【腾讯文档】2024年妈杯+认证杯资料汇总说明 https://docs.qq.com/doc/DSExyRGhNUm1pTHh4
我们直接将我们认为最为合适数据结果以及最好用的运行代码进行了整理以便大家可以在最后一天无法运行代码急需结果或者不会写代码时,提供帮助。、
% 数据加载
data = readtable('附件1.csv', 'Encoding', 'UTF-8');
% 转换日期格式
data.x____ = datetime(data.x____, 'InputFormat', 'yyyy/MM/dd');
% 按分拣中心分组
g = findgroups(data.x_______);
% 计算分拣中心的数量
total_centers = max(g);
num_plots = ceil(total_centers / 3); % 每个图三个分拣中心
% 对每组三个分拣中心生成一个图
for plot_index = 0:num_plots-1
figure;
centers = unique(data.x_______);
centers = centers(1 + plot_index * 3:min(end, 3 + plot_index * 3));
for i = 1:length(centers)
center = centers(i);
idx = ismember(data.x_______, center);
series = data.x_____1(idx);
% Q-Q图
subplot(3, 3, 3 * (i - 1) + 1);
qqplot(series);
title(['Q-Q plot for ', center]);
% 箱型图
subplot(3, 3, 3 * (i - 1) + 2);
boxplot(series, 'Orientation', 'horizontal');
title(['Box plot for ', center]);
% 3σ原则
subplot(3, 3, 3 * (i - 1) + 3);
histogram(series, 30, 'Normalization', 'probability');
meanVal = mean(series);
stdVal = std(series);
xline(meanVal - 3 * stdVal, 'r--', 'Label', 'Lower 3σ');
xline(meanVal + 3 * stdVal, 'r--', 'Label', 'Upper 3σ');
title(['3σ Rule for ', center]);
end
end
% 按分拣中心分组
[g, centers] = findgroups(data.x_______);
% 初始化结果表格
ks_results = table('Size', [0 3], 'VariableTypes', {'string', 'double', 'double'}, ...
'VariableNames', {'分拣中心', '统计量', 'p值'});
% 遍历每个分拣中心,执行K-S检验
for i = 1:max(g)
series = data.x_____1(g == i);
% 计算正态分布的参数
[mu, sigma] = normfit(series);
% 执行K-S检验
[statistic, p_value] = kstest(series, 'CDF', [series, normcdf(series, mu, sigma)]);
% 保存结果
ks_results = [ks_results; {centers(i), statistic, p_value}];
end
% 保存到Excel文件
writetable(ks_results, 'K-S检验结果.xlsx');
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# 数据加载
data_path = '附件1.csv'
data = pd.read_csv(data_path, encoding='GBK')
# 按分拣中心分组
grouped = data.groupby('分拣中心')
# 选择前三个分拣中心
selected_centers = list(grouped.groups.keys())[:3]
# 设置matplotlib的布局
fig, axs = plt.subplots(3, 3, figsize=(18, 12)) # 3行3列
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i, center in enumerate(selected_centers):
series = grouped.get_group(center)['货量']
# Q-Q图
stats.probplot(series, dist="norm", plot=axs[0, i])
axs[0, i].set_title(f'Q-Q plot for {center}')
# 箱型图
axs[1, i].boxplot(series, vert=False)
axs[1, i].set_title(f'Box plot for {center}')
# 3σ原则
mean = series.mean()
std = series.std()
lower_bound = mean - 3 * std
upper_bound = mean + 3 * std
axs[2, i].hist(series, bins=30, alpha=0.7, label='Data Distribution')
axs[2, i].axvline(lower_bound, color='red', linestyle='dashed', linewidth=1, label='Lower 3σ')
axs[2, i].axvline(upper_bound, color='red', linestyle='dashed', linewidth=1, label='Upper 3σ')
axs[2, i].set_title(f'3σ Rule for {center}')
axs[2, i].legend()
plt.show()