完整代码:
from sklearn import preprocessing
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from datetime import datetime
import time
import math
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from scipy import stats, integrate
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from sklearn.model_selection import learning_curve
from sklearn.model_selection import validation_curve
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error # 评价指标
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor
from lightgbm import LGBMRegressor
from collections import Counter
#--------------------------------------------
data=pd.read_csv('销售数据0321-8家店铺.csv',encoding='gbk')
print(data.columns)
my_dict=Counter(data['店铺'].values)
sorted_items = sorted(my_dict.items(), key=lambda x: x[1],reverse=True)
sorted_dict = {k: v for k, v in sorted_items}
print(sorted_dict)
# 根据条件筛选行
data = data[data['店铺'] == 'AMZDELAHK'] # 例 AMZDELAHK
# {'AMZUSLA': 53329, 'AMZUS3CZ': 30697, 'AMZUS5HZ': 30277, 'AMZDELAHK': 14695, 'AMZUS6PTXZ': 10742, 'AMZUS2XZ': 8676, 'AMZITLAHK': 4738, 'WayfairUSYZ': 3616}
# 将时间列解析为日期时间格式
data['订单日期'] = pd.to_datetime(data['订单日期'])
print(data)
# 筛选出特定时间段内的数据
start_date = '2021-01-01'
end_date = '2023-08-29'
filtered_df = data[(data['订单日期'] >= start_date) & (data['订单日期'] <= end_date)]
print(filtered_df)
# 筛选出特定时间段内的数据
start_date = '2023-09-01'
end_date = '2023-10-01'
filtered_df_test = data[(data['订单日期'] >= start_date) & (data['订单日期'] <= end_date)]
print(filtered_df_test)
# 训练数据
data_x=[]
data_y=[]
# 输入时序长度:
squence=28 # 7 14 28
filtered_df=filtered_df['店铺SKU_销量'].values
for i in range(0,len(filtered_df)-squence-1,1):
data_x.append(filtered_df[i:i+squence])
data_y.append(filtered_df[i+squence])
# 测试数据
test_x=[]
test_y=[]
# 输入时序长度:
filtered_df_test=filtered_df_test['店铺SKU_销量'].values
for i in range(0,len(filtered_df_test)-squence-1,1):
test_x.append(filtered_df_test[i:i+squence])
test_y.append(filtered_df_test[i+squence])
test_x=np.array(test_x)
print(test_x.shape)
# -----------------------------------------------------------
x_train, x_test, y_train, y_test = train_test_split(np.array(data_x), np.array(data_y), test_size=0.99,shuffle=False,random_state=1)
print('x_train.shape',x_train.shape)
print('x_test.shape',x_test.shape)
# 集成学习模型
# svm算法
from sklearn.linear_model import LassoLarsIC as LR#逻辑回归
svm = LGBMRegressor()
svm.fit(x_train,y_train)
svm_pred = svm.predict(test_x)
from metra import metric
mae, mse, rmse, mape, mspe,r2=metric(np.array(svm_pred), np.array(test_y))
print('mae, mse, rmse, mape, mspe')
print(mae, mse, rmse, mape, mspe)
# 设置Seaborn样式
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="darkgrid")
x = range(len(test_y))
data = pd.DataFrame({'x': x, 'y_pred': svm_pred, 'y_true': test_y})
# 绘制y_pred的折线图
sns.lineplot(x='x', y='y_pred', data=data, linewidth=1, label='y_pred')
# 绘制y_true的折线图
sns.lineplot(x='x', y='y_true', data=data, linewidth=1, label='y_true')
# 添加标题和标签
plt.title('Prediction vs True')
plt.xlabel('Date')
plt.ylabel('Values')
plt.savefig('预测1.png')
# 显示图形
plt.show()