目录
1.数据预处理
2.对订单状况进行分析
3.不同省份订单数详情
4.省份地图绘制
5.不同星期,订单分布
6.订单金额与订单数量
本次案例演示数据条数为:28010
import pandas as pd
from pyecharts import options as opts
df_data = pd.read_excel('../data/天猫订单.xlsx')
#对数据进行整体的查看和分析
df_data.head(3)
df_data.info()
#df_data
1.数据预处理
#将收货地址都改为三个字:
df_data["收货地址"] = df_data["收货地址"].str.replace("北京","北京市")
df_data["收货地址"] = df_data["收货地址"].str.replace("天津","天津市")
df_data["收货地址"] = df_data["收货地址"].str.replace("上海","上海市")
df_data["收货地址"] = df_data["收货地址"].str.replace("重庆","重庆市")
#设置新的列标签:
df_data["星期"] = df_data["订单付款时间"].dt.dayofweek + 1
df_data["hour"] = df_data["订单付款时间"].dt.hour
2.对订单状况进行分析
# 有效订单,退款订单,未付款订单
# 1、有效订单 有付款时间,退款金额为0
valid_count = df_data[(df_data["订单付款时间"].notnull()) & (df_data["退款金额"]==0)]
valid_count
# 退款订单 退款金额大于0
refund_count = df_data[df_data["退款金额"]>0]
refund_count
# 未付款订单
unpaid_count = df_data[df_data["订单付款时间"].isnull()]
unpaid_count
#------------------------------
# 计算每种订单的占比
total_count = len(valid_count) + len(refund_count) + len(unpaid_count)
# 有效订单占比
valid_rate = round(len(valid_count) / total_count *100, 2)
# 退款订单占比
refun_rate = round(len(refund_count) / total_count * 100, 2)
# 未付款订单占比
unpaid_rate = round(len(unpaid_count) / total_count * 100, 2)
print(f"有效订单占比:{valid_rate}%\n退款订单占比:{refun_rate}%\n未付款订单占比:{unpaid_rate}%")
#绘图更直观的查看:
from pyecharts.charts import Pie
pie_data = [("有效订单",len(valid_count)),("退款订单",len(refund_count)),("未付款订单",len(unpaid_count))]
pie_img = (
Pie()
.add("订单状况",pie_data)
)
pie_img.render_notebook()
3.不同省份订单数详情
df_new = df_data[df_data["订单付款时间"].notnull()]
province_order_count = df_new.groupby("收货地址")["订单金额"].count().reset_index(name="订单量")
# 将结果转化为字典(绘制地图)
data_pair = dict(zip(province_order_count["收货地址"].tolist(),province_order_count["订单量"].tolist()))
# 计算每个省份的实付金额和退款金额
province_payment = df_new.groupby("收货地址")[["订单金额","实付金额","退款金额"]].sum().reset_index()
province_payment["订单金额"] = province_payment["订单金额"] / 10000
province_payment["实付金额"] = province_payment["实付金额"] / 10000
province_payment["退款金额"] = province_payment["退款金额"] / 10000
#绘图
from pyecharts.charts import Bar
from pyecharts.charts import EffectScatter
from pyecharts.charts import Grid
bar_img = (
Bar()
.add_xaxis(province_payment["收货地址"].tolist())
.add_yaxis("实付金额",round(province_payment["实付金额"],2).tolist(),category_gap="50%",stack=True)
.add_yaxis("退款金额",round(province_payment["退款金额"],2).tolist(),category_gap="50%",stack=True)
.set_global_opts(
title_opts=opts.TitleOpts("实付金额与退款金额"),
xaxis_opts=opts.AxisOpts(name="省份",axislabel_opts={"rotate":-90}),
yaxis_opts=opts.AxisOpts(name="金额(万)")
)
.set_series_opts(
label_opts=opts.LabelOpts(is_show=False)
)
)
effect = (
EffectScatter()
.add_xaxis(province_payment["收货地址"].tolist())
.add_yaxis("订单金额",round(province_payment["订单金额"],2).tolist())
.set_series_opts(label_opts=opts.LabelOpts(position="top"))
)
overlap = bar_img.overlap(effect)
grid = Grid()
grid.add(overlap,grid_opts=opts.GridOpts(pos_top="35%"))
grid.render_notebook()
4.省份地图绘制
from pyecharts.charts import Map
map_img = (
Map(init_opts=opts.InitOpts(width="1000px",height="800px"))
.add(
"",
[list(x) for x in data_pair.items()],
maptype="china",
is_map_symbol_show=False,
label_opts=opts.LabelOpts(is_show=True,color="black"),
itemstyle_opts={
"normal":{
"shadowColor":"rgba(0,0,0.5)",
"shadowBlur":10
}
}
)
.set_global_opts(
visualmap_opts=opts.VisualMapOpts(
is_show=True,
min_=0,
max_=3100,
range_color=["red","green","blue","pink","orange","yellow","purple"]
)
)
)
map_img.render_notebook()
5.不同星期,订单分布
week_count = df_data.groupby("星期")["订单创建时间"].count().reset_index(name="订单数量")
week_img = (
Bar()
.add_xaxis(week_count["星期"].tolist())
.add_yaxis("订单数量",week_count["订单数量"].tolist())
.set_global_opts(
xaxis_opts=opts.AxisOpts(name="星期"),
yaxis_opts=opts.AxisOpts(name="订单数"),
)
)
week_img.render_notebook()
6.订单金额与订单数量
from pyecharts.charts import Line
hour_count = df_data.groupby("hour")["订单创建时间"].count()
hour_amount = round(df_data.groupby("hour")["订单金额"].mean(), 2)
line1 = Line()
line1.add_xaxis(hour_count.index.to_list())
line1.add_yaxis(series_name="订单数量",y_axis=hour_count.values.tolist(),yaxis_index=0)
# 添加另外一条y轴
line1.extend_axis(yaxis=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(formatter="{value}元"),
axisline_opts=opts.AxisLineOpts(linestyle_opts=opts.LineStyleOpts(color="green"))
))
line1.set_global_opts(
title_opts=opts.TitleOpts("订单数量与订单金额"),
yaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(formatter="{value}单"),
axisline_opts=opts.AxisLineOpts(linestyle_opts=opts.LineStyleOpts(color="orange"))
),
xaxis_opts=opts.AxisOpts(name="时间"),
tooltip_opts=opts.TooltipOpts(is_show=True,trigger="axis")
)
line1.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
line2 = Line().add_xaxis(hour_count.index.to_list()).add_yaxis("平均订单金额",hour_amount.values.tolist(),yaxis_index=1)
line2.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
line1.overlap(line2)
line1.set_colors(["green","orange"])
line1.render_notebook()