groupby+堆叠图
- 计算商品名称和销售数量
- 计算商品名称和销售总额
- 在每个颜色段上标注商品名称和平均销售金额
计算商品名称和销售数量
# 筛选出四个类别下的商品数据
categories_of_interest = ['Clothing', 'Accessories', 'Footwear', 'Outerwear']
# data['Category']列中的元素是否在categories_of_interest中
filtered_data = data[data['Category'].isin(categories_of_interest)]
filtered_data.groupby('Category')['Item Purchased'].value_counts()
# 商品种类分析
item_counts = filtered_data.groupby('Category')['Item Purchased'].value_counts().unstack(fill_value=0)
item_counts
关于stack()还有unstack()的使用方法和区别请看:一文详解:7个 Pandas stack() 和 unstack() 使用技巧
# 创建堆叠条形图
# kind='bar' 指定了条形图,
# stacked=True 指定了堆叠模式,
# figsize=(16, 12) 设置了图表的大小,
# colormap='viridis' 设置了颜色映射,
# legend=False 禁用了图例。
ax = item_counts.plot(kind='bar', stacked=True, figsize=(16, 12), colormap='viridis', legend=False)
# 设置图表标题和坐标轴标签
ax.set_title('Sales Quantity of Different Items in Each Category', fontsize=16)
ax.set_xlabel('Category', fontsize=12)
ax.set_ylabel('Quantity Sold', fontsize=12)
# 优化坐标轴刻度显示 禁用了 y 轴的科学计数法,使得数字以标准形式显示。
ax.yaxis.get_major_formatter().set_scientific(False)
# 在每个颜色段上标注商品名称和数量
for i, category in enumerate(item_counts.index):
y_offset = 0
for item, count in item_counts.loc[category].items():
print('item_counts.loc[category]:',item_counts.loc[category])
# print('item count ',(item,count))
if count > 0: #
ax.text(i, y_offset + count / 2, f'{item}:{count}', ha='center', va='center', fontsize=12, color='white',fontweight='bold')
y_offset += count
# 显示图表
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()
- 代码 print(‘item_counts.loc[category]:’,item_counts.loc[category])的部分输出
item_counts.loc[category]: Item Purchased
Backpack 143
Belt 161
Blouse 0
Boots 0
Coat 0
Dress 0
Gloves 140
Handbag 153
Hat 154
Hoodie 0
Jacket 0
Jeans 0
Jewelry 171
Pants 0
Sandals 0
Scarf 157
Shirt 0
Shoes 0
Shorts 0
Skirt 0
Sneakers 0
Socks 0
Sunglasses 161
Sweater 0
T-shirt 0
计算商品名称和销售总额
sales_amount = filtered_data.groupby('Category').apply(lambda x: x.groupby('Item Purchased')['Purchase Amount (USD)'].sum())
sales_amount
# 计算每个商品在每个类别下的销售总额
sales_amount = filtered_data.groupby('Category').apply(lambda x: x.groupby('Item Purchased')['Purchase Amount (USD)'].sum())
# 将sales_amount转换为DataFrame并进行适当的调整
sales_amount_df = sales_amount.unstack(fill_value=0)
# 创建堆叠条形图
ax = sales_amount_df.plot(kind='bar', stacked=True, figsize=(16, 12), colormap='viridis', legend=False)
# 设置图表标题和坐标轴标签
ax.set_title('Sales Amount of Different Items in Each Category', fontsize=16)
ax.set_xlabel('Category', fontsize=12)
ax.set_ylabel('Sales Amount (USD)', fontsize=12)
# 优化坐标轴刻度显示
ax.yaxis.get_major_formatter().set_scientific(False)
# 在每个颜色段上标注商品名称和销售总额
for i, category in enumerate(sales_amount_df.index):
y_offset = 0
for item, amount in sales_amount_df.loc[category].items():
if amount > 0:
ax.text(i, y_offset + amount / 2, f'{item}:{amount}', ha='center', va='center', fontsize=12, color='white',fontweight='bold')
y_offset += amount
# 显示图表
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()
在每个颜色段上标注商品名称和平均销售金额
# 计算每个商品在每个类别下的平均销售金额
average_sales_amount = sales_amount_df/(item_counts) # div除法
# 创建堆叠条形图
ax = average_sales_amount.plot(kind='bar', stacked=True, figsize=(16,12), colormap='viridis', legend=False)
# 设置图表标题和坐标轴标签
ax.set_title('Average Sales Amount of Different Items in Each Category', fontsize=16)
ax.set_xlabel('Category', fontsize=12)
ax.set_ylabel('Average Sales Amount (USD)', fontsize=12)
# 优化坐标轴刻度显示
ax.yaxis.get_major_formatter().set_scientific(False)
# 在每个颜色段上标注商品名称和平均销售金额
for i, category in enumerate(average_sales_amount.index):
y_offset = 0
for item, amount in average_sales_amount.loc[category].items():
if not np.isnan(amount):
ax.text(i, y_offset + amount / 2, f'{item}:{amount:.2f}', ha='center', va='center', fontsize=12, color='white', fontweight='bold')
y_offset += amount
# 显示图表
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()