Pandas | skill | 将groupby分组后的数据使用堆叠图像展示

groupby+堆叠图

- 计算商品名称和销售数量
- 计算商品名称和销售总额
- 在每个颜色段上标注商品名称和平均销售金额

计算商品名称和销售数量

# 筛选出四个类别下的商品数据
categories_of_interest = ['Clothing', 'Accessories', 'Footwear', 'Outerwear']
# data['Category']列中的元素是否在categories_of_interest中
filtered_data = data[data['Category'].isin(categories_of_interest)]

filtered_data.groupby('Category')['Item Purchased'].value_counts()

在这里插入图片描述

# 商品种类分析
item_counts = filtered_data.groupby('Category')['Item Purchased'].value_counts().unstack(fill_value=0)
item_counts

在这里插入图片描述

关于stack()还有unstack()的使用方法和区别请看：一文详解：7个 Pandas stack() 和 unstack() 使用技巧

# 创建堆叠条形图
# kind='bar' 指定了条形图，
# stacked=True 指定了堆叠模式，
# figsize=(16, 12) 设置了图表的大小，
# colormap='viridis' 设置了颜色映射，
# legend=False 禁用了图例。
ax = item_counts.plot(kind='bar', stacked=True, figsize=(16, 12), colormap='viridis', legend=False)

# 设置图表标题和坐标轴标签
ax.set_title('Sales Quantity of Different Items in Each Category', fontsize=16)
ax.set_xlabel('Category', fontsize=12)
ax.set_ylabel('Quantity Sold', fontsize=12)

# 优化坐标轴刻度显示  禁用了 y 轴的科学计数法，使得数字以标准形式显示。
ax.yaxis.get_major_formatter().set_scientific(False)

# 在每个颜色段上标注商品名称和数量
for i, category in enumerate(item_counts.index):
    y_offset = 0
    for item, count in item_counts.loc[category].items():
        print('item_counts.loc[category]:',item_counts.loc[category])
        # print('item count ',(item,count))
        if count > 0:  # 
            ax.text(i, y_offset + count / 2, f'{item}:{count}', ha='center', va='center', fontsize=12, color='white',fontweight='bold')
            y_offset += count

# 显示图表
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

代码 print(‘item_counts.loc[category]:’,item_counts.loc[category])的部分输出

item_counts.loc[category]: Item Purchased
Backpack      143
Belt          161
Blouse          0
Boots           0
Coat            0
Dress           0
Gloves        140
Handbag       153
Hat           154
Hoodie          0
Jacket          0
Jeans           0
Jewelry       171
Pants           0
Sandals         0
Scarf         157
Shirt           0
Shoes           0
Shorts          0
Skirt           0
Sneakers        0
Socks           0
Sunglasses    161
Sweater         0
T-shirt         0

在这里插入图片描述

计算商品名称和销售总额

sales_amount = filtered_data.groupby('Category').apply(lambda x: x.groupby('Item Purchased')['Purchase Amount (USD)'].sum())
sales_amount

在这里插入图片描述

# 计算每个商品在每个类别下的销售总额
sales_amount = filtered_data.groupby('Category').apply(lambda x: x.groupby('Item Purchased')['Purchase Amount (USD)'].sum())

# 将sales_amount转换为DataFrame并进行适当的调整
sales_amount_df = sales_amount.unstack(fill_value=0)

# 创建堆叠条形图
ax = sales_amount_df.plot(kind='bar', stacked=True, figsize=(16, 12), colormap='viridis', legend=False)

# 设置图表标题和坐标轴标签
ax.set_title('Sales Amount of Different Items in Each Category', fontsize=16)
ax.set_xlabel('Category', fontsize=12)
ax.set_ylabel('Sales Amount (USD)', fontsize=12)

# 优化坐标轴刻度显示
ax.yaxis.get_major_formatter().set_scientific(False)

# 在每个颜色段上标注商品名称和销售总额
for i, category in enumerate(sales_amount_df.index):
    y_offset = 0
    for item, amount in sales_amount_df.loc[category].items():
        if amount > 0:
            ax.text(i, y_offset + amount / 2, f'{item}:{amount}', ha='center', va='center', fontsize=12, color='white',fontweight='bold')
            y_offset += amount

# 显示图表
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

在这里插入图片描述

在每个颜色段上标注商品名称和平均销售金额

# 计算每个商品在每个类别下的平均销售金额
average_sales_amount = sales_amount_df/(item_counts)  # div除法

# 创建堆叠条形图
ax = average_sales_amount.plot(kind='bar', stacked=True, figsize=(16,12), colormap='viridis', legend=False)

# 设置图表标题和坐标轴标签
ax.set_title('Average Sales Amount of Different Items in Each Category', fontsize=16)
ax.set_xlabel('Category', fontsize=12)
ax.set_ylabel('Average Sales Amount (USD)', fontsize=12)

# 优化坐标轴刻度显示
ax.yaxis.get_major_formatter().set_scientific(False)

# 在每个颜色段上标注商品名称和平均销售金额
for i, category in enumerate(average_sales_amount.index):
    y_offset = 0
    for item, amount in average_sales_amount.loc[category].items():
        if not np.isnan(amount):
            ax.text(i, y_offset + amount / 2, f'{item}:{amount:.2f}', ha='center', va='center', fontsize=12, color='white', fontweight='bold')
            y_offset += amount

# 显示图表
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()