Kmeans聚类模型可视化大合集:
当对K-means聚类模型进行可视化时,有多种展示形式可以帮助我们理解和分析聚类结果。以下是一些常见的K-means聚类模型可视化形式:
kmeans构建:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import os
os.environ['OMP_NUM_THREADS'] = '4'
# 读取数据
data = pd.read_excel('./data/consumption_data.xls')
# 查看缺失行
print("查看缺失行:")
missing_rows = data[data.isnull().any(axis=1)]
print(missing_rows)
# 删除缺失值所在的行
data = data.dropna(axis=0)
# 数据预处理和特征选择
X = data[['R', 'F', 'M']] # 选择特征列
# 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 设置聚类的簇数
n_clusters = 3
# 创建KMeans对象并进行聚类
kmeans = KMeans(n_clusters=n_clusters,n_init=10)
kmeans.fit(X_scaled)
# 获取聚类结果标签
labels = kmeans.labels_
print(labels)
# 将聚类结果添加到原始数据中
data['Cluster'] = labels
可视化:
指标对比图:
故名思意,按照指标划分展示不同聚的区别:
# 分别提取不同群体的数据
cluster0 = data[data['Cluster'] == 0]
cluster1 = data[data['Cluster'] == 1]
cluster2 = data[data['Cluster'] == 2]
# 每一个指标上展示三聚类的区别
# 获取属性列的名称
attribute_columns = ['R', 'F', 'M'] # 假设属性列为'R', 'F', 'M'
# 绘制概率密度函数图像
for attribute in attribute_columns:
plt.figure()
cluster0[attribute].plot(kind='kde', label='Cluster 0')
cluster1[attribute].plot(kind='kde', label='Cluster 1')
cluster2[attribute].plot(kind='kde', label='Cluster 2')
plt.xlabel(attribute)
plt.ylabel('Density')
plt.legend()
plt.savefig(f'./img/1_{attribute}_density.png') # 保存图像为文件
plt.show()
单图多子图展示:
attribute_columns = ['R', 'F', 'M'] # 假设属性列为'R', 'F', 'M'
# 创建一个图像
fig, axes = plt.subplots(len(attribute_columns), 1, figsize=(8, 12))
# 绘制概率密度函数图像
for i, attribute in enumerate(attribute_columns):
cluster0[attribute].plot(kind='kde', label='Cluster 0', ax=axes[i])
cluster1[attribute].plot(kind='kde', label='Cluster 1', ax=axes[i])
cluster2[attribute].plot(kind='kde', label='Cluster 2', ax=axes[i])
axes[i].set_xlabel(attribute)
axes[i].set_ylabel('Density')
axes[i].legend()
plt.tight_layout()
plt.savefig(r'./img/2_mode.png')
plt.show()
3*3子图展示:
# 获取属性列的名称
attribute_columns = ['R', 'F', 'M'] # 假设属性列为'R', 'F', 'M'
# 创建三个图像,每个簇一个图
fig, axes = plt.subplots(3, 3, figsize=(12, 12))
# 绘制概率密度函数图像
for i, attribute in enumerate(attribute_columns):
cluster0[attribute].plot(kind='kde', label='Cluster 0', ax=axes[0, i])
cluster1[attribute].plot(kind='kde', label='Cluster 1', ax=axes[1, i])
cluster2[attribute].plot(kind='kde', label='Cluster 2', ax=axes[2, i])
axes[0, i].set_xlabel('')
axes[0, i].set_ylabel('Density')
axes[1, i].set_xlabel('')
axes[1, i].set_ylabel('Density')
axes[2, i].set_xlabel(attribute)
axes[2, i].set_ylabel('Density')
axes[0, i].legend()
axes[1, i].legend()
axes[2, i].legend()
# 调整子图布局
plt.tight_layout()
plt.savefig(r'./img/3_mode.png')
plt.show()
横向单独展示:
# # 获取属性列的名称
attribute_columns = ['F', 'R', 'M'] # 假设属性列为'F', 'R', 'M'
# 创建三个独立的图片
for i, cluster in enumerate([cluster0, cluster1, cluster2]):
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
# 绘制概率密度函数图像
for j, attribute in enumerate(attribute_columns):
cluster[attribute].plot(kind='kde', label=f'Cluster {i}', ax=axes[j])
axes[j].set_xlabel(attribute)
axes[j].set_ylabel('Density')
axes[j].legend()
plt.savefig(f'./img/4_cluster{i}_density.png')
plt.tight_layout()
plt.show()
纵向单独展示:
# 获取属性列的名称
attribute_columns = ['F', 'R', 'M'] # 假设属性列为'F', 'R', 'M'
# 创建三个独立的图片
for i, cluster in enumerate([cluster0, cluster1, cluster2]):
fig, axes = plt.subplots(3, 1, figsize=(8, 12))
# 绘制概率密度函数图像
for j, attribute in enumerate(attribute_columns):
cluster[attribute].plot(kind='kde', label=f'Cluster {i}', ax=axes[j])
axes[j].set_xlabel(attribute)
axes[j].set_ylabel('Density')
axes[j].legend()
plt.savefig(f'./img/5_cluster{i}_density.png')
plt.tight_layout()
plt.show()