mclust example1
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
# n_features=2是默认的
plt.scatter(X[:,0], X[:,1])
plt.show()
def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
"""\
Clustering using the mclust algorithm.
The parameters are the same as those in the R package mclust.
"""
np.random.seed(random_seed)
import rpy2.robjects as robjects
robjects.r.library("mclust")
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
r_random_seed = robjects.r['set.seed']
r_random_seed(random_seed)
rmclust = robjects.r['Mclust']
res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
mclust_res = np.array(res[-2])
return mclust_res.astype(int)
label_mclust = mclust(X, num_cluster=4)
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
print("ARI = {}".format(adjusted_rand_score(label_mclust,y)))
print("NMI = {}".format(normalized_mutual_info_score(label_mclust,y)))
pred_y = label_mclust.copy()
fig=plt.figure()
for label in np.unique(pred_y):
plt.scatter(X[label==pred_y,0], X[label==pred_y,1],label=label)
plt.show()
结果如下
mclust (example 2)
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs
def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
"""\
Clustering using the mclust algorithm.
The parameters are the same as those in the R package mclust.
"""
np.random.seed(random_seed)
import rpy2.robjects as robjects
robjects.r.library("mclust")
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
r_random_seed = robjects.r['set.seed']
r_random_seed(random_seed)
rmclust = robjects.r['Mclust']
res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
mclust_res = np.array(res[-2])
return mclust_res.astype(int)
X, y = make_blobs(n_samples=1000, n_features=50,centers=5, random_state=0)
print(X.shape)
label_mclust = mclust(X, num_cluster=5)
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
print("ARI = {}".format(adjusted_rand_score(label_mclust,y)))
print("NMI = {}".format(normalized_mutual_info_score(label_mclust,y)))