分享一个Python可视化工具pybaobabdt,轻松对决策树、随机森林可视化,例如,
图怎么看:每一种颜色代表一个class,link的宽度表示从一个节点流向另一个节点的items数量。
安装
pip install pybaobabdt
pip install pygraphviz
可视化决策树
import pybaobabdt
import pandas as pd
from scipy.io import arff
from sklearn.tree import DecisionTreeClassifier #导入决策树算法
data = arff.loadarff('winequality-red.arff')
df = pd.DataFrame(data[0])
y = list(df['class'])
features = list(df.columns)
features.remove('class')
X = df.loc[:, features]
clf = DecisionTreeClassifier().fit(X, y)
ax = pybaobabdt.drawTree(clf, size=10, dpi=300, features=features) #可视化主函数pybaobabdt.drawTree
可视化决策树-设置树深度
通过maxdepth限制需要渲染的树深度,对large trees特别友好,
ax = pybaobabdt.drawTree(
clf,
size=10,
dpi=100,
maxdepth=6, #设置渲染的树的最大深度
features=features)
可视化决策树-突出class
通过colormap凸显感兴趣的class,
from matplotlib.colors import ListedColormap
ax = pybaobabdt.drawTree(
clf,
size=10,
dpi=600,
colormap=ListedColormap(["#01a2d9", "gray", "#d5695d",
"gray"]), #通过颜色突出展示class 5和6 变量
features=features)
可视化随机森林
import pybaobabdt
import pandas as pd
from scipy.io import arff
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier #导入随机森林算法
data = arff.loadarff('vehicle.arff')
df = pd.DataFrame(data[0])
y = list(df['class'])
features = list(df.columns)
features.remove('class')
X = df.loc[:, features]
clf = RandomForestClassifier(n_estimators=20, n_jobs=-1, random_state=0)
clf.fit(X, y)
size = (15, 15)
plt.rcParams['figure.figsize'] = size
fig = plt.figure(figsize=size, dpi=300)
for idx, tree in enumerate(clf.estimators_):
ax1 = fig.add_subplot(5, 4, idx + 1)
pybaobabdt.drawTree(tree,
model=clf,
size=15,
dpi=300,
maxdepth=8,
features=features,
ax=ax1)
参考:https://pypi.org/project/pybaobabdt/