实验目的和要求
实验
- 了解sklearn,gensim可视化主题的基本操作;
- 采集四大名著之《红楼梦》进行主题分析
- 对《红楼梦》的主题进行可视化
或 - 采集二十大报告进行主题分析;
- 对《二十大报告》的主题进行可视化
- 数据来源
- 《红楼梦》小说
- 《二十大报告》
采集红楼梦文本,加载红楼梦人物、停用词和词典,完成以下任务:
1.输出前100个人物的出场次数,然后绘制人物出现词云图,最后再绘制出现次数最多的前20个人物的出场次数柱状图。
2.利用TF-IDF算法,sklearn,gensim对红楼梦进行可视化主题分析
3.基于知识图谱对红楼梦人物关系进行可视化,梳理贾宝玉、林黛玉、薛宝钗和其他人的人物关系。
data = pd.read_excel(“D:\学习\课件\文本挖掘\上机实验\实验三\20230320162359291\Readream\红楼梦数据集.xlsx”)
chapters = data[‘Artical’].tolist()
with open(“D:\学习\课件\文本挖掘\上机实验\实验三\20230320162359291\Readream\红楼梦人物.txt”, “r”, encoding=“utf-8”) as file:
characters = file.read().splitlines()
with open(“D:\学习\课件\文本挖掘\上机实验\实验三\20230320162359291\Readream\红楼梦停用词.txt”, “r”, encoding=“utf-8”) as file:
stopwords = file.read().splitlines()
with open(“D:\学习\课件\文本挖掘\上机实验\实验三\20230320162359291\Readream\红楼梦词典.txt”, “r”, encoding=“utf-8”) as file:
dictionary = file.read().splitlines()
wordcloud = WordCloud(background_color=‘white’, font_path=r"D:\coder\randomnumbers\Keywords_cloud\msyh.ttf",width=800, height=600)
人物出现次数
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 读取数据
data = pd.read_excel("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦数据集.xlsx")
chapters = data['Artical'].tolist()
# 读取人物列表、停用词和词典
with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦人物.txt", "r", encoding="utf-8") as file:
characters = file.read().splitlines()
with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦停用词.txt", "r", encoding="utf-8") as file:
stopwords = file.read().splitlines()
with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦词典.txt", "r", encoding="utf-8") as file:
dictionary = file.read().splitlines()
# 提取人物出现次数
character_counts = {}
for chapter in chapters:
for character in characters:
count = chapter.count(character)
if character in character_counts:
character_counts[character] += count
else:
character_counts[character] = count
# 输出前100个人物的出场次数
sorted_characters = sorted(character_counts.items(), key=lambda x: x[1], reverse=True)[:100]
print("前100个人物的出场次数:")
for character, count in sorted_characters:
print(f"{character}: {count}")
# 制作词云图
wordcloud = WordCloud(background_color='white', font_path=r"D:\\coder\\randomnumbers\\Keywords_cloud\\msyh.ttf", width=800, height=600)
wordcloud.generate_from_frequencies(character_counts)
plt.figure(figsize=(10, 8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('红楼梦人物出现词云图')
plt.show()
# 绘制出现次数最多的前20个人物的出场次数柱状图
top20_characters = sorted_characters[:20]
top20_names = [item[0] for item in top20_characters]
top20_counts = [item[1] for item in top20_characters]
plt.figure(figsize=(10, 6))
plt.bar(top20_names, top20_counts, color='skyblue')
plt.xlabel('人物')
plt.ylabel('出场次数')
plt.title('出现次数最多的前20个人物的出场次数')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Top 10关键词
from sklearn.feature_extraction.text import TfidfVectorizer
import re
# 定义需要过滤的无意义词语
meaningless_words = ["说", "道"]
# 自定义分词函数,用于过滤无意义词语
def tokenize(text):
words = re.findall(r'\b\w+\b', text)
meaningful_words = [word for word in words if word not in meaningless_words]
return meaningful_words
# 将章节列表转换为文本字符串
corpus = [' '.join(chapters)]
# 使用TF-IDF向量化器
vectorizer = TfidfVectorizer(stop_words=stopwords, tokenizer=tokenize)
X = vectorizer.fit_transform(corpus)
# 获取特征词列表
feature_names = vectorizer.get_feature_names()
# 获取最重要的词语
indices = X.toarray().argsort(axis=1)[:, ::-1]
top_n = 10 # 取前10个关键词
top_keywords = [feature_names[indices[0, i]] for i in range(top_n)]
print("红楼梦文本的Top 10关键词:")
for i, keyword in enumerate(top_keywords):
print(f"{i+1}. {keyword}")
红楼梦文本的Top 10关键词:
- 说着
- 宝玉道
- 宝玉笑道
- 笑道
- 贾母道
- 贾政道
- 凤姐道
- 袭人道
- 宝玉听了
- 一面说
import networkx as nx
from collections import defaultdict
# 构建人物关系图
G = nx.Graph()
# 使用默认字典以便于处理人物关系
relationships = defaultdict(int)
# 遍历每章节,统计人物之间的共现关系
for chapter in chapters:
for i in range(len(characters)):
for j in range(i + 1, len(characters)):
if characters[i] in chapter and characters[j] in chapter:
relationships[(characters[i], characters[j])] += 1
# 添加边到图中
for edge, weight in relationships.items():
G.add_edge(edge[0], edge[1], weight=weight)
# 绘制人物关系图
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, k=0.2) # 使用Spring布局算法排列节点
nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title('红楼梦人物关系图')
plt.show()
前20人物出场
import re
from collections import Counter
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 使用微软雅黑字体
# 加载人物列表
with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦人物.txt", "r", encoding="utf-8") as file:
characters = file.read().splitlines()
# 统计人物出场情况
character_freq = Counter(re.findall(r'(%s)' % '|'.join(characters), all_text))
# 选择出现次数最多的前100个人物
top_characters_freq = dict(sorted(character_freq.items(), key=lambda item: item[1], reverse=True)[:100])
# 输出前100个人物的出场次数
for idx, (character, freq) in enumerate(top_characters_freq.items(), 1):
print(f"{idx}. {character}: {freq}")
# 提取前20个人物和对应的出场次数
top_characters = list(top_characters_freq.keys())[:20]
top_frequencies = list(top_characters_freq.values())[:20]
# 绘制人物出场次数柱状图
plt.figure(figsize=(12, 6))
plt.bar(top_characters, top_frequencies)
plt.title('前20个人物出场情况')
plt.xlabel('人物')
plt.ylabel('出场次数')
plt.xticks(rotation=45) # 旋转横坐标标签,避免重叠
plt.tight_layout() # 调整布局以防止标签重叠
plt.show()
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
from collections import Counter
# 加载人物列表
with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦人物.txt", "r", encoding="utf-8") as file:
characters = file.read().splitlines()
# 统计人物出场情况
character_freq = Counter(re.findall(r'(%s)' % '|'.join(characters), all_text))
# 选择出现次数最多的前100个人物
top_characters_freq = dict(sorted(character_freq.items(), key=lambda item: item[1], reverse=True)[:100])
# 输出前100个人物的出场次数
for idx, (character, freq) in enumerate(top_characters_freq.items(), 1):
print(f"{idx}. {character}: {freq}")
# 绘制人物出现词云图
wordcloud = WordCloud(background_color='white', font_path=r"D:\\coder\\randomnumbers\\Keywords_cloud\\msyh.ttf")
wordcloud.generate_from_frequencies(top_characters_freq)
plt.figure(figsize=(10, 8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title('人物出现词云图')
plt.axis('off')
plt.show()
# 提取前20个人物和对应的出场次数
top_characters = list(top_characters_freq.keys())[:20]
top_frequencies = list(top_characters_freq.values())[:20]
# 绘制人物出场次数柱状图
plt.figure(figsize=(12, 6))
plt.bar(top_characters, top_frequencies)
plt.title('前20个人物出场情况')
plt.xlabel('人物')
plt.ylabel('出场次数')
plt.xticks(rotation=45) # 旋转横坐标标签,避免重叠
plt.tight_layout() # 调整布局以防止标签重叠
plt.show()
人物出现词云
- 宝玉: 3810
- 凤姐: 1680
- 贾母: 1639
- 袭人: 1123
- 王夫人: 1039
- 宝钗: 1002
- 贾政: 911
- 贾琏: 746
- 平儿: 653
- 薛姨妈: 446
- 紫鹃: 427
- 探春: 426
- 鸳鸯: 406
- 贾珍: 382
- 李纨: 366
- 晴雯: 340
- 尤氏: 336
- 刘姥姥: 288
- 邢夫人: 280
- 小丫头: 279
- 薛蟠: 277
- 林黛玉: 268
- 香菱: 245
- 麝月: 232
- 贾蓉: 222
- 周瑞: 215
- 小厮: 207
- 贾赦: 190
- 贾芸: 185
- 惜春: 182
- 芳官: 156
- 妙玉: 153
- 雪雁: 151
- 贾环: 146
- 林之孝: 142
- 迎春: 134
- 莺儿: 125
- 赵姨娘: 122
- 宝蟾: 114
- 巧姐: 107
- 秦钟: 100
- 薛蝌: 99
- 贾兰: 94
- 秋纹: 93
- 茗烟: 88
- 尤二姐: 88
- 大了: 84
- 史湘云: 83
- 赖大: 82
- 五儿: 80
- 司棋: 76
- 秦氏: 72
- 贾瑞: 68
- 旺儿: 68
- 贾蔷: 67
- 凤丫头: 64
- 兴儿: 58
- 彩云: 57
- 琥珀: 57
- 冯紫英: 55
- 焙茗: 51
- 鲍二: 50
- 包勇: 50
- 金钏: 48
- 门子: 47
- 翠缕: 47
- 北静王: 45
- 丰儿: 43
- 李贵: 41
- 玉钏儿: 41
- 柳家的: 40
- 倪二: 39
- 张华: 39
- 板儿: 38
- 小红: 38
- 李嬷嬷: 37
- 王仁: 36
- 坠儿: 36
- 甄宝玉: 36
- 藕官: 33
- 春燕: 33
- 尤三姐: 33
- 秋桐: 33
- 琏二奶奶: 31
- 金荣: 29
- 贾芹: 29
- 石头: 28
- 玻璃: 28
- 王善保: 28
- 大姐: 27
- 侍书: 27
- 女尼: 27
- 李氏: 26
- 彩屏: 26
- 李纹: 26
- 智能: 25
- 翠墨: 24
- 张道士: 24
- 李十儿: 24
- 王子腾: 23
前20个人物关系
import networkx as nx
from collections import defaultdict
# 构建人物关系图
G = nx.Graph()
# 使用默认字典以便于处理人物关系
top20_relationships = defaultdict(int)
# 遍历每章节,统计前20个人物之间的共现关系
for chapter in chapters:
for i in range(len(top20_characters)):
for j in range(i + 1, len(top20_characters)):
if top20_characters[i] in chapter and top20_characters[j] in chapter:
top20_relationships[(top20_characters[i], top20_characters[j])] += 1
# 添加边到图中
for edge, weight in top20_relationships.items():
G.add_edge(edge[0], edge[1], weight=weight)
# 绘制人物关系图
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, k=0.2) # 使用Spring布局算法排列节点
nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title('红楼梦前20个人物关系图')
plt.show()
在这个图中,人与人之间连线上的数字表示两个人之间的关系强度或者共现次数。在原始代码中,这些数字被命名为weight,代表边的权重。在红楼梦人物关系图中,这些权重可以表示两个人物在文本中共同出现的次数,从而反映了他们之间的关系密切程度或相关性。
如果两个人物之间的数字较大,说明他们在小说中经常一起出现,可能存在密切的关系或者故事情节联系紧密。相反,如果数字较小,则表示两个人物之间的关系不太密切。
import networkx as nx
from collections import defaultdict
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 使用微软雅黑字体
# 构建人物关系图
G = nx.Graph()
# 使用默认字典以便于处理人物关系
relationships = defaultdict(int)
# 遍历每章节,统计人物之间的共现关系
for chapter in chapters:
for i in range(len(characters)):
for j in range(i + 1, len(characters)):
if characters[i] in chapter and characters[j] in chapter:
relationships[(characters[i], characters[j])] += 1
# 添加边到图中(仅保留关联次数大于等于10的)
for edge, weight in relationships.items():
if weight >= 15:
G.add_edge(edge[0], edge[1], weight=weight)
# 绘制人物关系图
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, k=0.2) # 使用Spring布局算法排列节点
nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title('红楼梦人物关系图(关联次数大于等于10)')
plt.show()
全人物关系图
import networkx as nx
from collections import defaultdict
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 使用微软雅黑字体
# 构建人物关系图
G = nx.Graph()
# 使用默认字典以便于处理人物关系
relationships = defaultdict(int)
# 遍历每章节,统计人物之间的共现关系
for chapter in chapters:
for i in range(len(characters)):
for j in range(i + 1, len(characters)):
if characters[i] in chapter and characters[j] in chapter:
relationships[(characters[i], characters[j])] += 1
# 添加边到图中(仅保留关联次数大于等于15的)
for edge, weight in relationships.items():
if weight >= 15:
G.add_edge(edge[0], edge[1], weight=weight)
# 绘制人物关系图
plt.figure(figsize=(12, 8))
pos = nx.circular_layout(G) # 使用circular_layout布局算法排列节点
nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title('红楼梦人物关系图(关联次数大于等于15)')
plt.show()
案例
## 加载所需要包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer,TfidfVectorizer
## 设置字体
fonts = FontProperties(fname = r"C:\Windows\Fonts\STXIHEI.ttf",size=14)
## 引入3D坐标系
from mpl_toolkits.mplot3d import Axes3D
## 设置pandas显示方式
pd.set_option("display.max_rows",8)
pd.options.mode.chained_assignment = None # default='warn'
## 设置显示图像的方式
%matplotlib inline
%config InlineBackend.figure_format = "retina"
## 读取停用词
stopword = pd.read_csv("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦停用词.txt",
header=None,names = ["Stopwords"])
## 读取红楼梦数据集
Red_df = pd.read_excel("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦数据集.xlsx")
Red_df.head(5)
import jieba
## 添加自定义词典
jieba.load_userdict("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦词典.txt")
## 对红楼梦全文进行分词
## 数据表的行数
row,col = Red_df.shape
## 预定义列表
Red_df["cutword"] = "cutword"
for ii in np.arange(row):
## 分词
cutwords = list(jieba.cut(Red_df.Artical[ii], cut_all=True))
## 去除长度为1的词
cutwords = pd.Series(cutwords)[pd.Series(cutwords).apply(len)>1]
## 去停用词
cutwords = cutwords[~cutwords.isin(stopword)]
Red_df.cutword[ii] = cutwords.values
for ii in np.arange(row): #过滤
Red_df.cutword[ii] =Red_df.cutword[ii][~(Red_df.cutword[ii]==':“')]
Red_df.cutword[ii] =Red_df.cutword[ii][~(Red_df.cutword[ii]=='。”')]
Red_df.cutword[ii] =Red_df.cutword[ii][~(Red_df.cutword[ii]=='?”')]
Red_df.cutword[ii] =Red_df.cutword[ii][~(Red_df.cutword[ii]=='!”')]
import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
warnings.filterwarnings("ignore",category=FutureWarning)
分析红楼梦的人物关系
## 查看几个关键人物在整个书籍中的出现次数的变化
## 读取红楼梦中人物名数据集
role = pd.read_csv("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦人物.txt",header=None,names = ["rolename"])
role.head()
## 计算每个角色在书籍中出现的次数
# 将120章的分词结果连接在一起,并计算词频
allcutword = np.concatenate(Red_df.cutword)
allcutword = pd.DataFrame({"word":allcutword})
allcutword = allcutword.groupby(by=["word"])["word"].agg(number=np.size)
allcutword = allcutword.reset_index().sort_values(by="number",ascending=False)
## 计算人物所出现的次数
counts = []
for ii in role.index:
rolenam = role.rolename[ii]
number = allcutword["number"][allcutword.word == rolenam]
counts.append(number.values)
role["counts"] = pd.DataFrame(counts)
## 去除缺失值 和出现次数小于5的人物
role = role[role.counts.notnull()].sort_values(by="counts",ascending=False)
role = role[role.counts > 5].reset_index(drop=True)
print(role.head())
rolename counts
0 宝玉 3862.0
1 凤姐 1680.0
2 贾母 1639.0
3 袭人 1123.0
4 王夫人 1039.0
## 查看前几个关键人物在各章节的走势
rolenumber = np.zeros((10,120))
for kk in np.arange(10):
# 计算每个人物在各章节出现的次数
nums = []
for ii in np.arange(len(Red_df.index)):
## 每章节词频
chapcutword= pd.DataFrame({"word":Red_df.cutword[ii]})
chapcutword = chapcutword.groupby(by=["word"])["word"].agg(number=np.size)
chapcutword = chapcutword.reset_index()
# 一个章节出现次数
num = chapcutword["number"][chapcutword.word == role.rolename[kk]]
nums.append(num.values)
# 一个人的所有章节出现次数
rolenumber[kk,:] = pd.DataFrame(nums).fillna(0).values[:,0]
## 绘制人物在各个章节出场频次变化图
plt.figure(figsize=(12,8))
for ii in np.arange(6):
plt.subplot(3,2,ii+1)
plt.bar(np.arange(120)+1,rolenumber[ii,:],alpha = 1)
plt.title(role.rolename[ii],fontproperties = fonts,size = 12)
plt.ylabel("频次",fontproperties = fonts,size = 10)
plt.subplots_adjust(hspace = 0.25,wspace = 0.15)
plt.show()
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 解决负号'-'显示为方块的问题
## 分析10个人出场的相关性
import seaborn as sns
datacor = np.corrcoef(rolenumber)
datacor = pd.DataFrame(data=datacor,columns=role.rolename[0:10],index=role.rolename[0:10])
## 相关稀疏热力图
plt.figure(figsize=(10,10))
ax = sns.heatmap(datacor,square=True,annot=True,fmt = ".3f",
linewidths=.5,cmap="YlGnBu",
cbar_kws={"fraction":0.046, "pad":0.03})
ax.set_xticklabels(role.rolename[0:10],fontproperties = fonts)
ax.set_yticklabels(role.rolename[0:10],fontproperties = fonts)
ax.set_title("人物相关性",fontproperties = fonts)
ax.set_xlabel("",fontproperties = fonts)
ax.set_xlabel("",fontproperties = fonts)
plt.show()
## 根据分词结果,计算人物之间的关系权重,值分析人物出场次数大于100次的人物
## 权重的定义,如果两个人物同时出现在同一章节中,则相应的权重增加1
Red_df.cutword
rolenew = role[role.counts>100]
rolenew
## 构建两两之间的关系
from itertools import combinations
relation = combinations(rolenew.rolename,2)
rela = []
weight = []
for ii in relation:
rela.append(ii)
## 计算两者是之间的权重
weig = 0
for kk in np.arange(len(Red_df.index)):
## 人物是否同时出现在同一章
if ((sum(Red_df.cutword[kk] == ii[0]) >1) & (sum(Red_df.cutword[kk] == ii[1]) >1)):
weig = weig+1
weight.append(weig)
Red_rela = pd.DataFrame(rela)
Red_rela.columns = ["First","Second"]
Red_rela["weight"] = weight
Red_rela = Red_rela[Red_rela.weight>20].sort_values(by="weight",ascending=False).reset_index(drop = True)
print(Red_rela.head())
import networkx as nx
## 将人物关系可视化
plt.figure(figsize=(12,12))
## 生成社交网络图
G=nx.Graph()
## 添加边
for ii in Red_rela.index:
G.add_edge(Red_rela.First[ii],Red_rela.Second[ii],weight = Red_rela.weight[ii] / 120)
## 定义两种边
elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.25]
esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.25]
## 图的布局
pos=nx.circular_layout(G) # positions for all nodes
# nodes根据
nx.draw_networkx_nodes(G,pos,alpha=0.6,node_size=500)
# edges
nx.draw_networkx_edges(G,pos,edgelist=elarge,
width=1.5,alpha=0.6,edge_color='r')
nx.draw_networkx_edges(G,pos,edgelist=esmall,
width=1,alpha=0.8,edge_color='b',style='dashed')
# labels
nx.draw_networkx_labels(G,pos,font_size=10)
plt.axis('off')
plt.title("《红楼梦》社交网络",FontProperties = fonts)
plt.show() # display
## 将人物关系可视化
plt.figure(figsize=(12,12))
## 生成社交网络图
G=nx.Graph()
## 添加边
for ii in Red_rela.index:
G.add_edge(Red_rela.First[ii],Red_rela.Second[ii],weight = Red_rela.weight[ii] / 120)
## 定义两种边
elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.25]
esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.25]
## 图的布局
pos=nx.spring_layout(G) # positions for all nodes
# nodes根据
nx.draw_networkx_nodes(G,pos,alpha=0.6,node_size=500)
# edges
nx.draw_networkx_edges(G,pos,edgelist=elarge,
width=1.5,alpha=0.6,edge_color='r')
nx.draw_networkx_edges(G,pos,edgelist=esmall,
width=1,alpha=0.8,edge_color='b',style='dashed')
# labels
nx.draw_networkx_labels(G,pos,font_size=10)
plt.axis('off')
plt.title("《红楼梦》社交网络",FontProperties = fonts)
plt.show() # display
## 计算每个节点的度
Red_degree = pd.DataFrame(list(G.degree))
Red_degree.columns = ["name","degree"]
Red_degree
Red_degree.sort_values(by="degree",ascending=False).plot(kind = "bar",x="name",y = "degree",figsize=(12,6),legend=False)
plt.xticks(FontProperties = fonts,size = 12)
plt.ylabel("degree")
plt.show()