import pandas as pd
# 导入词典
df = pd.read_excel('Sentiment_dictionary\大连理工情感词汇本体\情感词汇本体.xlsx')
# 我们暂时只使用 ['词语','词性种类','词义数','词义序号','情感分类','强度','极性']
df = df[['词语', '词性种类', '词义数', '词义序号', '情感分类', '强度', '极性']]
df.head()
# 按照7大情绪划分
Happy = []
Good = []
Surprise = []
Anger = []
Sad = []
Fear = []
Disgust = []
for idx, row in df.iterrows():
if row['情感分类'] in ['PA', 'PE']:
Happy.append(row['词语'])
if row['情感分类'] in ['PD', 'PH', 'PG', 'PB', 'PK']:
Good.append(row['词语'])
if row['情感分类'] in ['PC']:
Surprise.append(row['词语'])
if row['情感分类'] in ['NA']:
Anger.append(row['词语'])
if row['情感分类'] in ['NB', 'NJ', 'NH', 'PF']:
Sad.append(row['词语'])
if row['情感分类'] in ['NI', 'NC', 'NG']:
Fear.append(row['词语'])
if row['情感分类'] in ['NE', 'ND', 'NN', 'NK', 'NL']:
Disgust.append(row['词语'])
Positive = Happy + Good +Surprise
Negative = Anger + Sad + Fear + Disgust
print('情绪词语列表整理完成')
# 计情绪计算函数
# 这里只是朴素的使用情绪词计数统计文本的情绪值
import jieba
import time
def emotion_caculate(text):
positive = 0
negative = 0
anger = 0
disgust = 0
fear = 0
sad = 0
surprise = 0
good = 0
happy = 0
wordlist = jieba.lcut(text)
wordset = set(wordlist)
wordfreq = []
for word in wordset:
freq = wordlist.count(word)
if word in Positive:
positive+=freq
if word in Negative:
negative+=freq
if word in Anger:
anger+=freq
if word in Disgust:
disgust+=freq
if word in Fear:
fear+=freq
if word in Sad:
sad+=freq
if word in Surprise:
surprise+=freq
if word in Good:
good+=freq
if word in Happy:
happy+=freq
emotion_info = {
'length':len(wordlist),
'positive': positive,
'negative': negative,
'anger': anger,
'disgust': disgust,
'fear':fear,
'good':good,
'sadness':sad,
'surprise':surprise,
'happy':happy,
}
indexs = ['length', 'positive', 'negative', 'anger', 'disgust','fear','sadness','surprise', 'good', 'happy']
return pd.Series(emotion_info, index=indexs)
emotion_caculate(text='这个国家再对这些制造假冒伪劣食品药品的人手软的话,那后果真的会相当糟糕。坐牢?从快判个死刑!')
输出结果: