试下先import 所有库之后回车一下
不行就换一个编译器试试
摘要:主要以天池情感分析可视化为例子
from wordcloud import WordCloud
import pandas as pd
import numpy as np
import jieba
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import font_manager
data = pd.read_csv('earphone_sentiment.csv') #文件名要加后缀csv不然会出错
#先分词后删除停用词
def stopwords_remove(text): #data表示待分析的文本
seg_list = jieba.lcut(text) #默认是精确模式cut,得到分词的list
stopwords = []
for line in open('自建停用词.txt','r'):
stopwords.append(line.rstrip())
seg_remove_list = []
for word in seg_list:
if word not in stopwords:
seg_remove_list.append(word) #得到去除停用词的分词list
seg_remove = ' '.join(seg_remove_list) #得到去除停用词的分词文本,因为用wordcloud画图时需要输入文本
return seg_remove
#尝试把词云图以某个形状展现
def wc_1(text_remove): #这里的text_remove是删除停用词之后的分词文本
graph = plt.imread("earphone.png")
#一定要先imread(),不能直接把图片路径放进WordCloud里面的mask中
word_cloud = WordCloud(font_path='simsun.ttc',mask=graph, background_color='white').generate(text_remove)
plt.imshow(word_cloud)
plt.axis("off")
plt.figure(figsize=(10,8))
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"simsun.ttc", size=14) #为了可以在词云中显示中文
group_data = data.groupby(['sentiment_value']).get_group(0)
group_text = ' '.join(group_data['content'])
#sentiment = ["positive","neutual","negtive"]
plt.title("中立评价",fontproperties=font)
wc_1(stopwords_remove(group_text))
UserWarning:mask image should be unsigned byte between 0 and 255. Got a float array
解决办法一:graph = plt.imread("earphone.png") graph = graph.astype(np.uint8)
最终不报错但是也没有生成指定形状的词云
解决办法二:将图片数据转化成数组形式
from PIL import Image
def wc_1(text_remove): #这里的text_remove是删除停用词之后的分词文本
graph = np.array(Image.open("earphone.png")) #关键
word_cloud = WordCloud(font_path='simsun.ttc',mask=graph, background_color='white').generate(text_remove)
plt.imshow(word_cloud)
plt.axis("off")
plt.figure(figsize=(10,8))
问题解决啦!