效果如图 : , 我获取的是影评的, 如有帮助给个采纳
from selenium import webdriver
from selenium.webdriver.common.by import By
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
import os
browser = webdriver.Chrome()
url = 'https://movie.douban.com/subject/34841067/comments?status=P'
browser.get(url)
selector = 'span.short'
results = []
while True:
elements = browser.find_elements(By.CSS_SELECTOR, selector)
for element in elements:
comment = element.text.strip()
if len(comment) > 0:
results.append(comment)
# 点击下一页按钮
try:
next_btn = browser.find_element(By.LINK_TEXT, '后页>')
next_btn.click()
except:
break
# 关闭浏览器对象
browser.quit()
text = '\n'.join(results)
# 使用 jieba 分词库进行中文分词
words = jieba.cut(text)
stop_words_path = 'stopwords.txt'
with open(stop_words_path, encoding='utf-8') as f:
stop_words = f.read().splitlines()
valid_words = [word for word in words if word not in stop_words]
valid_text = ' '.join(valid_words)
wc = WordCloud(
font_path="PingFang Bold.ttf",
width=800,
height=600,
background_color='white',
max_words=200,
max_font_size=80,
random_state=42
)
wc.generate(valid_text)
# 定义输出路径
output_dir = 'output'
output_path = os.path.join(output_dir, 'wordcloud.png')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
wc.to_file(output_path)
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.show()