python123生成词云

img


Python123第七章 Python程序设计方法论 词云生成

效果如图 : , 我获取的是影评的, 如有帮助给个采纳

img

from selenium import webdriver
from selenium.webdriver.common.by import By
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
import os
browser = webdriver.Chrome()
url = 'https://movie.douban.com/subject/34841067/comments?status=P'
browser.get(url)
selector = 'span.short'
results = []
while True:
    elements = browser.find_elements(By.CSS_SELECTOR, selector)
    for element in elements:
        comment = element.text.strip()
        if len(comment) > 0:
            results.append(comment)
    # 点击下一页按钮
    try:
        next_btn = browser.find_element(By.LINK_TEXT, '后页>')
        next_btn.click()
    except:
        break

# 关闭浏览器对象
browser.quit()
text = '\n'.join(results)
# 使用 jieba 分词库进行中文分词
words = jieba.cut(text)
stop_words_path = 'stopwords.txt'
with open(stop_words_path, encoding='utf-8') as f:
    stop_words = f.read().splitlines()
valid_words = [word for word in words if word not in stop_words]
valid_text = ' '.join(valid_words)
wc = WordCloud(
    font_path="PingFang Bold.ttf",
    width=800,
    height=600,
    background_color='white',
    max_words=200,
    max_font_size=80,
    random_state=42
)
wc.generate(valid_text)
# 定义输出路径
output_dir = 'output'
output_path = os.path.join(output_dir, 'wordcloud.png')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
wc.to_file(output_path)
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.show()