利用jieba库分词后提取各自的50个高频词,使用pyecharts绘制词云图进行对比
# -*- coding: utf-8 -*-
"""
Created on Thu May 4 19:21:10 2023
@author: Lenovo
"""
import jieba
from collections import Counter
from pyecharts import options as opts
from pyecharts.charts import WordCloud
# 定义待分词的文本
text1 = '中文文本分词测试中文中文测试测试测试'
text2 = 'aaaaaaaaaaaaaaaaaaadddddddddddddddddbbbbbbbbbbbbbbbss ing big small'
if __name__ == '__main__':
# 对中文文本进行分词和统计高频词
cn_words = jieba.cut(text1)
cn_word_freq = Counter(cn_words)
cn_top50_words = cn_word_freq.most_common(50)
# 对英文文本进行分词和统计高频词
en_words = jieba.cut(text2)
en_word_freq = Counter(en_words)
en_top50_words = en_word_freq.most_common(50)
# 绘制中文词云图
cn_wordcloud = WordCloud()
cn_wordcloud.add('', cn_top50_words, word_size_range=[20, 100])
cn_wordcloud.set_global_opts(title_opts=opts.TitleOpts(title='中文词云图'))
cn_wordcloud.render('wordcloud1.html')
# 绘制英文词云图
en_wordcloud = WordCloud()
en_wordcloud.add('', en_top50_words, word_size_range=[20, 100])
en_wordcloud.set_global_opts(title_opts=opts.TitleOpts(title='英文词云图'))
en_wordcloud.render('wordcloud2.html')