利用jieba库分词后提取各自的50个高频词,使用pyecharts绘制词云图进行对比

利用jieba库分词后提取各自的50个高频词,使用pyecharts绘制词云图进行对比


# -*- coding: utf-8 -*-
"""
Created on Thu May  4 19:21:10 2023

@author: Lenovo
"""

import jieba
from collections import Counter
from pyecharts import options as opts
from pyecharts.charts import WordCloud

# 定义待分词的文本
text1 = '中文文本分词测试中文中文测试测试测试'
text2 = 'aaaaaaaaaaaaaaaaaaadddddddddddddddddbbbbbbbbbbbbbbbss ing big small'

if __name__ == '__main__':
    # 对中文文本进行分词和统计高频词
    cn_words = jieba.cut(text1)
    cn_word_freq = Counter(cn_words)
    cn_top50_words = cn_word_freq.most_common(50)

    # 对英文文本进行分词和统计高频词
    en_words = jieba.cut(text2)
    en_word_freq = Counter(en_words)
    en_top50_words = en_word_freq.most_common(50)

    # 绘制中文词云图
    cn_wordcloud = WordCloud()
    cn_wordcloud.add('', cn_top50_words, word_size_range=[20, 100])
    cn_wordcloud.set_global_opts(title_opts=opts.TitleOpts(title='中文词云图'))
    cn_wordcloud.render('wordcloud1.html')

    # 绘制英文词云图
    en_wordcloud = WordCloud()
    en_wordcloud.add('', en_top50_words, word_size_range=[20, 100])
    en_wordcloud.set_global_opts(title_opts=opts.TitleOpts(title='英文词云图'))
    en_wordcloud.render('wordcloud2.html')

img

img