import jieba
import re
import json
import tqdm
import gensim
def load_data(self):
with open('0.5-.csv') as file:
data = file.read()
data = json.loads(data)
return data
def fenci_data(self):
text = []
print(".....")
data = self.load_data()
with open('stoplist.txt') as file:
stop_word_list = file.read()
for weibo_item in tqdm(data):
tmp = []
sentence=''.join(re.findall(r'[\u4e00-\u9fa5]+',weibo_item['weibo_cont']))
for word in jieba.lcut(sentence):
if word not in stop_word_list:
tmp.append(word)
text.append(tmp)
return text
def weibo_lda(self):
text = self.fenci_data()
dictionary = Dictionary(text)
corpus = [dictionary.doc2bow(tmp) for tmp in text]
return dictionary, corpus
def choose_topic(self):
dictionary, corpus = self.weibo_lda()
texts = self.fenci_data()
for i in range(1,16):
print('目前的topic个数:{}'.format(i))
print('目前的数据量:{}'.format(len(texts)))
temp = 'lda_{}_{}'.format(i,len(texts))
tmp = gensim.models.ldamodel.LdaModel(corpus, num_topics=i, id2word=dictionary, passes=20)
file_path = './{}.model'.format(temp)
tmp.save('LDA.csv')
print('------------------')
代码只是给出了几个函数,并没有定义类及实例化对象和对函数的调用,当然不会有结果输出。
将这些函数写类中:class wordParse:
在最后用,output=wordParse(),output.choose_topic()调用即可。
您好,我是有问必答小助手,你的问题已经有小伙伴为您解答了问题,您看下是否解决了您的问题,可以追评进行沟通哦~
如果有您比较满意的答案 / 帮您提供解决思路的答案,可以点击【采纳】按钮,给回答的小伙伴一些鼓励哦~~
ps:问答VIP仅需29元,即可享受5次/月 有问必答服务,了解详情>>>https://vip.csdn.net/askvip?utm_source=1146287632