#………………省略
#text = [list(np.arange(0, y)) for y in n_word]
class yy:
def weibo_lda(self):
#text = self.result()
dictionary = Dictionary(text)
corpus = [dictionary.doc2bow(tmp) for tmp in text]
return dictionary, corpus
def choose_topic(self):
dictionary, corpus = self.weibo_lda()
texts = text
for i in range(1,6):
print('目前的topic个数:{}'.format(i))
print('目前的数据量:{}'.format(len(texts)))
temp = 'lda_{}_{}'.format(i,len(texts))
tmp = gensim.models.ldamodel.LdaModel(corpus, num_topics=i, id2word=dictionary, passes=20)
file_path = './{}.model'.format(temp)
tmp.save(file_path)
print('------------------')
#返回topic_num, data_num的值
return 6,len(texts)
def perplexity_visible_model(self, topic_num, data_num):
# texts = self.fenci_data()
_, corpus = self.weibo_lda()
x_list = []
y_list = []
for i in range(1,topic_num):
model_name = './lda_{}_{}.model'.format(i, data_num)
try:
lda = gensim.models.ldamodel.LdaModel.load(model_name)
perplexity = lda.log_perplexity(corpus)
print(perplexity)
x_list.append(i)
y_list.append(perplexity)
except Exception as e:
print(e)
plt.xlabel('num topics')
plt.ylabel('perplexity score')
plt.legend(('perplexity_values'), loc='best')
plt.plot(x_list,y_list)
plt.show()
def visible_model(self, topic_num, data_num):
dictionary, _ = self.weibo_lda()
#texts = self.fenci_data()
texts = text
x_list = []
y_list = []
for i in range(1,topic_num):
model_name = './lda_{}_{}.model'.format(i, data_num)
try:
lda = gensim.models.ldamodel.LdaModel.load(model_name)
cv_tmp = CoherenceModel(model=lda, texts=texts, dictionary=dictionary, coherence='c_v')
x_list.append(i)
y_list.append(cv_tmp.get_coherence())
except:
print('没有这个模型:{}'.format(model_name))
plt.plot(x_list, y_list)
plt.xlabel('num topics')
plt.ylabel('coherence score')
plt.legend(('coherence_values'), loc='best')
plt.plot(x_list,y_list)
plt.show()
if __name__=='__main__':
t = yy()
#调用
topic_num, data_num = t.choose_topic()
t.perplexity_visible_model(topic_num, data_num)
t.visible_model(topic_num, data_num)
为什么run出def perplexity_visible_model以后,就会一直循环跑class前面的代码好多次,且visible_model没有算出来?
(加了def visible_model(self, topic_num, data_num):之后才会这样的,删了这个函数就一切正常)
“循环跑class前面的代码好多次 ” 是什么意思,这里循环?