想问一下编辑LDA代码中需要设置阿尔法和贝塔的参数吗,如果需要的话应该写什么代码,不需要的话他们的默认值是多少呢
import math
def perplexity(ldamodel, testset, dictionary, size_dictionary, num_topics):
print('the info of this ldamodel: \n')
print('num of topics: %s' % num_topics)
prep = 0.0
prob_doc_sum = 0.0
topic_word_list = []
for topic_id in range(num_topics):
topic_word = ldamodel.show_topic(topic_id, size_dictionary)
dic = {}
for word, probability in topic_word:
dic[word] = probability
topic_word_list.append(dic)
doc_topics_ist = []
for doc in testset:
doc_topics_ist.append(ldamodel.get_document_topics(doc, minimum_probability=0))
testset_word_num = 0
for i in range(len(testset)):
prob_doc = 0.0 # the probablity of the doc
doc = testset[i]
doc_word_num = 0
for word_id, num in dict(doc).items():
prob_word = 0.0
doc_word_num += num
word = dictionary[word_id]
for topic_id in range(num_topics):
# cal p(w) : p(w) = sumz(p(z)*p(w|z))
prob_topic = doc_topics_ist[i][topic_id][1]
prob_topic_word = topic_word_list[topic_id][word]
prob_word += prob_topic * prob_topic_word
prob_doc += math.log(prob_word) # p(d) = sum(log(p(w)))
prob_doc_sum += prob_doc
testset_word_num += doc_word_num
prep = math.exp(-prob_doc_sum / testset_word_num) # perplexity = exp(-sum(p(d)/sum(Nd))
print("模型困惑度的值为 : %s" % prep)
return prep