输入的可视化代码如下:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)
vis
报错如下:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<ipython-input-12-3af4665cd12f> in <module>
1 # Visualize the topics
2 pyLDAvis.enable_notebook()
----> 3 vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)
4 vis
D:\lib\site-packages\pyLDAvis\gensim.py in prepare(topic_model, corpus, dictionary, doc_topic_dist, **kwargs)
117 """
118 opts = fp.merge(_extract_data(topic_model, corpus, dictionary, doc_topic_dist), kwargs)
--> 119 return vis_prepare(**opts)
D:\lib\site-packages\pyLDAvis\_prepare.py in prepare(topic_term_dists, doc_topic_dists, doc_lengths, vocab, term_frequency, R, lambda_step, mds, n_jobs, plot_opts, sort_topics)
396 term_frequency = np.sum(term_topic_freq, axis=0)
397
--> 398 topic_info = _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs)
399 token_table = _token_table(topic_info, term_topic_freq, vocab, term_frequency)
400 topic_coordinates = _topic_coordinates(mds, topic_term_dists, topic_proportion)
D:\lib\site-packages\pyLDAvis\_prepare.py in _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs)
253
254 top_terms = pd.concat(Parallel(n_jobs=n_jobs)(delayed(_find_relevance_chunks)(log_ttd, log_lift, R, ls) \
--> 255 for ls in _job_chunks(lambda_seq, n_jobs)))
256 topic_dfs = map(topic_top_term_df, enumerate(top_terms.T.iterrows(), 1))
257 return pd.concat([default_term_info] + list(topic_dfs))
D:\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1027 # remaining jobs.
1028 self._iterating = False
-> 1029 if self.dispatch_one_batch(iterator):
1030 self._iterating = self._original_iterator is not None
1031
D:\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
845 return False
846 else:
--> 847 self._dispatch(tasks)
848 return True
849
D:\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
763 with self._lock:
764 job_idx = len(self._jobs)
--> 765 job = self._backend.apply_async(batch, callback=cb)
766 # A job can complete so quickly than its callback is
767 # called before we get here, causing self._jobs to
D:\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
529 def apply_async(self, func, callback=None):
530 """Schedule a func to be run"""
--> 531 future = self._workers.submit(SafeFunction(func))
532 future.get = functools.partial(self.wrap_future_result, future)
533 if callback is not None:
D:\lib\site-packages\joblib\externals\loky\reusable_executor.py in submit(self, fn, *args, **kwargs)
176 with self._submit_resize_lock:
177 return super(_ReusablePoolExecutor, self).submit(
--> 178 fn, *args, **kwargs)
179
180 def _resize(self, max_workers):
D:\lib\site-packages\joblib\externals\loky\process_executor.py in submit(self, fn, *args, **kwargs)
1120 self._executor_manager_thread_wakeup.wakeup()
1121
-> 1122 self._ensure_executor_running()
1123 return f
1124 submit.__doc__ = _base.Executor.submit.__doc__
D:\lib\site-packages\joblib\externals\loky\process_executor.py in _ensure_executor_running(self)
1094 with self._processes_management_lock:
1095 if len(self._processes) != self._max_workers:
-> 1096 self._adjust_process_count()
1097 self._start_executor_manager_thread()
1098
D:\lib\site-packages\joblib\externals\loky\process_executor.py in _adjust_process_count(self)
1085 p = self._context.Process(target=_process_worker, args=args)
1086 p._worker_exit_lock = worker_exit_lock
-> 1087 p.start()
1088 self._processes[p.pid] = p
1089 mp.util.debug('Adjust process count : {}'.format(self._processes))
d:\lib\multiprocessing\process.py in start(self)
110 'daemonic processes are not allowed to have children'
111 _cleanup()
--> 112 self._popen = self._Popen(self)
113 self._sentinel = self._popen.sentinel
114 # Avoid a refcycle if the target function holds an indirect
D:\lib\site-packages\joblib\externals\loky\backend\process.py in _Popen(process_obj)
37 else:
38 from .popen_loky_posix import Popen
---> 39 return Popen(process_obj)
40
41 if sys.version_info < (3, 3):
D:\lib\site-packages\joblib\externals\loky\backend\popen_loky_win32.py in __init__(self, process_obj)
53 def __init__(self, process_obj):
54 prep_data = spawn.get_preparation_data(
---> 55 process_obj._name, getattr(process_obj, "init_main_module", True))
56
57 # read end of pipe will be "stolen" by the child process
D:\lib\site-packages\joblib\externals\loky\backend\spawn.py in get_preparation_data(name, init_main_module)
84 # Tell the child how to communicate with the resource_tracker
85 from .resource_tracker import _resource_tracker
---> 86 _resource_tracker.ensure_running()
87 d["tracker_args"] = {"pid": _resource_tracker._pid}
88 if sys.platform == "win32":
D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in ensure_running(self)
100 if self._fd is not None:
101 # resource tracker was launched before, is it still running?
--> 102 if self._check_alive():
103 # => still alive
104 return
D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _check_alive(self)
180 '''Check for the existence of the resource tracker process.'''
181 try:
--> 182 self._send('PROBE', '', '')
183 except BrokenPipeError:
184 return False
D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
207 # bytes are atomic, and that PIPE_BUF >= 512
208 raise ValueError('name too long')
--> 209 nbytes = os.write(self._fd, msg)
210 assert nbytes == len(msg)
211
OSError: [Errno 22] Invalid argument
看是不是这几个参数, 有的数据类型不符合要求呢
出现新的错误提示如下:
第一个错误,
PicklingError Traceback (most recent call last)
D:\lib\site-packages\joblib\parallel.py in retrieve(self)
920 if getattr(self._backend, 'supports_timeout', False):
--> 921 self._output.extend(job.get(timeout=self.timeout))
922 else:
D:\lib\site-packages\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
d:\lib\concurrent\futures\_base.py in result(self, timeout)
424 elif self._state == FINISHED:
--> 425 return self.__get_result()
426
d:\lib\concurrent\futures\_base.py in __get_result(self)
383 if self._exception:
--> 384 raise self._exception
385 else:
PicklingError: Could not pickle the task to send it to the workers.
第二个错误,
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
D:\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1041 with self._backend.retrieval_context():
-> 1042 self.retrieve()
1043 # Make sure that we get a last message telling us we are done
D:\lib\site-packages\joblib\parallel.py in retrieve(self)
942 ensure_ready = self._managed_backend
--> 943 backend.abort_everything(ensure_ready=ensure_ready)
944 raise
D:\lib\site-packages\joblib\_parallel_backends.py in abort_everything(self, ensure_ready)
560 """
--> 561 self._workers.terminate(kill_workers=True)
562 self._workers = None
D:\lib\site-packages\joblib\executor.py in terminate(self, kill_workers)
85 with self._submit_resize_lock:
---> 86 self._temp_folder_manager._unregister_temporary_resources()
87 self._temp_folder_manager._try_delete_folder(
D:\lib\site-packages\joblib\_memmapping_reducer.py in _unregister_temporary_resources(self, context_id)
634 for context_id in self._cached_temp_folders:
--> 635 self._unregister_temporary_resources(context_id)
636 else:
D:\lib\site-packages\joblib\_memmapping_reducer.py in _unregister_temporary_resources(self, context_id)
640 resource_tracker.unregister(
--> 641 os.path.join(temp_folder, filename), "file"
642 )
D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in unregister(self, name, rtype)
194 '''Unregister a named resource with resource tracker.'''
--> 195 self.ensure_running()
196 self._send('UNREGISTER', name, rtype)
D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in ensure_running(self)
101 # resource tracker was launched before, is it still running?
--> 102 if self._check_alive():
103 # => still alive
D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _check_alive(self)
181 try:
--> 182 self._send('PROBE', '', '')
183 except BrokenPipeError:
D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
208 raise ValueError('name too long')
--> 209 nbytes = os.write(self._fd, msg)
210 assert nbytes == len(msg)
OSError: [Errno 22] Invalid argument
输入的代码如下:
id2word = corpora.Dictionary(data_lemmatized)
id2word.save('idw.dict')
# Create Corpus
texts = data_lemmatized
corpus = [id2word.doc2bow(text) for text in texts]
corpora.MmCorpus.serialize('corpusbow.mm', corpus)
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
id2word=id2word,
num_topics=45,
random_state=100,
update_every=1,
chunksize=100,
passes=10,
alpha='auto',
per_word_topics=True)
lda_model.save('idw.model')
def lda_vis():
id2word = gensim.corpora.Dictionary.load('idw.dict')
corpus = gensim.corpora.MmCorpus('corpusbow.mm')
lda_model = models.ldamodel.LdaModel.load('idw.model')
vis = pyLDAvis.gensim.prepare(lda_model,corpus,id2word,mds='mmds')
pyLDAvis.show(vis,open_browser=False)
if __name__ == '__main__':
lda_vis()
你好,我是有问必答小助手,非常抱歉,本次您提出的有问必答问题,技术专家团超时未为您做出解答
本次提问扣除的有问必答次数,已经为您补发到账户,我们后续会持续优化,扩大我们的服务范围,为您带来更好地服务。