jupyter notebook中可视化出现 Invalid argument

输入的可视化代码如下:

pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)
vis

报错如下:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-12-3af4665cd12f> in <module>
      1 # Visualize the topics
      2 pyLDAvis.enable_notebook()
----> 3 vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)
      4 vis

D:\lib\site-packages\pyLDAvis\gensim.py in prepare(topic_model, corpus, dictionary, doc_topic_dist, **kwargs)
    117     """
    118     opts = fp.merge(_extract_data(topic_model, corpus, dictionary, doc_topic_dist), kwargs)
--> 119     return vis_prepare(**opts)

D:\lib\site-packages\pyLDAvis\_prepare.py in prepare(topic_term_dists, doc_topic_dists, doc_lengths, vocab, term_frequency, R, lambda_step, mds, n_jobs, plot_opts, sort_topics)
    396    term_frequency = np.sum(term_topic_freq, axis=0)
    397 
--> 398    topic_info         = _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs)
    399    token_table        = _token_table(topic_info, term_topic_freq, vocab, term_frequency)
    400    topic_coordinates = _topic_coordinates(mds, topic_term_dists, topic_proportion)

D:\lib\site-packages\pyLDAvis\_prepare.py in _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs)
    253 
    254    top_terms = pd.concat(Parallel(n_jobs=n_jobs)(delayed(_find_relevance_chunks)(log_ttd, log_lift, R, ls) \
--> 255                                                  for ls in _job_chunks(lambda_seq, n_jobs)))
    256    topic_dfs = map(topic_top_term_df, enumerate(top_terms.T.iterrows(), 1))
    257    return pd.concat([default_term_info] + list(topic_dfs))

D:\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
   1027             # remaining jobs.
   1028             self._iterating = False
-> 1029             if self.dispatch_one_batch(iterator):
   1030                 self._iterating = self._original_iterator is not None
   1031 

D:\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
    845                 return False
    846             else:
--> 847                 self._dispatch(tasks)
    848                 return True
    849 

D:\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
    763         with self._lock:
    764             job_idx = len(self._jobs)
--> 765             job = self._backend.apply_async(batch, callback=cb)
    766             # A job can complete so quickly than its callback is
    767             # called before we get here, causing self._jobs to

D:\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
    529     def apply_async(self, func, callback=None):
    530         """Schedule a func to be run"""
--> 531         future = self._workers.submit(SafeFunction(func))
    532         future.get = functools.partial(self.wrap_future_result, future)
    533         if callback is not None:

D:\lib\site-packages\joblib\externals\loky\reusable_executor.py in submit(self, fn, *args, **kwargs)
    176         with self._submit_resize_lock:
    177             return super(_ReusablePoolExecutor, self).submit(
--> 178                 fn, *args, **kwargs)
    179 
    180     def _resize(self, max_workers):

D:\lib\site-packages\joblib\externals\loky\process_executor.py in submit(self, fn, *args, **kwargs)
   1120             self._executor_manager_thread_wakeup.wakeup()
   1121 
-> 1122             self._ensure_executor_running()
   1123             return f
   1124     submit.__doc__ = _base.Executor.submit.__doc__

D:\lib\site-packages\joblib\externals\loky\process_executor.py in _ensure_executor_running(self)
   1094         with self._processes_management_lock:
   1095             if len(self._processes) != self._max_workers:
-> 1096                 self._adjust_process_count()
   1097             self._start_executor_manager_thread()
   1098 

D:\lib\site-packages\joblib\externals\loky\process_executor.py in _adjust_process_count(self)
   1085                 p = self._context.Process(target=_process_worker, args=args)
   1086             p._worker_exit_lock = worker_exit_lock
-> 1087             p.start()
   1088             self._processes[p.pid] = p
   1089         mp.util.debug('Adjust process count : {}'.format(self._processes))

d:\lib\multiprocessing\process.py in start(self)
    110                'daemonic processes are not allowed to have children'
    111         _cleanup()
--> 112         self._popen = self._Popen(self)
    113         self._sentinel = self._popen.sentinel
    114         # Avoid a refcycle if the target function holds an indirect

D:\lib\site-packages\joblib\externals\loky\backend\process.py in _Popen(process_obj)
     37         else:
     38             from .popen_loky_posix import Popen
---> 39         return Popen(process_obj)
     40 
     41     if sys.version_info < (3, 3):

D:\lib\site-packages\joblib\externals\loky\backend\popen_loky_win32.py in __init__(self, process_obj)
     53     def __init__(self, process_obj):
     54         prep_data = spawn.get_preparation_data(
---> 55             process_obj._name, getattr(process_obj, "init_main_module", True))
     56 
     57         # read end of pipe will be "stolen" by the child process

D:\lib\site-packages\joblib\externals\loky\backend\spawn.py in get_preparation_data(name, init_main_module)
     84     # Tell the child how to communicate with the resource_tracker
     85     from .resource_tracker import _resource_tracker
---> 86     _resource_tracker.ensure_running()
     87     d["tracker_args"] = {"pid": _resource_tracker._pid}
     88     if sys.platform == "win32":

D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in ensure_running(self)
    100             if self._fd is not None:
    101                 # resource tracker was launched before, is it still running?
--> 102                 if self._check_alive():
    103                     # => still alive
    104                     return

D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _check_alive(self)
    180         '''Check for the existence of the resource tracker process.'''
    181         try:
--> 182             self._send('PROBE', '', '')
    183         except BrokenPipeError:
    184             return False

D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
    207             # bytes are atomic, and that PIPE_BUF >= 512
    208             raise ValueError('name too long')
--> 209         nbytes = os.write(self._fd, msg)
    210         assert nbytes == len(msg)
    211 

OSError: [Errno 22] Invalid argument

 

 

看是不是这几个参数, 有的数据类型不符合要求呢

 出现新的错误提示如下:

第一个错误,

PicklingError                             Traceback (most recent call last)
D:\lib\site-packages\joblib\parallel.py in retrieve(self)
    920                 if getattr(self._backend, 'supports_timeout', False):
--> 921                     self._output.extend(job.get(timeout=self.timeout))
    922                 else:

D:\lib\site-packages\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
    541         try:
--> 542             return future.result(timeout=timeout)
    543         except CfTimeoutError as e:

d:\lib\concurrent\futures\_base.py in result(self, timeout)
    424             elif self._state == FINISHED:
--> 425                 return self.__get_result()
    426 

d:\lib\concurrent\futures\_base.py in __get_result(self)
    383         if self._exception:
--> 384             raise self._exception
    385         else:

PicklingError: Could not pickle the task to send it to the workers.

第二个错误, 

During handling of the above exception, another exception occurred:

OSError                                   Traceback (most recent call last)
D:\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
   1041             with self._backend.retrieval_context():
-> 1042                 self.retrieve()
   1043             # Make sure that we get a last message telling us we are done

D:\lib\site-packages\joblib\parallel.py in retrieve(self)
    942                     ensure_ready = self._managed_backend
--> 943                     backend.abort_everything(ensure_ready=ensure_ready)
    944                 raise

D:\lib\site-packages\joblib\_parallel_backends.py in abort_everything(self, ensure_ready)
    560         """
--> 561         self._workers.terminate(kill_workers=True)
    562         self._workers = None

D:\lib\site-packages\joblib\executor.py in terminate(self, kill_workers)
     85             with self._submit_resize_lock:
---> 86                 self._temp_folder_manager._unregister_temporary_resources()
     87                 self._temp_folder_manager._try_delete_folder(

D:\lib\site-packages\joblib\_memmapping_reducer.py in _unregister_temporary_resources(self, context_id)
    634             for context_id in self._cached_temp_folders:
--> 635                 self._unregister_temporary_resources(context_id)
    636         else:

D:\lib\site-packages\joblib\_memmapping_reducer.py in _unregister_temporary_resources(self, context_id)
    640                     resource_tracker.unregister(
--> 641                         os.path.join(temp_folder, filename), "file"
    642                     )

D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in unregister(self, name, rtype)
    194         '''Unregister a named resource with resource tracker.'''
--> 195         self.ensure_running()
    196         self._send('UNREGISTER', name, rtype)

D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in ensure_running(self)
    101                 # resource tracker was launched before, is it still running?
--> 102                 if self._check_alive():
    103                     # => still alive

D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _check_alive(self)
    181         try:
--> 182             self._send('PROBE', '', '')
    183         except BrokenPipeError:

D:\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
    208             raise ValueError('name too long')
--> 209         nbytes = os.write(self._fd, msg)
    210         assert nbytes == len(msg)

OSError: [Errno 22] Invalid argument

 

输入的代码如下:

id2word = corpora.Dictionary(data_lemmatized)
id2word.save('idw.dict')
# Create Corpus
texts = data_lemmatized
corpus = [id2word.doc2bow(text) for text in texts]
corpora.MmCorpus.serialize('corpusbow.mm', corpus)
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                           id2word=id2word,
                                           num_topics=45, 
                                           random_state=100,
                                           update_every=1,
                                           chunksize=100,
                                           passes=10,
                                           alpha='auto',
                                           per_word_topics=True)
lda_model.save('idw.model')

 

def lda_vis():
    id2word = gensim.corpora.Dictionary.load('idw.dict')
    corpus = gensim.corpora.MmCorpus('corpusbow.mm')
    lda_model = models.ldamodel.LdaModel.load('idw.model')
    vis = pyLDAvis.gensim.prepare(lda_model,corpus,id2word,mds='mmds')
    pyLDAvis.show(vis,open_browser=False) 
if __name__ == '__main__':
    lda_vis()

 

 

你好,我是有问必答小助手,非常抱歉,本次您提出的有问必答问题,技术专家团超时未为您做出解答

本次提问扣除的有问必答次数,已经为您补发到账户,我们后续会持续优化,扩大我们的服务范围,为您带来更好地服务。