short_Cases1 = ["我们伟大祖国的每一寸领土都绝对不能也绝对不可能从中国分割出去"]
import jieba
import jieba.posseg as pseg
import thulac
from pyhanlp import *
import pkuseg
from ltp import LTP
import nltk
thu1 = thulac.thulac() # 默认模式
seg = pkuseg.pkuseg(postag=True) # 以默认配置加载模型
ltp = LTP() # 默认加载 Small 模型
for sentence in short_Cases1:
seg_list = pseg.cut(sentence)
print('jieba:')
for w in seg_list:
a = w.word + w.flag
print(' ', a, end=' ')
print('\t')
text = thu1.cut(sentence, text=True) # 进行一句话分词
print("thulac:", '\n', text)
print("HanLP:", '\n', HanLP.segment(sentence))
text = seg.cut(sentence)
print("pkuseg:", '\n', text)
seg, hidden = ltp.seg(short_Cases1)
pos = ltp.pos(hidden) # 词性标注
print("ltp:", '\n', seg)
print('', pos)
import jieba
import thulac
from pyhanlp import *
import pkuseg
from ltp import LTP
import time
A = ['刘二狗', '真', '狗', ',', '刘大狗', '比', '他', '还', '狗']
sentence = "刘二狗真狗,刘大狗比他还狗"
t1 = time.time()
jiebajg = list(jieba.cut(sentence))
t2 = time.time()
thu1 = thulac.thulac(seg_only=True)
thulacjg = thu1.cut(sentence, text=True).split()
t3 = time.time()
pyhanlpjg = []
for term in HanLP.segment(sentence):
pyhanlpjg.append(term.word)
t4 = time.time()
se = pkuseg.pkuseg()
pkusegjg = list(se.cut(sentence))
t5 = time.time()
ltp = LTP()
ltpjg, hidden = ltp.seg([sentence])
t6 = time.time()
def jqzhf1(x, y, n):
a = 0
b = 0
c = 0
za = 0
zb = 0
while True:
if za == zb:
if len(x[a]) == len(y[b]):
c += 1
zb += len(y[b])
za += len(x[a])
a += 1
b += 1
else:
zb += len(y[b])
za += len(x[a])
a += 1
b += 1
if za < zb:
za += len(x[a])
a += 1
if za > zb:
zb += len(y[b])
b += 1
if zb == n | za == n:
break
P = c / len(y)
R = c / len(x)
F = 2 * P * R / (P + R)
return P, R, F
precision, recall, F1 = jqzhf1(A, jiebajg, len(sentence))
print("jieba:\n精准率{}:\n召回率:{}\nF1:{}".format(precision, recall, F1))
print("所用时间", t2 - t1)
print(jiebajg)
precision, recall, F1 = jqzhf1(A, thulacjg, len(sentence))
print("thulac:\n精准率{}:\n召回率:{}\nF1:{}".format(precision, recall, F1))
print("所用时间", t3 - t2)
print(thulacjg)
precision, recall, F1 = jqzhf1(A, pyhanlpjg, len(sentence))
print("pyhanlp:\n精准率{}:\n召回率:{}\nF1:{}".format(precision, recall, F1))
print("所用时间", t4 - t3)
print(pyhanlpjg)
precision, recall, F1 = jqzhf1(A, pkusegjg, len(sentence))
print("pkuseg:\n精准率{}:\n召回率:{}\nF1:{}".format(precision, recall, F1))
print("所用时间", t5 - t4)
print(pkusegjg)
precision, recall, F1 = jqzhf1(A, ltpjg[0], len(sentence))
print("ltp:\n精准率{}:\n召回率:{}\nF1:{}".format(precision, recall, F1))
print("所用时间", t6 - t5)
print(ltpjg[0])
哪位可以把这两段代码运行出来给我发个截图呀
F:\Python37>python f1.py
Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\showbo\AppData\Local\Temp\jieba.cache
Loading model cost 1.066 seconds.
Prefix dict has been built successfully.
Model loaded succeed
jieba:
精准率0.875:
召回率:0.7777777777777778
F1:0.823529411764706
所用时间 1.067995309829712
['刘二狗', '真狗', ',', '刘大狗', '比', '他', '还', '狗']
thulac:
精准率1.0:
召回率:1.0
F1:1.0
所用时间 6.114999532699585
['刘二狗', '真', '狗', ',', '刘大狗', '比', '他', '还', '狗']
pyhanlp:
精准率1.0:
召回率:1.0
F1:1.0
所用时间 0.6090002059936523
['刘二狗', '真', '狗', ',', '刘大狗', '比', '他', '还', '狗']
pkuseg:
精准率0.7272727272727273:
召回率:0.8888888888888888
F1:0.7999999999999999
所用时间 4.620006561279297
['刘', '二', '狗', '真', '狗', ',', '刘大狗', '比', '他', '还', '狗']
ltp:
精准率0.875:
召回率:0.7777777777777778
F1:0.823529411764706
所用时间 2.550997495651245
['刘二狗', '真', '狗', ',', '刘大狗', '比', '他', '还狗']
F:\Python37>
参考一下用jieba和thulac的分词结果:
short_Cases1 = ["我们伟大祖国的每一寸领土都绝对不能也绝对不可能从中国分割出去"]
import jieba
import jieba.posseg as pseg
import thulac
import nltk
thu1 = thulac.thulac() # 默认模式
for sentence in short_Cases1:
seg_list = pseg.cut(sentence)
print('jieba:')
for w in seg_list:
a = w.word + w.flag
print(' ', a, end=' ')
print()
text = thu1.cut(sentence, text=True) # 进行一句话分词
print("thulac:", '\n', text)
jieba:
我们r 伟大祖国n 的uj 每一寸l 领土n 都d 绝对d 不能v 也d 绝对d 不d 可能v 从p
中国ns 分割v 出去v
thulac:
我们_r 伟大_a 祖国_n 的_u 每_r 一_m 寸_q 领土_n 都_d 绝对_d 不_d 能_v 也_d 绝对_d 不_d 可能_v 从_p 中
国_ns 分割_v 出去_v
您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!