def getText():
txt=open('Ci.txt','r',encoding ="utf-8").read()
for ch in '\n \ / ,。、:!?”“#¥%':
txt=txt.replace(ch,"")
return txt
test=getText()
counts={}
for word in test:
if len(word)==1:
if word in counts:
counts[word]=counts[word]+1
else:
counts[word]=1
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
with open('2.txt','w') as a:
a.write(str(items))
文件就不上传了,就是无法读取两个字符(汉字),只能读出单个汉字,但是又不想用jieba包,如何处理,求解决方案
你这样每次读两个:
def getText():
txt = open('1.txt', 'r', encoding="utf-8").read()
for ch in '\n \ / ,。、:!?”“#¥%':
txt = txt.replace(ch, "")
return txt
test = getText()
print(test)
counts = {}
for i in range(len(test)-1):
t=test[i]+test[i+1]
if len(t) == 2:
if t in counts:
counts[t] = counts[t] + 1
else:
counts[t] = 1
items = list(counts.items())
print(items)
items.sort(key=lambda x: x[1], reverse=True)
with open('2.txt', 'w') as a:
a.write(str(items))