目前情况是这样的,有一个excel文件存放文档,用txt文件存放关键词
然后将txt文件和excel进行匹配看txt文件里的关键词出现的次数
txt文件的关键词是以
成都
北京
天津
上海
香港
这样的形式存放的
我想
def frequency(list_word):
c = Counter()
for x in list_word:
if len(x) > 1 and x != '\r\n':
c[x] += 1
word = []
key = []
for (k, v) in c.most_common(100):
print('%s %d' % (k, v))
word.append(k)
key.append(v)
return key,word
def high_frequency():
list_keyword = []
for mes in result:
content = result
for keyword in data_keyword[0]:
res = content.count(keyword)
for i in range(res):
list_keyword.append(keyword)
通过这样 方式来,但是无法匹配,求更快的方法
用字典记录关键字出现的次数
# 读关键字
def get_key_list(txt_file):
try:
file_data = open(txt_file, 'r', encoding='utf8')
list1 = file_data.readlines()
key_list = [k.strip('\n') for k in list1]
return key_list
except:
pass
return []
# 按关键字列表遍历EXCEL , 写入统计表
def xls_select_proc(xls_sr, xls_tg, key_list):
from openpyxl import Workbook
from openpyxl import load_workbook
# 只处理第一个工作表 , 待匹配数据在第一列
wb = load_workbook(xls_sr)
ws = wb[wb.sheetnames[0]]
wb_tg = Workbook()
sheet_tg = wb_tg.active
sheet_tg.title = "关键词统计"
save_i = 0
sr_i = 0
# 表格头
sheet_tg[chr(ord('A') + 0) + '%d' % (save_i + 1)] = '关键词'
sheet_tg[chr(ord('A') + 1) + '%d' % (save_i + 1)] = '出现次数'
save_i = save_i + 1
# 关键词统计字典
key_dict = {}
for row in ws.rows:
sr_i = sr_i + 1
if row[0] is None:
break
for k in key_list:
key_sum = str(row[0].value).count(k[1])
if key_sum > 0:
if k[1] in key_dict:
key_cnt = key_dict[k[1]]
key_dict[k[1]] = key_cnt + key_sum
else:
key_dict[k[1]] = key_sum
# 写入Excel
for k in key_dict:
sheet_tg[chr(ord('A') + 0) + '%d' % (save_i + 1)] = k
sheet_tg[chr(ord('A') + 1) + '%d' % (save_i + 1)] = key_dict[k]
save_i = save_i + 1
# print(k,key_dict[k])
# 保存文件
wb_tg.save(xls_tg)
wb.close()
wb_tg.close()
def xls_select3(xls_sr, xls_tg, key_file_data):
tg_file = "{}.xlsx".format(xls_tg)
key_list = []
for kf in key_file_data:
for k in kf[1]:
key_list.append([kf[0][:-4],k])
xls_select_proc(xls_sr, tg_file, key_list)
key_file = ['关键词.txt']
key_file_data = []
print('开始处理')
try:
for kf in key_file:
key_file_data.append([kf, get_key_list(kf)])
xls_select3('文本.xlsx', '结果', key_file_data)
except Exception as e:
print('处理出错:\n',repr(e))
print('处理完成')