python 匹配excel和txt文件,取得txt文件中词语出现的频率

目前情况是这样的,有一个excel文件存放文档,用txt文件存放关键词
然后将txt文件和excel进行匹配看txt文件里的关键词出现的次数
txt文件的关键词是以

成都
北京
天津
上海
香港

这样的形式存放的
我想

def frequency(list_word):
    c = Counter()
    for x in list_word:
        if len(x) > 1 and x != '\r\n':
            c[x] += 1
    word = []
    key = []
    for (k, v) in c.most_common(100):
        print('%s  %d' % (k, v))
        word.append(k)
        key.append(v)
    return key,word
def high_frequency():
    list_keyword = []
    for mes in result:
                content = result
        for keyword in data_keyword[0]:
              res = content.count(keyword)
              for i in range(res):
                  list_keyword.append(keyword)

通过这样 方式来,但是无法匹配,求更快的方法

用字典记录关键字出现的次数


# 读关键字
def get_key_list(txt_file):
    try:
        file_data = open(txt_file, 'r', encoding='utf8')
        list1 = file_data.readlines()
        key_list = [k.strip('\n') for k in list1]
        return key_list
    except:
        pass
    return []

# 按关键字列表遍历EXCEL , 写入统计表
def xls_select_proc(xls_sr, xls_tg, key_list):
    from openpyxl import Workbook
    from openpyxl import load_workbook
    # 只处理第一个工作表 , 待匹配数据在第一列
    wb = load_workbook(xls_sr)
    ws = wb[wb.sheetnames[0]]

    wb_tg = Workbook()
    sheet_tg = wb_tg.active
    sheet_tg.title = "关键词统计"
    save_i = 0
    sr_i = 0

    # 表格头
    sheet_tg[chr(ord('A') + 0) + '%d' % (save_i + 1)] = '关键词'
    sheet_tg[chr(ord('A') + 1) + '%d' % (save_i + 1)] = '出现次数'
    save_i = save_i + 1

    # 关键词统计字典
    key_dict = {}

    for row in ws.rows:
        sr_i = sr_i + 1
        if row[0] is None:
            break

        for k in key_list:
            key_sum = str(row[0].value).count(k[1])

            if key_sum > 0:
                if k[1] in key_dict:
                    key_cnt = key_dict[k[1]]
                    key_dict[k[1]] = key_cnt + key_sum
                else:
                    key_dict[k[1]] = key_sum
    # 写入Excel
    for k in key_dict:
        sheet_tg[chr(ord('A') + 0) + '%d' % (save_i + 1)] = k
        sheet_tg[chr(ord('A') + 1) + '%d' % (save_i + 1)] = key_dict[k]
        save_i = save_i + 1
        # print(k,key_dict[k])
    # 保存文件
    wb_tg.save(xls_tg)
    wb.close()
    wb_tg.close()

def xls_select3(xls_sr, xls_tg, key_file_data):
    tg_file = "{}.xlsx".format(xls_tg)
    key_list = []
    for kf in key_file_data:
        for k in kf[1]:
            key_list.append([kf[0][:-4],k])
    xls_select_proc(xls_sr, tg_file, key_list)

key_file = ['关键词.txt']
key_file_data = []
print('开始处理')
try:
    for kf in key_file:
        key_file_data.append([kf, get_key_list(kf)])
    xls_select3('文本.xlsx', '结果', key_file_data)
except Exception as e:
    print('处理出错:\n',repr(e))

print('处理完成')



https://blog.csdn.net/fu_jian_ping/article/details/88907572