模糊查询python



import os
from win32com import client
import fitz


def convert_word_to_pdf(word_file, pdf_file):
    try:
        word_app = client.Dispatch("Word.Application")
        doc = word_app.Documents.Open(word_file)
        doc.SaveAs(pdf_file, FileFormat=17)
        doc.Close()
        word_app.Quit()
        return True
    except Exception as e:
        print(f"转换 Word 到 PDF 失败：{e}")
        return False


def fuzzy_match_keywords(text, keywords):
    matches = []
    for keyword in keywords:
        if keyword in text:
            index = text.index(keyword)
            match_text = text[index:]
            colon_index = match_text.find(":")  # 寻找第一个冒号的位置
            if colon_index != -1:
                value = match_text[colon_index + 1:]  # 获取冒号右侧的内容
                value = value.strip()
                matches.append((keyword, value))
    return matches


if __name__ == '__main__':
    folder_path = "D:/haha"
    keywords = ["联系人及电话", "启 运 港"]  # 请输入您要寻找的关键字

    files = os.listdir(folder_path)
    for file in files:
        if file.endswith(".docx") or file.endswith(".doc"):
            word_file = os.path.join(folder_path, file)
            pdf_file = os.path.splitext(word_file)[0] + ".pdf"

            if convert_word_to_pdf(word_file, pdf_file):
                with fitz.open(pdf_file) as doc:
                    try:
                        all_text = ""
                        for page in doc:
                            all_text += page.get_text()

                        matches = fuzzy_match_keywords(all_text, keywords)
                        if matches:
                            print(f"在文件 {file} 中找到了 {len(matches)} 个匹配关键字：")
                            for match in matches:
                                keyword, value = match
                                print(f"关键字：{keyword} 内容：{value}")
                        else:
                            print(f"在文件 {file} 中没有找到关键字的模糊匹配。")
                    except UnicodeDecodeError:
                        print(f"解码错误：无法解码文件 {file}")
            else:
                print(f"转换 Word 到 PDF 失败：{word_file}")

为什么我的代码要么查询不到感觉信息要么就是查询到的货不对板
这是咋回事啊

帮你找了个相似的问题, 你可以看下: https://ask.csdn.net/questions/7452934
你也可以参考下这篇文章：硬币问题python
你还可以看下python参考手册中的 python-具体异常
除此之外, 这篇博客: 聊聊Python中的部分也许能够解决你的问题, 你可以仔细阅读以下内容或跳转源博客中阅读:
前面已经说过，Python 3打破了对Python 2的向后兼容。但它并不是完全重新设计的。而且，也并不是说2.x版本的Python模块在Python 3下都无法运行。代码可以完全跨版本兼容，无需其他工具或技术在两大版本上都可以运行，但一般只有简单应用才能做到这一点。
您还可以看一下龙豪杰老师的Python网络爬虫课程中的为什么学习python小节, 巩固相关知识点
以下回答由chatgpt基于相关博客总结生成:
我可以使用Python中的正则表达式模块re来实现模糊查询的功能。具体步骤如下： 1. 导入re模块。 2. 根据需求编写正则表达式，使用“.”表示匹配任意字符，“*”表示匹配前一个字符0次或多次，使用“+”表示匹配前一个字符1次或多次。 3. 使用re模块的search()、findall()或match()方法实现匹配。 4. 将匹配结果返回或进行进一步的处理。
下面是一个例子，实现在字符串中查找包含“Python”的所有单词：
```
import re

# 示例字符串
string = "Python is a widely used high-level programming language for general-purpose programming."

# 正则表达式，匹配包含Python的单词
pattern = r"\b\w*Python\w*\b"

# 匹配
result = re.findall(pattern, string)

# 输出结果
print(result)
```
输出：
```
['Python']
```
这个例子中，使用\b表示单词边界，\w表示单词字符，*表示匹配前一个字符0次或多次。