import re
with open('keyword.txt', 'r') as file:
keyword = [word.strip() for word in file.readlines()]
with open('file.txt', 'r') as file:
article = file.read()
unmatched_keywords = []
matched_sentences = []
for word in keyword:
pattern = re.compile(fr'\b{re.escape(word)}\b')
match = re.search(pattern, article)
if match:
sentence_pattern = r'[^.!?]*' + re.escape(word) + r'[^.!?]*[.!?]'
sentences = re.findall(sentence_pattern, article)
matched_sentence = next((s for s in sentences if word in s), None)
if matched_sentence:
article = article.replace(matched_sentence, f"<b>{matched_sentence}<b>", 1)
matched_sentences.append(matched_sentence)
else:
unmatched_keywords.append(word)
else:
unmatched_keywords.append(word)
with open("new.txt", "w") as file:
file.write(article)
with open('unmatched_keywords.txt', 'w') as file:
for word in unmatched_keywords:
file.write(word + '\n')
with open("sentences.txt", "w") as file:
file.write('\n'.join(matched_sentences))
一段代码,要求keyword匹配file,同时生成三个文件,一个文件是修改以后的file,一个是未匹配的关键字unmatchd,还有一个是提取关键字所在的句子
sentences(keyword只匹配一次),
如何修改代码在file文件以及sentences文件中高亮匹配的关键词(以形式)表示?
以下回答引用了gpt生成,请参考是否解决了问题:
要修改代码以便在文件和句子中以 <b></b>
形式突出显示匹配的关键词,您需要对替换逻辑进行调整。请使用以下修改后的代码:
import re
with open('keyword.txt', 'r') as file:
keywords = [word.strip() for word in file.readlines()]
with open('file.txt', 'r') as file:
article = file.read()
unmatched_keywords = []
matched_sentences = []
for keyword in keywords:
pattern = re.compile(fr'\b{re.escape(keyword)}\b')
match = re.search(pattern, article)
if match:
sentence_pattern = r'[^.!?]*' + re.escape(keyword) + r'[^.!?]*[.!?]'
sentences = re.findall(sentence_pattern, article)
matched_sentence = next((s for s in sentences if keyword in s), None)
if matched_sentence:
article = article.replace(matched_sentence, f"<b>{matched_sentence}</b>", 1)
matched_sentences.append(matched_sentence)
else:
unmatched_keywords.append(keyword)
else:
unmatched_keywords.append(keyword)
with open("new.txt", "w") as file:
file.write(article)
with open("unmatched_keywords.txt", "w") as file:
for keyword in unmatched_keywords:
file.write(keyword + '\n')
with open("sentences.txt", "w") as file:
file.write('\n'.join(matched_sentences))
以上代码将会在 file.txt
文件中将匹配到的关键词用 <b>
和 </b>
标签包裹起来,表示为高亮。同时,匹配到的句子将会被保存在 sentences.txt
文件中。