如利用正则匹配到“受到xx影响”,则删除字符串所在行,并返回2877,2881。
a.txt内容如下:
2877 a3 1-1 9:16,部分地区受到雾霾影响出行不便。
2878 a2 1-1 9:42,床前明月光,疑是地上霜。
2880 a2 1-1 10:09,举头望明月,低头思故乡。
2881 a3 1-1 9:16,受到事故影响出行不便。
希望得到的结果是:
“2877”匹配到:“受到雾霾影响”
“2881”匹配到:“受到事故影响”
写入b.txt内容为:、
2878 a2 1-1 9:42,床前明月光,疑是地上霜。
2880 a2 1-1 10:09,举头望明月,低头思故乡。
# _*_ coding:utf-8 _*_
def get_math_line(source, keyword):
# re chinese
# 源文件编码需要指定
source = source.decode('gb2312')
pattern = '((\d)*).*' + keyword
pattern = pattern.decode('utf-8')
import re
prog = re.compile(pattern)
result = prog.search(source)
return result.group(1) if result else None
with open('source.txt', 'rb') as src_file:
with open('dest.txt', 'w') as dst_file:
for line in src_file.readlines():
val = get_math_line(line, '受到雾霾影响')
if val:
print val
else:
dst_file.write(line.strip('\n'))
# _*_ coding:utf-8 _*_
def get_math_line(source):
# re chinese
# 源文件编码需要指定
source = source.decode('gb2312')
pattern = '(\d*).*(受到.*影响)'
pattern = pattern.decode('utf-8')
import re
prog = re.compile(pattern)
result = prog.search(source)
if result:
return result.group(1), result.group(2)
else:
return None
with open('source.txt', 'rb') as src_file:
with open('dest.txt', 'w') as dst_file:
for line in src_file.readlines():
val = get_math_line(line)
if val:
print "%s 匹配到 %s" % (val[0].encode('utf-8'), val[1].encode('utf-8'))
else:
dst_file.write(line.strip('\n'))