with open(savefile, 'w') as w:
for line in linelist:
w.write(line)
# linelist是列表
w.write(line)
UnicodeEncodeError: 'gbk' codec can't encode character '\xa0' in position 32: illegal multibyte sequence
try:
lisdir = os.listdir()
count = 0
for filename in lisdir:
count = count+1
# 0.读取csv原始文件
df = pd.read_csv(filename, encoding='utf-8', error_bad_lines=False)
# 1.每读取一个csv文件都将csv文件保存在过渡文件topsave.txt中
savetxtname = 'topsave.txt'
df.to_csv(savetxtname, na_rep='NA')
# 2.调用remove_noise_return_two函数,对txt文件进行清洗,去掉第一行噪音
pack_class, linelist = remove_noise_return_two(savetxtname)
time.sleep(0.1)
except FileNotFoundError as e:
print(f'{count}:{filename},文件没有找到',e)
except KeyError as k:
print(f'{count}:{filename},关键词Key错误',k)
except UnicodeError as u:
print(f'{count}:{filename},编码存在问题',u)
写入和读取的编码要保持一致,windows默认为gbk,linux默认为utf-8
with open(savefile, 'w', encoding='utf-8') as w:
...