for i in tree.xpath('//*[@id="ctl00_CPHMain_divObjective"]/div'):
h += i.xpath('string(.)').replace('\n\n','\n').replace('。','。\n').replace('参考答案','\n参考答案').replace(r'\ax0','').replace(' ','')
# 主观题
for i in tree.xpath('//*[@id="ctl00_CPHMain_divSubjective"]/div'):
try:
h += i.xpath('string(.)').replace('<br/>','\n').replace('问题:','\n问题:').replace('参考答案','\n参考答案').replace('?','?\n').replace(r'\ax0','').replace(' ','')
except Exception as e:
print(e)
先用xpath把所有文本提取出来,然后把每一句当成字符串进行replace替换啊