import requests
import csv
from lxml import etree
def amd():
for i in range(1,6):
url = "http://www.spiderbuf.cn/beginner?level=4&pageno={}".format(i)
print(url)
header = {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
}
response = requests.get(url=url,headers=header).text
# s1 = ".*?"
# asd = re.compile(s1,response)
# print(asd)
# f = open("01.html","w",encoding="utf8")
# f.write(response)
# f.close()
s2 = etree.HTML(response)
# s3 = s2.xpath('//tr/td/text()')
# for td in s3:
# ts = td + "|"
# print(ts)
s3 = s2.xpath('//tr/td')
s = ""
for j in s3:
# s = s + str(j.text()) + "|"
"""
在字符串里面text后面不能加()
"""
# s = s + str(j.text) + "|"
"""
最大的改变在这里,
"""
s = s + str(j.xpath("string(.)")) + "|"
# 此处的string会绕过一些干扰直接去提取
print("--------以下为第%d页数--------"%(i))
print(s)
with open("训练3.csv","a+",newline="",encoding="utf8") as fin:
writes = csv.writer(fin)
writes.writerow(s)
# 上面的csv文件能存进去
"""
对比csv文件和txt文件的写入方式
"""
with open("训练3.txt","a+",encoding="utf8") as fins:
fins.writelines(s)
# f = open("01.html","w",encoding="utf8")
# f.write(td)
# f.close()
if __name__ == '__main__':
amd()
而在txt文件中保存时,提示没有定义变量s,怎么回事?缩进了取消都一样

import requests
import csv
from lxml import etree
def amd():
for i in range(1,6):
url = "http://www.spiderbuf.cn/beginner?level=4&pageno={}".format(i)
print(url)
header = {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
}
response = requests.get(url=url,headers=header).text
s2 = etree.HTML(response)
s3 = s2.xpath('//tr/td')
s = ""
for j in s3:
s += str(j.xpath("string(.)")) + "|"
print("--------以下为第%d页数--------"%(i))
print(s)
with open("训练3.csv","a+",newline="",encoding="utf8") as fin:
writes = csv.writer(fin)
writes.writerow(s)
with open("训练3.txt","a+",encoding="utf8") as fins:
fins.write(s + "\n") # add a newline character at the end of each line
if __name__ == '__main__':
amd()
没有在循环之前定义变量 s
def amd():
s = ""
for i in range(1,6):
# ...省略部分代码...