import requests
from lxml import etree
import csv
from concurrent.futures import ThreadPoolExecutor
f = open("data.csv","w",encoding="utf-8")
csvwriter = csv.writer(f)
def download_page(url):
reponse = requests.get(url).text
tree = etree.HTML(reponse)
cj_list = tree.xpath(".//table[@class='hq_table']")[0]
trs = cj_list.xpath(".//table[@class='hq_table']/tbody/tr")[1:]
# print(cj_list)
for li in trs:
cj_li = li.xpath(".//table[@class='hq_table']/tbody/tr/td/text()")
# print(cj_li)
cj_li = (item.replace("\\","").replace("/","") for item in cj_li)
print(cj_li)
csvwriter.writerow(cj_li)
print(url,"下载完成!")
if __name__ == '__main__':
with ThreadPoolExecutor(50) as t:
for i in range(1,200):
t.submit(download_page,f"http://www.xinfadi.com.cn/marketanalysis/0/list/{i}.shtml")
程序在对页面数据采集时, 没有采集到对应的数据导致的