import requests
from lxml import etree
url_all = 'https://www.89ip.cn/index_1.html'
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
response = requests.get(url = url_all,headers=header)
html = etree.HTML(response.content.decode())
h_list = html.xpath('//tbody/tr/td[1]/text()')
p_list = html.xpath('//tbody/tr/td[2]/text()')
with open('text.txt', 'w') as f:
for h, p in zip(h_list, p_list):
f.write('http://' + h.strip() + ':' + p.strip() + '\n')
获取前100个ip和端口,如何改进
需要翻页,但如何遍历循环
这个需要将你的https://www.89ip.cn/index_**1**.html 当中1变化他就是翻页,你需要对其循环和看网页具体变化就行
import requests
from lxml import etree
with open('text.txt', 'w') as f:
for i in range(4):
url_all = f'https://www.89ip.cn/index_{i}.html'
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
response = requests.get(url = url_all,headers=header)
html = etree.HTML(response.content.decode())
h_list = html.xpath('//tbody/tr/td[1]/text()')
p_list = html.xpath('//tbody/tr/td[2]/text()')
for h, p in zip(h_list, p_list):
f.write('http://' + h.strip() + ':' + p.strip() + '\n')
有帮助请点击一下采纳谢谢