我想要达到的结果
import urllib.request
from lxml import etree
def create_request(page):
if page==1 :
url = "https://sc.chinaz.com/tupian/feizhuliutupian_.html"
else:
url = 'https://sc.chinaz.com/tupian/feizhuliutupian_'+str(page)+'.html'
headers={'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Mobile Safari/537.36 Edg/103.0.1264.62'}
request = urllib.request.Request(headers=headers,url=url)
return request
def get_content(request):
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
return content
def down_load(content):
#urllib.request.retrieve(图片地址 文件名字)
tree = etree.HTML(content)
name_list = tree.xpath('//div[@id="container"]//a/img/@alt') #//div[@id="ulcontent"]//a/img/@src //div[@id="ulcontent"]//a/img/@alt
src_list = tree.xpath('//div[@id="container"]//a/img/@src')
print(len(name_list))
if __name__ == '__main__':
start_page = int(input("请输入开始的页码"))
end_page = int(input("请输入结束的页码"))
for page in range(start_page,end_page+1):
request = create_request(page)
content = get_content(request)
down_load(content)