import requests from lxml import etree import os url='https://www.jdlingyu.com/38203.html' headers={ 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36' } response=requests.get(url=url,headers=headers) page= response.text # print(html) //*[@id="primary-home"]/article/div[2]/p tree=etree.HTML(page) li_list=tree.xpath('//div[@id="primary-home"]/article/div[2]') for li in li_list: # name=li.xpath('./p/img/@loading="lazy"') 返回true img_data = li.xpath('./p/img/@src')
给你个例子,这个是我抓取图片存储的
import requests
from urllib.request import urlretrieve
import re
# encoding: utf-8
"""
@version: 1.0
@author: AusKa_T
@file: ex_umei
@time: 2019/11/9 0009 10:42
"""
def download_images(url, regex):
response = requests.get(url)
response.encoding = response.apparent_encoding
html = response.text
img_details = re.findall(regex, html)
for i, src in enumerate(img_details):
savename = r'downloads/{}_{}.png'.format(i + 1, src[1])
urlretrieve(src[0], savename)
if __name__ == '__main__':
download_images('http://www.umei.cc/weimeitupian/oumeitupian/', r'<img src="(.*?)" width="180" height="270" /><span>(.*?)</span>')