```python
mport requests
from pyquery import PyQuery as pq
import requests
import os
path=r'\C:\Users\yijiaxing\Desktop\tu/'
count=1
def tools(url):
headers = {'Cookie': 'UM_distinctid=17bfc299039199-0a4aaeb4a2ed0e-5d432b17-fa000-17bfc29903c10; CNZZDATA1260801081=1163808556-1632014198-null%7C1632575161; CNZZDATA1258947629=1737818012-1632014191-null%7C1632575796; CNZZDATA1278036569=1495289316-1632013499-null%7C1632575105',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36' }
resp=requests.get(url,headers=headers)
resp.encoding=resp.apparent_encoding
return resp
def save(img_url,name):
global count
resp=tools(img_url).content
with open (path+'{}.gif'.format(count),'wb') as f:
f.write(resp)
print('已下载第{}张图片'.format(count))
count+=1
def details(detail):
url='http://s1.555xnxn.net{}'.format(detail)
resp=tools(url).text
doc=pq(resp)
img=doc('.content img').items()
for i in img:
img_url=i.attr('src')
save(img_url)
def details1(href):
for i in range(1,3):
if i==1:
url='http://s1.555xnxn.net/art{}'.format(href)
else:
url='http://s1.555xnxn.net/art/{}/index-{}.html'.format(href,i)
resp=tools(url).text
doc=pq(resp)
det=doc('.zuo li a').items()
for d in det:
detail=d.attr('href')
details(detail)
def main():
url='http://s1.555xnxn.net'
re=tools(url)
resp=re.text
doc=pq(resp)
a=doc('.navc a').items()
for i in a:
href=i.attr('href')
details1(href)
re.close()
if __name__ == '__main__':
main()
第一被识别爬虫
第二你ip有问题
第三你地址有问题
很多图片网站有防盗链技术会标记你从哪里来,你的headers里面要加上referer