源代码如下:
import requests
from lxml import etree
import queue
import threading
import os
def test(url1):
tree = etree.HTML(requests.get(url1).text)
src_list = tree.xpath('//*[@id="primary-home"]/article/div[2]/p[2]/img/@src')
for src in src_list:
resp = requests.get(src)
name=name_list.get()[0]
src_name = src.split('/')[6]
os.makedirs(f'D:\\爬虫爬取\\图片\\{name}', exist_ok=True)
with open(os.path.join(f'D:\\爬虫爬取\\图片\\{name}\\'+src_name), 'wb') as f:
f.write(resp.content)
print(src_name,'完成')
if __name__ == '__main__':
url = 'https://www.ababbb.com/'
url_quque = queue.Queue()
threadings = []
name_list = queue.Queue()
tree = etree.HTML(requests.get(url).text)
li_list = tree.xpath('//*[@id="primary-home"]/div[4]/div/div[1]/div/div[2]/ul/li/@id')
for li in li_list:
name = tree.xpath('//*[@id="item-' + li.split('-')[1] + '"]/div/div[2]/h2/a/text()')
url_quque.put(url + li.split('-')[1] + '.html')
name_list.put(name)
for i in range(8):
threadings.append(threading.Thread(target=test, args=(url_quque.get(),)))
for i in threadings:
i.start()
for i in threadings:
i.join()
运行的 结果:
11行src_list 可以debug一下,有可能会有多个图片。这样导致name_list 这个queue被get 光了。再get一次,python的queue默认是阻塞的,然后就卡住了。 可以debug,也可以print一些信息出来看看。