在获取图片链接进行保存时,图片的数据并没有写入,图片无法打开,使用os也无法创建出文件夹。这是啥原因?
代码如下
import queue # 队列
import threading
from queue import Queue
import requests
import parsel
import re
import os
import time
from threading import Thread
def product(into): # 生产者
url_list = [] # 初始化url_list列表
while into.full() is False: # 判断into队列是否满,满True,未满False
for i in range(2):
url = 'http://www.woyaogexing.com/touxiang/z/nvkeaqi/index_{}.html'.format(i)
if url not in url_list: # 判断新的url是否在url_list中
url_list.append(url)
into.put('http://www.woyaogexing.com/touxiang/z/nvkeaqi/',url)
else:
continue # 跳出此次循环
def consume(into,out): # 消费者
headers = {
'Upgrade - Insecure - Requests': '1',
'Host': 'www.woyaogexing.com',
'User-Agent': 'Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 98.0.4758.82Safari / 537.36'
}
while True:
home_page = requests.get(url = into.get(), headers = headers) # into.ge()获取队列中的url
home_page.encoding = 'utf-8'
home_page_data = home_page.text
selector = parsel.Selector(home_page_data) # 转换数据
div_list = selector.xpath('//div[3]/div[3]/div/div[2]/div') # 提取所有div标签
for div in div_list:
minute_page_url = 'http://www.woyaogexing.com'\
+ div.xpath('//div[3]/div[3]/div/div[@class = "pMain"]/div/a/@href').get() # 所有详细页的链接
file_title = div.xpath('//div[3]/div[3]/div/div[2]/div/a/@title').get()
file_title_data = re.sub('[【】/\ :|]','',file_title) # 文件保存名
# out.put(str(threading.current_thread().getName()) + '-' + str(minute_page_url))
#print(minute_page_url)
# 创建文件
if not os.path.exists('头像\\' + file_title_data):
out.put(os.makedirs('头像\\' + file_title_data))
# 请求详细页获取图片数据
response2 = requests.get(url = minute_page_url, headers = headers)
response2.encoding = 'utf-8'
response2_data = response2.text
selector2 = parsel.Selector(response2_data)
li_list = selector2.xpath('//div[3]/div/div/ul/li[@class = "tx-img"]') # 提取所有li标签
for li in li_list:
picture_url = 'http:' + li.xpath('./a/img/@src').get() # 照片下载地址
print(picture_url)
response3 = requests.get(url = picture_url, headers = headers).content
#print(response3)
picture_name = picture_url.split('/')[-1]
out.put(str(threading.current_thread().getName()) + '-' + str(picture_url))
# print(picture_url)
# 保存
with open(f'头像\\{file_title_data}\\{picture_name}', mode = 'wb') as f:
f.write(response3)
print('{}保存完成'.format(picture_name))
f.close()
into.task_done() # 通知生产者,此处完成
if __name__ == '__main__':
start_time = time.time() # 开始时间
queue = Queue(maxsize = 5) # 设置队列最大空间为10
url_list = Queue()
print('queue 开始大小 %d' % queue.qsize())
# 生成者
product_thread = Thread(target= product, args= (queue, ))
product_thread.daemon = True # 开启守护
product_thread.start()
# 消费者
for index in range(5):
consume_thread = Thread(target= consume, args= (queue, url_list,))
consume_thread.daemon = True # 开启守护
consume_thread.start()
queue.join()
end_time = time.time()
print('总耗时: %s' % (end_time - start_time))
print('queue 结束大小 %d' % queue.qsize())
#创建文件夹
e = "视频"
if not os.path.exists(e):
os.mkdir(e)
#图片保存在文件夹内
name_ = re.sub(r'[/:*?"<>|\n]', '', name)
response = requests.get(url=urls, headers=headers).content
with open(e + "/" + name + ".mp4", mode='wb') as f:
f.write(response)
print('保存完成:', name_)
f.close()
路径改成绝对路径试试