python协程爬取视频的ts文件时,总在快要下载完是卡住,然后报错,最后视频有时依然能够出来,报错如下
Task exception was never retrieved
future: <Task finished coro=<down_load_ts1() done, defined at D:/pycharm/PycharmProjects/PycharmProjects/爬虫/第四章/爬取网吧电影.py:91> exception=ServerDisconnectedError('Server disconnected',)>
Traceback (most recent call last):
File "D:/pycharm/PycharmProjects/PycharmProjects/爬虫/第四章/爬取网吧电影.py", line 92, in down_load_ts1
async with session.get(url=url) as resp:
File "D:\python\lib\site-packages\aiohttp\client.py", line 1138, in aenter
self._resp = await self._coro
File "D:\python\lib\site-packages\aiohttp\client.py", line 559, in _request
await resp.start(conn)
File "D:\python\lib\site-packages\aiohttp\client_reqrep.py", line 898, in start
message, payload = await protocol.read() # type: ignore[union-attr]
File "D:\python\lib\site-packages\aiohttp\streams.py", line 616, in read
await self._waiter
aiohttp.client_exceptions.ServerDisconnectedError: Server disconnected
import requests
import re
import aiohttp
import aiofiles
import asyncio
import os
import time
import shutil
from Crypto.Cipher import AES
headers = {
'Cookie': 'PHPSESSID=0gakl8p9tc46n2ivcsjg6ccvq2; Hm_lvt_68f31e3bbfb6b0778abdd0c25d0bd6e0=1638432662; Hm_lpvt_68f31e3bbfb6b0778abdd0c25d0bd6e0=1638450480',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
}
def get_index_url(url):
resp = requests.get(url=url, headers=headers)
time.sleep(2)
resp_text = resp.text
# print(resp.text)
obj = re.compile(r'url=(?P<url>.*?)&id=.*?alt="(?P<title>.*?)" />', re.S)
index_url = obj.search(resp_text).group('url')
title = obj.search(resp_text).group('title')
# print(index_url, title)
return index_url, title
def get_m3u8_url(url):
resp = requests.get(url=url, headers=headers)
time.sleep(2)
m3u8_url = resp.text.split()[2]
# print(index_url_list[0])
# print(m3u8_url)
return m3u8_url
def save_m3u8(main_url, url, title):
fin_url = main_url + url
# print(fin_url)
resp = requests.get(url=fin_url, headers=headers)
folder = os.path.exists(f'movie/{title}')
if not folder:
os.mkdir(f'movie/{title}')
with open(f'movie/{title}/{title}_m3u8.txt', mode='w', encoding='utf-8') as f:
f.write(resp.text)
print('m3u8下载完毕')
def get_key(title):
flag = False
with open(f'movie/{title}/{title}_m3u8.txt', mode='r', encoding='utf-8') as f:
for line in f:
if line.startswith('#EXT-X-KEY'):
flag = True
break
if flag == False:
return '0'
obj = re.compile('URI="(?P<key_url>.*?)"', re.S)
url = obj.search(line).group('key_url')
resp = requests.get(url=url, headers=headers)
key = resp.text.encode('utf-8')
return key
def dec_ts(key):
IV = b'0000000000000000'
aes = AES.new(key=key, IV=IV, mode=AES.MODE_CBC)
return aes
async def down_load_ts1(url, title, name, session):
async with session.get(url=url) as resp:
async with aiofiles.open(f'movie/{title}/{name}', mode='wb') as f:
await f.write(await resp.content.read()) # 将内容写入文件中
print(f'{name}下载完毕')
async def down_load_ts2(url, title, name, session, aes):
async with session.get(url=url) as resp:
async with aiofiles.open(f'movie/{title}/{name}', mode='wb') as f:
await f.write(aes.decrypt(await resp.content.read())) # 将内容写入文件中
print(f'{name}下载完毕')
async def down_load(title):
# 下载ts
tasks = []
key = get_key(title)
# timeout = aiohttp.ClientTimeout(total=200) # 将超时时间
# connector = aiohttp.TCPConnector(limit=90) # 将并发数量降低
# connector = aiohttp.TCPConnector(force_close=True)
async with aiohttp.ClientSession(headers=headers) as session:
async with aiofiles.open(f'movie/{title}/{title}_m3u8.txt', mode='r', encoding='utf-8') as f:
async for line in f:
if line.startswith('#'):
continue
ts_url = line.strip() # 去掉无用的空格
line = ts_url.split('/')[-1]
if key == '0':
task = asyncio.get_event_loop().create_task(down_load_ts1(ts_url, title, line, session)) # 创建任务
else:
aes = dec_ts(key)
task = asyncio.get_event_loop().create_task(down_load_ts2(ts_url, title, line, session, aes))
tasks.append(task)
await asyncio.wait(tasks) # 等待任务结束
# async def dec_ts(key, name):
# IV = b'0' * len(key)
# aes = AES.new(key=key, IV=IV, mode=AES.MODE_CBC)
# async with aiofiles.open(f'movie/{name}', mode='rb') as f1, \
# aiofiles.open(f'movie/temp_{name}', mode='wb') as f2:
# bs = await f1.read() # 从源文件读取
# await f2.write(aes.decrypt(bs)) # 把解密好的文件写入文件
# print(f'{name}处理完毕')
# async def aio_dec(title, key):
# tasks = []
# path = os.listdir(f'movie/{title}')
# for i in path:
# task = asyncio.create_task(dec_ts(name=i, key=key))
# tasks.append(task)
# await asyncio.wait(tasks)
def merge_ts(title):
all_list_ts = []
list_ts = []
with open(f'movie/{title}/{title}_m3u8.txt', mode='r', encoding='utf-8') as f:
for line in f:
if line.startswith('#'):
continue
line = line.strip()
line = line.split('/')[-1]
list_ts.append(f'movie\\{title}\\{line}')
if len(list_ts) == 200:
all_list_ts.append(list_ts)
list_ts = []
n = 1
for fin_ts in all_list_ts:
fin_ts = '+'.join(fin_ts)
cmd = 'copy /b ' + repr(fin_ts).strip("'") + fr' movie\{title}\{n}.mp4'
os.system(cmd)
n += 1
print(n)
num_list = []
for i in range(1, n):
num_list.append(f'movie\\{title}\\{i}.mp4')
# print(num_list)
num_list = '+'.join(num_list)
cmd = 'copy /b ' + repr(num_list).strip("'") + fr' movie\{title}.mp4'
os.system(cmd)
shutil.rmtree(fr'movie\{title}')
def main(url):
index = get_index_url(url=url)
index_url = index[0]
main_url = index_url.split('com')[0] + 'com'
title = index[1]
m3u8_url = get_m3u8_url(url=index_url)
save_m3u8(main_url=main_url, url=m3u8_url, title=title)
asyncio.get_event_loop().run_until_complete(down_load(title=title))
merge_ts(title=title)
# asyncio.get_event_loop().run_until_complete(down_load(url))
if __name__ == '__main__':
movie_url = 'http://www.wbdy.tv/play/31042_1_1.html'
folder = os.path.exists('movie')
if not folder:
os.mkdir('movie')
main(movie_url)
应该是aiohttp的问题,你还用requests+多线程试试,aiohttp爬某些网站会有这个问题