在用协程下载视频网站(涉及的网站仅用作个人学习).ts文件,总是有十几个.ts文件无法下载,报错如下
import asyncio
import requests
import aiohttp
import aiofiles
import time
from bs4 import BeautifulSoup
import re
def main():
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
"Referer": "https://player.yunbtv.net/"
}
m3u8_url = get_m3u8_url()
# "https://vod8.wenshibaowenbei.com/20210917/l7VYklhA/index.m3u8"
get_m3u8_file(m3u8_url)
asyncio.run(get_video())
def get_m3u8_url():
com = re.compile(r'"link_pre":"","url":"(?P<m3u8_url>.*?)","url_next', re.S)
url = "https://www.yunbtv.net/vodplay/youyuyouxi-1-1.html"
requ_1 = requests.get(url)
m3u8_url = com.search(requ_1.text).group("m3u8_url").replace("\\", "")
return m3u8_url
def get_m3u8_file(m3u8_url):
requ = requests.get(m3u8_url)
with open("鱿鱼游戏/m3u8/1.m3u8", mode="wb") as f_1:
f_1.write(requ.content)
requ.close()
with open("鱿鱼游戏/m3u8/1.m3u8", mode="r", encoding="utf-8") as f_2:
for line in f_2:
line = line.strip()
if line.startswith("#"):
continue
index_url = m3u8_url.split("/20210917")[0]+line # /20210917/l7VYklhA//1000kb/hls/index.m3u8
# "https://vod8.wenshibaowenbei.com/20210917/l7VYklhA//1000kb/hls/index.m3u8"
with open("鱿鱼游戏/m3u8/1.m3u8", mode="wb") as f_3:
f_3.write(requests.get(index_url).content)
async def get_video():
tasks = []
i = 1
# 将默认并发量由100降低至50
connector = aiohttp.TCPConnector(limit=40)
# 将默认超时时间提高至600秒
timeout = aiohttp.ClientTimeout(total=1000)
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
async with aiofiles.open("鱿鱼游戏/m3u8/1.m3u8", mode="r", encoding="utf-8") as f_1:
async for line in f_1:
line = line.strip()
if line.startswith("#"):
continue
else:
task = asyncio.create_task(get_download_ts(session, line, i))
tasks.append(task)
i = i+1
await asyncio.wait(tasks)
async def get_download_ts(session, line, i):
async with session.get(line) as requ:
async with aiofiles.open("鱿鱼游戏/ts/第1集/{}.ts".format(i), mode="wb") as f:
await f.write(await requ.content.read())
print("第1集/{}.ts下载完成---".format(i))
time.sleep(0.2)
requ.close()
if __name__ == "__main__":
main()
Task exception was never retrieved
future: <Task finished coro=<get_download_ts() done, defined at C:/python/Practice/爬虫第一阶段/08爬取云播TV鱿鱼游戏/08爬取云播TV鱿鱼游戏学习版.py:79> exception=ClientOSError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None)>
Traceback (most recent call last):
File "C:/python/Practice/爬虫第一阶段/08爬取云播TV鱿鱼游戏/08爬取云播TV鱿鱼游戏学习版.py", line 80, in get_download_ts
async with session.get(line) as requ:
File "C:\python\Practice\爬虫第一阶段\venv\lib\site-packages\aiohttp\client.py", line 1138, in __aenter__
self._resp = await self._coro
File "C:\python\Practice\爬虫第一阶段\venv\lib\site-packages\aiohttp\client.py", line 559, in _request
await resp.start(conn)
File "C:\python\Practice\爬虫第一阶段\venv\lib\site-packages\aiohttp\client_reqrep.py", line 898, in start
message, payload = await protocol.read() # type: ignore[union-attr]
File "C:\python\Practice\爬虫第一阶段\venv\lib\site-packages\aiohttp\streams.py", line 616, in read
await self._waiter
aiohttp.client_exceptions.ClientOSError: [WinError 10054] 远程主机强迫关闭了一个现有的连接。
Task exception was never retrieved
future: <Task finished coro=<get_download_ts() done, defined at C:/python/Practice/爬虫第一阶段/08爬取云播TV鱿鱼游戏/08爬取云播TV鱿鱼游戏学习版.py:79> exception=ClientOSError(10053, '你的主机中的软件中止了一个已建立的连接。', None, 10053, None)>
Traceback (most recent call last):
File "C:/python/Practice/爬虫第一阶段/08爬取云播TV鱿鱼游戏/08爬取云播TV鱿鱼游戏学习版.py", line 80, in get_download_ts
async with session.get(line) as requ:
File "C:\python\Practice\爬虫第一阶段\venv\lib\site-packages\aiohttp\client.py", line 1138, in __aenter__
self._resp = await self._coro
File "C:\python\Practice\爬虫第一阶段\venv\lib\site-packages\aiohttp\client.py", line 559, in _request
await resp.start(conn)
File "C:\python\Practice\爬虫第一阶段\venv\lib\site-packages\aiohttp\client_reqrep.py", line 898, in start
message, payload = await protocol.read() # type: ignore[union-attr]
File "C:\python\Practice\爬虫第一阶段\venv\lib\site-packages\aiohttp\streams.py", line 616, in read
await self._waiter
aiohttp.client_exceptions.ClientOSError: [WinError 10053] 你的主机中的软件中止了一个已建立的连接。
1、.close(),关闭打开的连接
2、网上有搜到是因为 await挂起操作导致连接超时,尝试增加默认连接时间
1、能完整下载所有m3u8里的.ts文件
2、帮我理解相关问题
万分感谢
这个应该是连接数过多,用requests模块,aiohttp做爬虫很容易被反爬
请求过去频繁,时间间隔太短。将get_download_ts中休眠时间加长些,time.sleep(1)或者更长些,测试可以获取。
如有帮助,请点采纳。