from concurrent.futures import ThreadPoolExecutor
import requests
import re
import time
def main():
m3u8_url = get_m3u8_url()
get_m3u8_file(m3u8_url)
get_video()
def get_m3u8_url():
com = re.compile(r'"link_pre":"","url":"(?P<m3u8_url>.*?)","url_next', re.S)
url = "https://www.yunbtv.net/vodplay/youyuyouxi-1-1.html"
requ = requests.get(url)
m3u8_url = com.search(requ.text).group("m3u8_url")
m3u8_url = m3u8_url.replace("\\", "")
return m3u8_url
def get_m3u8_file(m3u8_url):
with open("鱿鱼游戏/m3u8/1.m3u8", mode="wb") as f_1:
f_1.write(requests.get(m3u8_url).content)
with open("鱿鱼游戏/m3u8/1.m3u8", mode="r", encoding="utf-8") as f_2:
for line in f_2:
line = line.strip()
if line.startswith("#"):
continue
else:
# "https://vod8.wenshibaowenbei.com/20210917/l7VYklhA/index.m3u8"
second_url = m3u8_url.split("/20210917")[0]+line
with open("鱿鱼游戏/m3u8/1.m3u8", mode="wb") as f_3:
f_3.write(requests.get(second_url).content)
def get_video():
i = 1
with ThreadPoolExecutor(50) as threadpool:
with open("鱿鱼游戏/m3u8/1.m3u8", mode="r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line.startswith("#"):
continue
else:
threadpool.submit(get_download_ts, line, i)
i = i+1
def get_download_ts(line, i):
with open("鱿鱼游戏/ts/第1集/{}.ts".format(i), mode="wb") as f:
f.write(requests.get(line).content)
print("第1集/{}.ts下载完成---".format(i))
time.sleep(0.3)
# "https://ts8.hhmm0.com:9999/20210917/ahQ89ILO/1000kb/hls/key.key"
if __name__ == "__main__":
# start_time = time.time()
main()
# end_time = time.time()
# print("下载完成,用时{:.2f}秒".format(end_time-start_time))
无报错,也无结果,程序也不停止,所有ts文件都已经下载完成
第1集/909.ts下载完成---
第1集/898.ts下载完成---
第1集/1036.ts下载完成---
第1集/700.ts下载完成---
完整运行
ts文件太多了,你得跑好久,一个两小时的视频,一个ts文件只有两三秒五六秒,所以说大概有好几千个ts文件需要爬,让它慢慢跑吧,如果反扒处理都没有问。有帮助的话采纳一下哦🙈🙈🙈