import requests
from bs4 import BeautifulSoup
import time
a=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
print(a)
for i in range(1,700):
baseurl="https://www.nfmovies.com/list/{}"
headers={"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
"cookie":"UM_distinctid=17b6e6bcbb8326-071f159c8878cc-2343360-1fa400-17b6e6bcbb9c9e; _ga=GA1.2.1305466438.1629645164; say=hbnl1.85.41.44; PHPSESSID=28brtg9cisppcubj0flmpj74k4; first_h=1632664371199; count_m=1; _gid=GA1.2.1111362904.1632664371; __music_index__=2; CNZZDATA1277238851=2006611711-1629641537-https%253A%252F%252Fwww.nfmovies.com%252F%7C1632667258; count_h=3; first_m=1632669512872; _gat_gtag_UA_170166758_1=1"}
url=baseurl.format(i)
response=requests.get(url,headers=headers)
response1=BeautifulSoup(response.text,"html.parser")
response2=response1.find("ul",attrs={"class":"myui-vodlist clearfix"}).find_all(class_="myui-vodlist__thumb lazyload")
response3 = response1.find("ul", attrs={"class": "myui-vodlist clearfix"}).find_all(class_="myui-vodlist__detail")
for p in response2:
v="https://www.nfmovies.com"+str(p.get('href'))
b=p.get("title")
print(str(b)+"https://"+str(v))
写代码尽量不要写一长串的a.b.c.d.e这样,尤其是你根本不确定a到底是什么的时候
a既然是从网络获取的,那么它本身很有可能并不包含你要找的标签
你需要每一步find都判断一下到底找到没有,而不是默认肯定能找到
最简单的办法,在代码前加上try,不然你就把每部分拆分,不要全部用 . 连在一起写,每部分都单独判断是否为空