import re import requests if __name__=='__main__': url = 'https://www.qiushibaike.com/imgrank/' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36'} page_text = requests.get(url=url, headers=headers).text # <div class="thumb"> # <a href="/article/124293385" target="_blank"> # <img src="//pic.qiushibaike.com/system/pictures/12429/124293385/medium/XEYQOYMNVEQP8RA1.jpg" alt="糗事#124293385" class="illustration" width="100%" height="auto"> # </div> ex = '<div class="thumb">.*?<img src="(.*?)" atl.*?</div>' img_src_list = re.findall(ex, page_text, re.S) print(img_src_list)
红框内应为alt,题主写错了。另外,正则表达式前面加r,可以避免很多麻烦,是一个好习惯。
啥问题