关于#网站#的问题,如何解决?

import requests
from bs4 import BeautifulSoup
from lxml import etree
import time

爬取网站图片

lj = "


url = "
404 https://www.chenbie.com/mm/%22

resp0 = requests.get(url)
resp0.encoding = "utf-8"
#print(resp.text)
tp_list = []
img_list = BeautifulSoup(resp0.text, "html.parser")
#print(img_list)
imgs = img_list.find("ul", id="pins").find_all("a")
for img in imgs:
img_href = lj+img.get("href") # 拼接成网址
tp_list.append(img_href) # 将网址放在列表中
#print(img_href)
#print(tp_list)
for i in tp_list:
resp1 = requests.get(i) # 用循环依次去访问网站
resp1.encoding = "utf-8"
tree = etree.HTML(resp1.text)
divs = tree.xpath("/html/body/div[3]/div[1]/div[3]/p/a/img/@src")
print(divs)

1.网址错误,应该分别是:
lj = "https://www.chenbie.com"
url = "


2.解析器选择不正确,img_list = BeautifulSoup(resp0.text, "html.parser")改成
img_list = BeautifulSoup(resp0.text, "lxml")
完整代码:

import requests
from bs4 import BeautifulSoup


url = "https://www.chenbie.com/mm/"
resp0 = requests.get(url, headers={
                     'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36 Edg/93.0.961.38'})
resp0.encoding = "utf-8"
img_list = BeautifulSoup(resp0.text, "lxml")
#print(img_list)
imgs = [x['data-original'] for x in img_list.select('#pins  li a img')]
print(imgs)
['https://pic.chenbie.com/d/file/titlepic/2021/03/24/11/65b53534a3.jpg', 'https://pic.chenbie.com/d/file/titlepic/2021/03/24/11/9f4a3cf03a_副本.jpg', ...]

如有帮助,请点采纳。

你这网页地址打不开啊

具体你碰到了什么问题?