这个表达式哪里出错了,我无法爬取网页的东西
```python
import requests
import pandas as pd
from lxml import etree
from bs4 import BeautifulSoup
headers={'User-Agent':'Mozilla/5.0(Windows NT 10.0;win64;x64)ApplewebKit/537.36(KHTML,like Gecko)Chrome/78.0.3904.108Safari/537.36'}
for i in range(1,26):
url=r"http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent30-0-0-1-{}".format(i)
html_date=requests.get(url,headers=headers,timeout=10)
html_text=html_date.text
#print(html_text)
html=etree.HTML(html_text)
print(html.xpath("/html/body/div[3]/div[3]/div[2]/ul/li[1]/div[2]/a/img[@class='alt']"))
```
import requests
import pandas as pd
from lxml import etree
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0(Windows NT 10.0;win64;x64)ApplewebKit/537.36(KHTML,like Gecko)Chrome/78.0.3904.108Safari/537.36'}
for i in range(1, 26):
url = "http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent30-0-0-1-{}".format(i)
html_date = requests.get(url, headers=headers, timeout=10)
html_text = html_date.text
# print(html_text)
html = etree.HTML(html_text)
print( html.xpath('//ul[@class="bang_list clearfix bang_list_mode"]/li/div[@class="pic"]/a/img/@alt') ) #从源代码里面找到的
看起来没有出错,但是可能是你的网页源代码中没有提供你想要爬取的信息,或者你的XPath表达式没有正确指向你想要爬取的信息。
html.xpath("/html/body/div[3]/div[3]/div[2]/ul/li[1]/div[2]/a/img[@class='alt']")这个不对,检查一下
import requests
from lxml import etree
headers={'User-Agent':'Mozilla/5.0(Windows NT 10.0;win64;x64)ApplewebKit/537.36(KHTML,like Gecko)Chrome/78.0.3904.108Safari/537.36'}
for i in range(1,26):
url=f"http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent30-0-0-1-{i}"
html_date=requests.get(url,headers=headers,timeout=10).text
html=etree.HTML(html_date)
title=[title for title in html.xpath("//li/div[3]/a/text()")]
for name in title:
print(f'书名:{name}')
print('-'*60)