```python
import requests
from bs4 import BeautifulSoup
def get_page_source(url): # 获取源代码
headers = {
"User - Agent": "Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 111.0.0.0Safari / 537.36"
}
resp = requests.get(url, headers=headers)
resp.encoding = 'gbk'
return resp.text
def parser_source(html): # 解析源代码
page = BeautifulSoup(html, 'html.parser')
ul = page.find("ul", attrs={"class": "bigimg"})
li_list = ul.findAll("li")
for li in li_list:
bookname = li.find("img").get('alt') # 拿到书名
link = "http:" + li.find("a",attrs={"class":"pic"}).get("href") # 拿到链接
author = li.find("p",attrs={"class":"search_book_author"}).find("a",attrs={'dd_name':'单品作者'}).get("title") # 拿到作者信息
date = li.find("p",attrs={"class":"search_book_author"}).findAll("span")[1].text # 拿到出版日期
press = li.find("a",attrs={'dd_name':'单品出版社'}).text # 拿到出版社信息
price = li.find("span",attrs={"class":"search_pre_price"}).text # 书的价格
ebookprice = li.find("i").text # 电子书价格
comment = li.find("a",attrs={"dd_name":"单品评论"}).text # 获取评论数
# print(f"{link},{bookname},{date},{price},{author}")
# f.write(f"{link},{bookname},{author},{date},{press},{price},{ebookprice},{comment}\n")
# print(f"{link},{bookname},{author},{date},{press},{price},{ebookprice},{comment}")
def main(i):
url = 'http://search.dangdang.com/?key=%D0%A1%CB%B5&act=input&page_index=1'
html = get_page_source(url)
parser_source(html)
print(f"第{i}页爬取完成")
if __name__ == '__main__':
for i in range(1,100):
main(i)
break
f.close()
AttributeError: 'NoneType' object has no attribute 'text'
几个小时了,还没有解决这个问题。 没有数据,就报错。怎么解决啊
name=res.html.xpath('//*[@class="bigimg"]/li/a/@title')
print(name)
应该是定位标签元素没有成功,定位到的是空值。因地制宜,bs4匹配失败的话,可以尝试使用Xpath或者正则表达式来匹配试试看