它只能获取第一页的数据,要加什么才能获取之后页码的数据。
import requests
from bs4 import BeautifulSoup
import pandas as pd
from lxml import etree
#设置浏览器代理,它是一个字典
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
soup = BeautifulSoup(response.text, 'html.parser')
for page in range(0, 8, 1):
print ("正在获取第%s页"%page)
url = 'https://www.ynredcross.cn/donate/donatelist.jsp?page=?%s'%page
response = requests.get(url = url, headers = headers)
table = soup.find('table')
rows = table.find_all('tr')
data = []
for row in rows:
cells = row.find_all('td')
if cells:
data.append([cell.text.strip() for cell in cells])
# 存储数据到Excel文件
df = pd.DataFrame(data, columns=['name', 'use','money', 'date', 'bz'])
df.to_excel('donation2.xlsx', index=False)
df.head(100)