python爬虫
运行成功(大概),但没有结果
https://www.fivb.com/en/volleyball/rankings/rankingseniorwomen
import requests
from bs4 import BeautifulSoup
def M23_info():
headers = {
"user-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) A" "ppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
}
url = 'https://www.fivb.com/en/volleyball/rankings/rankingseniorwomen'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# 提取所有tr标签下td标签里的内容
tr_list = soup.select('.context-box-text tr')
f = open('./世界排名/男排.txt', 'w+', encoding='utf-8')
for i in range(1, len(tr_list) - 1):#去除它本身的导航栏
tr = tr_list[i]
td_list = tr.select('td')
RK = td_list[0].text.strip()
Teams = td_list[1].text.strip()
Points = td_list[2].text.strip()
RO= td_list[3].text.strip()
WGP= td_list[4].text.strip()
OG = td_list[5].text.strip()
WC = td_list[6].text.strip()
WCH = td_list[-1].text.strip()
a = f'{RK},{Teams},{Points},{RO},{WGP},{OG },{WC},{WCH}'
f.write(a + '\n')
print('爬虫结束')
M23_info()
import requests
from bs4 import BeautifulSoup
def M23_info():
headers = {
"user-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) A" "ppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
}
url = 'https://www.fivb.com/en/volleyball/rankings/rankingseniorwomen'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# 提取所有tr标签下td标签里的内容
tr_list = soup.select('tr[class="telerik-reTableOddRow-5"]')
print(len(tr_list), tr_list[0].get_text())
f = open('test.txt', 'w+', encoding='utf-8')
for i in range(1, len(tr_list) - 1):#去除它本身的导航栏
tr = tr_list[i]
td_list = tr.select('td')
RK = td_list[0].text.strip()
Teams = td_list[1].text.strip()
Points = td_list[2].text.strip()
RO= td_list[3].text.strip()
WGP= td_list[4].text.strip()
OG = td_list[5].text.strip()
WC = td_list[6].text.strip()
WCH = td_list[-1].text.strip()
a = f'{RK},{Teams},{Points},{RO},{WGP},{OG },{WC},{WCH}'
f.write(a + '\n')
print('爬虫结束')
M23_info()
问题就出现在条件选择上tr_list = soup.select('.context-box-text tr')
,爬虫常遇到的问题就是网页一些选择条件经常变更,导致爬虫失效。
tr_list = soup.select('tr[class="telerik-reTableOddRow-5"]')
改成这个试试,选择单数行tr_list = soup.select('tr[class="telerik-reTableEvenRow-5"]')
偶数行,与单数行的class又不同,需要调整代码bs 的 select 换成 telerik-reTable-5 tr 试试,感觉你的样式类名写错了