爬虫成功(大概),但没有结果

python爬虫
运行成功(大概),但没有结果
https://www.fivb.com/en/volleyball/rankings/rankingseniorwomen

import requests
from bs4 import BeautifulSoup

def M23_info():
    headers = {
        "user-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) A" "ppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
    }
    url = 'https://www.fivb.com/en/volleyball/rankings/rankingseniorwomen'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    # 提取所有tr标签下td标签里的内容
    tr_list = soup.select('.context-box-text tr')
    f = open('./世界排名/男排.txt', 'w+', encoding='utf-8')
    for i in range(1, len(tr_list) - 1):#去除它本身的导航栏
        tr = tr_list[i]
        td_list = tr.select('td')
        RK = td_list[0].text.strip()
        Teams = td_list[1].text.strip()
        Points = td_list[2].text.strip()
        RO= td_list[3].text.strip()
        WGP= td_list[4].text.strip()
        OG = td_list[5].text.strip()
        WC = td_list[6].text.strip()
        WCH = td_list[-1].text.strip()
        a = f'{RK},{Teams},{Points},{RO},{WGP},{OG },{WC},{WCH}'

        f.write(a + '\n')
    print('爬虫结束')
M23_info()

import requests
from bs4 import BeautifulSoup
 
def M23_info():
    headers = {
        "user-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) A" "ppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
    }
    url = 'https://www.fivb.com/en/volleyball/rankings/rankingseniorwomen'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    # 提取所有tr标签下td标签里的内容
    tr_list = soup.select('tr[class="telerik-reTableOddRow-5"]')
    print(len(tr_list), tr_list[0].get_text())
    f = open('test.txt', 'w+', encoding='utf-8')
    for i in range(1, len(tr_list) - 1):#去除它本身的导航栏
        tr = tr_list[i]
        td_list = tr.select('td')
        RK = td_list[0].text.strip()
        Teams = td_list[1].text.strip()
        Points = td_list[2].text.strip()
        RO= td_list[3].text.strip()
        WGP= td_list[4].text.strip()
        OG = td_list[5].text.strip()
        WC = td_list[6].text.strip()
        WCH = td_list[-1].text.strip()
        a = f'{RK},{Teams},{Points},{RO},{WGP},{OG },{WC},{WCH}'
 
        f.write(a + '\n')
    print('爬虫结束')
M23_info()

问题就出现在条件选择上tr_list = soup.select('.context-box-text tr'),爬虫常遇到的问题就是网页一些选择条件经常变更,导致爬虫失效。

  • tr_list = soup.select('tr[class="telerik-reTableOddRow-5"]')改成这个试试,选择单数行
  • tr_list = soup.select('tr[class="telerik-reTableEvenRow-5"]') 偶数行,与单数行的class又不同,需要调整代码

img


并且实际上,我也只想要以上这些国家名称和排名数据。不需要导航栏,还有其他的。。奈何前端学的稀烂,不太会

bs 的 select 换成 telerik-reTable-5 tr 试试,感觉你的样式类名写错了