import requests
from bs4 import BeautifulSoup
allUniv = []
def getHTMLText(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = 'utf-8'
return r.text
except:
return ""
def fillUnivList(soup):
data = soup.find_all('tr')
for tr in data:
ltd = tr.find_all('td')
if len(ltd)==0:
continue
singleUniv = []
for td in ltd:
singleUniv.append(td.string)
allUniv.append(singleUniv)
def printUnivList(num):
print("{:4}{:10}{:5}{:8}{:10}".format("排名","学校名称","省市","学校类型","总分"))
for i in range(num):
u=allUniv[i]
print("{:4}{:10}{:5}{:8}{:10}".format(u[0],u[1],u[2],u[3],u[4]))
def main():
url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2020.html'
html = getHTMLText(url)
soup = BeautifulSoup(html, "html.parser")
fillUnivList(soup)
printUnivList(100)
main()
源代码是这样,怎么解决呀0.0
IndexError: list index out of range
数组越界异常了,看下具体报错的代码行,是不是使用了超过列表最大的索引。
你这个http://www.zuihaodaxue.cn/zuihaodaxuepaiming2020.html 地址是 404 错误,地址不存在
getHTMLText(url):函数返回的是except:中return ""