初学网络爬虫,运行报错,修改几次都不行,总是报错。
代码如下:
改成下面就可以了
import requests
from bs4 import BeautifulSoup
import bs4
import csv
import urllib3
urllib3.disable_warnings()##不试试ssl警告
def getHTMLText(url):
try:
r=requests.get(url,timeout=30,verify=False)###网址ssl证书有问题,要禁用证书验证,要不报错
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
print('getHTMLText Error')
return ''
def fillUnivlist(ulist,html):
soup=BeautifulSoup(html,'html.parser')
for tr in soup.find('tbody').children:
if isinstance(tr,bs4.element.Tag):
tds=tr('td')
ulist.append([tds[0].string,tds[1].string,tds[2].string,tds[3].string,tds[4].string,tds[5].string])
return ulist
def printUnivList(ulinst,num):
tplt="{0:8}\t{1:^8}\t{2:^8}\t{3:^8}\t{4:^8}\t{5:^8}"
for i in range(num):
try:
u=ulinst[i]
print(tplt.format(u[0],u[1],u[2],u[3],u[4],u[5]))
except:
print('printUnivList Error')
pass
def writeUlistfile(ulist):
with open('ulist1.csv','w',newline='') as fout:
writer=csv.writer(fout)
for row in ulist:
writer.writerow(row)
def main():
uinfo=[]
url='https://www.chyxx.com/industry/202103/938485.html'
html=getHTMLText(url)
ulist=fillUnivlist(uinfo,html)
printUnivList(ulist,len(ulist))####
if __name__=='__main__':
main()
您好,我是有问必答小助手,您的问题已经有小伙伴帮您解答,感谢您对有问必答的支持与关注!