仿写了一个爬取网站最新通知的标题和日期的代码,但不知道为什么不显示结果,求指正!
import requests
from bs4 import BeautifulSoup
import csv
ur="https://edu.nju.edu.cn/_s297/8782/list.psp"
def get_one_page(ur1):
headers= {
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.1.2; Nexus 7 Build/JZ054K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19'
}
try:
info_list_page=[]
resp=requests.get(ur1,headers=headers)
resp.encoding=resp.status_code
page_text=resp.text
soup=BeautifulSoup(page_text,"lxml")
tr_list=soup.select(".news_list > table > tr")
for tr in tr_list:
tds=tr.select("td")
title=tds[0].a.title
date=tds[1].string
info=[date,title]
info_list_page.append(info)
print(info_list_page)
想要输出最新通知公告的日期+标题
修改:代码是:
import requests
from bs4 import BeautifulSoup
import csv
ur="https://edu.nju.edu.cn/_s297/8782/list.psp"
def get_one_page(ur1):
headers= {
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.1.2; Nexus 7 Build/JZ054K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19'
}
try:
info_list_page=[]
resp=requests.get(ur1,headers=headers)
resp.encoding=resp.status_code
page_text=resp.text
soup=BeautifulSoup(page_text,"lxml")
tr_list=soup.select(".news_list > table > tr")
for tr in tr_list:
tds=tr.select("td")
title=tds[0].a.title
date=tds[1].string
info=[date,title]
info_list_page.append(info)
print(info_list_page)
except Exception as e:
print('爬取'+ur1+"错误")
print(e)
return None
else:
resp.close()
print('爬取'+ur1+"成功")
return info_list_page
print(get_one_page(ur))