希望大家帮帮我,想爬取校园网站但是不懂哪里出错了,返回结果不对,希望学霸帮看看

import requests
from bs4 import BeautifulSoup
import requests as requets
def GetHTMLText(url):
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding="utf-8"
return r.text
def maskSoup(url):
guiyuan_news=GetHTMLText(url)
print(guiyuan_news)
soup = BeautifulSoup(guiyuan_news, "html.parser")
titles=soup.select("div.c_title pt20 mt5>h1")
rank=soup.select("div.v_news_content")
fp = open('guiyuan_news.txt', "a+", encoding="utf-8")
for rank,title in zip(rank,titles):
data={
"题目":rank.get_text().strip(),
"内容":title.get_text()
}
list(data)
fp.writelines(data)
print(data)
if name == 'main':
url="https://www.gxljcollege.cn/info/1012/33907.htm"
maskSoup(url)

img

问题有些多, 改好了


import requests
from bs4 import BeautifulSoup
def GetHTMLText(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 SLBrowser/7.0.0.12151 SLBChan/30'}
    r=requests.get(url,headers=headers)
    # print(r)
    r.raise_for_status()
    r.encoding="utf-8"
    return r.text
def maskSoup(url):
    guiyuan_news=GetHTMLText(url)
    # print(guiyuan_news)
    soup = BeautifulSoup(guiyuan_news, "html.parser")
    titles=soup.select('[class="c_title pt20 mt5"]')
    print(titles[0].get_text())
    rank=soup.select("div.v_news_content")
    fp = open('guiyuan_news.txt', "a+", encoding="utf-8")
    for rank,title in zip(rank,titles):
        data={
        "题目": title.get_text().strip(),
        "内容":rank.get_text().strip()

        }
        print(data)
        for k,v in data.items():
            fp.writelines("{}:{}\n".format(k,v))
if __name__ == '__main__':
    url="https://www.gxljcollege.cn/info/1012/33907.htm"
    maskSoup(url)