python爬虫初学,运行不报错但是没有结果?

import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
    try:
        r=request.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return ""
    return ""
def fillUnivList(ulist,html):
    soup=BeautifulSoup(html,"html.parser")
    for tr in soup.find('tbody').children:
        if isinstance(tr,bs4.element.Tag):
            tds=th('td')
            ulist.append([tds[0].string,tds[1].string,tds[2].string])
    pass
def printUnivList(ulist,num):
    print("{:^10}\t{:^6}\t{:^10}".format("排名","学校名称","省市"))
    for i in range(num):
        u=ulist[i]
    print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2]))  
def main():
    uinfo=[]
    url="https://www.dxsbb.com/news/44368.html"
    html =getHTMLText(url)
    fillUnivList(uinfo,html)
    printUnivList(uinfo,30)
    main()

 

代码问题:1.request少写了s。

2. fillUnivList(ulist,html)函数没有return。

3.printUnivList(ulist,num)缺少异常处理。

4.main()没有调用,应该放到main()函数外。

改成这样就行了。

import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return ""
    return ""
def fillUnivList(ulist,html):
    soup=BeautifulSoup(html,"html.parser")
    for tr in soup.find('tbody').children:
        if isinstance(tr,bs4.element.Tag):
            tds=tr('td')
            ulist.append([tds[0].string,tds[1].string,tds[2].string])
    return ulist
def printUnivList(ulist,num):
    print("{:^10}\t{:^6}\t{:^10}".format("排名","学校名称","省市"))
    for i in range(num):
        try:
            u=ulist[i]
            print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2]))
        except:
            pass  
def main():
    uinfo=[]
    url="https://www.dxsbb.com/news/44368.html"
    html =getHTMLText(url)
    ulist=fillUnivList(uinfo,html)
    printUnivList(ulist,30)
if __name__=='__main__':
    main()
 try:
        r=request.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return ""


没报错是因为你try 捕获异常了,
  你的请求 requests 请求  少个 s

 

您好,我是有问必答小助手,你的问题已经有小伙伴为您解答了问题,您看下是否解决了您的问题,可以追评进行沟通哦~

如果有您比较满意的答案 / 帮您提供解决思路的答案,可以点击【采纳】按钮,给回答的小伙伴一些鼓励哦~~

ps:问答VIP仅需29元,即可享受5次/月 有问必答服务,了解详情>>>https://vip.csdn.net/askvip?utm_source=1146287632

#!/usr/bin/env python

-- coding: utf-8 --

from lxml import etree
import requests
import time

url = '

headers = {
'Host': 'bang.dangdang.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
}

data = requests.get(url,headers=headers).text
s = etree.HTML(data)

with open('dangdang.csv','w') as outputfile:
items = s.xpath('//*[@id="sortRanking"]/div')

for item in items:
    book_url=item.xpath('./a/@href')
    item_name=item.xpath('./a/text()')

    if len(book_url)>0:
        href=book_url[0]
        item_title=item_name[0]
        a=href[41:46]
        print(item_title)

        for page in range(1,26):

            per_url= 'http://bang.dangdang.com/books/fivestars/{}.00.00.00.00-all-0-0-1-{}'.format(a,page)
            data2=requests.get(per_url).text
            f=etree.HTML(data2)

            try:
                file=f.xpath('//ul[@class="bang_list clearfix bang_list_mode"]/li')
                print('正在打印{}第{}页…………'.format(item_title,page))
                time.sleep(2)

                for book in file:
                    title=book.xpath('./div[@class="name"]/a/@title')[0]
                    author=book.xpath('string(./div[@class="publisher_info"][1])')
                    pinglun=book.xpath('./div[@class="star"]/a/text()')[0].strip('条评论')
                    wuxing=book.xpath('./div[@class="biaosheng"]/span/text()')[0].strip('')
                    price_now=book.xpath('./div[@class="price"]/p/span[1]/text()')[0]
                    price_before=book.xpath('./div[@class="price"]/p/span[2]/text()')[0]
                    price_sale=book.xpath('./div[@class="price"]/p/span[3]/text()')[0]

                    try:
                        date=book.xpath('./div[@class="publisher_info"]/span/text()')[0]
                    except:
                        date='出版时间不详'

                    try:
                        company=book.xpath('./div[@class="publisher_info"][2]/a/text()')[0]
                    except:
                        company='出版社不详'

                    try:
                        price_e=book.xpath('./div[@class="price"]/p[@class="price_e"]/span/text()')[0]
                    except:
                        price_e="没有电子书"

                    outputfile.write('{},{},{},{},{},{},{},{},{},{}'.format(title,author,date,company,pinglun,wuxing,price_now,price_before,price_sale,price_e))

            except:
                pass