httpx异步爬取网页时,只有部分爬取成功的问题

我用httpx写了一个异步爬虫。 我在代码中指定了9个url,按道理 getDomain 会被执行9次。 但是程序运行时发现getDomain只会执行1到3次,其他几次根本没有调用getDomain。 麻烦大家看看是什么原因导致的? 应该不是目标网站屏蔽的问题。


import json
import traceback
import httpx
from datetime import datetime
import time
import re
import asyncio
from httpx import AsyncHTTPTransport,  Cookies


cookies =""
async def loginWest(client:httpx.AsyncClient):
    headers = {
        #'content-length': '100',
        'cache-control': 'max-age=0',
        'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'upgrade-insecure-requests': '1',
        'content-type': 'application/x-www-form-urlencoded',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'sec-fetch-site': 'same-origin',
        'sec-fetch-mode': 'navigate',
        'sec-fetch-user': '?1',
        'sec-fetch-dest': 'document',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'zh-CN,zh;q=0.9'
    }
    payload = 'u_name=tylrr1&u_password=123qwe&adds323sdsdsad=3546234134sadsa233&back_path2=&m=&module=enterzone'
    response0 = await client.post("https://www.xxxxx.cn/login.asp", headers=headers, data=payload ,follow_redirects=True)

    #print(response0.status_code)
    #print(response0.content.decode("gbk"))
    if "1902326" in response0.content.decode("gbk") :
        print("登录成功")
    print(response0.cookies)
    #return response0.cookies


async def getDomain(page:int ,client:httpx.AsyncClient):
    headers = {
        'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
        'accept': 'application/json, text/plain, */*',
        'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
        'x-requested-with': 'XMLHttpRequest',
        'sec-ch-ua-mobile': '?0',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
         }

    payload = f'domeq=&domkey=&domeq1=&domkey1=&domuneq=&domunkey=&domuneq1=&domunkey1=&domlen1=&domlen2=&topmoney=&topmoneymax=&price=&pricemax=&expday=&expdaymax=&arrdomext=&domclass=&domleiab=&deldate=&regyear=&regyearmax=&freeyd=0&deltype=&ordby=deldate&ordtp=&sogoupr=&sogouprmax=&baidupr=&baiduprmax=&sgsoulu=&sgsoulumax=&bdsoulu=&bdsoulumax=&bdfanlian=&bdfanlianmax=&wailian=&wailianmax=&sitehis=&sitehismax=&siteinfohis=&siteinfohismax=&bdrenzheng=&wxcheck=&qqcheck=&wallcheck=&bdpingjia=&ismiiban=&guonei=&linktype=&isqy=&viewcount=&sitetitle=&icpwzmc=&haveuser=&isbid=&pageno={page}&pagesize=1000&mode=wedel'

    try:
        response0 = await client.post("https://www.xxxx.cn/services/newlist.asp",
                                 headers=headers, data=payload  )
        #print(client.headers)
        if response0.status_code == 200:
            jsonstr = json.loads(response0.text.replace('\\', '\\\\'))
            if jsonstr['code'] == 200:
                print("获取列表成功")
                total = jsonstr['body']['total']
                page = jsonstr['body']['pageno']
                itemJson = jsonstr['body']['items']
                print({"total": total, "page": page})
                return {"total": total, "page": page, "itemJson": itemJson}
            else:
                print("获取列表失败:",jsonstr)
                pass
        else:
            print("获取列表失败:",response0.status_code)
    except Exception as e:
        print(e)
        pass



async def main():
    results = []
    client = httpx.AsyncClient()
    global cookies
    if cookies == "":
        cookies = await loginWest(client)
        #await asyncio.sleep(1)
        print(cookies)

    tasks = []
    for i in range(1, 10):
        # 添加一个协程到列表中
        tasks.append(asyncio.ensure_future(getDomain(i, client)))
    print(tasks)
    results = await asyncio.gather(*tasks)



    # 检查响应状态码
    for task in tasks :
        response = task.result()
        #print(response)
        #res = await getDomain(i, client=client)
        #results.append(res)
        #await asyncio.sleep(1)

    print(f"本次任务共{len(results)}页")
    for item in results:
        print(str(item)[:120])
        #saveData(item)
    await asyncio.sleep(60)
    print("任务完成")


if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())

#运行输出结果如下:

获取列表成功
{'total': 2030589, 'page': 1}

本次任务共9页
{'total': 2030589, 'page': 1, 'itemJson': [{'refsmoney': 0, 'domext': 'online', 'domain':
None
None
None
None
None
None
None
None
任务完成

#上面的任务中,只有1个url完成,其他都显示了None。不知道什么原因

有大佬指点一下吗?