No connection adapters were found for {!r}".format(url)) requests.exceptions.InvalidSchema:


import requests
import re

url = "https://www.dytt8.net/index2.htm"
headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62"
}
resp = requests.get(url, headers=headers)
resp.encoding = "gb2312"
# print(resp.text)
obj1 = re.compile(r"最新影片推荐(?P<ul>.*?)</ul>", re.S)
obj2 = re.compile(r"<a href='(?P<href>.*?)'", re.S)
obj3 = re.compile(r'"◎译  名(?P<title>.*?)<br />.*?<font color=red>下载地址2:<a href="(?P<download>.*?)" '
                  r'target="_blank"  title="迅雷电影">', re.S)
result1 = obj1.finditer(resp.text)
child_href_list = []
for it in result1:
    ul = it.group("ul")
    # print(ul)
    result2 = obj2.finditer(ul)
    for itt in result2:
        child_href = url.strip("index2.htm") + itt.group('href').strip("/")
        child_href_list.append(child_href)
        # print(itt.group("href"))
for href in child_href_list:
    child_resp = requests.get(href)
    child_resp.encoding = "gb2312"
    result3 = obj3.finditer(child_resp.text)
    print(result3.group("download"))

报错信息如下

Traceback (most recent call last):
  File "C:/Users/15348/PycharmProjects/untitled5/爬虫/ziji.py", line 27, in <module>
    child_resp = requests.get(href)
  File "C:\Python\Python37\lib\site-packages\requests\api.py", line 75, in get
    return request('get', url, params=params, **kwargs)
  File "C:\Python\Python37\lib\site-packages\requests\api.py", line 61, in request
    return session.request(method=method, url=url, **kwargs)
  File "C:\Python\Python37\lib\site-packages\requests\sessions.py", line 529, in request
    resp = self.send(prep, **send_kwargs)
  File "C:\Python\Python37\lib\site-packages\requests\sessions.py", line 639, in send
    adapter = self.get_adapter(url=request.url)
  File "C:\Python\Python37\lib\site-packages\requests\sessions.py", line 732, in get_adapter
    raise InvalidSchema("No connection adapters were found for {!r}".format(url))
requests.exceptions.InvalidSchema: No connection adapters were found for 'ps://www.dytt8.net/html/gndy/dyzz/20211225/62153.html'


求哥们看看是啥问题,报错的网站点击打不开,但是复制到浏览器却能打开。。不知道咋回事,求解

第一 url.strip("index2.htm") 改成 url.replace("index2.htm","")
第二 obj3 正则表达式不对
第三 obj3.finditer(child_resp.text)返回的是迭代器对象,要用next()获取对象

result3 = obj3.finditer(child_resp.text)
dg = next(result3)
print(dg.group("download"))

你题目的解答代码如下:

import requests
import re

url = "https://www.dytt8.net/index2.htm"
headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62"
}
resp = requests.get(url, headers=headers)
resp.encoding = "gb2312"
# print(resp.text)
obj1 = re.compile(r"最新影片推荐(?P<ul>.*?)</ul>", re.S)
obj2 = re.compile(r"<a href='(?P<href>.*?)'", re.S)
obj3 = re.compile(r'◎译  名\s+(?P<title>.*?)<br />.*?<font color=red>下载地址2:<a href="(?P<download>.*?)".*?target="_blank"  title="迅雷电影">', re.S)
result1 = obj1.finditer(resp.text)
child_href_list = []
for it in result1:
    ul = it.group("ul")
    # print(ul)
    result2 = obj2.finditer(ul)
    for itt in result2:
        child_href = url.replace("index2.htm","") + itt.group('href').strip("/")
        child_href_list.append(child_href)
        # print(itt.group("href"))
for href in child_href_list:
    child_resp = requests.get(href)
    child_resp.encoding = "gb2312"
    result3 = obj3.finditer(child_resp.text)
    dg = next(result3)
    # print(child_resp.text)
    print(dg.group("download"))

如有帮助,请点击我的回答下方的【采纳该答案】按钮帮忙采纳下,谢谢!

img

从报错信息看,是子链接的url不全导致的,链接地址https://开头被截取成了ps://开头,不是有效的url地址,重点检查代码中这行:
child_href = url.strip("index2.htm") + itt.group('href').strip("/")
看看获取的子链接地址是不是不完整,并作出相应修改。

如有帮助,请点采纳。