python创建代理IP池，检测可用ip出错，求大神解惑

from requests.exceptions import Timeout
from selenium import webdriver
import time

import requests
import lxml
from lxml import etree
import os
import re
import parsel

def check_ip(proxies_list):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'}

    can_use = []
    for ip in proxies_list:
        try:
            requests.get(url='https//www.bilibili.com/', headers = headers, proxies = ip, timeout = 0.1)
            if response.status_code == 200:
                can_use.append(ip)

        except Exception:
            print('当前代理ip: ', ip, "请求超时，检测不合格")

        finally:
            print('当前代理ip: ', ip, '检测通过')
    return can_use

proxies_list = []

for page in range(1,8):

    print('============正在爬取第{}页数据============'.format(str(page)))

    base_url = 'http://www.ip3366.net/free/?stype=1&page{}.format(str(page))'
    
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'}
    response = requests.get(url=base_url , headers = headers )
    data = response.text
    html_data = parsel.Selector(data)
    parse_list = html_data.xpath('//table[@class="table table-bordered table-striped"]/tbody/tr')

    #代理IP的结构  {'ip的协议’：‘ip: ip端口’}
    for tr in parse_list:
        http_type = tr.xpath('./td[4]/text()').extract_first()   #协议类型
        ip_num = tr.xpath('./td[1]/text()').extract_first()   #IP地址
        ip_port = tr.xpath('./td[2]/text()').extract_first()   #IP端口
        print(http_type, ip_num, ip_port)
        
        proxies_dict = {}
        print(http_type)
        proxies_dict[http_type] = ip_num + ':' + ip_port
        print('保存成功： ' , proxies_dict)
        proxies_list.append(proxies_dict)

print(proxies_list)
print('获取到的代理IP数量 ：',len(proxies_list))

print('===================正在检测ip质量============================')
yes_can = check_ip(proxies_list)
print('质量高的： ', yes_can)
print('质量高的代理ip数量： ', len(yes_can))

以下是检测ip质量返回的部分结果

不知道为什么对既显示不合格，又显示检测通过

最后返回的还是一个空列表

===================正在检测ip质量============================
当前代理ip:  {'HTTPS': '175.43.57.24:9999'} 请求超时，检测不合格
当前代理ip:  {'HTTPS': '175.43.57.24:9999'} 检测通过
当前代理ip:  {'HTTP': '180.118.128.220:9000'} 请求超时，检测不合格
当前代理ip:  {'HTTP': '180.118.128.220:9000'} 检测通过
当前代理ip:  {'HTTP': '182.34.20.143:9999'} 请求超时，检测不合格
当前代理ip:  {'HTTP': '182.34.20.143:9999'} 检测通过
当前代理ip:  {'HTTP': '183.145.58.210:9000'} 请求超时，检测不合格
当前代理ip:  {'HTTP': '183.145.58.210:9000'} 检测通过
当前代理ip:  {'HTTP': '61.92.188.117:8080'} 请求超时，检测不合格
当前代理ip:  {'HTTP': '61.92.188.117:8080'} 检测通过
当前代理ip:  {'HTTPS': '182.46.114.174:9999'} 请求超时，检测不合格
当前代理ip:  {'HTTPS': '182.46.114.174:9999'} 检测通过
当前代理ip:  {'HTTP': '182.105.201.5:9000'} 请求超时，检测不合格
当前代理ip:  {'HTTP': '182.105.201.5:9000'} 检测通过

哥们，你搞错try,except,finally的用法了。

1、当执行try...except之间的语句序列没有发生异常时，则忽略异常处理部分(except)的语句。

2、Except括起来的语句，则只有在产生异常的情况下会被执行，其他情况一概不执行的。

3、Finally括起来的语句是铁定会被执行的，无论是否有异常产生；

你这个“检测通过”压根不应该放在finally语句下面呀，应该放在try:语句里面。

换句话来说，其实你的这些IP都检测不合格，只是你的检测通过语句放的位置出错了。

=======代码内容已经更新。

def check_ip(proxies_list):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'}
 
    can_use = []
    for ip in proxies_list:
        try:
            requests.get(url='https://www.bilibili.com/', headers = headers, proxies = ip, timeout = 0.2)
            if response.status_code == 200:
                can_use.append(ip)
            print('当前代理ip: ', ip, '检测通过')

        except Exception:
            print('当前代理ip: ', ip, "请求超时，检测不合格")
 
        finally:
            print('检测结束。')
    return can_use

我实际运行了一下你的代码。

大量显示IP错误的原因是你的超链接错误了，题目中你的超链接少了个冒号。应该是https://bilibili.com。

第二个导致大量显示IP错误的原因是，家里的网速比较慢，跟这个博主家里的网速不太一致。如果你将条件从timeout的0.1换成0.2，就会有更多的IP显示为优质IP并通过。条件继续放宽到timeout=1，基本所有的IP都可以显示正确了。

我可以给你解释下，代理ip池为何会出错，这种情况很多。一是你自己的参数要写对。这可能会与Python的版本有关系。Python经常会应为版本不同而导致相关的方法不可使用。

还有就是可能你的ip代理已经被相关的网站屏蔽，还有可能就是本生ip服务器现在不可用了。如果你的代码并没有问题，多半是这两个问题，这种爬虫代码创建请求代理的格式一般是固定的，你可以先自己检查是否有问题。如果没有问题的话，就是我上述说的那样，本来网站也有反爬机制的。而且网站的源码会有渲染，人家也不一定给你返回你想要的数据。

不管有没有异常，finally都会执行，所以except执行后还会执行finally