怎么避免爬取重复数据呀,报错table jobdata already exists

def jiexi(url, head, parse_list):
    # 循环遍历,二次提取
    proxies_list = []
    for tr in parse_list:
        http_type = tr.xpath('./td[2]/text()').extract_first().replace("代理", "")  # extract_first,返回第一个元素,不加first则返回所有元素
        http_type = http_type.replace("HTTP,", "")
        ip_num = tr.xpath('./td[1]/text()').extract_first()
        # port_num = tr.xpath('./td[2]/text()').extract_first()
        # print(http_type, ip_num, port_num)
        # 构建代理ip字典
        # proxies_dict[http_type] = ip_num
        '''+ ':' + port_num'''
        proxies_dict = {"%s" % http_type: 'HTTPS://%s' % ip_num}
        # print(len(proxies_dict))
        proxies_list.append(proxies_dict)
        # print(type(proxies_list))
        # print(len(proxies_list))
        print(proxies_list)
        # geturl(url, head, proxies_list, proxies_dict)


    return proxies_list


``````python

def geturl(url, head, proxies_list, proxies_dict):
    html = ""
    for proxy in proxies_dict:
        response = requests.get(url, headers=head, proxies=random.choice(proxies_list), timeout=3)  # 超时报错
        if response.status_code != 200:
            proxies_dict.remove(proxy)
            continue
        else:
            try:
                html = response.text
                # print(html)
                # print(len(proxies_dict))
                gethtml(html)  # 提取网页数据

            # print(url)
            # print(html)
            except Exception as error:
                print(f"错误异常信息为:{error}")
    return html

一个IP爬完一页,下一个IP又重新爬一遍,这种情况应该在哪里写判断。截止19.42发现IP地址在重复写入列表,语法是不是有误,怎么才能一次写入到列表呢

不清楚你在表达啥,既然重复写入,那肯定是写错了