爬虫出现IndexError: list index out of range

应该是有的用户没有生日那一行,才出现了错误。但我不知道怎么改,请帮忙看看,谢谢。


# 代码来自龙王山小青椒https://www.bilibili.com/video/BV1M64y1u7wE
import requests
from lxml import etree
from collections import OrderedDict
from urllib.parse import quote
import csv
import traceback
import random
import re
from time import sleep
import os
from datetime import datetime, timedelta
import sys
import numpy as np
import pandas as pd
import time

header = {'Content-Type':'xx','User-Agent':'xx'}
Cookie = {'Cookie':'xxx'}

# 导入用户id
weibo_comment_df = pd.read_csv('weibo_comment.csv')
weibo_comments = weibo_comment_df.values.tolist()
print(len(weibo_comments))

for i in range(len(weibo_comments)):
    url_base_1 = "https://weibo.cn/"
    url_base_2 = "/info"
    url = url_base_1 + str(weibo_comments[i][0]) + url_base_2
    print(i)
    print(url)
    html = requests.get(url, headers=header, cookies=Cookie)
    html.encoding='utf-8'  
    nickname = re.findall(r'<div class="c">昵称:(.*?)<br/>', html.text)
    sex = re.findall(r'<br/>性别:(.*?)<br/>', html.text)
    location = re.findall(r'<br/>地区:(.*?)<br/>', html.text)
    birthday = re.findall(r'<br/>生日:(.*?)<br/>', html.text)
    if birthday == []:
        data1 = [(nickname[0], sex[0], location[0], ' ')]
    else:
        data1 = [(nickname[0], sex[0], location[0], birthday[0])]
    data2 = pd.DataFrame(data1)
    print(data2)
    print(type(data2))
    data2.to_csv('id_2011.csv')
    time.sleep(1)

 

    nickname = re.findall(r'<div class="c">昵称:(.*?)<br/>', html.text)
    sex = re.findall(r'<br/>性别:(.*?)<br/>', html.text)
    location = re.findall(r'<br/>地区:(.*?)<br/>', html.text)
    birthday = re.findall(r'<br/>生日:(.*?)<br/>', html.text)

这4个变量用findall()赋值,都有可能得到的数值为空,这样你后面的nickname[0], sex[0], location[0] 就会导致异常:list index out of range。解决办法是这些变量在用下标索引的时候需要检查是否为空  。if nickname and sex and location :do something

if not birthday:
    data1 = [(nickname[0], sex[0], location[0], ' ')]