python火车票余票监测(通过微信提醒)

问题遇到的现象和发生背景

用python写火车余票监测通过server酱以微信的形式进行提送告知
最后查询的时候一直是No connection adapters were found for "['\nhttps://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=--&leftTicketDTO.from_station=--&leftTicketDTO.to_station=--&purpose_codes=ADULT\\n%27]%22%EF%BC%8C%E7%BD%91%E5%9D%80%E9%87%8C%E4%B8%8D%E6%98%AF%E6%88%91%E6%83%B3%E6%9F%A5%E8%AF%A2%E7%9A%842022-07-18%E5%8C%97%E4%BA%AC%E5%88%B0%E5%8E%A6%E9%97%A8%E7%9A%84%E7%BD%91%E5%9D%80

问题相关代码,请勿粘贴截图

import requests
import re
import time

requests.packages.urllib3.disable_warnings()#关闭https证书验证警告

调用service 酱,推送到微信

def send_msg(title, info):
url = 'https://sctapi.ftqq.com/SCT159966T6Ou8yOocBYzaesamsSBzzkoM.send'\
% (title, info)
requests.post(url)

def get_station():
# 12306的城市名和城市代码js文件url
url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9235%27
r = requests.get(url, verify=False)
pattern = u'([\u4e00-\u9fa5]+)|([A-Z]+)' # \u4e00-\u9fa5是所有汉字的unicode编码范围
result = re.findall(pattern, r.text) # 按正则表达式规则匹配
station = dict(result)
return station

生成查询的url

def get_query_url(text):
# 城市名代码查询字典
# key:城市名 value:城市代码
try:
date = '2022-07-18'
from_station_name = '北京'
to_station_name = '厦门'
from_station = text[from_station_name] # 将城市名转换为城市代码
to_station = text[to_station_name]
except:
date, from_station, to_station = '--', '--', '--'

    # api url 构造
    url = (
    'https://kyfw.12306.cn/otn/leftTicket/query?'
    'leftTicketDTO.train_date={}&'
    'leftTicketDTO.from_station={}&'
    'leftTicketDTO.to_station={}&'
    'purpose_codes=ADULT'
    ).format(date, from_station, to_station)
    print(url)
    
    return url

获取车次信息

def query_train_info(url, text):

try:
    r = requests.get(url, verify=False)

    # 获取返回的json数据里的data字段的result结果
    raw_trains = r.json()['data']['result']

    for raw_train in raw_trains:
        # 循环遍历每辆列车的信息
        data_list = raw_train.split('|')

        # 车次号码
        train_no = data_list[3]
        # 出发站
        from_station_code = data_list[6]
        from_station_name = text['北京']
        # 终点站
        to_station_code = data_list[7]
        to_station_name = text['厦门']
        # 出发时间
        start_time = data_list[8]
        # 到达时间
        arrive_time = data_list[9]
        # 总耗时
        time_fucked_up = data_list[10]
        # 一等座
        first_class_seat = data_list[31] or '--'
        # 二等座
        second_class_seat = data_list[30] or '--'
        # 软卧
        soft_sleep = data_list[23] or '--'
        # 硬卧
        hard_sleep = data_list[28] or '--'
        # 硬座
        hard_seat = data_list[29] or '--'
        # 无座
        no_seat = data_list[26] or '--'

        # 打印查询结果
        info = (
        '车次:{}\n出发站:{}\n目的地:{}\n出发时间:{}\n到达时间:{}\n消耗时间:{}\n座位情况:\n一等座:「{}」 \n二等座:「{}」\n软卧:「{}」\n硬卧:「{}」\n硬座:「{}」\n无座:「{}」\n\n'.format(
            train_no, from_station_name, to_station_name, start_time, arrive_time, time_fucked_up, first_class_seat,
            second_class_seat, soft_sleep, hard_sleep, hard_seat, no_seat))

        print(info)
        if (second_class_seat and second_class_seat!= '无' and train_no == "G321"):
            send_msg("G321次高铁二等座有票了", info)
            return True
        else:
            continue

except Exception as e:
    print(e)

text = [get_station()]
print(text)
url = [get_query_url(text)]

循环查询,直到查询到想要的车次有票终止

while True:
time.sleep(1) # 刷票频率
if query_train_info(url,text):
break

运行结果及报错内容

img

我的解答思路和尝试过的方法
我想要达到的结果

可以正确访问2022-07-18 北京到厦门 G321车次二等座的网址和查询余票信息

img


这块应该是url[0]

下边我会给出可以成功爬取12306车次信息的python代码,遇到了两点问题:
1.如果你的请求不携带cookie,以及host,访问会一直显示网络异常,应该是12306反爬的措施
2.12306返回的响应做了信息压缩以及混淆,舍弃了相关属性只保留属性值,一方面应该是提高请求速度,另一方面应该也是为了反爬,查阅相关12306经混淆的前台js,得出相关页面信息对应关系seat_info

运行结果

img



```python

import requests

import random

USER_AGENTS = [
    "Mozilla/5.0 (compatible MSIE 9.0 Windows NT 6.1 Win64 x64 Trident/5.0 .NET CLR 3.5.30729 .NET CLR 3.0.30729 .NET CLR 2.0.50727 Media Center PC 6.0)",
    "Mozilla/5.0 (compatible MSIE 8.0 Windows NT 6.0 Trident/4.0 WOW64 Trident/4.0 SLCC2 .NET CLR 2.0.50727 .NET CLR 3.5.30729 .NET CLR 3.0.30729 .NET CLR 1.0.3705 .NET CLR 1.1.4322)",
    "Mozilla/4.0 (compatible MSIE 7.0b Windows NT 5.2 .NET CLR 1.1.4322 .NET CLR 2.0.50727 InfoPath.2 .NET CLR 3.0.04506.30)",
    "Mozilla/5.0 (Windows U Windows NT 5.1 zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
    "Mozilla/5.0 (X11 U Linux en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
    "Mozilla/5.0 (Windows U Windows NT 5.1 en-US rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
    "Mozilla/5.0 (Windows U Windows NT 5.1 zh-CN rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
    "Mozilla/5.0 (X11 Linux i686 U) Gecko/20070322 Kazehakase/0.4.5"
]


def get_user_agent():
    user_agent = random.choice(USER_AGENTS)
    return user_agent

import json

# 解密12306返回的数据
def cA(cY, c0):
    cX = []
    for cV in cY:
        c1 ={}
        cV = cV.split("|")
        c1["secretStr"] = cV[0]
        c1["buttonTextInfo"] = cV[1]
        cZ ={}
        cZ["train_no"] = cV[2]
        cZ["station_train_code"] = cV[3]
        cZ["start_station_telecode"] = cV[4]
        cZ["end_station_telecode"] = cV[5]
        cZ["from_station_telecode"] = cV[6]
        cZ["to_station_telecode"] = cV[7]
        cZ["start_time"] = cV[8]
        cZ["arrive_time"] = cV[9]
        cZ["lishi"] = cV[10]
        cZ["canWebBuy"] = cV[11]
        cZ["yp_info"] = cV[12]
        cZ["start_train_date"] = cV[13]
        cZ["train_seat_feature"] = cV[14]
        cZ["location_code"] = cV[15]
        cZ["from_station_no"] = cV[16]
        cZ["to_station_no"] = cV[17]
        cZ["is_support_card"] = cV[18]
        cZ["controlled_train_flag"] = cV[19]
        cZ["gg_num"] = cV[20] if cV[20] else "--"
        cZ["gr_num"] = cV[21] if cV[21] else "--"
        cZ["qt_num"] = cV[22] if cV[22] else "--"
        cZ["rw_num"] = cV[23] if cV[23] else "--"
        cZ["rz_num"] = cV[24] if cV[24] else "--"
        cZ["tz_num"] = cV[25] if cV[25] else "--"
        cZ["wz_num"] = cV[26] if cV[26] else "--"
        cZ["yb_num"] = cV[27] if cV[27] else "--"
        cZ["yw_num"] = cV[28] if cV[28] else "--"
        cZ["yz_num"] = cV[29] if cV[29] else "--"
        cZ["ze_num"] = cV[30] if cV[30] else "--"
        cZ["zy_num"] = cV[31] if cV[31] else "--"
        cZ["swz_num"] = cV[32] if cV[32] else "--"
        cZ["srrb_num"] = cV[33] if cV[33] else "--"
        cZ["yp_ex"] = cV[34]
        cZ["seat_types"] = cV[35]
        cZ["exchange_train_flag"] = cV[36]
        cZ["houbu_train_flag"] = cV[37]
        cZ["houbu_seat_limit"] = cV[38]
        cZ["yp_info_new"] = cV[39]
        if len(cV) > 46:
            cZ["dw_flag"] = cV[46]
        if len(cV) > 48:
            cZ["stopcheckTime"] = cV[48]
        cZ["from_station_name"] = c0[cV[6]]
        cZ["to_station_name"] = c0[cV[7]]
        c1["queryLeftNewDTO"] = cZ
        cX.append(c1)
    return cX


seat_info={'tz_num':'特等座','zy_num':'一等座','ze_num':'二等座',
           'gr_num':'高级软座','rw_num':'软卧','srrb_num':'高级软卧',
           'yw_num':'硬卧','rz_num':'软座','yz_num':'硬座','wz_num':'无座','qt_num':'其他'}


def print_train_info(train_info):
    msg = train_info['station_train_code']
    msg = msg + '始发地' + train_info['from_station_name']
    msg = msg + '目的地' + train_info['to_station_name']
    for seat in seat_info:
        msg = msg + '{%s:%s}' % (seat_info[seat],str(train_info[seat]))
    print(msg)

if __name__ == '__main__':
    # 座位类型
    seat_type = "ze_num"
    # 车次
    station_train_code = "G321"

    session = requests.session()
    headers = {
        'User-Agent': get_user_agent(),
        "Referer": 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc',
        'Host': 'kyfw.12306.cn',
        'Cookie': '_uab_collina=165769445955648824998766 JSESSIONID=9FE05F311AF8ECA9B5B3B4FE0231615B route=c5c62a339e7744272a54643b3be5bf64 BIGipServerotn=484966666.24610.0000 guidesStatus=off highContrastMode=defaltMode cursorStatus=off RAIL_EXPIRATION=1657954851984 RAIL_DEVICEID=XTnDYvjXyZNfhf9ZK4oC4FVJ9o_jJwhvZDX6iZSD0kUBeQvOCefBDtdNzWxYHpvj4_TQE14lkd2pE9JK3AvAVGvv4aAmLkDzqG2APmu9DIn_VWKpzIkow5o8avlOqy1f6iU5Kk6lx8hVUFd5niujT8m7OVO90955 _jc_save_fromStation=%u5317%u4EAC%2CBJP _jc_save_toStation=%u53A6%u95E8%2CXMS _jc_save_fromDate=2022-07-13 _jc_save_toDate=2022-07-13 _jc_save_wfdc_flag=dc'
    }
    session.headers = headers
    r = session.get(
        'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2022-07-18&leftTicketDTO.from_station=BJP&leftTicketDTO.to_station=XMS&purpose_codes=ADULT',
        verify=False)
    print(r.content.decode())
    result = json.loads(r.content.decode())

    if result['status']:
        # 解密相应后,拿到每一条列车信息
        for train_info_all in cA(result['data']['result'], result['data']['map']):
            train_info = train_info_all['queryLeftNewDTO']
            # 打印列车信息
            print_train_info(train_info)
            # 判断该条列车信息是否是G321
            if train_info['station_train_code'] == station_train_code:
                # 判断二等座是否有票
                if train_info[seat_type] != '无' and train_info[seat_type] != '--':
                        print("找到%s%s" % (station_train_code, seat_info[seat_type]))
                        break




try:
    r = requests.get(url, verify=False)
 
    # 获取返回的json数据里的data字段的result结果
    raw_trains = r.json()['data']['result']
 
    for raw_train in raw_trains:
        # 循环遍历每辆列车的信息
        data_list = raw_train.split('|')
 
        # 车次号码
        train_no = data_list[3]
        # 出发站
        from_station_code = data_list[6]
        from_station_name = text['北京']
        # 终点站
        to_station_code = data_list[7]
        to_station_name = text['厦门']
        # 出发时间
        start_time = data_list[8]
        # 到达时间
        arrive_time = data_list[9]
        # 总耗时
        time_fucked_up = data_list[10]
        # 一等座
        first_class_seat = data_list[31] or '--'
        # 二等座
        second_class_seat = data_list[30] or '--'
        # 软卧
        soft_sleep = data_list[23] or '--'
        # 硬卧
        hard_sleep = data_list[28] or '--'
        # 硬座
        hard_seat = data_list[29] or '--'
        # 无座
        no_seat = data_list[26] or '--'
 
        # 打印查询结果
        info = (
        '车次:{}\n出发站:{}\n目的地:{}\n出发时间:{}\n到达时间:{}\n消耗时间:{}\n座位情况:\n一等座:「{}」 \n二等座:「{}」\n软卧:「{}」\n硬卧:「{}」\n硬座:「{}」\n无座:「{}」\n\n'.format(
            train_no, from_station_name, to_station_name, start_time, arrive_time, time_fucked_up, first_class_seat,
            second_class_seat, soft_sleep, hard_sleep, hard_seat, no_seat))
 
        print(info)
        if (second_class_seat and second_class_seat!= '无' and train_no == "G321"):
            send_msg("G321次高铁二等座有票了", info)
            return True
        else:
            continue
 
except Exception as e:
    print(e)

报错信息是url问题造成的,首先你先用postman或者其他工具手动调用你的url,并设置好需要的参数,查看是否能返回正确数据,确认没问题之后再排查是否是你的代码问题