用python写火车余票监测通过server酱以微信的形式进行提送告知
最后查询的时候一直是No connection adapters were found for "['\nhttps://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=--&leftTicketDTO.from_station=--&leftTicketDTO.to_station=--&purpose_codes=ADULT\\n%27]%22%EF%BC%8C%E7%BD%91%E5%9D%80%E9%87%8C%E4%B8%8D%E6%98%AF%E6%88%91%E6%83%B3%E6%9F%A5%E8%AF%A2%E7%9A%842022-07-18%E5%8C%97%E4%BA%AC%E5%88%B0%E5%8E%A6%E9%97%A8%E7%9A%84%E7%BD%91%E5%9D%80
import requests
import re
import time
requests.packages.urllib3.disable_warnings()#关闭https证书验证警告
def send_msg(title, info):
url = 'https://sctapi.ftqq.com/SCT159966T6Ou8yOocBYzaesamsSBzzkoM.send'\
% (title, info)
requests.post(url)
def get_station():
# 12306的城市名和城市代码js文件url
url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9235%27
r = requests.get(url, verify=False)
pattern = u'([\u4e00-\u9fa5]+)|([A-Z]+)' # \u4e00-\u9fa5是所有汉字的unicode编码范围
result = re.findall(pattern, r.text) # 按正则表达式规则匹配
station = dict(result)
return station
def get_query_url(text):
# 城市名代码查询字典
# key:城市名 value:城市代码
try:
date = '2022-07-18'
from_station_name = '北京'
to_station_name = '厦门'
from_station = text[from_station_name] # 将城市名转换为城市代码
to_station = text[to_station_name]
except:
date, from_station, to_station = '--', '--', '--'
# api url 构造
url = (
'https://kyfw.12306.cn/otn/leftTicket/query?'
'leftTicketDTO.train_date={}&'
'leftTicketDTO.from_station={}&'
'leftTicketDTO.to_station={}&'
'purpose_codes=ADULT'
).format(date, from_station, to_station)
print(url)
return url
def query_train_info(url, text):
try:
r = requests.get(url, verify=False)
# 获取返回的json数据里的data字段的result结果
raw_trains = r.json()['data']['result']
for raw_train in raw_trains:
# 循环遍历每辆列车的信息
data_list = raw_train.split('|')
# 车次号码
train_no = data_list[3]
# 出发站
from_station_code = data_list[6]
from_station_name = text['北京']
# 终点站
to_station_code = data_list[7]
to_station_name = text['厦门']
# 出发时间
start_time = data_list[8]
# 到达时间
arrive_time = data_list[9]
# 总耗时
time_fucked_up = data_list[10]
# 一等座
first_class_seat = data_list[31] or '--'
# 二等座
second_class_seat = data_list[30] or '--'
# 软卧
soft_sleep = data_list[23] or '--'
# 硬卧
hard_sleep = data_list[28] or '--'
# 硬座
hard_seat = data_list[29] or '--'
# 无座
no_seat = data_list[26] or '--'
# 打印查询结果
info = (
'车次:{}\n出发站:{}\n目的地:{}\n出发时间:{}\n到达时间:{}\n消耗时间:{}\n座位情况:\n一等座:「{}」 \n二等座:「{}」\n软卧:「{}」\n硬卧:「{}」\n硬座:「{}」\n无座:「{}」\n\n'.format(
train_no, from_station_name, to_station_name, start_time, arrive_time, time_fucked_up, first_class_seat,
second_class_seat, soft_sleep, hard_sleep, hard_seat, no_seat))
print(info)
if (second_class_seat and second_class_seat!= '无' and train_no == "G321"):
send_msg("G321次高铁二等座有票了", info)
return True
else:
continue
except Exception as e:
print(e)
text = [get_station()]
print(text)
url = [get_query_url(text)]
while True:
time.sleep(1) # 刷票频率
if query_train_info(url,text):
break
可以正确访问2022-07-18 北京到厦门 G321车次二等座的网址和查询余票信息
下边我会给出可以成功爬取12306车次信息的python代码,遇到了两点问题:
1.如果你的请求不携带cookie,以及host,访问会一直显示网络异常,应该是12306反爬的措施
2.12306返回的响应做了信息压缩以及混淆,舍弃了相关属性只保留属性值,一方面应该是提高请求速度,另一方面应该也是为了反爬,查阅相关12306经混淆的前台js,得出相关页面信息对应关系seat_info
运行结果
```python
import requests
import random
USER_AGENTS = [
"Mozilla/5.0 (compatible MSIE 9.0 Windows NT 6.1 Win64 x64 Trident/5.0 .NET CLR 3.5.30729 .NET CLR 3.0.30729 .NET CLR 2.0.50727 Media Center PC 6.0)",
"Mozilla/5.0 (compatible MSIE 8.0 Windows NT 6.0 Trident/4.0 WOW64 Trident/4.0 SLCC2 .NET CLR 2.0.50727 .NET CLR 3.5.30729 .NET CLR 3.0.30729 .NET CLR 1.0.3705 .NET CLR 1.1.4322)",
"Mozilla/4.0 (compatible MSIE 7.0b Windows NT 5.2 .NET CLR 1.1.4322 .NET CLR 2.0.50727 InfoPath.2 .NET CLR 3.0.04506.30)",
"Mozilla/5.0 (Windows U Windows NT 5.1 zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
"Mozilla/5.0 (X11 U Linux en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
"Mozilla/5.0 (Windows U Windows NT 5.1 en-US rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
"Mozilla/5.0 (Windows U Windows NT 5.1 zh-CN rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
"Mozilla/5.0 (X11 Linux i686 U) Gecko/20070322 Kazehakase/0.4.5"
]
def get_user_agent():
user_agent = random.choice(USER_AGENTS)
return user_agent
import json
# 解密12306返回的数据
def cA(cY, c0):
cX = []
for cV in cY:
c1 ={}
cV = cV.split("|")
c1["secretStr"] = cV[0]
c1["buttonTextInfo"] = cV[1]
cZ ={}
cZ["train_no"] = cV[2]
cZ["station_train_code"] = cV[3]
cZ["start_station_telecode"] = cV[4]
cZ["end_station_telecode"] = cV[5]
cZ["from_station_telecode"] = cV[6]
cZ["to_station_telecode"] = cV[7]
cZ["start_time"] = cV[8]
cZ["arrive_time"] = cV[9]
cZ["lishi"] = cV[10]
cZ["canWebBuy"] = cV[11]
cZ["yp_info"] = cV[12]
cZ["start_train_date"] = cV[13]
cZ["train_seat_feature"] = cV[14]
cZ["location_code"] = cV[15]
cZ["from_station_no"] = cV[16]
cZ["to_station_no"] = cV[17]
cZ["is_support_card"] = cV[18]
cZ["controlled_train_flag"] = cV[19]
cZ["gg_num"] = cV[20] if cV[20] else "--"
cZ["gr_num"] = cV[21] if cV[21] else "--"
cZ["qt_num"] = cV[22] if cV[22] else "--"
cZ["rw_num"] = cV[23] if cV[23] else "--"
cZ["rz_num"] = cV[24] if cV[24] else "--"
cZ["tz_num"] = cV[25] if cV[25] else "--"
cZ["wz_num"] = cV[26] if cV[26] else "--"
cZ["yb_num"] = cV[27] if cV[27] else "--"
cZ["yw_num"] = cV[28] if cV[28] else "--"
cZ["yz_num"] = cV[29] if cV[29] else "--"
cZ["ze_num"] = cV[30] if cV[30] else "--"
cZ["zy_num"] = cV[31] if cV[31] else "--"
cZ["swz_num"] = cV[32] if cV[32] else "--"
cZ["srrb_num"] = cV[33] if cV[33] else "--"
cZ["yp_ex"] = cV[34]
cZ["seat_types"] = cV[35]
cZ["exchange_train_flag"] = cV[36]
cZ["houbu_train_flag"] = cV[37]
cZ["houbu_seat_limit"] = cV[38]
cZ["yp_info_new"] = cV[39]
if len(cV) > 46:
cZ["dw_flag"] = cV[46]
if len(cV) > 48:
cZ["stopcheckTime"] = cV[48]
cZ["from_station_name"] = c0[cV[6]]
cZ["to_station_name"] = c0[cV[7]]
c1["queryLeftNewDTO"] = cZ
cX.append(c1)
return cX
seat_info={'tz_num':'特等座','zy_num':'一等座','ze_num':'二等座',
'gr_num':'高级软座','rw_num':'软卧','srrb_num':'高级软卧',
'yw_num':'硬卧','rz_num':'软座','yz_num':'硬座','wz_num':'无座','qt_num':'其他'}
def print_train_info(train_info):
msg = train_info['station_train_code']
msg = msg + '始发地' + train_info['from_station_name']
msg = msg + '目的地' + train_info['to_station_name']
for seat in seat_info:
msg = msg + '{%s:%s}' % (seat_info[seat],str(train_info[seat]))
print(msg)
if __name__ == '__main__':
# 座位类型
seat_type = "ze_num"
# 车次
station_train_code = "G321"
session = requests.session()
headers = {
'User-Agent': get_user_agent(),
"Referer": 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc',
'Host': 'kyfw.12306.cn',
'Cookie': '_uab_collina=165769445955648824998766 JSESSIONID=9FE05F311AF8ECA9B5B3B4FE0231615B route=c5c62a339e7744272a54643b3be5bf64 BIGipServerotn=484966666.24610.0000 guidesStatus=off highContrastMode=defaltMode cursorStatus=off RAIL_EXPIRATION=1657954851984 RAIL_DEVICEID=XTnDYvjXyZNfhf9ZK4oC4FVJ9o_jJwhvZDX6iZSD0kUBeQvOCefBDtdNzWxYHpvj4_TQE14lkd2pE9JK3AvAVGvv4aAmLkDzqG2APmu9DIn_VWKpzIkow5o8avlOqy1f6iU5Kk6lx8hVUFd5niujT8m7OVO90955 _jc_save_fromStation=%u5317%u4EAC%2CBJP _jc_save_toStation=%u53A6%u95E8%2CXMS _jc_save_fromDate=2022-07-13 _jc_save_toDate=2022-07-13 _jc_save_wfdc_flag=dc'
}
session.headers = headers
r = session.get(
'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2022-07-18&leftTicketDTO.from_station=BJP&leftTicketDTO.to_station=XMS&purpose_codes=ADULT',
verify=False)
print(r.content.decode())
result = json.loads(r.content.decode())
if result['status']:
# 解密相应后,拿到每一条列车信息
for train_info_all in cA(result['data']['result'], result['data']['map']):
train_info = train_info_all['queryLeftNewDTO']
# 打印列车信息
print_train_info(train_info)
# 判断该条列车信息是否是G321
if train_info['station_train_code'] == station_train_code:
# 判断二等座是否有票
if train_info[seat_type] != '无' and train_info[seat_type] != '--':
print("找到%s%s" % (station_train_code, seat_info[seat_type]))
break
try:
r = requests.get(url, verify=False)
# 获取返回的json数据里的data字段的result结果
raw_trains = r.json()['data']['result']
for raw_train in raw_trains:
# 循环遍历每辆列车的信息
data_list = raw_train.split('|')
# 车次号码
train_no = data_list[3]
# 出发站
from_station_code = data_list[6]
from_station_name = text['北京']
# 终点站
to_station_code = data_list[7]
to_station_name = text['厦门']
# 出发时间
start_time = data_list[8]
# 到达时间
arrive_time = data_list[9]
# 总耗时
time_fucked_up = data_list[10]
# 一等座
first_class_seat = data_list[31] or '--'
# 二等座
second_class_seat = data_list[30] or '--'
# 软卧
soft_sleep = data_list[23] or '--'
# 硬卧
hard_sleep = data_list[28] or '--'
# 硬座
hard_seat = data_list[29] or '--'
# 无座
no_seat = data_list[26] or '--'
# 打印查询结果
info = (
'车次:{}\n出发站:{}\n目的地:{}\n出发时间:{}\n到达时间:{}\n消耗时间:{}\n座位情况:\n一等座:「{}」 \n二等座:「{}」\n软卧:「{}」\n硬卧:「{}」\n硬座:「{}」\n无座:「{}」\n\n'.format(
train_no, from_station_name, to_station_name, start_time, arrive_time, time_fucked_up, first_class_seat,
second_class_seat, soft_sleep, hard_sleep, hard_seat, no_seat))
print(info)
if (second_class_seat and second_class_seat!= '无' and train_no == "G321"):
send_msg("G321次高铁二等座有票了", info)
return True
else:
continue
except Exception as e:
print(e)
报错信息是url问题造成的,首先你先用postman或者其他工具手动调用你的url,并设置好需要的参数,查看是否能返回正确数据,确认没问题之后再排查是否是你的代码问题