携程酒店爬虫遇到问题求解答(语言-python)

在获取携程数据时 headers里的'content-length只能使用一次,第一次可获得正确数据,再次使用不返回正确数据

代码如下

import requests
import json
datadic={"meta":{"fgt":"","hotelId":"","priceToleranceData":"","priceToleranceDataValidationCode":"","mpRoom":[],"hotelUniqueKey":"","shoppingid":"","minPrice":"","minCurr":""},"seqid":"645fc4bf3041490a9f86222b7da55368","deduplication":[54946072,26191611,1578931,72992898,10246623,7270168,425161,28416187,430879,992765,346412,453317,6952329,484249,15878972,346395,45693446,930222,1256624,80397610,374477,7253324,9151811,6929152,446917,42165326,686139,434019,45081510,11536398,2784701,43968356,23706628,68192855,2302699,374924,1064521,1349663],"filterCondition":{"star":[],"rate":"","rateCount":[],"priceRange":{"lowPrice":0,"highPrice":-1},"priceType":"","breakfast":[],"payType":[],"bedType":[],"bookPolicy":[],"bookable":[],"discount":[],"hotPoi":[],"zone":[],"landmark":[],"metro":[],"airportTrainstation":[],"location":[],"cityId":[],"amenty":[],"promotion":[],"category":[],"feature":[],"brand":[],"popularFilters":[],"hotArea":[],"ctripService":[],"priceQuickFilters":[],"applicablePeople":[]},"searchCondition":{"sortType":"1","adult":1,"child":0,"age":"","pageNo":4,"optionType":"City","optionId":"2","lat":0,"destination":"","keyword":"","cityName":"上海","lng":0,"cityId":2,"checkIn":"2022-01-14","checkOut":"2022-01-15","roomNum":1,"mapType":"gd","travelPurpose":0,"countryId":1,"url":"https://hotels.ctrip.com/hotels/listPage?cityename=shanghai&city=2&checkin=2022/1/14&checkout=2022/01/15&optionName=(%E9%80%89%E5%A1%AB)%E9%85%92%E5%BA%97%E5%90%8D/%E5%9C%B0%E6%A0%87/%E5%95%86%E5%9C%88&display=(%E9%80%89%E5%A1%AB)%E9%85%92%E5%BA%97%E5%90%8D/%E5%9C%B0%E6%A0%87/%E5%95%86%E5%9C%88&directSearch=0&optionId=2&optionType=City&crn=1&adult=1&children=0#ctm_ref=ctr_hp_sb_lst","pageSize":10,"timeOffset":28800,"radius":0,"directSearch":0,"signInHotelId":0,"signInType":0,"hotelIdList":[]},"queryTag":"NORMAL","genk":"true","genKeyParam":{"a":0,"b":"2022-01-14","c":"2022-01-15","d":"zh-cn","e":2},"pageTraceId":"515cc6e5-d573-4f4c-a3be-ef8d43b0fe17","tsid":"prd-2022_1_14_38-76a281b3-5dd7-4661-8779-e9d4c08f1db6-hotel_online_list-3.2.1-DOM-online","webpSupport":"true","platform":"online","pageID":"102002","head":{"Version":"","userRegion":"CN","Locale":"zh-CN","LocaleController":"zh-CN","TimeZone":"8","Currency":"CNY","PageId":"102002","webpSupport":"true","userIP":"","P":"92367645156","ticket":"","clientID":"09031065116756219104","group":"ctrip","Frontend":{"vid":"1629473964863.35463n","sessionID":17,"pvid":304},"Union":{"AllianceID":"","SID":"","Ouid":""},"HotelExtension":{"group":"CTRIP","hasAidInUrl":"false","Qid":"369422895407","WebpSupport":"true","hotelUuidKey":"k93yzmYX7IA4wapWTgeb0E6Zrssxs7x3hY9YDditQIfZRaTY3lESBjMlYGARFYSQy3DrForf4eAkE4AjNAW0ZE3GIUQJbYtpKfGrO8IF8jp7w0drLGjp5ypGJT0WsYZ8r30Y8qYN9j5PKd7vGdjaneaAYnkicYmw3lW0PekfjSpwfXvkqj0XeMqY3oKDYPyXgYAXwmkvltYXOy9ljP7vQoeNgYBTjFOyg6WQFElAKGYD8wD3IQ6y5GvPlv1aYSOwLtj3qeQni0tY5SIhEfkYcYgLjXZEzLJQORMNj0Tr1Y0dJaLv7aKmdvgZeHQYpginGY7XWa7vMZx1Ym0KFbeBXydwo1w75IPYLLxAoJ6sW0ajZ4iQAycYn3E7lWUUxQnY56isFiNliBcj8vBAKQQxtYD8YBLJs5vzayPlwoaYdnwDhygAw08Y6nwLor9HKUXIsYhHxOMxpOyLDRFdwLhEdhYS3Rn1J05EagWcFW4XjX6v3sj4MJXdrlXrfBwkYNsKaljZ3KmHJNbyO4vhsJkNY6oEoFesgiblJFQvgFyQbEm7YpzKasyQMYkZIGcrH4jgYdpKMlYNGilsRfPwXtYZNvf0EQSyfQinZvhgwG9EbMjPfwpZi99KsmIh6eLYtqKnoYg3Yalj8FwZ8v46jGpKbkEhbY3YMdxBhefsWmoR8GvZtYMTWMfeQhRM6WABjbcWmdy4Jp7KoYP3E40xZOjBmR8avf6YhNWkze3kRfbWUpEXlW5FKAqY9hR7YmpEXOI6hvG5RqTwdDE1pYtnRFzJ83ikQvMsictj81yo3JB8JL6W1Deq8KTYXoYdoeAjmPE15jf3WFfWTXWPmYzLYHdYToRAsY0BWdFYMSYXtYQ5jkOeqSEOqWzAenswAZef7jT9Ysny7lEmqjPOEcOrn1jS9w3OyaPWUPv4UrZYcSRZSWG5W3LW8OWBPYbYHZeFkKgwFQvOMEpDWBLyM9j3JGHvSdElQWPHynZjTaJcAjU6e5Y3HxQvzqWUcEHGEB8EFSRakE5Sv8JsfrkYa5R89iXzxhpEQlEN6EGlYHnYLsYoaY1PeZhWoT"}}}

headers={
'Host':'m.ctrip.com',
'Connection':'keep-alive',
'content-length': '问题位置',
'Pragma':'no-cache',
'Cache-Control':'no-cache',
'sec-ch-ua':'" Not;A Brand";v="99", "Google Chrome";v="97", "Chromium";v="97"',
'Accept':'application/json',
'P':'92367645156',
'sec-ch-ua-mobile':'?0',
'Content-Type':'application/json;charset=UTF-8',
'User-Agent':'',
'sec-ch-ua-platform':'"Windows"',
'Origin':'https://hotels.ctrip.com',
'Sec-Fetch-Site':'same-site',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Dest':'empty',
'Referer':'https://hotels.ctrip.com/',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cookie': '_RSG=NSqm17aC9TFrgRCbIofmKA; _RGUID=f7493241-c1c3-4717-a530-a724a531bab5; _RDG=289809879435062d32354c4793bcd67492; MKT_CKID=1629473967711.a2nyn.xweo; _ga=GA1.2.1165280513.1629473968; _abtest_userid=b3fdfae6-7594-4a53-855f-dc5123312c79; GUID=09031065116756219104; login_type=0; AHeadUserInfo=VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=&NoReadMessageCount=0; UUID=F4025F124E064D6784286F576404968D; IsPersonalizedLogin=T; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _bfaStatusPVSend=1; _gcl_au=1.1.1045722555.1641706144; MKT_Pagesource=PC; login_uid=DC527C63F15747F18FEA7A5A11B91D32; cticket=C9E5BA5F7CE86DD6CD88C377DB286CB2376290186BDBEEC27884F2C8FF997791; DUID=u=DC527C63F15747F18FEA7A5A11B91D32&v=0; IsNonUser=F; Session=SmartLinkCode=csdn&SmartLinkKeyWord=&SmartLinkQuary=_UTF.&SmartLinkHost=link.csdn.net&SmartLinkLanguage=zh; nfes_isSupportWebP=1; nfes_isSupportWebP=1; StartCity_Pkg=PkgStartCity=21185; intl_ht1=h4=17_445476,17_65143245,17_435439,1_4722025,30_67064593,477_476251; HotelCityID=2split%E4%B8%8A%E6%B5%B7splitShanghaisplit2022-1-14split2022-01-15split0; _RF1=171.14.146.147; Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1642153936&Expires=1642758736082; MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1642153936087&CURL=https%3A%2F%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={"pc_vid":"1629473964863.35463n"}; MKT_CKID_LMT=1642153936095; _gid=GA1.2.556630122.1642153937; _bfa=1.1629473964863.35463n.1.1641993915950.1642153933448.17.304.212093; _bfs=1.2; _uetsid=aaee4860751f11eca86dc13392f82df7; _uetvid=d9ea359001cc11ec8839af5725765409; _bfi=p1%3D102002%26p2%3D0%26v1%3D304%26v2%3D0; _jzqco=%7C%7C%7C%7C1642153936617%7C1.1019211311.1629473967707.1642153936100.1642153943305.1642153936100.1642153943305.undefined.0.0.259.259; __zpspc=9.20.1642153936.1642153943.2%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%7C%23; appFloatCnt=223; librauuid=qFfS2aZQSf742hdn; _bfaStatus=send'
}
url="https://m.ctrip.com/restapi/soa2/21881/json/HotelSearch?testab=418e7b4bece90f23c02b4440726032ff921dd9f7677e74b117956128a68dd263"

html=requests.post(url,data=json.dumps(datadic),headers=headers)
print(html.text)

同一个 content-length运行结果如下

{"Response":{"isOversea":false,"isEnd":false,"traceId":0},"ResponseStatus":{"Timestamp":"/Date(1642649249790+0800)/","Ack":"Success","Errors":[],"Extension":[{"Id":"CLOGGING_TRACE_ID","Value":"4063403568887217632"},{"Id":"RootMessageId","Value":"100025527-0a70c299-456291-5260889"}]},"ErrorCode":0}

我的解答思路和尝试过的方法

尝试一次将'content-length 增加50 大多数情况下无用

你在浏览器操作多几次发送post请求,看看 content-length是否每次都变化,找找变化规律

这个是有反爬的,有个参数是几乎每次请求都需要变动的