在爬虫过程中,网页可以正常访问但是打印出来的内容找不到想爬取的信息
import requests
from bs4 import BeautifulSoup
from csv import writer
import re
# 发送请求
main_url = 'https://s.taobao.com/search?q=%E4%BF%9D%E9%99%A9%E7%AE%B1&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.jianhua.201856-taobao-item.2&ie=utf8&initiative_id=tbindexz_20170306'
headers = {
'cookie': 'cna=T+KtGthl61wCAUPmoqSB703V; tracknick=tb936245298; enc=AMAPZoFUGgyJnojU7zemZgsqslbAnQIUj3AcMSfrf93SApPc%2FdZcZqQcb3gbYL8OeTv35G0qkcH2YbYeli%2BtLjU9LZRi3xGnsds8BuyRoys%3D; thw=cn; cookie2=176216f6fbfeea020c82b83dfdd140a3; t=7ea407e2e202f28793b52d623c0081a4; _tb_token_=7b8765eb3b35b; _samesite_flag_=true; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; sgcookie=E100wILHixWPryBdoNcQ941s%2F50rq4MLn9o8Bwzy0Tm0HbXwQPSd3jBCiizMtv7oPJ1uAH6fiwvASF2GfSgn6Ieq1I1XS89kA6F1CIdlXH%2FIo2yjQgXFnOvd9CU0F5rxwEAK; uc3=vt3=F8dCv4CCSu1mUxUyLnc%3D&nk2=F5RMGoKC7Z3wcfc%3D&id2=UUphwoX%2BLwIrQ8u8KQ%3D%3D&lg2=VT5L2FSpMGV7TQ%3D%3D; csg=ded498f1; lgc=tb936245298; cancelledSubSites=empty; dnk=tb936245298; skt=ff94d2cbcaf74244; existShop=MTY2MTI0MjE2Ng%3D%3D; uc4=nk4=0%40FY4HX7CqDhb9hiDqKzlvbQCvEv%2BcRg%3D%3D&id4=0%40U2grGR1tYM5Z4j5trjZiNuVhsQLtPTEM; _cc_=V32FPkk%2Fhw%3D%3D; mt=ci=-1_0; _m_h5_tk=f87e9d31948cd403e0c35a5d117a472c_1661826999200; _m_h5_tk_enc=23c1e19e5ec79e0e292d22ccfe612e70; uc1=cookie14=UoeyD4qtVcDkDA%3D%3D&pas=0&existShop=false&cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&cookie21=URm48syIYn73; JSESSIONID=54A2B08EE515FC6969D5D5A6A70B1675; xlly_s=1; tfstk=cCNhBdw__JkQwpU4zWGCZhoyH8WAZoIEY7PL_wTm31hbIJPNibJw3KfFScSfYL1..; isg=BGRk0dnK29qmeS6K8CeGTXGqNWJW_Yhnsnd1oX6F8C_yKQTzpg1Y95qL6Ikx2MC_; l=eBEsFPd7LohmD67kBOfZourza77OSIRvIuPzaNbMiOCPOkCH5EeGW6lQh3TMC3hNh6f9R35Wn1oBBeYBqIv4n5U62j-la_kmn',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}
r = requests.get(url=main_url, headers= headers)
soup = BeautifulSoup(r.content, 'lxml')
# 获取数据
print(soup.prettify)
想请问是哪里有问题吗,还是方法用的不对
你打印的信息放出来啊?
你输出的时候可能要设置下编码UTF-8
因为你需要内容根本就没在这个url下,他是通过ajax加载的
打开浏览器,重新抓包找到正确的url再请求就行了
啥乱的英文,这不是中文?