import requests
from lxml import etree
import os
if __name__ == "__main__":
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36'
} # 伪装成浏览器,将headers设置成浏览器中设置的值
num = 0
num_1 = 0
num_2 = 0
x=2
if not os.path.exists('./picLibs'):
os.mkdir('./picLibs')
for i in range(int(x)):
url = 'https://pic.sogou.com/pics?query='+'橘子皮'
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
li_list = tree.xpath('//div[@class="figure-result"]/ul/li')
print(li_list)
js = tree.xpath('//script')
for k in js:
print(etree.tostring(k))
获取所有 script 内容,然后把里面的东西解析出来就好