使用xpath解析,只能获取前五个div标签的href链接,剩下的标签获取不到
import requests
from lxml import etree
url = 'https://www.digikey.cn/zh/products'
headers = {
'referer': 'https://www.digikey.cn/zh/products',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}
resp = requests.get(url=url,headers=headers)
tree = etree.HTML(resp.text)
div = tree.xpath('//div[@id="__next"]/main/div/div/div[2]/div[2]/section/div')
for item in div:
# 获取第二页的链接地址
href = 'https://www.digikey.cn'+item.xpath('./a[@class="jss92 jss99 jss103"]/@href')[0]
print(href)
import requests
from bs4 import BeautifulSoup
url = 'https://www.digikey.cn/zh/products'
headers = {
'referer': 'https://www.digikey.cn/zh/products',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
}
session = requests.Session()
resp = session.get(url=url, headers=headers)
page_Soup = BeautifulSoup(resp.content, 'html.parser')
page_spec_label = page_Soup.select('section div')
for i in page_spec_label[0].select('a')[1:]:
print(i.string, i['href'])