写了个Nike爬虫但是当爬取到折扣商品的时候控制台打印老是错误😑
import time
import csv
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
def crawl_nike():
options = Options()
options.headless = False # 设置为False将显示浏览器窗口,true显示
service = Service('/Users/zilong/PycharmProjects/pythonProject/chromedriver') # ChromeDriver可执行文件路径
driver = webdriver.Chrome(service=service, options=options)
start_url = 'https://www.nike.com.cn/w/shoes-y7ok'
driver.get(start_url)
# 等待页面加载完成
time.sleep(5)
# 模拟快速滚动到页面底部
last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
time.sleep(1) # 等待页面加载
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height:
break
last_height = new_height
# 等待额外时间以确保页面加载完全
time.sleep(3)
# 获取网页源代码
page_source = driver.page_source
# 使用BeautifulSoup解析网页
soup = BeautifulSoup(page_source, 'html.parser')
# 查找商品列表
product_list = soup.find_all('div', {'class': 'product-card'})
# 创建CSV文件并写入表头
with open('nike_products_all.csv', 'w', newline='', encoding='utf-8') as csvfile_all, \
open('nike_products_discount.csv', 'w', newline='', encoding='utf-8') as csvfile_discount:
writer_all = csv.writer(csvfile_all)
writer_discount = csv.writer(csvfile_discount)
writer_all.writerow(['款式', '现价'])
writer_discount.writerow(['款式', '现价', '原价', '折扣'])
for product in product_list:
# 提取款式
link_element = product.find('a', {'class': 'product-card__img-link-overlay'})
link = link_element['href']
code = link.split('/')[-1]
# 提取价格
price_element = product.find('div', {'class': 'product-card__price'})
price = price_element.text.strip() if price_element else 'N/A'
# 提取原价和现价,计算折扣百分比
original_price_element = product.find('div', {'class': 'product-price is--current-price'})
current_prices = product.find_all('div', {'class': 'product-price cn__styling is--striked-out'})
if original_price_element and current_prices:
original_price = original_price_element.text.strip()
current_price = current_prices[-1].text.strip()
discount_percent = round((float(original_price[1:].replace(',', '')) - float(current_price[1:].replace(',', ''))) / float(original_price[1:].replace(',', '')) * 100, 2)
discount = f'{discount_percent}% 折让'
writer_discount.writerow([code, current_price, original_price, discount])
else:
original_price = '无'
current_price = price
discount = '无'
# 打印款式、现价、原价和折扣信息
print('款式:', code)
print('现价:', current_price)
print('原价:', original_price)
print('折扣:', discount)
print('------------------------')
# 写入CSV文件
writer_all.writerow([code, current_price])
driver.quit()
if __name__ == '__main__':
crawl_nike()
import time
import os
for file in os.scandir():
if ".txt" in file.name:
print(file.name,time.ctime(file.stat().st_mtime))
123文本.txt Sun Jul 12 11:04:24 2020
234文本.txt Sun Jul 12 11:05:12 2020
345文本.txt Sun Jul 12 11:05:19 2020
456文本.txt Sun Jul 12 10:47:54 2020
567文本.txt Sun Jul 12 10:49:56 2020