from pyquery import PyQuery as pq
import time
from selenium import webdriver
url = 'https://list.jd.com/list.html?cat=9987%2C653%2C655&enc=utf-8'
bw = webdriver.Chrome()
def jdPhone_spider(url,beginPage,endPage):
for page in range(beginPage,endPage):
pn = page*2-1
print("正在抓取第"+str(page)+"页")
fullurl = url +"&page=" +str(pn)
time.sleep(5)
bw.get(fullurl)
html = bw.page_source
doc = pq(html)
# with open('jdPhone.csv', 'w', newline='') as f:
# writer = csv.writer(f)
# writer.writerow(['品牌', 'CPU型号', '机身内存', '屏幕材质', '机身颜色', '尺寸', '像素','价格'])
infolist = doc('.p-name.p-name-type-3').items()
for item in infolist:
# 品牌
brand = item('em').text().split(' ')[0]
print(brand)
xinghao = item('span .attr:first-child').text()
print(xinghao)
neicun = item('span .attr:nth-child(2)').text()
print(neicun)
caizhi = item('span .attr:nth-child(3)').text()
print(caizhi)
color = item('span .attr:nth-child(4)').text()
print(color)
size = item('span .attr:nth-child(5)').text()
print(size)
xiangsu = item('span .attr:last-child').text()
print(xiangsu)
# list1 = []
# for i in item('b').items():
# bw.close()
jdPhone_spider(url,beginPage=1,endPage=3)
csv模块就可以。举个例子:
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# 导入CSV安装包
import csv
# 1. 创建文件对象
f = open('文件名.csv','w',encoding='utf-8')
#或者:
with open('A.csv', 'a', encoding='utf-8', newline='') as f:
# 2. 基于文件对象构建 csv写入对象
csv_writer = csv.writer(f)
# 3. 构建列表头
csv_writer.writerow(["姓名","年龄","性别"])
# 4. 写入csv文件内容
csv_writer.writerow(["l",'18','男'])
csv_writer.writerow(["c",'20','男'])
csv_writer.writerow(["w",'22','女'])
# 5. 关闭文件
f.close() # with方法不需要