import time
import pymysql
from selenium import webdriver # 包
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
s = Service(r"C:\Program Files\Google\Chrome\Application\chromedriver.exe")
driver = webdriver.Chrome(service=s) # 初始化一个浏览器,放在函数里会闪退,全局不会闪退
# 打开指定的浏览器 ---输入京东的网址---搜索商品
# 1。访问京东页面 内置函数打印输出
def spider(url):
driver.get(url) # 访问京东网址
element = driver.find_element(By.CLASS_NAME, "pn-next")
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)") # 通过 selenium 模拟浏览器下拉网页的过程,获取网站全部商品的信息
driver.implicitly_wait(5)
# driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
driver.execute_script("arguments[0].scrollIntoView()", element)
time.sleep(3)
driver.execute_script("window.scrollTo(0,-document.body.scrollHeight)")
# input_tag = driver.find_element_by_id('key') #找到id属性位key的标签
# input_tag.send_keys(keyword) #模拟键盘输入关键字
# input_tag.send_keys(Keys.ENTER) #模拟键盘输入enter键盘
# time.sleep(1) #感受等待 5s
get_goods(driver)
# 2.定位商品抓取
def get_goods(driver):
num = 0
goods = driver.find_element(By.CLASS_NAME, "gl-i-wrap") # 查找多节点点信息,查找到所有li标签
for good in goods:
link = good.find_elements(By.TAG_NAME, "a").get_attribute("href") # 得到地址
name = good.find_elements(By.CSS_SELECTOR, ".p-name em").text.replace("\n", "") # 名字 Css选择器
# price = good.find_element_by_css_selector('.p-price i').text.replace('\n','') #价格
# commit = good.find_element_by_css_selector('.p-commit a').text.replace('\n','')
# pages =good.find_element_by_css_selector('.p-skip em').text.replace('\n','')
shops = good.find_elements(By.CSS_SELECTOR, ".p-shop").text.replace("\n", "")
if len(shops) > 0:
shop = shops
else:
shop = ""
itemID = good.find_elements(By.CSS_SELECTOR, "p-operate a").get_attribute("data-sku")
# msg = '''
# 商品:%s
# 链接:%s
# 价格:%s
# 评论:%s
# ''' %(name,link,price,commit)# 字符串格式化
# print(msg)
# print(pages)
num = num + 1
# print(name)
# print(itemID)
intoDatabase(itemID, name, shops, link)
print("=" * 40 + "共爬取%d条数据" % num + "=" * 40)
# 存入数据库
def intoDatabase(itemID, name, shop, link):
print()
try:
db = pymysql.connect(host="localhost", user="root", password="1234", db="jd")
print("连接成功!")
# 创建一个游标,如果存在就删除,其中student 是表名
cur = db.cursor()
# cur.execute("DROP TABLE IF EXISTS Student")
# sql1 = 'CREATE TABLE comment (Name char(100) not NULL,comment text(1500))'
sql = '''INSERT INTO sheyingshexiang(itemID,name,shop,link) value (%s,%s,%s,%s)'''
value = (itemID, name, shop, link)
# cur.execute(sql1)
cur.execute(sql, value)
db.commit() # 数据库提交
print("插入数据成功!")
except pymysql.Error as e:
print("插入数据失败!" + str(e))
db.rollback() # 数据库的回滚
db.close()
if __name__ == "__main__": # 判断文件程序入口,标准写法
number = 0
for i in range(1, 200, 2):
number = 1 + number
page = str(i)
spider(
"https://search.jd.com/Search?keyword=%E6%91%84%E5%BD%B1%E6%91%84%E5%83%8F&&page=" + page)
print("===============已经爬取第" + str(number) + "页数据=================")
Traceback (most recent call last):
File "D:\Project\productSpider\productSpider.py", line 92, in <module>
spider(
File "D:\Project\productSpider\productSpider.py", line 27, in spider
get_goods(driver)
File "D:\Project\productSpider\productSpider.py", line 34, in get_goods
for good in goods:
TypeError: 'WebElement' object is not iterable
上网查了都说是TypeError: 'WebElement' object is not iterable这个类型的错误是因为element没加s,但是我加了也一直报错,不知道怎么回事,请各位大佬帮忙找一下错误
goods = driver.find_element(By.CLASS_NAME, "gl-i-wrap")
这句话应该是elements,没有s是单个元素,无法进行循环遍历
打断点看下34行返回的是什么