#Create the Spider class
class DC_Description_Spider(scrapy.Spider):
name = "dc_chapter_spider"
# start_requests method
def start_requests(self):
url_short='https://beauty.jd.com/'
yield scrapy.Request(url = url_short, callback = self.parse_front)
# First parsing method
def parse_front(self, response):
parent_node = response.css('#app > div > div.lc-floor.lc-xfloor--id-1574665740220.lc-floor--lg > div > div:nth-child(9) > div > div > div.lc-nav-tabs__body > div:nth-child(2) > div > div > div')
item_dict=[]
for i in range(4):
item_name = parent_node.css('>div:nth-child({i}) > div > div.goods-item__info > div.goods-item__title.goods-item__title--twoline').extract()
item_price=parent_node.css('>div:nth-child({i}) > div > div.goods-item__info > div.goods-item__content > div').extract()
if item_price == []:
item_price="补货中"
item_dict.append({"name":item_name, "price":item_price})
else:
item_dict.append({"name":item_name, "price":item_price})
return item_dict
#for url in links_to_follow:
#yield response.follow(url = url,
# callback = self.parse_pages)
#Run the spider
process = CrawlerProcess()
process.crawl(DC_Description_Spider)
process.start()
# Print a preview of courses
#previewCourses(dc_dict)
有可能是动态网站的原因。