import scrapy
class ThreedmSpider(scrapy.Spider):
name = 'threedm'
# allowed_domains = ['www.3xxxdm.com']
start_urls = ['https://www.3dmgame.com/bagua_62_1/']
# # 生成一个通用URL模板
# url = "https://www.3dmgame.com/bagua_62_%d/"
# page_num = 2
def parse(self, response):
li_list = response.xpath("/html/body/div[3]/div[2]/div[2]/ul")
for li in li_list:
text = li.xpath("./li/a/div[2]/text()").extract_first()
print(text)
detail_url = li.xpath("./li/a/@href").extract_first()
# for detail in detail_url:
yield scrapy.Request(url=detail_url, callback=self.parse_detail)
# pass
# if self.page_num <= 2:
# new_url = format(self.url%self.page_num)
# self.page_num += 1
# # 手动请求发送,callback回调函数专门用于数据解析
# yield scrapy.Request(url=new_url, callback=self.parse)
def parse_detail(self, response):
detail = response.xpath("/html/body/div[2]/div[2]/div[3]//text()").extract()
detail = ''.join(detail).strip()
print(detail)
(venv) PS C:\Users\Administrator\Desktop\douban\threedmPro> scrapy crawl threedm
周星驰《食神》取景地珍宝海鲜舫 正式告别香港
周星驰电影《食神》取景地——珍宝海鲜舫,正式告别香港,转移至东南亚。
因为就一组