import requests
from lxml import etree
import pandas as pd
impont time
import matplotlib.pyplot as plt
pre_url = "https://book.douban.com/top250?start= num
headers = {'user-Agent':'Mozilla/5.8 (Windows NT 6.1; Win64; x64)Applewebkit/537.36(khT
douban book =[]
for x in range(0,26,25):
html =requests.get(pre_url+str(x),headers=headers)
time.sleep(1)
html.encoding='utf-8'
selector=etree.HTML(html.text)
name=selector.xpath('//div[@class="p12"]/a/@title")
writer = selector.xpath('//p[@class="pl"]/text()")
links=selector.xpath('//div[@class="p12"]/a/@href')
sorce = selector.xpath('//span[@class=" rating_nums"//text()
book=pd.DataFrame([name,writer,sorce,links]).T
douban_book.append(book)
df = pd.concat( douban_book)