def parseHtml(html):
data = {}
# 创建bs对象 bs是使用的python默认的解析器,lxml也是解析器
soup = BeautifulSoup(html, 'lxml')
# find_all(): 可以根据标签名、属性、内容查找文档
conshoplist = soup.find_all('div', {'class': 'con shoplist'})[0]
for each in conshoplist.find_all('li'):
# 书名
bookname = each.find_all('a')[0].get('title').strip(' ')
# 书图
img_src = each.find_all('a')[0].img.get('data-original')
if img_src is None:
img_src = each.find_all('a')[0].img.get('src')
img_src = img_src.strip(' ')
# 价格
price = float(each.find_all('p', {'class': 'price'})[0].span.text[1:])
# 简介
detail = each.find_all('p', {'class': 'detail'})[0].text
# 评分
stars = float(each.find_all('p', {'class': 'search_star_line'})[0].span.span.get('style').split(': ')[-1].strip('%;')) / 20
# 评论数量
num_comments = float(each.find_all('p', {'class': 'search_star_line'})[0].a.text[:-3])
data[bookname] = [img_src, price, detail, stars, num_comments]
data[bookname].save('d:/new/dangdang_python.csv')
return data
有什么问题请详细说明