from bs4 import BeautifulSoup
import pandas as pd
import requests
def crawer_travel_introduction2(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
req = requests.get(url, headers=headers)
content = req.text
bsObj = BeautifulSoup(content, 'lxml')
return bsObj
def get_jd_introduction2(url):
cat_advice_time = []
cat_location = []
cat_tel = []
cat_open_time =[]
bsobj = crawer_travel_introduction2(url)
advice_time = bsobj.find('div', {'class': 'time'})
cat_advice_time.append(advice_time.text)
cat_location_ = bsobj.find('td', {'class': 'class="td_l"'}).find('dl'[1]).find('dd').find('span')
cat_location.append(cat_location_.text)
cat_tel__ = bsobj.find('td', {'class': 'class="td_l"'}).find('dl'[2]).find('dd').find('span')
cat_tel.append(cat_tel__.text)
open_time = bsobj.find('td', {'dl': 'class="m_desc_right_col"'}).find('dd').find('span').find('p')
cat_open_time.append(open_time.text)
return cat_advice_time, cat_location, cat_tel, cat_open_time
url = 'http://travel.qunar.com/p-oi708952-xihu'
cat_advice_time, cat_location, cat_tel, cat_open_time = get_jd_introduction2(url)
city = pd.DataFrame({'advice_time': cat_advice_time, 'location': cat_location, 'tel': cat_tel, 'open_time':cat_open_time })
city.to_csv('travel_introduction.csv', encoding='utf-8')
哪来的报错??发出来,还有爬虫不能爬电话这些信息
####### 我放的是完整代码