爬取数据时返回报的错错误
Traceback (most recent call last):
File "D:/untitled2/数据爬取.py", line 45, in <module>
floor = removeSpaces(div.css('.property-content-info-text.property-content-info-text:nth-child(4)::text').get())
File "D:/untitled2/数据爬取.py", line 8, in removeSpaces
str = str.strip( )
AttributeError: 'NoneType' object has no attribute 'strip'
这是我的代码
import requests # 发送请求
import parsel #解析数据
import csv #保存数据
def removeSpaces(str):
str = str.strip( )
return str
#保存格式
f =open('广州二手房源信息.csv',mode='a',encoding='utf-8',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['title','unitType','area','towards','floor','year','totalPrice','average','village','address'])
#伪装
headers ={
'cookie': 'aQQ_ajkguid=BF21B465-6B86-4EA9-8999-4741B3C48CFF; sessid=B841AC2F-E128-4D17-8044-622DDBCD2232; ajk-appVersion=; seo_source_type=0; id58=QcqK9mOlpR4jPJOfbtuQAg==; ctid=12; fzq_h=82359579a9776fe0f21e4d1fcf0fd6bf_1678172249394_26dbed738dbc4efe8573f1e254bb16a7_3072636441; twe=2; isp=true; 58tj_uuid=2fab36d4-1520-4f54-9f11-2bcc623c7d3a; new_session=1; init_refer=https%253A%252F%252Fguangzhou.anjuke.com%252F; new_uv=1; obtain_by=1; fzq_js_anjuke_ershoufang_pc=40848104fc3fcc91c1b1f0825858bd81_1678177151566_24; xxzl_cid=26e246cb78004e20a4ed0f636aae432e; xxzl_deviceid=cYDgdx/SEH2j7cHterkQIkJ7zU1Z0dHRe7W5wYoztqMws4sCveNqjKJsqrZ859/7',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
}
for page in range(1,51):
url = f'https://guangzhou.anjuke.com/sale/p{page}/?from=HomePage_TopBar'
#发送请求
response = requests.get(url, headers=headers)
#获取数据
html_data = response.text
#解析数据 提取房源信息
#.property
selector = parsel.Selector(html_data)
divs = selector.css('.property')
# 把每一个div挨个取出来,针对每一个div进行详情内容的提取
for div in divs:
#标题
title = removeSpaces(div.css('.property-content-title-name::text').get())
#房源信息
unitType = div.css('.property-content-info-text .property-content-info-attribute span::text').getall()
unitType = ''.join(unitType)
area = removeSpaces(div.css('.property-content-info-text.property-content-info-text:nth-child(2)::text').get())
towards = removeSpaces(div.css('.property-content-info-text.property-content-info-text:nth-child(3)::text').get())
floor = removeSpaces(div.css('.property-content-info-text.property-content-info-text:nth-child(4)::text').get())
year = removeSpaces(div.css('.property-content-info-text.property-content-info-text:nth-child(5)::text').get())
totalPrice = removeSpaces(div.css('.property-price-total-num::text').get())
average = removeSpaces(div.css('.property-price-average::text').get())
village = removeSpaces(div.css('.property-content-info-comm-name::text').get())
address = div.css('.property-content-info.property-content-info-comm .property-content-info-comm-address span::text').getall()
address = ''.join(address)
print(title,unitType,area,towards,floor,year,totalPrice,average,village,address)
#保存数据
csv_writer.writerow([title,unitType,area,towards,floor,year,totalPrice,average,village,address])
回答如下,记得采纳一下哦!
你打印str看看,应该是你的str为空了,所以没法调用strip()。报错都说了是nonetype类型
说明
div.css('.xxx').get()
的结果是个None