用pyhton爬虫,为什么会报错啊
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
url = 'http://quotes.money.163.com/data/caibao/yjgl_ALL.html?'
def get_html(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, likeGecko) Chrome/87.0.4280.88 Safari/537.36'
}
res = requests.get(url,headers=headers)
res.encoding = 'utf-8'
return res.text
html= get_html(url)
soup = BeautifulSoup(html, 'html.parser')
stock_list = soup.find('table',attrs={'id':'plate performance'})
print(stock_list)
items = stock_list.find_all('td')
for item in items:
url = item.a['href']
id = re.find_all(attrs={'target':'_blank'})
stock_id_list.append(id)
print(stock_id_list)
、、、
###### 运行结果及报错内容
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/var/folders/_6/zyz5xzm502l3h_c4258pb1yh0000gn/T/ipykernel_32130/2526818991.py in <module>
22 print(stock_list)
23
---> 24 items = stock_list.find_all('td')
25
26 for item in items:
AttributeError: 'NoneType' object has no attribute 'find_all'
###### 我的解答思路和尝试过的方法
###### 我想要达到的结果
你在find_all('td')标签时,没有找到td标签,你把stock_list打印出来后,看看哪个标签是你想要的数据,把td换一下。
你可以试试下面代码,可以获取你给的页面上的所有股票代码。
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
url = 'http://quotes.money.163.com/data/caibao/yjgl_ALL.html?'
def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, likeGecko) Chrome/87.0.4280.88 Safari/537.36'
}
res = requests.get(url=url, headers=headers)
res.encoding = 'utf-8'
return res.text
html = get_html(url)
soup = BeautifulSoup(html,'html.parser')
stock_list = soup.find('table', attrs={'id': 'plate_performance'})
items = stock_list.find_all('td')
# print(items)
stock_id_list = []
for item in items:
url = item.find_all('a')
if url:
id = url[0].text
if id.isdigit():
stock_id_list.append(id)
print(stock_id_list)