import requests
from bs4 import BeautifulSoup
response = requests.get('https://job.chizhouren.com/search?region=100&subarea_id=33')
response.encoding = 'utf-8'
Soup = BeautifulSoup(response.text, 'html.parser')
wages = Soup.find('tbody').find_all('td', {'class': 'wages'})
n=0
for td in wages:
tdlst = td.text
tdlst=[(tdlst.strip().strip('元/月').strip('薪资面议').split('-'))[-1]]
tdlst=(','.join(tdlst))
print(tdlst)
2700
12000
8000
10000
5000
6000
8000
3000
12000
2700
6000
10000
10000
进程已结束,退出代码0
2700
12000
8000
10000
5000
6000
8000
3000
12000
2700
6000
10000
10000
我想要达到这种结果
哪个并不是换行符哦,是获取到空的数据(薪资面议),是你获取工资的语句没有写好哦
下面是我的理解,供参考:
可以通过逐行判断然后进行相应操作来去除,('薪资面议'字符串被剔除了,所以留下的是一个空的字符串,print打印空的字符串就是一个换行),可以这样进行处理: 把爬虫返回的数据,在for循环逐个访问,从列表提取之后,如果是'薪资面议'字符串,则不添加到tdlst,如果不是则添加到tdlst。(此修改代码仅限用于学习研究之用。)
修改如下:
import requests
from bs4 import BeautifulSoup
response = requests.get('https://job.chizhouren.com/search?region=100&subarea_id=33')
response.encoding = 'utf-8'
Soup = BeautifulSoup(response.text, 'html.parser')
wages = Soup.find('tbody').find_all('td', {'class': 'wages'})
n=0
for td in wages:
tdlst = td.text
#去除'元/月'字符串,以'-'字符分隔形成列表,-1取列表后一个元素,即取薪资最大值,
#保留'薪资面议’字符串,以做判断
temp = [(tdlst.strip().strip('元/月').split('-'))[-1]]
#print("temp=",temp)
if temp[0] == '薪资面议': #如果读取的每行数据经提取后的字符串是字符串'薪资面议',则遍历下一项数据,此项不添加到tdlst
continue
#https://www.runoob.com/python/att-string-join.html
#https://www.jb51.net/article/63592.htm
#https://www.runoob.com/python/att-string-strip.html
#https://blog.csdn.net/qq_42113763/article/details/87909269
else:#如果不是'薪资面议'字符串,则添加到tdlst
tdlst= temp
tdlst=(','.join(tdlst))
print(tdlst)