
job_name= []
company_name =[]
workarea =[]
providesalary =[]
attribute =[]
def get_job_list(keyword=None,page=1):
key = urllib.parse.quote(urllib.parse.quote(keyword))
url='https://search.51job.com/list/000000,000000,0000,00,9,99,' +key+',2,'+str(page)+'.html'
headers = {
'Host': 'search.51job.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4506.400',
}
response = requests.get(url,headers=headers)
if response.status_code == 200:
html = response.text
with open('/home/aistudio/work/index.html', mode='w',encoding='gbk') as f:
f.write(html)
pattern ='window\._SEARCH_RESULT_ =({.+?}) </script>'
data =re.findall(patten,html)[0]
job_list = json.loads(data)
for job in job_list["engine_jds"]:
job_name.append(job["job_name"])
company_name.append(job["company_name"])
workarea.append(job["workarea_text"])
salary.append(job["providesalary_text"]
attribute.append(",".join(job["attribute_text"]))
import requests
import json
import urllib
import re
job_name= []
company_name =[]
workarea =[]
providesalary =[]
attribute =[]
def get_job_list(keyword=None,page=1):
key = urllib.parse.quote(urllib.parse.quote(keyword))
url='https://search.51job.com/list/000000,000000,0000,00,9,99,' +key+',2,'+str(page)+'.html'
headers = {
'Host': 'search.51job.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4506.400',
}
response = requests.get(url,headers=headers)
if response.status_code == 200:
html = response.text
with open('/index.html', mode='w',encoding='gbk') as f:
f.write(html)
pattern ='window\.__SEARCH_RESULT__ = ({.*?})</script>'
data =re.findall(pattern,html, re.DOTALL)
job_list = json.loads(data[0])
for job in job_list["engine_jds"]:
job_name.append(job["job_name"])
company_name.append(job["company_name"])
workarea.append(job["workarea_text"])
# salary.append(job["providesalary_text"])
attribute.append(",".join(job["attribute_text"]))
get_job_list("计算机")
print(job_name)