爬取到薪资数据单位有:
xx千-xx千
xx千-xx万
xx万-xx万
参考代码如下,望采纳
import re
def convert_salary_unit(salary):
# 匹配 xx千-xx千、xx千-xx万、xx万-xx万 的薪资数据
pattern = r'^(\d+\.?\d*)[kK千]-(\d+\.?\d*)[kK千万]$'
result = re.sub(pattern, r'\1万-\2万', salary)
# 匹配 xx千-xx万·13薪、xx万-xx万·14薪、xx K-xx K·15薪 的薪资数据
pattern = r'^(\d+\.?\d*)[kK千万]-(\d+\.?\d*)[kK千万]·\d+薪$'
result = re.sub(pattern, r'\1万-\2万', result)
# 匹配 xx-xx/天 的薪资数据
pattern = r'^(\d+\.?\d*)-(\d+\.?\d*)/天$'
result = re.sub(pattern, r'\1万-\2万', result)
# 匹配 xx千-xx万元/月 的薪资数据
pattern = r'^(\d+\.?\d*)千-(\d+\.?\d*)万元/月$'
result = re.sub(pattern, r'\1万-\2万', result)
return result
for salary in salary_data:
converted_salary = convert_salary_unit(salary)
print(converted_salary)