①提取str文本中 两个小于号判断的变量 :age、height
②删除str文本中两个小于号判断的逻辑,返回”df.loc[(df['salary']>=4000)]"
import re
str="df.loc[(df['age']<23)&(df['salary']>=4000)&(df['height']<160)]"
res1 = re.findall(r"\['([^>]*?)'\]<",str)
res2 = re.sub(r"&?\([^>]*\)&?","",str)
print(res1)
print(res2)
输出:
['age', 'height']
df.loc[(df['salary']>=4000)]
讲解:
import re
str1 = "df.loc[(df['age']<23)&(df['salary']>=4000)&(df['height']<160)]"
# re.findall(要找的内容,原字符串)
# 如果找到,返回列表。如果没找到返回[]
lis1 = re.findall("'([a-zA-Z]*?)']<", str1)
print(lis1) # ['age', 'height']
str2 = re.sub('&*\([^>]*?\)&*','', str1)
print(str2) # df.loc[(df['salary']>=4000)]
import re
str = "df.loc[(df['age']<23)&(df['salary']>=4000)&(df['height']<160)]"
# 匹配所有的类似"df['age']<23","df['salary']>=4000" 解析变化的部分
findall = re.findall(r"\(df\['([^\]]*)'\]([><=]*)(\d*)\)", str)
print(findall)
variables = []
remain = []
for i in findall:
if i[1] == '<':
variables.append(i[0])
else:
remain.append(f"(df['{i[0]}']{i[1]}{i[2]})")
print(variables)
result = f"df.loc[{'&'.join(remain)}]"
print(result)
import re
content = "df.loc[(df['salary']>=4000)&(df['age']<23)&(df['age']<23)&(df['age']<23)&(df['salary']>=4000)&(df['age']<23)&(df['salary']>=4000)&(df['salary']>=4000)&(df['height']<160)&(df['salary']>=4000)]"
# 正则匹配 ['xxx']<包含的内容,即中括号单引号包裹,后面是小于号的内容。并返回匹配结果
result1 = re.findall(r"\['(\w+)'\]<", content)
print(result1)
# 匹配满足除大于号>之外,括号()包裹的内容。括号前的&?,表示匹配一次或0次‘&’
result2 = re.sub(r"&?\([^>]*\)","",content) # [^>]的意思,比如[a]是匹配a,那么[^a]就是匹配非a
#去除掉多余的&符号
print(result2.replace('[&(','[('))
写代码试了一下,不知道能不能满足你的要求。
# -*- coding: utf-8 -*-
import re
str1 = "df.loc[(df['age']<23)&(df['salary']>=4000)&(df['height']<160)]"
# 先提取每个()括号内的的条件内容,然后再提取对应变量
res_kuohao = re.findall("\((.+?)\)",str1)
# 提取两个小于号判断的变量 :age、height
res1 = [re.findall("\\[\'(.+?)\'\\]<", each)[0] for each in res_kuohao if re.findall("\\[\'(.+?)\'\\]<", each)]
print(res1)
##['age', 'height']
# 筛选满足条件的condition,不包含小括号的
res2_cond = [each for each in res_kuohao if '<' not in each]
res2 = r"df.loc[{}]".format(res2_cond[0])
print(res2)
#df.loc[df['salary']>=4000]
参考一下呢 https://www.jb51.net/article/238575.htm
这么多正确答案
java使用正则表达式截取单引号之间的的内容
https://blog.csdn.net/weixin_45453133/article/details/113418049