需要用正则和js匹配一段内容,该内容是产品配料表,包括名称,括号内备注,和含量百分比。需要提取出每一种配料和百分比。
注意:
需要得到如下结果:
[ Water 52.64%,
sand 13.36%,
glass12%,
concret (-4°-0° brix) 6%,
grand sand 4.6%,
fine added sand (powder, smoke) 0.1%,
Sodium sulfate (sodium, calcium, iron (ion), modified glue(FAX)) 4.5%,
alkaline sulfides 0.65% (sodium 45%, carbon 0.2%, heptahydrate sodium 0.1%, black stone 0.1%),
sulfate ion 0.35%,
Potassium sulfate 0.3%,
POTAssium chloride 0.3% ]
js的正则不支持balancing group,所以很匹配出嵌套括号的最外层括号,由于正则本身就是状态机实现,所以这里使用状态机的思路完成配料表的提取。
https://www.regular-expressions.info/refrecurse.html
时间复杂度:O(n)
空间复杂度:O(1),不考虑返回值
function getChargeMixture(str) {
const ans = []
let cur = ''
// lBrackets:左括号数量
// hasPercentSymbol:是否存在%,只有左括号数量为0时,该值才为true
// lRectBrackets:左方括号数量
let lBrackets = 0, hasPercentSymbol = false, lRectBrackets = 0
for (const ch of str) {
// 不拼接[xxx]
if (ch === '[') {
lRectBrackets++
continue
} else if (ch === ']') {
lRectBrackets--
continue
}
if (lRectBrackets > 0) continue
if (ch === ',') {
// 当字符为,并且左括号为0,存在%时,说明获得一种配料,将其加入结果中
if (lBrackets === 0 && hasPercentSymbol) {
ans.push(cur.trim())
cur = ''
lBrackets = 0
hasPercentSymbol = false
continue
}
} else if (ch === '(') {
lBrackets++
} else if (ch === ')') {
lBrackets--
} else if (ch === '%') {
// 只要hasPercentSymbol为true了,之后就不会再发生变化
if (!hasPercentSymbol) hasPercentSymbol = lBrackets === 0
}
cur += ch
}
cur && ans.push(cur.trim().replace(/\.$/, ''))
return ans
}
const str = `Water 52.64%, sand 13,36% [1], glass12%, concret (-4°-0° brix) 6% [2], grand sand 4.6%, fine added sand (powder, smoke) 0.1%, Sodium sulfate (sodium, calcium, iron (ion), modified glue(FAX)) 4.5%, alkaline sulfides 0.65% (sodium 45%, carbon 0.2%, heptahydrate sodium 0.1%, black stone 0.1%), sulfate ion 0.35%, Potassium sulfate 0.3%, POTAssium chloride 0.3%.`
console.log(getChargeMixture(str))
Water 52.64%, sand 13.36% [1], glass12%, concret (-4°-0° brix) 6% [2], grand sand 4.6%, fine added sand (powder, smoke) 0.1%, Sodium sulfate (sodium, calcium, iron (ion), modified glue(FAX)) 4.5%, alkaline sulfides 0.65% (sodium 45%, carbon 0.2%, heptahydrate sodium 0.1%, black stone 0.1%), sulfate ion 0.35%, Potassium sulfate 0.3%, POTAssium chloride 0.3%
.split(/(?<=%|]|))(?=,),/g).map(e => e.replace(/^(.+)[\d+]$/, '$1').trim()).reduce((p, c) => {
const aryChar = [...c]
const leftB = aryChar.filter(e => e === '(').length
const rightB = aryChar.filter(e => e === ')').length
const lastItem = p.at(-1)
if (Array.isArray(lastItem)) {
const leftBCount = lastItem.map(e => [...e].filter(e => e === '(').length).reduce((p1, c1) => p1 + c1, 0)
const rightBCount = lastItem.map(e => [...e].filter(e => e === ')').length).reduce((p1, c1) => p1 + c1, 0)
if (leftBCount === rightBCount) {
if (leftB === rightB) {
p.push(c)
} else {
p.push([c])
}
} else {
lastItem.push(c)
}
} else {
if (leftB === rightB) {
p.push(c)
} else {
p.push([c])
}
}
return p
}, []).map(e => Array.isArray(e) ? e.join(',') : e)
alkaline sulfides 0.65% (sodium 45%, carbon 0.2%, heptahydrate sodium 0.1%, black stone 0.1%), 这部分太可能作为一个整体匹配的,你这个里面本身就有百分比,肯定会被匹配到的
不太准确,仅供参考吧
var str="Water 52.64%, sand 13.36% [1], glass12%, concret (-4°-0° brix) 6% [2], grand sand 4.6%, fine added sand (powder, smoke) 0.1%, Sodium sulfate (sodium, calcium, iron (ion), modified glue(FAX)) 4.5%, alkaline sulfides 0.65% (sodium 45%, carbon 0.2%, heptahydrate sodium 0.1%, black stone 0.1%), sulfate ion 0.35%, Potassium sulfate 0.3%, POTAssium chloride 0.3%."
var reg=/([\w+\s\.\(\)°-]*?%)/g;
console.log(str.match(reg));
注意这个:百分比可能是英文逗号做小数点:33,4% 或者33.4% 都有可能