How to split a string into "," or "[|]" if "," is in '{}'

I am looking for a regex to split the following line:

aaa[bbb,ccc[ddd,{eee:1,mmm:999}],nnn[0,3]]
aaa[bbb,ccc[ddd,{eee:1, mmm:[123,555]}],nnn[0,3]]
aaa[bbb, ccc[ddd, ddd],nnn[0,3]]
aaa[bbb,ddd[0,3]]

by '[' or ']' or ',' if ',' is in '{}'. For example: split 'aaa [bbb, ccc [ddd,' to aaa, bbb, ccc, ddd is allowed, but not {eee: 1, mmm: 999}.

result:

aaa, bbb, ccc, ddd, {eee:1,mmm:999}, nnn, 0, 3
aaa, bbb, ccc, ddd, {eee:1, mmm:[123,555]}], nnn, 0, 3
aaa, bbb, ccc, ddd, ddd, nnn, 0, 3
aaa, bbb, ddd, 0, 3

I read a few other questions, but I can't change the regex to do this what I want.

The target language for the expression is javascript.

+3
source share
5 answers

Regex Perl / PCRE should work in JS (as long as {} is not nested):

$_ = 'aaa[bbb,ccc[ddd,{eee:1,mmm:999}],nnn[0,3]]
aaa[bbb,ccc[ddd,{eee:1, mmm:[123,555]}],nnn[0,3]]
aaa[bbb, ccc[ddd, ddd],nnn[0,3]]
aaa[bbb,ddd[0,3]]';

@r = /[^][,{}]+|\{[^}]*}/g;
print join ", ", @r;

Conclusion:

aaa, bbb, ccc, ddd, {eee:1,mmm:999}, nnn, 0, 3,
aaa, bbb, ccc, ddd, {eee:1, mmm:[123,555]}, nnn, 0, 3,
aaa, bbb,  ccc, ddd,  ddd, nnn, 0, 3,
aaa, bbb, ddd, 0, 3

Rough JavaScript Translation:

var input =
    "aaa[bbb,ccc[ddd,{eee:1,mmm:999}],nnn[0,3]]\n" +
    "aaa[bbb,ccc[ddd,{eee:1, mmm:[123,555]}],nnn[0,3]]\n" +
    "aaa[bbb, ccc[ddd, ddd],nnn[0,3]]\n" +
    "aaa[bbb,ddd[0,3]]";

var re = /[^][,{}]+|\{[^}]*}/g;

var result = [];
while (!!(match = re.exec(input)))
{
    result.push(match[0]);
}

// Using <<value>> rather than just a comma, for clarity around
// whether and how "{...}" was processed or not.
write("<<" + result.join(">><<") + ">>");

, . , . , OP . ( , << >> , , , {...}):

<<aaa>><<bbb>><<ccc>><<ddd>><<{eee:1,mmm:999}>><<nnn>><<0>><<3>><<
aaa>><<bbb>><<ccc>><<ddd>><<{eee:1, mmm:[123,555]}>><<nnn>><<0>><<3>><<
aaa>><<bbb>><< ccc>><<ddd>><< ddd>><<nnn>><<0>><<3>><<
aaa>><<bbb>><<ddd>><<0>><<3>>
+1

; .

+2

- , . {, . }, . ,, , /, , , . , , .

, { }, , .

+1

{}, -

function customRx(s){
 s= s.replace(/[\[\],\s]+$/g,'');
 var Rx=/,?(\{[^}]+\}),?/g, Rs=/[\[\],\s]+/, Rc=/^,|,$/g;
 var A= [], i= 0, M, z= 0;
 while((M= Rx.exec(s))!= null){
  i= M.index;
  if(i> z){
   A.push(s.substring(z, i).split(Rs));
  }
  z= Rx.lastIndex;
  A.push(s.substring(i, z).replace(Rc,''));
 }
 if(s.length> z){
  A.push(s.substring(z).split(Rs));
 }
 return A;
}

//test

var s1= 'aaa[bbb,ccc[ddd,{eee:1,mmm:999}],nnn[0,3]]'+
'aaa[bbb,ccc[ddd,{eee:1, mmm:[123,555]}],nnn[0,3]]'+
'aaa[bbb, ccc[ddd, ddd],nnn[0,3]]'+
'aaa[bbb,ddd[0,3]]';

alert (customRx (s1).join(','));

( ) >

aaa, bbb, ccc, ddd, {eee: 1, mmm: 999},

nnn, 0,3, aaa, bbb, ccc, ddd, {eee: 1, mmm: [123,555]},

NNN, 0,3, , , , , , NNN,

0,3, , , , 0,3

0

, , , :

/ *[\[\],]+ *(?=[^{}]*(?:\{[^{}]*\}[^{}]*)*$)/

The first part - *[\[\],]+ *- corresponds to one or several [, ]or to ,any surrounding spaces. The rest is a look that claims that if there are any curly braces in front of matching characters, they come in balanced pairs. If the text is well-formed, this ensures that a match does not occur within a pair of curly braces.

0
source

Source: https://habr.com/ru/post/1737564/


All Articles