Regular expression for parsing configuration file with comments

Edit: I'm really curious how I can get this regex to work. Please do not tell me that there are easier ways to do this. This is obvious !: P

I am writing a regular expression (using Python) to parse strings in a configuration file. The lines may look like this:

someoption1 = some value # some comment
# this line is only a comment
someoption2 = some value with an escaped \# hash
someoption3 = some value with a \# hash # some comment

The idea is that anything after the hash character is considered a comment, except that the hash is escaped with a slash.

I am trying to use a regular expression to break each line into separate parts: leading spaces, the left part of the task, the right part of the task and comment. For the first line in the example, the breakdown will be as follows:

  • Space: ""
  • Task on the left: "someoption1 ="
  • : " "
  • "# some comment"

, :

^(\s)?(\S+\s?=)?(([^\#]*(\\\#)*)*)?(\#.*)?$

, !

Python re.findAll(), :

  • 0- : ,
  • 1- :
  • 2- : , , ( )
  • 5- : , , - ( )

, - , . - , ...

+3
6

, , : , ​​

#:

  • [^\#]* ( # btw) ,
  • (\\\#)* , #
  • (\#.*)

, : (a*)(ab)?(b*) (ab)? -

, ( ) : ^\s*(\S+\s*=([^\\#]|\\#?)*)?(#.*)?$

+2

:

^\s*([a-zA-Z_][a-zA-Z_0-9]*)\s*=\s*((?:[^\\#]|\\.)+)

(\\.). #, \\#.

+2

5 Gumbo. , :

import re

def fn(line):
    match = re.search(
        r"""^          # Anchor to start of line
        (\s*)          # $1: Zero or more leading ws chars
        (?:            # Begin group for optional var=value.
          (\S+)        # $2: Variable name. One or more non-spaces.
          (\s*=\s*)    # $3: Assignment operator, optional ws
          (            # $4: Everything up to comment or EOL.
            [^#\\]*    # Unrolling the loop 1st normal*.
            (?:        # Begin (special normal*)* construct.
              \\.      # special is backslash-anything.
              [^#\\]*  # More normal*.
            )*         # End (special normal*)* construct.
          )            # End $4: Value.
        )?             # End group for optional var=value.
        ((?:\#.*)?)    # $5: Optional comment.
        $              # Anchor to end of line""", 
        line, re.MULTILINE | re.VERBOSE)
    return match.groups()

print (fn(r" # just a comment"))
print (fn(r" option1 = value"))
print (fn(r" option2 = value # no escape == IS a comment"))
print (fn(r" option3 = value \# 1 escape == NOT a comment"))
print (fn(r" option4 = value \\# 2 escapes == IS a comment"))
print (fn(r" option5 = value \\\# 3 escapes == NOT a comment"))
print (fn(r" option6 = value \\\\# 4 escapes == IS a comment"))

script () : ( Python 3.0.1)

(' ', None, None, None, '# just a comment')
(' ', 'option1', ' = ', 'value', '')
(' ', 'option2', ' = ', 'value ', '# no escape == IS a comment')
(' ', 'option3', ' = ', 'value \\# 1 escape == NOT a comment', '')
(' ', 'option4', ' = ', 'value \\\\', '# 2 escapes == IS a comment')
(' ', 'option5', ' = ', 'value \\\\\\# 3 escapes == NOT a comment', '')
(' ', 'option6', ' = ', 'value \\\\\\\\', '# 4 escapes == IS a comment')

, Jeffrey Friedl " ( ). . (3- ) - , , " " . ( " ", " -! ":)

+2

, , , , .

, , . , 2 .

  • - (.*?(?<!\\))#(.*): # \ (. lookbehind);
  • .
+1

, .

, , :

  • #, .
  • # \, .
  • \# #.

, . .

:

import re

def fn(line):
    # Split line into non-comment and comment.

    comment = ""
    if line[0] == "#":
        comment = line
        line = ""
    else:
        idx = re.search (r"[^\\]#", line)
        if idx != None:
            comment = line[idx.start()+1:]
            line = line[:idx.start()+1]

    # Split non-comment into key and value.

    idx = re.search (r"=", line)
    if idx == None:
        key = line
        val = ""
    else:
        key = line[:idx.start()]
        val = line[idx.start()+1:]
    val = val.replace ("\\#", "#")

    return (key.strip(),val.strip(),comment.strip())

print fn(r"someoption1 = some value # some comment")
print fn(r"# this line is only a comment")
print fn(r"someoption2 = some value with an escaped \# hash")
print fn(r"someoption3 = some value with a \# hash # some comment")

:

('someoption1', 'some value', '# some comment')
('', '', '# this line is only a comment')
('someoption2', 'some value with an escaped # hash', '')
('someoption3', 'some value with a # hash', '# some comment')

( ), :

[^\#]

( , r"[^\\#]") , \ #, \# . , , , , , : -)


- ( ), :

def fn(line):
    line = line.strip()                            # remove spaces
    first = re.split (r"\s*(?<!\\)#\s*", line, 1)  # get non-comment/comment
    if len(first) == 1: first.append ("")          # ensure we have a comment
    first[0] = first[0].replace("\\#","#")         # unescape non-comment

    second = re.split (r"\s*=\s*", first[0], 1)    # get key and value
    if len(second) == 1: second.append ("")        # ensure we have a value
    second.append (first[1])                       # create 3-tuple
    return second                                  # and return it

, . , :

['someoption1', 'some value', 'some comment']
['', '', 'this line is only a comment']
['someoption2', 'some value with an escaped # hash', '']
['someoption3', 'some value with a # hash', 'some comment']
0
source

Try breaking it into 2 steps:

  • Process Escape to recognize true comments (first # does not precede \ (hint: "negative lookbehind")), delete the true comments, and then replace r"\#"with"#"

  • Process comment without comment.

BIG HINT: use re.VERBOSE with comments

0
source

Source: https://habr.com/ru/post/1766308/


All Articles