Python replaces multiple lines by supporting backlinks

Question

Python replaces multiple lines by supporting backlinks

There are several useful ways to handle multiple string replacement at the same time in python. However, I am unable to create an efficient function that can do this, as well as support backlinks.

I would like to use a dictionary of expression / replacement terms, where replacement terms may contain backlinks to something matched by the expression.

eg. (pay attention to \ 1)

repdict = {'&&':'and', '||':'or', '!([a-zA-Z_])':'not \1'}

I put the SO answer, mentioned at the beginning, in the function below, which is great for expression / replacement pairs that don't contain backlinks:

def replaceAll(repdict, text):
    repdict = dict((re.escape(k), v) for k, v in repdict.items())
    pattern = re.compile("|".join(repdict.keys()))
    return pattern.sub(lambda m: repdict[re.escape(m.group(0))], text)

However, it does not work for a key that contains a backlink.

>>> replaceAll(repldict, "!newData.exists() || newData.val().length == 1")
'!newData.exists() or newData.val().length == 1'

If I do it manually, it works fine. eg:.

pattern = re.compile("!([a-zA-Z_])")
pattern.sub(r'not \1', '!newData.exists()')

Works as expected:

'not newData.exists()'

, , , backref, -.

. , backrefs , replacer:

def replaceAll(repPat, text):
    def replacer(obj):
        match = obj.group(0)
        # manually deal with exclamation mark match..
        if match[:1] == "!": return 'not ' + match[1:]
        # here we naively escape the matched pattern into
        # the format of our dictionary key
        else: return repPat[naive_escaper(match)]

    pattern = re.compile("|".join(repPat.keys()))
    return pattern.sub(replacer, text)

def naive_escaper(string):
    if '=' in string: return string.replace('=', '\=')
    elif '|' in string: return string.replace('|', '\|')
    else: return string

# manually escaping \ and = works fine
repPat = {'!([a-zA-Z_])':'', '&&':'and', '\|\|':'or', '\=\=\=':'=='}
replaceAll(repPat, "(!this && !that) || !this && foo === bar")

:

'(not this and not that) or not this'

, - , , , .

+4

python python-3.x regex

fzzylogic 16 . '17 13:08

3

Rawing, , . , .

, , . , replacer ( match.expand).

import re
from collections import OrderedDict
from functools import partial

pattern_to_replacement = {'&&': 'and', '!([a-zA-Z_]+)': r'not \1'}


def build_replacer(cases):
    ordered_cases = OrderedDict(cases.items())
    replacements = {}

    leading_groups = 0
    for pattern, replacement in ordered_cases.items():
        leading_groups += 1

        # leading_groups is now the absolute position of the root group (back-references should be relative to this)
        group_index = leading_groups
        replacement = absolute_backreference(replacement, group_index)
        replacements[group_index] = replacement

        # This pattern contains N subgroups (determine by compiling pattern)
        subgroups = re.compile(pattern).groups
        leading_groups += subgroups

    catch_all = "|".join("({})".format(p) for p in ordered_cases)
    pattern = re.compile(catch_all)

    def replacer(match):
        replacement_pattern = replacements[match.lastindex]
        return match.expand(replacement_pattern)

    return partial(pattern.sub, replacer)


def absolute_backreference(text, n):
    ref_pat = re.compile(r"\\([0-99])")

    def replacer(match):
        return "\\{}".format(int(match.group(1)) + n)

    return ref_pat.sub(replacer, text)


replacer = build_replacer(pattern_to_replacement)
print(replacer("!this.exists()"))

+2

Angus Hollands 16 . '17 20:12

Simple is better than complex, the code as shown below is more readable (the reason you don't work as expected is that ([a-zA-Z_]) should not be in re.escape):

repdict = {
    r'\s*' + re.escape('&&')) + r'\s*': ' and ',
    r'\s*' + re.escape('||') + r'\s*': ' or ',
    re.escape('!') + r'([a-zA-Z_])': r'not \1',
}
def replaceAll(repdict, text):
    for k, v in repdict.items():
        text = re.sub(k, v, text)
    return text

0

williezh Jul 16 '17 at 13:55

source share

Aran-Fey · Accepted Answer · 2017-07-16T16:56:39+0000

: . .

, dict .

. a repldict :

repldict = {r'(a)': r'\1a', r'(b)': r'\1b'}

, (a)|(b) - (b) 1, , .

, , . b, , \1b ? ; .

, dict :

(?P<group1>(a))|(?P<group2>(b))

, . \1b " 2".

:

def replaceAll(repldict, text):
    # split the dict into two lists because we need the order to be reliable
    keys, repls = zip(*repldict.items())

    # generate a regex pattern from the keys, putting each key in a named group
    # so that we can find out which one of them matched.
    # groups are named "_<idx>" where <idx> is the index of the corresponding
    # replacement text in the list above
    pattern = '|'.join('(?P<_{}>{})'.format(i, k) for i, k in enumerate(keys))

    def repl(match):
        # find out which key matched. We know that exactly one of the keys has
        # matched, so it the only named group with a value other than None.
        group_name = next(name for name, value in match.groupdict().items()
                          if value is not None)
        group_index = int(group_name[1:])

        # now that we know which group matched, we can retrieve the
        # corresponding replacement text
        repl_text = repls[group_index]

        # now we'll manually search for backreferences in the
        # replacement text and substitute them
        def repl_backreference(m):
            reference_index = int(m.group(1))

            # return the corresponding group value from the original match
            # +1 because regex starts counting at 1
            return match.group(group_index + reference_index + 1)  

        return re.sub(r'\\(\d+)', repl_backreference, repl_text)

    return re.sub(pattern, repl, text)

:

repldict = {'&&':'and', r'\|\|':'or', r'!([a-zA-Z_])':r'not \1'}
print( replaceAll(repldict, "!newData.exists() || newData.val().length == 1") )

repldict = {'!([a-zA-Z_])':r'not \1', '&&':'and', r'\|\|':'or', r'\=\=\=':'=='}
print( replaceAll(repldict, "(!this && !that) || !this && foo === bar") )

# output: not newData.exists() or newData.val().length == 1
#         (not this and not that) or not this and foo == bar

:

; .
, {r'(a)': r'\2'}. ( , .)

Python replaces multiple lines by supporting backlinks

More articles: