The simultaneous use of regular expressions 'or' and capture groups

I am trying to match the expressions "ephname" below (depending on the file that is present), but I want the numbers to be captured:

entries = ['other data\nephdelay = 12\nephname = cfghjk78.comb\nother data', 'other data\nephdelay = 17\nephname = qwerty.s92\nother data']

I use this as my regular expression, but there are no matches (however, it only works for one if I select one and delete the boolean):

\s?ephname\s?=\s?.*?\.s(\d+)\s?|\s?ephname\s?=\s?.*?(\d+)\.comb\s?

I tested this on regex websites and I don't understand what the problem is. I want the output to be either "94" or "78" depending on the record. Why am I not getting any matches?

Edit: In my code, I have this:

import re
commonterms = (["term1", "#term1pattern"],
               ["ephsol", "\s?ephname\s?=\s?.*?\.s(\d+)\s?|\s?ephname\s?=\s?.*?(\d+)\.comb\s?"],
               ["term3", "#term3pattern"], ...)

terms = [commonterms[i][0] for i in range(len(commonterms))]
patterns = [commonterms[i][1] for i in range(len(commonterms))]

d = {t: [] for t in terms}

def getTerms(entry):
    for i in range(len(terms)):
        term = re.search(patterns[i], entry)
        term = term.groups()[0] if term else 'NULL'
    return d

for entry in entries:
    d = getTerms(entry)

print d['ephsol']

Then when I print d['ephsol'], I just get a bunch of NULL, but I know there should be matches.

+4
3

.

getTerms . 0, . , d, .

:

# anything above this line was not changed

d = {}

# d is: term - list of matches
for i in range(len(terms)):
  d[terms[i]] = []

# for each entry
for entry in entries:

  # for each term
  for i in range(len(terms)):

    # get match
    m = re.search(patterns[i], entry)

    # matched
    if m:

      # check for group 1
      if m.group(1):
        # add match to the term list
        d[terms[i]].append(m.group(1))

      # check for group 2
      elif m.group(2):
        # add match to the term list
        d[terms[i]].append(m.group(2))

    # did not match
    else:
      # add null to the term list
      d[terms[i]].append('NULL')

print d
print
print d['ephsol']

{'ephsol': ['78', '92'], 'term3': ['NULL', 'NULL'], 'term1': ['NULL', 'NULL']}

['78', '92']

https://repl.it/Ko2L/0

0

, , , :

entries = ['other data\nephdelay = 12\nephname = cfghjk78.comb\nother data', 'other data\nephdelay = 17\nephname = qwerty.s92\nother data']

for e in entries:
    m=re.search(r'\s?ephname\s?=\s?.*?\.s(\d+)\s?|\s?ephname\s?=\s?.*?(\d+)\.comb\s?', e)
    if m:
        print "Group 1: {}, Group 2: {} {}".format(m.group(1), m.group(2), m.groups())

Group 1: None, Group 2: 78 (None, '78')
Group 1: 92, Group 2: None ('92', None)

, :

for e in entries:
    m=re.search(r'\s?ephname\s?=\s?.*?\.s(\d+)\s?|\s?ephname\s?=\s?.*?(\d+)\.comb\s?', e)
    if m:
        print m.group(1) if m.group(1) is not None else m.group(2)

78
92

, 1:

for e in entries:
    m=re.search(r'^ephname[ \t]*=[ \t]*[^0-9\n]*(\d+)(?:\.comb|\s)', e, flags=re.M)
    if m:
        print m.group(1)

78
92
+3

, ?? . '?' . , re.DOTALL, "." ('\n').

:

import re

entries = ['other data\nephdelay = 12\nephname = cfghjk78.comb\nother data', 'other data\nephdelay = 17\nephname = qwerty.s92\nother data']

pattern = '.*ephname\s*=\s*[a-zA-Z\.]*(\d+)\s*|.*ephname\s*=\s*[a-zA-Z\.]*(\d+)\.comb\s*'

pObj = re.compile(pattern, re.DOTALL)

match = pObj.match(entries[0])

match2 = pObj.match(entries[1])

print(match.group(1))
print("**********divider")
print(match2.group(1))


print("\n\nReprinting the input date\n\n")
print(entries[0])
print(entries[1])
+2

Source: https://habr.com/ru/post/1685195/


All Articles