Divide the string into a dictionary with several layers of key value pairs

I have a file containing lines in this type of format.

Example 1: nextline = "DD:MM:YYYY INFO - 'WeeklyMedal: Hole = 1; Par = 4; Index = 2; Distance = 459; Score = { Player1 = 4 };" Example 2: nextline = "DD:MM:YYYY INFO - 'WeeklyMedal: Hole = 1; Par = 4; Index = 2; Distance = 459; Score = { Player1 = 4; Player2 = 6; Player3 = 4 };" 

First, I split the line into ':', which gives me a list of 2 entries. I would like to split this line into a dictionary with a key and a value, but where the evaluation key has several subframes with a value.

 Hole 1 Par 4 Index 2 Distance 459 Score Player1 4 Player2 6 Player3 4 

So, I'm using something like this ...

 split_line_by_semicolon = nextline.split(":") dictionary_of_line = dict((k.strip(), v.strip()) for k,v in (item.split('=') for item in split_line_by_semicolon.split(';'))) for keys,values in dictionary_of_line.items(): print("{0} {1}".format(keys,values)) 

However, I get an error in the score element of the line:

 ValueError: too many values to unpack (expected 2) 

I can set the split to '=' for this, so it stops after the first '='

 dictionary_of_line = dict((k.strip(), v.strip()) for k,v in (item.split('=',1) for item in split_line_by_semicolon.split(';'))) for keys,values in dictionary_of_line.items(): print("{0} {1}".format(keys,values)) 

However, I am losing sub-values ​​in braces. Does anyone know how I can achieve this multi-layered dictionary?

+5
source share
3 answers

The simplest way to do this (but I don't know if this is acceptable in your situation):

 import re nextline = "DD:MM:YYYY INFO - 'WeeklyMedal: Hole = 1; Par = 4; Index = 2; Distance = 459; Score = { Player1 = 4; Player2 = 6; Player3 = 4 };" # compiles the regular expression to get the info you want my_regex = re.compile(r'\w+ \= \w+') # builds the structure of the dict you expect to get final_dict = {'Hole':0, 'Par':0, 'Index':0, 'Distance':0, 'Score':{}} # uses the compiled regular expression to filter out the info you want from the string filtered_items = my_regex.findall(nextline) for item in filtered_items: # for each filtered item (string in the form key = value) # splits out the 'key' and handles it to fill your final dictionary key = item.split(' = ')[0] if key.startswith('Player'): final_dict['Score'][key] = int(item.split(' = ')[1]) else: final_dict[key] = int(item.split(' = ')[1]) 
+2
source

I would use regular expressions in the same way as maccinza (I like his answer), with one slight difference - data with an internal dictionary in it can be processed recursively:

 #example strings: nextline1 = "DD:MM:YYYY INFO - 'WeeklyMedal: Hole = 1; Par = 4; Index = 2; Distance = 459; Score = { Player1 = 4 };" nextline2 = "DD:MM:YYYY INFO - 'WeeklyMedal: Hole = 1; Par = 4; Index = 2; Distance = 459; Score = { Player1 = 4; Player2 = 6; Player3 = 4 };" import re lineRegexp = re.compile(r'.+\'WeeklyMedal:(.+)\'?') #this regexp returns WeeklyMedal record. weeklyMedalRegexp = re.compile(r'(\w+) = (\{.+\}|\w+)') #this regexp parses WeeklyMedal #helper recursive function to process WeeklyMedal record. returns dictionary parseWeeklyMedal = lambda r, info: { k: (int(v) if v.isdigit() else parseWeeklyMedal(r, v)) for (k, v) in r.findall(info)} parsedLines = [] for line in [nextline1, nextline2]: info = lineRegexp.search(line) if info: #process WeeklyMedal record parsedLines.append(parseWeeklyMedal(weeklyMedalRegexp, info.group(0))) #or do something with parsed dictionary in place # do something here with entire result, print for example print(parsedLines) 
+1
source
 lines = "DD:MM:YYYY INFO - 'WeeklyMedal: Hole = 1; Par = 4; Index = 2; Distance = 459; Score = { Player1 = 4 };", "DD:MM:YYYY INFO - 'WeeklyMedal: Hole = 1; Par = 4; Index = 2; Distance = 459; Score = { Player1 = 4; Player2 = 6; Player3 = 4 };" def lines_to_dict(nextline): import json # cut up to Hole nextline = nextline[nextline.index("Hole"):] # convert to dict format string_ = re.sub(r'\s+=',':',nextline) string_ = re.sub(r';',',',string_) # json likes double quotes string_ = re.sub(r'(\b\w+)',r'"\1"',string_) string_ = re.sub(r',$',r'',string_) # make dict for Hole mo = re.search(r'(\"Hole.+?),\W+Score.*',string_) if mo: d_hole = json.loads("{" + mo.groups()[0] + "}") # make dict for Score mo = re.search(r'(\"Score.*)',string_) if mo: d_score = json.loads("{" + mo.groups()[0] + "}") # combine dicts d_hole.update(d_score) return d_hole for d in lines: pprint.pprint(lines_to_dict(d)) {'Distance': '459', 'Hole': '1', 'Index': '2', 'Par': '4', 'Score': {'Player1': '4'}} {'Distance': '459', 'Hole': '1', 'Index': '2', 'Par': '4', 'Score': {'Player1': '4', 'Player2': '6', 'Player3': '4'}} 
0
source

Source: https://habr.com/ru/post/1233163/


All Articles