You can abuse the Python tokenizer to parse a list of key values:
#!/usr/bin/env python from tokenize import generate_tokens, NAME, NUMBER, OP, STRING, ENDMARKER def parse_key_value_list(text): key = value = None for type, string, _,_,_ in generate_tokens(lambda it=iter([text]): next(it)): if type == NAME and key is None: key = string elif type in {NAME, NUMBER, STRING}: value = { NAME: lambda x: x, NUMBER: int, STRING: lambda x: x[1:-1] }[type](string) elif ((type == OP and string == ',') or (type == ENDMARKER and key is not None)): yield key, value key = value = None text = '''age=12,name=bob,hobbies="games,reading",phrase="I'm cool!"''' print(dict(parse_key_value_list(text)))
Exit
{'phrase': "I'm cool!", 'age': 12, 'name': 'bob', 'hobbies': 'games,reading'}
You can use a state machine (FSM) to implement a stronger analyzer. The parser uses only the current state and the following token for syntax input:
#!/usr/bin/env python from tokenize import generate_tokens, NAME, NUMBER, OP, STRING, ENDMARKER def parse_key_value_list(text): def check(condition): if not condition: raise ValueError((state, token)) KEY, EQ, VALUE, SEP = range(4) state = KEY for token in generate_tokens(lambda it=iter([text]): next(it)): type, string = token[:2] if state == KEY: check(type == NAME) key = string state = EQ elif state == EQ: check(type == OP and string == '=') state = VALUE elif state == VALUE: check(type in {NAME, NUMBER, STRING}) value = { NAME: lambda x: x, NUMBER: int, STRING: lambda x: x[1:-1] }[type](string) state = SEP elif state == SEP: check(type == OP and string == ',' or type == ENDMARKER) yield key, value state = KEY text = '''age=12,name=bob,hobbies="games,reading",phrase="I'm cool!"''' print(dict(parse_key_value_list(text)))