Create dynamic level of nested dict from list of objects?

I am trying to turn a list of objects into a nested dict that can be accessed using indexes.

The following code works for a two-level nested dictionary. I would like to expand it for flexible work for any number of levels.

from collections import namedtuple import pprint Holding = namedtuple('holding', ['portfolio', 'ticker', 'shares']) lst = [ Holding('Large Cap', 'TSLA', 100), Holding('Large Cap', 'MSFT', 200), Holding('Small Cap', 'UTSI', 500) ] def indexer(lst, indexes): """Creates a dynamic nested dictionary based on indexes.""" result = {} for item in lst: index0 = getattr(item, indexes[0]) index1 = getattr(item, indexes[1]) result.setdefault(index0, {}).setdefault(index1, []) result[index0][index1].append(item) return result d = indexer(lst, ['portfolio', 'ticker']) pp = pprint.PrettyPrinter() pp.pprint(d) 

Outputs:

 {'Large Cap': {'MSFT': [holding(portfolio='Large Cap', ticker='MSFT', shares=200)], 'TSLA': [holding(portfolio='Large Cap', ticker='TSLA', shares=100)]}, 'Small Cap': {'UTSI': [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}} 
+5
source share
5 answers

You can try sth on the following lines. Just list the attributes indicated by the indices and continue to discard the nested dict created in this way:

 def indexer(lst, indexes): result = {} for item in lst: attrs = [getattr(item, i) for i in indexes] crnt = result # always the dict at the current nesting level for attr in attrs[:-1]: # follow one level deeper crnt = crnt.setdefault(attr, {}) crnt.setdefault(attrs[-1], []).append(item) return result 

This leads to the following outputs:

 >>> d = indexer(lst, ['portfolio', 'ticker']) {'Large Cap': {'ticker': [holding(portfolio='Large Cap', ticker='TSLA', shares=100), holding(portfolio='Large Cap', ticker='MSFT', shares=200)]}, 'Small Cap': {'ticker': [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}} >>> d = indexer(lst, ['portfolio', 'ticker', 'shares']) {'Large Cap': {'MSFT': {200: [holding(portfolio='Large Cap', ticker='MSFT', shares=200)]}, 'TSLA': {100: [holding(portfolio='Large Cap', ticker='TSLA', shares=100)]}}, 'Small Cap': {'UTSI': {500: [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}} 
+1
source

One of the best ways I've ever seen an implementation of nested dictionaries is to Aaron Hall to answer the question. What is the best way to implement nested dictionaries? . This is an example implementation of a type that performs Autovivification in a Perl programming language.

In any case, using one here would be useful, because that means you only need to call setdefault() on the "leaves" of your data tree structure (which are list s, not sub-dictionaries).

So here is the answer to your question that uses it:

 from collections import namedtuple from functools import reduce from operator import attrgetter from pprint import pprint Holding = namedtuple('Holding', ['portfolio', 'ticker', 'shares']) lst = [Holding('Large Cap', 'TSLA', 100), Holding('Large Cap', 'MSFT', 200), Holding('Small Cap', 'UTSI', 500),] def indexer(lst, indexes): """ Creates a dynamic nested dictionary based on indexes. """ class Vividict(dict): """ dict subclass which dynamically creates sub-dictionaries when they're first referenced (and don't exist). See /questions/32317/what-is-the-best-way-to-implement-nested-dictionaries/236492#236492 """ def __missing__(self, key): value = self[key] = type(self)() return value result = Vividict() index_getters = attrgetter(*indexes) for item in lst: *indices, leaf = index_getters(item) # Leaves are lists, not dicts. target = reduce(lambda x, y: x[y], indices, result) target.setdefault(leaf, []).append(item) return result d = indexer(lst, ['portfolio', 'ticker']) pprint(d) print() d = indexer(lst, ['portfolio', 'ticker', 'shares']) pprint(d) 

Output:

 {'Large Cap': {'MSFT': [Holding(portfolio='Large Cap', ticker='MSFT', shares=200)], 'TSLA': [Holding(portfolio='Large Cap', ticker='TSLA', shares=100)]}, 'Small Cap': {'UTSI': [Holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}} {'Large Cap': {'MSFT': {200: [Holding(portfolio='Large Cap', ticker='MSFT', shares=200)]}, 'TSLA': {100: [Holding(portfolio='Large Cap', ticker='TSLA', shares=100)]}}, 'Small Cap': {'UTSI': {500: [Holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}} 
+1
source

Your code was actually a good attempt, the little addition I made was to maintain the current map represented by the last index, and let the next index create a map there. Thus, for each index (and, therefore, each iteration in the loop), you actually go deeper into the level. At the last level, a list is created instead of a dictionary, and after a cycle, the element is simply added to the current level.

 def indexer(lst, indexes): result = {} for item in lst: current_level = result for i, index in enumerate(indexes): key = getattr(item, index) current_level.setdefault(key, [] if i == len(indexes)-1 else {}) # if we are in the last iteration, create a list instead of a dict current_level = current_level[key] current_level.append(item) return result 
0
source

This question may be more appropriate for CodeReview .

Since your code works, here are some tips:

  • namedtuple returns the class. holding should be written holding .
  • lst is too general. This is a list of holding instances; it could be called holdings .
  • index0 not a list index, but a dict key.
  • You can use the nested defaultdict instead of calling setdefault again.

Here is an example:

 from collections import namedtuple, defaultdict import pprint Holding = namedtuple('holding', ['portfolio', 'ticker', 'shares']) holdings = [ Holding('Large Cap', 'TSLA', 100), Holding('Large Cap', 'MSFT', 200), Holding('Small Cap', 'UTSI', 500) ] def default_tree(depth, leaf): if depth == 1: return defaultdict(leaf) else: return defaultdict(lambda: default_tree(depth - 1, leaf)) def indexer(lst, attributes): """Creates a dynamic nested dictionary based on indexes.""" root = default_tree(len(attributes), list) for item in lst: node = root for attribute in attributes: key = getattr(item, attribute) node = node[key] node.append(item) return root d = indexer(holdings, ['portfolio', 'ticker', 'shares']) pp = pprint.PrettyPrinter() pp.pprint(d) 
0
source

I started using QueryList instead of a nested dict, and it made my life easier.

eg:

ql.filter (ticker = "MSFT") will return a list of all MSFT entries.

 class QueryList(list): """Stores a list indexable by attributes. """ def group_by(self, attrs) -> dict: """Like a database group_by function. args: attrs: a str or a list of the group_by attrs. Returns: {(attr_val0, attr_val1,...): QueryList(), ..., } -- or -- {attr_val: QueryList(), attr_val: QueryList(), ... } """ result = defaultdict(QueryList) if isinstance(attrs, str): for item in self: result[getattr(item, attrs)].append(item) else: for item in self: result[tuple(getattr(item, x) for x in attrs)].append(item) return result def filter(self, **kwargs): """Returns the subset of QueryList that has matching attributes. args: kwargs: Attribute name/value pairs. For example: foo.filter(portfolio='123', account='ABC') will return all matching items. """ if len(kwargs) == 1: [(attr, val)] = kwargs.items() result = QueryList([x for x in self if getattr(x, attr) == val]) else: attr_val_pairs = [(k, v) for k, v in kwargs.items()] d = self.group_by(tuple(x[0] for x in attr_val_pairs)) result = d.get(tuple(x[1] for x in attr_val_pairs), QueryList()) return result def scalar(self, default=None, attr=None): """Returns the first item in this QueryList. args: default: The value to return if there is less than one item, or if the attr is not found. attr: Returns getattr(item, attr) if not None. """ item, = self[0:1] or [default] if attr is None: result = item else: result = getattr(item, attr, default) return result 
0
source

Source: https://habr.com/ru/post/1274336/


All Articles