Python - summarize a value in a dictionary list based on the same key

I have a list of dictionaries that looks like this:

data = [{'stat3': '5', 'stat2': '4', 'player': '1'}, {'stat3': '8', 'stat2': '1', 'player': '1'}, {'stat3': '6', 'stat2': '1', 'player': '3'}, {'stat3': '3', 'stat2': '7', 'player': '3'}] 

And I want to get a nested dictionary whose keys are the value from the key ('player') and whose values ​​are dictionaries of aggregated statistics.

The output should be:

 {'3': {'stat3': 9, 'stat2': 8, 'player': '3'}, '1': {'stat3': 13, 'stat2': 5, 'player': '1'}} 

Below is my code:

 from collections import defaultdict result = {} total_stat = defaultdict(int) for dict in data: total_stat[dict['player']] += int(dict['stat3']) total_stat[dict['player']] += int(dict['stat2']) total_stat = ([{'player': info, 'stat3': total_stat[info], 'stat2': total_stat[info]} for info in sorted(total_stat, reverse=True)]) for item in total_stat: result.update({item['player']: item}) print(result) 

However, I got the following:

 {'3': {'player': '3', 'stat3': 17, 'stat2': 17}, '1': {'player': '1', 'stat3': 18, 'stat2': 18}} 

How could I fix this? Or are there other approaches?

+5
source share
7 answers

Your data is more likely a DataFrame, the natural solution to pandas is:

 In [34]: pd.DataFrame.from_records(data).astype(int).groupby('player').sum().T.to_dict() Out[34]: {1: {'stat2': 5, 'stat3': 13}, 3: {'stat2': 8, 'stat3': 9}} 
+11
source

Just use the more nested factory default settings:

 >>> total_stat = defaultdict(lambda : defaultdict(int)) >>> value_fields = 'stat2', 'stat3' >>> for datum in data: ... player_data = total_stat[datum['player']] ... for k in value_fields: ... player_data[k] += int(datum[k]) ... >>> from pprint import pprint >>> pprint(total_stat) defaultdict(<function <lambda> at 0x1023490d0>, {'1': defaultdict(<class 'int'>, {'stat2': 5, 'stat3': 13}), '3': defaultdict(<class 'int'>, {'stat2': 8, 'stat3': 9})}) 
+5
source

This solution uses a nested dictionary. out is a dictionary {player: Counter} , where as Counter itself is another dictionary {stat: score}

 import collections def split_player_stat(dict_object): """ Split a row of data into player, stat >>> split_player_stat({'stat3': '5', 'stat2': '4', 'player': '1'}) '1', {'stat3': 5, 'stat2': 4} """ key = dict_object['player'] value = {k: int(v) for k, v in dict_object.items() if k != 'player'} return key, value data = [{'stat3': '5', 'stat2': '4', 'player': '1'}, {'stat3': '8', 'stat2': '1', 'player': '1'}, {'stat3': '6', 'stat2': '1', 'player': '3'}, {'stat3': '3', 'stat2': '7', 'player': '3'}] out = collections.defaultdict(collections.Counter) for player_stat in data: player, stat = split_player_stat(player_stat) out[player].update(stat) print(out) 

The magic of this solution is performed by the collections.defaultdict and collections.Counter classes, both behave like dictionaries.

+3
source

Most of the solutions here make the task difficult. Let me make it simple and straightforward. Here you are:

 In [26]: result = {} In [27]: req_key = 'player' In [29]: for dct in data: ...: player_val = dct.pop(req_key) ...: result.setdefault(player_val, {req_key: player_val}) ...: for k, v in dct.items(): ...: result[player_val][k] = result[player_val].get(k, 0) + int(v) In [30]: result Out[30]: {'1': {'player': '1', 'stat2': 5, 'stat3': 13}, '3': {'player': '3', 'stat2': 8, 'stat3': 9}} 

Here you go simple and clean. There is no need for import for this simple problem. Now let's move on to the program:

 result.setdefault(player_val, {'player': player_val}) 

It sets the default value as "player": 3 or "player": 1 if there is no such key as a result.

 result[player_val][k] = result[player_val].get(k, 0) + int(v) 

This adds value to keys with common values.

+1
source

Not the best code, no more pythonic, but I think you should go through it and find out where your code went wrong.

 def sum_stats_by_player(data): result = {} for dictionary in data: print(f"evaluating dictionary {dictionary}") player = dictionary["player"] stat3 = int(dictionary["stat3"]) stat2 = int(dictionary["stat2"]) # if the player isn't in our result if player not in result: print(f"\tfirst time player {player}") result[player] = {} # add the player as an empty dictionary result[player]["player"] = player if "stat3" not in result[player]: print(f"\tfirst time stat3 {stat3}") result[player]["stat3"] = stat3 else: print(f"\tupdating stat3 { result[player]['stat3'] + stat3}") result[player]["stat3"] += stat3 if "stat2" not in result[player]: print(f"\tfirst time stat2 {stat2}") result[player]["stat2"] = stat2 else: print(f"\tupdating stat2 { result[player]['stat2'] + stat2}") result[player]["stat2"] += stat2 return result data = [{'stat3': '5', 'stat2': '4', 'player': '1'}, {'stat3': '8', 'stat2': '1', 'player': '1'}, {'stat3': '6', 'stat2': '1', 'player': '3'}, {'stat3': '3', 'stat2': '7', 'player': '3'}] print(sum_stats_by_player(data)) 
+1
source

Another version using a counter

 import itertools from collections import Counter def count_group(group): c = Counter() for g in group: g_i = dict([(k, int(v)) for k, v in g.items() if k != 'player']) c.update(g_i) return dict(c) sorted_data = sorted(data, key=lambda x:x['player']) results = [(k, count_group(g)) for k, g in itertools.groupby(sorted_data, lambda x: x['player'])] print(results) 

To give

 [('1', {'stat3': 13, 'stat2': 5}), ('3', {'stat3': 9, 'stat2': 8})] 
0
source

Two loops allow you to:

  • group data using a primary key
  • aggregate all secondary information

These two tasks are performed in the aggregate_statistics function, shown below.

 from collections import Counter from pprint import pprint def main(): data = [{'player': 1, 'stat2': 4, 'stat3': 5}, {'player': 1, 'stat2': 1, 'stat3': 8}, {'player': 3, 'stat2': 1, 'stat3': 6}, {'player': 3, 'stat2': 7, 'stat3': 3}] new_data = aggregate_statistics(data, 'player') pprint(new_data) def aggregate_statistics(table, key): records_by_key = {} for record in table: data = record.copy() records_by_key.setdefault(data.pop(key), []).append(Counter(data)) new_data = [] for second_key, value in records_by_key.items(): start, *remaining = value for record in remaining: start.update(record) new_data.append(dict(start, **{key: second_key})) return new_data if __name__ == '__main__': main() 
0
source

Source: https://habr.com/ru/post/1273981/


All Articles