Unique list of keys based on keys

I have a list of dics:

     data = {}
     data['key'] = pointer_key
     data['timestamp'] = timestamp
     data['action'] = action
     data['type'] = type
     data['id'] = id

     list = [data1, data2, data3, ... ]

How can I guarantee that for each data item in the list there is only one such item for each "key"? If there are two keys, as shown below, the most recent timestamp will win:

    list = [{'key':1,'timestamp':1234567890,'action':'like','type':'photo',id:245},
            {'key':2,'timestamp':2345678901,'action':'like','type':'photo',id:252},
            {'key':1,'timestamp':3456789012,'action':'like','type':'photo',id:212}]

    unique(list)

    list = [{'key':2,'timestamp':2345678901,'action':'like','type':'photo',id:252},
            {'key':1,'timestamp':3456789012,'action':'like','type':'photo',id:212}]

Thank.

+3
source share
10 answers

Here is my solution:

def uniq(list_dicts):
    return [dict(p) for p in set(tuple(i.items()) 
        for i in list_dicts)]

hope this helps someone.

+4
source

I need it, but I did not like any of the answers here. So I made this simple and effective version.

def list_of_seq_unique_by_key(seq, key):
    seen = set()
    seen_add = seen.add
    return [x for x in seq if x[key] not in seen and not seen_add(x[key])]

# Usage
# If you want most recent timestamp to win, just sort by timestamp first
list = sorted(list, key=lambda k: k['timestamp'], reverse=True)
# Remove everything with a duplicate value for key 'key'
list = list_of_seq_unique_by_key(list, 'key')
+3
source

, , ['key']? , data1 ['key'] = 'hello', , data2 ['key'] = 'hello' ? , ? , . ( "list" , python)

datalist = [datadict1, datadict2, datadict3]
big_key_list = []
for datadict in datalist:
    curkey = datadict.get('key')
    if curkey not in big_key_list:
        big_key_list.append(curkey)
    else:
        raise Exception("Key %s in two data dicts" % curkey)

- , dict, , . , , , , ( , , - , ).

EDIT: , , , , , . , , . .

:

class MyDataObject(object):
    def __init__(self, **kwargs):
        for k,v in kwargs:
            self.__dict__[k] = v

4 :

class MyDataObject(object):
    def __init__(self, timestamp, action, obj_type, obj_id):
        self.timestamp = timestamp
        self.action = action
        self.type = obj_type
        self.id = obj_id

.

data = {}
data['key1'] = MyDataObject(timestamp='some timestamp', action='some action', type='some type', id = 1234)
data['key2'] = MyDataObject(timestamp='some timestamp2', action='some action2', type='some type2', id = 1235)

, :

data['key1'].timestamp # returns 'some timestamp'
data['key2'].action # returns 'some action2'

dict() (, , x = 'action ", ).

data['key1'].__dict__('action') # returns 'some action'
data['key2'].__dict__('timestamp') # returns 'some timestamp2'

, , , , ( MyDataObject).

+1

, , 'key' .

, , , , : , .

, :

def unique_keys( items):
    seen = set()
    for item in items:
        key = item['key']
        if key not in seen:
             seen.add(key)
             yield item
        else:
             # its a duplicate key, do what?
             pass # drops it

print list(unique_keys(data_list))

, , , ...?

+1

, .

data = {}
data[pointer_key] = [timestamp, action, type, id]
if new_pointer_key in data:
    if this_timestamp > data[new_pointer_key][0]:   ## first element of list=timestamp
        data[new_pointer_key] = [new_timestamp,  new_action, new_type, new_id] 
+1

. dict .

0
>>> d = {'a': 1, 'b': 2, 'a': 3}
>>> d
{'a': 3, 'b': 2}

, dict .

: ( )

, , dict, :

>>> class custom_dict(dict):
      def __setitem__(self, key, value):
        self.setdefault(key, []).append(value)

>>> m = custom_dict()
>>> m['key'] = 1
>>> m['key'] = 2
>>> m
{'key': [1, 2]}

.

0

, , - .
:

from operator import itemgetter

def unique(list_of_dicts):
    _sorted = sorted(list_of_dicts, key=itemgetter('timestamp'), reverse=True)
    known_keys = set()
    result = []
    for d in _sorted:
        key = d['key']
        if key in known_keys: continue
        known_keys.add(key)
        result.append(d)
    return result

(: ):

[{'action': 'like', 'timestamp': 3456789012, 'type': 'photo', 'id': 212, 'key': 1},
{'action': 'like', 'timestamp': 2345678901, 'type': 'photo', 'id': 252, 'key': 2}]

, ( , ), , , jimbob:

class MyDataObject(object):
    def __init__(self, timestamp, action, obj_type, obj_id):
        self.timestamp = timestamp
        self.action = action
        self.type = obj_type
        self.id = obj_id

data = {}
for action in unique(_list):
    key = action['key']
    data[key] = MyDataObject(action['timestamp'], action['action'],
        action['type'], action['id'])
0

groupby itertools :

def unique(items, key, order=None):
    sort_func = (lambda v: (key(v), order(v))) if order else key
    groups = itertools.groupby(sorted(items, key=sort_func), key)
    return [group.next() for unused_key, group in groups]

def unique(items, key, order=None):
    groups = itertools.groupby(sorted(items, key=key), key)
    return [max(group, key=order) for unused_key, group in groups]

, . , , . . " ", , , . , :

data = [{'key':1, 'timestamp':1234567890, 'action':'like', 'type':'photo', 'id':245},
        {'key':2, 'timestamp':2345678901, 'action':'like', 'type':'photo', 'id':252},
        {'key':1, 'timestamp':3456789012, 'action':'like', 'type':'photo', 'id':212}]

# unique(data)
key = lambda d: d['key']  # Group by key
order = lambda d: -d['timestamp']  # Sort by descending order timestamp
data = unique(data, key, order_func=order)

data == [{'key':1, 'timestamp':3456789012, 'action':'like', 'type':'photo', 'id':212},
         {'key':2, 'timestamp':2345678901, 'action':'like', 'type':'photo', 'id':252}]

We force the key to be the first in the sort function to ensure the correct grouping, regardless of order.

This decision changes the order of your products, although it has the advantage of harmless storage and time complexity.

0
source
>>> def unique(l):
...     return {k['key']:k for k in l}.values()
...
>>> print(unique([ {'key':1,'timestamp':1234567890,'action':'like','type':'photo',id:245},
...                {'key':2,'timestamp':2345678901,'action':'like','type':'photo',id:252},
...                {'key':1,'timestamp':3456789012,'action':'like','type':'photo',id:212} ]))
dict_values([{<built-in function id>: 212, 'type': 'photo', 'key': 1, 'timestamp': 3456789012, 'action': 'like'}, {<built-in function id>: 252, 'type': 'photo', 'key': 2, 'timestamp': 2345678901, 'action': 'like'}])
0
source

Source: https://habr.com/ru/post/1778827/


All Articles