Python - something faster than 2 nested for loops

def fancymatching(fname1, fname2):
#This function will do much smarter and fancy kinds of compares
    if (fname1 == fname2):
        return 1
    else:
        return 0

personlist = [
{ 
'pid':'1',
'fname':'john',
'mname':'a',
'lname':'smyth',
},{ 
'pid':'2',
'fname':'john',
'mnane':'a',
'lname':'smith',
},{ 
'pid':'3',
'fname':'bob',
'mname':'b',
'lname':'nope',
}
]

for person1 in personlist:
    for person2 in personlist:
        if person1['pid'] >= person2['pid']:
            #don't check yourself, or ones that have been
        continue
        if fancymatching(person1['fname'], person2['fname']):
            print (person1['pid'] + " matched " + person2['pid'])

I am trying to improve the idea of ​​the above code. It works, but if personlistit gets very large (like millions), I feel like there should be something faster than 2 for loops.

What the code does is take a list of dictionaries and run the fuzzzy fuzzy matching function by the values ​​of each dictionary for each dictionary. It is not as simple as comparing all dictionaries with others. I would like to run a function in every dictionary, maybe 2 for loops - is this the right way to do this? Any suggestions would be helpful!

+4
source share
2 answers

itertools.combinations, , , C ( , O(n**2) ), if person1['pid'] >= person2['pid']: continue ( combinations).

from itertools import combinations

for person1, person2 in combinations(personlist, 2):
    print(person1['fname'], person2['fname'])

:

('john', 'john')
('john', 'bob')
('john', 'bob')

, fancymatching , (O(n) runtime) . , 'fname' -.

>>> matches = {}
>>> for person in personlist:
...     matches.setdefault(person['fname'], []).append(person)
>>> matches
{'bob': [{'fname': 'bob', 'lname': 'nope', 'mname': 'b', 'pid': '3'}],
 'john': [{'fname': 'john', 'lname': 'smyth', 'mname': 'a', 'pid': '1'}, 
          {'fname': 'john', 'lname': 'smith', 'mnane': 'a', 'pid': '2'}]}

, fancymatching ​​. , , , .

+6

MSeifert, fname1 == fname2, : ie:

from itertools import combinations, groupby

keyfunc = lambda x: x['fname']
data = sorted(personlist, key= keyfunc)
for key, group in groupby(data, key):
    #every element in group will now match
    for person1, person2 in combinations(group, 2):
        print(person1['fname'], person2['fname'])

, , , , , . , , .

0

Source: https://habr.com/ru/post/1669984/


All Articles