Python with Itertools?

I have .csvcontains 3 columns. PersonX, PersonY and PersonZ. There are 7,000 name strings and various combinations. My goal is to see which pair and group of 3 are the highest. I could not find a formula in excel that would achieve this goal. I am sure python will be able to combine itertools, but I have not moved forward yet. Names can be in any order, just seeing how many times these 2 or 3 people are on the same line. Any suggestions would be a big help, thanks!

A small example of data .

PersonX         PersonY             PersonZ
Aaron Ekblad    Keith Yandle        Vincent Trocheck
Aaron Ekblad    Denis Malgin        Mike Matheson
Aaron Ekblad    Denis Malgin        Mike Matheson
Aaron Ekblad    Jonathan Huberdeau  Keith Yandle
Aaron Ekblad    Jonathan Huberdeau  Keith Yandle
Aaron Ekblad    Jamie McGinn        Keith Yandle
Aaron Ekblad    Aleksander Barkov   Jonathan Huberdeau
Aaron Ekblad        
Adam Erne       Andrej Sustr        Vladislav Namestnikov
Adam Erne       Anthony Cirelli 
Adam Erne       
Adam Henrique   Rickard Rakell      Ryan Getzlaf
Adam Henrique   Brandon Montour     Ryan Getzlaf
Adam Henrique   Corey Perry         Brandon Montour
Adam Henrique   Corey Perry         Brandon Montour
Adam Henrique   Brian Gibbons       Andy Greene
Adam Henrique   Ryan Getzlaf    
Adam Henrique   Ondrej Kase 
Adam Henrique   Josh Manson 
Adam Henrique   Brian Gibbons   
Adam Henrique       
Adam Henrique   

Script launch

import csv
from itertools import combinations, product

#Header = PersonX PersonY PersonZ

#Import Game
with open('1718_All_Goals_&_Assists.csv', newline='') as f:
    next(f)
    skaters = '\n'.join(' '.join(row) for row in csv.reader(f))
    print(skaters)
+4
source share
1 answer

You can simply use collections.Counteron csv.reader:

from collections import Counter

>>> cnt = Counter(frozenset(item.strip() for item in line if item.strip()) for line in csv.reader(f))

() .most_common Counter:

>>> cnt.most_common()
[(frozenset({'Aaron Ekblad', 'Denis Malgin', 'Mike Matheson'}), 2),
 (frozenset({'Aaron Ekblad', 'Jonathan Huberdeau', 'Keith Yandle'}), 2),
 (frozenset({'Adam Henrique', 'Brandon Montour', 'Corey Perry'}), 2),
 (frozenset({'Adam Henrique'}), 2),
 (frozenset({'Aaron Ekblad', 'Keith Yandle', 'Vincent Trocheck'}), 1),
 (frozenset({'Aaron Ekblad', 'Jamie McGinn', 'Keith Yandle'}), 1),
 (frozenset({'Aaron Ekblad', 'Aleksander Barkov', 'Jonathan Huberdeau'}), 1),
 (frozenset({'Aaron Ekblad'}), 1),
 (frozenset({'Adam Erne', 'Andrej Sustr', 'Vladislav Namestnikov'}), 1),
 (frozenset({'Adam Erne', 'Anthony Cirelli'}), 1),
 (frozenset({'Adam Erne'}), 1),
 (frozenset({'Adam Henrique', 'Rickard Rakell', 'Ryan Getzlaf'}), 1),
 (frozenset({'Adam Henrique', 'Brandon Montour', 'Ryan Getzlaf'}), 1),
 (frozenset({'Adam Henrique', 'Andy Greene', 'Brian Gibbons'}), 1),
 (frozenset({'Adam Henrique', 'Ryan Getzlaf'}), 1),
 (frozenset({'Adam Henrique', 'Ondrej Kase'}), 1),
 (frozenset({'Adam Henrique', 'Josh Manson'}), 1),
 (frozenset({'Adam Henrique', 'Brian Gibbons'}), 1)]

( ), :

>>> cnt.most_common(1)
[(frozenset({'Aaron Ekblad', 'Denis Malgin', 'Mike Matheson'}), 2)]

, , , , max:

>>> maximum_occurences = max(cnt.values())
>>> [group for group, occurences in cnt.items() if occurences == maximum_occurences]
[frozenset({'Aaron Ekblad', 'Denis Malgin', 'Mike Matheson'}),
 frozenset({'Aaron Ekblad', 'Jonathan Huberdeau', 'Keith Yandle'}),
 frozenset({'Adam Henrique', 'Brandon Montour', 'Corey Perry'}),
 frozenset({'Adam Henrique'})]

:

Counter , , - . , csv.reader (, ), .

tuples , , , , , , . , frozenset. : , , , .

, csv, , , "" / ,

item.strip() for item in line if item.strip()

frozenset.

, strips , map:

frozenset(item for item in (item.strip() for item in line) if item)
frozenset(item for item in map(lambda x: x.strip(), line) if item)
frozenset(item for item in map(str.strip, line) if item)  # if all items are really of type str

, filter bool:

frozenset(filter(bool, map(str.strip, line)))

, " ". itertools.

: , Counter, most_common:

cnt = Counter({k: v for k, v in cnt.items() if len(k) > 1})

Counter, .


, "", , .

Counter(tuple(Counter(filter(bool, map(str.strip, line))).most_common()) for line in csv.reader(f))

, .

+5

Source: https://habr.com/ru/post/1696129/


All Articles