https://en.m.wikipedia.org/wiki/Jaccard_index
and now some cleared code sample.
def jac(s1,s2): """the jaccard index between 2 sets""" s_union = s1.union(s2) s_inter = s1.intersection(s2) len_union = len(s_union) if not len_union: return 0 return len(s_inter)*1.0/len_union from itertools import permutations ratings={'Shane': {'127 Hours': 5.0, 'Avatar': 4.0, 'Nonstop': 5.0}, 'Joe': {'127 Hours': 5.0, 'Taken 3': 4.0, 'Avatar': 5.0, 'Nonstop': 3.0}, 'Bob': {'Panic Room':5.0,'Nonstop':5.0}} def common_movie(dict0, dict1): """have we rated the same movies?""" set0 = set(dict0.items()) set1 = set(dict1.items()) return jac(set0, set1) def movies_and_ratings(dict0, dict1): """how do our movies and ratings line up?""" set_keys0 = set(dict0.keys()) set_keys1 = set(dict1.keys()) key_commonality = jac(set_keys0, set_keys1) set0 = set(dict0.items()) set1 = set(dict1.items()) item_commonality = jac(set0, set1)
Output:
Shane vs Bob [('127 Hours', 5.0), ('Avatar', 4.0), ('Nonstop', 5.0)] [('Nonstop', 5.0), ('Panic Room', 5.0)] common_movie :0.25 movies_and_ratings:0.25 common_movie_ratings :1.0 Shane vs Joe [('127 Hours', 5.0), ('Avatar', 4.0), ('Nonstop', 5.0)] [('127 Hours', 5.0), ('Avatar', 5.0), ('Nonstop', 3.0), ('Taken 3', 4.0)] common_movie :0.166666666667 movies_and_ratings:0.341666666667 common_movie_ratings :0.333333333333 Bob vs Shane [('Nonstop', 5.0), ('Panic Room', 5.0)] [('127 Hours', 5.0), ('Avatar', 4.0), ('Nonstop', 5.0)] common_movie :0.25 movies_and_ratings:0.25 common_movie_ratings :1.0 Bob vs Joe [('Nonstop', 5.0), ('Panic Room', 5.0)] [('127 Hours', 5.0), ('Avatar', 5.0), ('Nonstop', 3.0), ('Taken 3', 4.0)] common_movie :0.0 movies_and_ratings:0.06 common_movie_ratings :0.0 Joe vs Shane [('127 Hours', 5.0), ('Avatar', 5.0), ('Nonstop', 3.0), ('Taken 3', 4.0)] [('127 Hours', 5.0), ('Avatar', 4.0), ('Nonstop', 5.0)] common_movie :0.166666666667 movies_and_ratings:0.341666666667 common_movie_ratings :0.333333333333 Joe vs Bob [('127 Hours', 5.0), ('Avatar', 5.0), ('Nonstop', 3.0), ('Taken 3', 4.0)] [('Nonstop', 5.0), ('Panic Room', 5.0)] common_movie :0.0 movies_and_ratings:0.06 common_movie_ratings :0.0