This is what I use so far. This is part of a function that matches data from one encoding system to another encoding system.
if report == True: report_df = pd.DataFrame(data[match_on].describe(), columns=['left']) report_df = report_df.merge(pd.DataFrame(concord[match_on].describe(), columns=['right']), left_index=True, right_index=True) set_left = set(data[match_on]) set_right = set(concord[match_on]) set_info = pd.DataFrame({'left':set_left.issubset(set_right), 'right':set_right.issubset(set_left)}, index=['subset']) report_df = report_df.append(set_info) set_info = pd.DataFrame({'left':len(set_left.difference(set_right)), 'right':len(set_right.difference(set_left))}, index=['differences']) report_df = report_df.append(set_info) #Return Random Sample of [5 Differences] left_diff = list(set_left.difference(set_right))[0:5] if len(left_diff) < 5: left_diff = (left_diff + [np.nan]*5)[0:5] right_diff = list(set_right.difference(set_left))[0:5] if len(right_diff) < 5: right_diff = (right_diff + [np.nan]*5)[0:5] set_info = pd.DataFrame({'left': left_diff, 'right': right_diff}, index=['diff1', 'diff2', 'diff3', 'diff4', 'diff5']) report_df = report_df.append(set_info)
Report Example

source share