groupby, ( , ) , "Identifier", :
, columns = ["Identifier", "Category1", "Category2", "Category3", "Category4", "Category5"]
:
groups = []
pure_groups = []
for name, group in df.groupby("Identifier"):
pure_groups += [group]
g_idfless = group[group.columns.difference(["Identifier"])]
groups += [g_idfless.sort_values(columns[1:]).reset_index().drop("index", axis=1)]
:
for i in range(len(groups)):
for j in range(i + 1, len(groups)):
id1 = str(pure_groups[i]["Identifier"].iloc[0])
id2 = str(pure_groups[j]["Identifier"].iloc[0])
print(id1 + " and " + id2 + " equal?: " + str(groups[i].equals(groups[j])))
#-->1000 and 1001 equal?: False
#-->1000 and 1002 equal?: True
#-->1001 and 1002 equal?: False
EDIT: ,