Without considering performance issues, this will find duplicate lines and give you a dictionary (similar to the question here ).
import pandas as pd, numpy as np # Define data d = np.array([["D", "T", "D", "T", "U"], ["F", "F", "F", "J", "K"], [False, False, False, False, True]]) df = pd.DataFrame(dT) # Find and remove duplicate rows df_nodupe = df[~df.duplicated()] # Make a list df_nodupe.T.to_dict('list') {0: ['D', 'F', 'False'], 1: ['T', 'F', 'False'], 3: ['T', 'J', 'False'], 4: ['U', 'K', 'True']}
Otherwise, you can use map , for example:
import pandas as pd, numpy as np # Define data d = np.array([["D", "T", "D", "T", "U"], ["F", "F", "F", "J", "K"], [False, False, False, False, True]]) df = pd.DataFrame(dT) df.columns = ['x', 'y', 'z'] # Define your dictionary of interest dd = {('D', 'F', 'False'): 0, ('T', 'F', 'False'): 1, ('T', 'J', 'False'): 2, ('U', 'K', 'True'): 3} # Create a tuple of the rows of interest df['tupe'] = zip(df.x, df.y, df.z) # Create a new column based on the row values df['new_category'] = df.tupe.map(dd)