answer
option number 6 is my best attempt.
change
For option 6, instead of overwriting the job, you can increase it. This little tweak should give you an account.
df2.values[row_indexers, col_indexers] += 1
option 1
df1 = pd.DataFrame([[1,2], [3,4], [5,6], [1,6]], columns=['A', 'B']) df2 = pd.DataFrame(0, index = list(df1['B'].unique()), columns = list(df1['A'].unique())) df1.groupby(list('AB')).size().gt(0).mul(1) \ .reindex(df2.unstack().index, fill_value=0) \ .unstack(0)
option 2
df1 = pd.DataFrame([[1,2], [3,4], [5,6], [1,6]], columns=['A', 'B']) df2 = pd.DataFrame(0, index = list(df1['B'].unique()), columns = list(df1['A'].unique())) mux = pd.MultiIndex.from_arrays(df1.values.T).drop_duplicates() df2.update(pd.Series(1, mux).unstack(0)) df2
option 3
df1 = pd.DataFrame([[1,2], [3,4], [5,6], [1,6]], columns=['A', 'B']) df2 = pd.DataFrame(0, index = list(df1['B'].unique()), columns = list(df1['A'].unique())) mux = pd.MultiIndex.from_arrays(df1.values.T).drop_duplicates() df2.where(pd.Series(False, mux).unstack(0, fill_value=True), 1)
option 4
df1 = pd.DataFrame([[1,2], [3,4], [5,6], [1,6]], columns=['A', 'B']) df2 = pd.DataFrame(0, index = list(df1['B'].unique()), columns = list(df1['A'].unique())) mux = pd.MultiIndex.from_arrays(df1.values.T).drop_duplicates() df2[pd.Series(True, mux).unstack(0, fill_value=False)] = 1 df2
option 5
df1 = pd.DataFrame([[1,2], [3,4], [5,6], [1,6]], columns=['A', 'B']) df2 = pd.DataFrame(0, index = list(df1['B'].unique()), columns = list(df1['A'].unique())) for i, (a, b) in df1.iterrows(): df2.set_value(b, a, 1) df2
option 6
inspired by @ayhan and @Divakar
df1 = pd.DataFrame([[1,2], [3,4], [5,6], [1,6]], columns=['A', 'B']) df2 = pd.DataFrame(0, index = list(df1['B'].unique()), columns = list(df1['A'].unique())) row_indexers = df2.index.values.searchsorted(df1.B.values) col_indexers = df2.columns.values.searchsorted(df1.A.values) df2.values[row_indexers, col_indexers] = 1 df2
time
given sample
the code:
df1 = pd.DataFrame([[1,2], [3,4], [5,6], [1,6]], columns=['A', 'B']) df2 = pd.DataFrame(0, index = list(df1['B'].unique()), columns = list(df1['A'].unique())) def pir1(): return df1.groupby(list('AB')).size().gt(0).mul(1) \ .reindex(df2.unstack().index, fill_value=0) \ .unstack(0) def pir2(): mux = pd.MultiIndex.from_arrays(df1.values.T).drop_duplicates() df2.update(pd.Series(1, mux).unstack(0)) def pir3(): mux = pd.MultiIndex.from_arrays(df1.values.T).drop_duplicates() return df2.where(pd.Series(False, mux).unstack(0, fill_value=True), 1) def pir4(): mux = pd.MultiIndex.from_arrays(df1.values.T).drop_duplicates() df2[pd.Series(True, mux).unstack(0, fill_value=False)] = 1 def pir5(): for i, (a, b) in df1.iterrows(): df2.set_value(b, a, 1) def pir6(): row_indexers = df2.index.values.searchsorted(df1.B.values) col_indexers = df2.columns.values.searchsorted(df1.A.values) df2.values[row_indexers, col_indexers] = 1 return df2 def ayhan1(): row_indexers = [df2.index.get_loc(r_label) for r_label in df1.B] col_indexers = [df2.columns.get_loc(c_label) for c_label in df1.A] df2.values[row_indexers, col_indexers] = 1 def jez1(): return pd.get_dummies(df1.set_index('B')['A']).groupby(level=0).max()
much larger sample
the code:
from itertools import combinations from string import ascii_letters letter_pairs = [t[0] + t[1] for t in combinations(ascii_letters, 2)] df1 = pd.DataFrame(dict(A=np.random.randint(0, 100, 10000), B=np.random.choice(letter_pairs, 10000))) df2 = pd.DataFrame(0, index = list(df1['B'].unique()), columns = list(df1['A'].unique()))