Comprehension
df.assign(match=[dic.get(md, '') == mk for mk, md in df.values])
Make Model match
0 Toyota Corolla True
1 Honda Civic True
2 Toyota Accord False
3 Ford F-150 True
dict.items and in
items = dic.items()
df.assign(match=[t[::-1] in items for t in map(tuple, df.values)])
Make Model match
0 Toyota Corolla True
1 Honda Civic True
2 Toyota Accord False
3 Ford F-150 True
isin
df.assign(match=pd.Series(list(map(tuple, df.values[:, ::-1]))).isin(dic.items()))
Make Model match
0 Toyota Corolla True
1 Honda Civic True
2 Toyota Accord False
3 Ford F-150 True
Numpy Structured Arrays
dtype = [('Make', '<U6'), ('Model', '<U7')]
a = np.array([tuple(r) for r in df.values], dtype)
b = np.array(list(dic.items()), dtype[::-1])
df.assign(match=np.in1d(a, b))
Make Model match
0 Toyota Corolla True
1 Honda Civic True
2 Toyota Accord False
3 Ford F-150 True
Time comparison
Conlcusions
The @wen method is much better!
Functions
def wen(df, dic):
return df.assign(match=df.Model.map(dic).eq(df.Make))
def maxu(df, dic):
return df.assign(match=df[['Make', 'Model']].sum(axis=1).isin(set([v+k for k, v in dic.items()])))
def pir1(df, dic):
return df.assign(match=[dic.get(md, '') == mk for mk, md in df.values])
def pir2(df, dic):
items = dic.items()
return df.assign(match=[t[::-1] in items for t in map(tuple, df.values)])
def pir3(df, dic):
return df.assign(match=pd.Series(list(map(tuple, df.values[:, ::-1]))).isin(dic.items()))
def pir4(df, dic):
dtype = [('Make', '<U6'), ('Model', '<U7')]
a = np.array([tuple(r) for r in df.values], dtype)
b = np.array(list(dic.items()), dtype[::-1])
return df.assign(match=np.in1d(a, b))
Reverse test
res = pd.DataFrame(
np.nan, [10, 30, 100, 300, 1000, 3000, 10000, 30000],
'wen maxu pir1 pir2 pir3 pir4'.split()
)
for i in res.index:
m = dict(dic.items())
d = pd.concat([df] * i, ignore_index=True)
for j in res.columns:
stmt = f'{j}(d, m)'
setp = f'from __main__ import {j}, m, d'
res.at[i, j] = timeit(stmt, setp, number=200)
results
res.plot(loglog=True)

res.div(res.min(1), 0)
wen maxu pir1 pir2 pir3 pir4
10 2.041111 2.799885 1.000000 1.032221 1.432887 1.174196
30 1.544264 2.417550 1.000000 1.043218 1.336503 1.003284
100 1.037501 1.843029 1.000000 1.066310 1.319942 1.191763
300 1.000000 2.373917 1.726667 2.009198 2.193276 2.424844
1000 1.000000 3.962928 3.764808 3.932539 4.099261 4.971527
3000 1.000000 6.250289 6.311701 6.740862 6.258989 7.791234
10000 1.000000 9.014925 10.110949 10.964482 10.347168 13.407998
30000 1.000000 10.410604 11.682759 13.113974 11.877862 16.000993