PROJECT/ KILL <-- (: overkill )
Pandas factorize . pd.factorize , , .
, , - Numpy bincount.
"" , , "bin". np.bincount , 0:. , , bin . ? , " ". , " ". . ""
tups = list(zip(df.x, df.y))
i, r = pd.factorize(tups)
j, c = pd.factorize(df.z)
n, m = len(r), len(c)
b = np.bincount(i * m + j, minlength=n * m).reshape(n, m)
pd.DataFrame(
np.column_stack([r.tolist(), b]),
columns=['x', 'y'] + c.tolist()
)
x y 1 3 2 4
0 a 2 2 1 0 0
1 b 5 0 1 4 0
2 c 7 0 1 2 2
z
, Pandas factorize. Numpy unique . , . , np.unique ( ). O(n * log(n)) . , Nump .
z, , OP. , , . Numpy, sort pd.factorize
tups = list(zip(df.x, df.y))
i, r = pd.factorize(tups)
j, c = pd.factorize(df.z, sort=True)
n, m = len(r), len(c)
b = np.bincount(i * m + j, minlength=n * m).reshape(n, m)
pd.DataFrame(
np.column_stack([r.tolist(), b]),
columns=['x', 'y'] + c.tolist()
)
x y 1 2 3 4
0 a 2 2 0 1 0
1 b 5 0 4 1 0
2 c 7 0 2 1 2