№ 1
NumPy -
_,tags, c = np.unique(df.group.values, return_counts=1, return_inverse=1)
df['group_incidence'] = (c/c.sum())[tags]
-
In [584]: df
Out[584]:
group
170 64.22-1-00
72 64.22-1-00
121 35.12-3-00
99 64.22-1-00
19 35.12-3-00
In [585]: _,tags, c = np.unique(df.group.values, return_counts=1, return_inverse=1)
In [586]: df['group_incidence'] = (c/c.sum())[tags]
In [587]: df
Out[587]:
group group_incidence
170 64.22-1-00 0.6
72 64.22-1-00 0.6
121 35.12-3-00 0.4
99 64.22-1-00 0.6
19 35.12-3-00 0.4
# 2
/ NumPy -
def argsort_unique(idx):
n = idx.size
sidx = np.empty(n,dtype=int)
sidx[idx] = np.arange(n)
return sidx
def group_ratios_tagged(a):
sidx = a.argsort()
b = a[sidx]
m = np.concatenate(( [False], b[1:] != b[:-1] ))
sep_idx = np.concatenate(([0], np.flatnonzero(m), [a.size]))
idx = m.astype(int)
np.maximum.accumulate(idx, out=idx)
c = sep_idx[1:] - sep_idx[:-1]
h = (c/c.sum())[idx]
out = h[argsort_unique(sidx)]
return out
( ) -
In [659]: df = pd.read_clipboard()
In [660]: df
Out[660]:
group
170 64.22-1-00
72 64.22-1-00
121 35.12-3-00
99 64.22-1-00
19 35.12-3-00
In [661]: df['group_incidence'] = group_ratios_tagged(df.group.values)
In [662]: df
Out[662]:
group group_incidence
170 64.22-1-00 0.6
72 64.22-1-00 0.6
121 35.12-3-00 0.4
99 64.22-1-00 0.6
19 35.12-3-00 0.4