NumPy's solution would be this:
def groupby_count(df): unq, t = np.unique(df.TERM, return_inverse=1) ids = df.ID.values sidx = np.lexsort([t,ids]) ts = t[sidx] idss = ids[sidx] m0 = (idss[1:] != idss[:-1]) | (ts[1:] != ts[:-1]) m = np.concatenate(([True], m0, [True])) ids_out = idss[m[:-1]] t_out = unq[ts[m[:-1]]] x_out = np.diff(np.flatnonzero(m)+1) out_ar = np.column_stack((ids_out, t_out, x_out)) return pd.DataFrame(out_ar, columns = [['ID','TERM','X']])
A little easier -
def groupby_count_v2(df): a = df.values sidx = np.lexsort(a[:,:2].T) b = a[sidx,:2] m = np.concatenate(([True],(b[1:] != b[:-1]).any(1),[True])) out_ar = np.column_stack((b[m[:-1],:2], np.diff(np.flatnonzero(m)+1))) return pd.DataFrame(out_ar, columns = [['ID','TERM','X']])
Run Example -
In [332]: df Out[332]: ID TERM X 0 1 A 0 1 1 A 4 2 1 A 6 3 1 B 0 4 1 B 10 5 2 A 1 6 2 B 1 7 2 F 1 In [333]: groupby_count(df) Out[333]: ID TERM X 0 1 A 3 1 1 B 2 2 2 A 1 3 2 B 1 4 2 F 1
Allows randomly shuffling strings and checking that it works with our solution -
In [339]: df1 = df.iloc[np.random.permutation(len(df))] In [340]: df1 Out[340]: ID TERM X 7 2 F 1 6 2 B 1 0 1 A 0 3 1 B 0 5 2 A 1 2 1 A 6 1 1 A 4 4 1 B 10 In [341]: groupby_count(df1) Out[341]: ID TERM X 0 1 A 3 1 1 B 2 2 2 A 1 3 2 B 1 4 2 F 1
source share