Here is mainly based on NumPy -
def group_dup_rowids(a): sidx = np.lexsort(aT) b = a[sidx] m = np.concatenate(([False], (b[1:] == b[:-1]).all(1), [False] )) idx = np.flatnonzero(m[1:] != m[:-1]) C = sidx.tolist() return [C[i:j] for i,j in zip(idx[::2],idx[1::2]+1)] out = group_dup_rowids(np.abs(a))
Run Example -
In [175]: a Out[175]: array([[ 0, 1, 2], [ 3, 4, 5], [ 0, -1, -2], [ 9, 5, 6], [-3, -4, -5]]) In [176]: group_dup_rowids(np.abs(a)) Out[176]: [[0, 2], [1, 4]]
The exact case of denial
In the case when you are looking for paired matches with exact negation, we just need a little modification -
def group_dup_rowids_negation(ar): a = np.abs(ar) sidx = np.lexsort(aT) b = ar[sidx] m = np.concatenate(([False], (b[1:] == -b[:-1]).all(1), [False] )) idx = np.flatnonzero(m[1:] != m[:-1]) C = sidx.tolist() return [(C[i:j]) for i,j in zip(idx[::2],idx[1::2]+1)]
Run Example -
In [354]: a Out[354]: array([[ 0, 1, 2], [ 3, 4, 5], [ 0, -1, -2], [ 9, 5, 6], [-3, -4, -5]]) In [355]: group_dup_rowids_negation(a) Out[355]: [[0, 2], [1, 4]] In [356]: a[-1] = [-3,4,-5] In [357]: group_dup_rowids_negation(a) Out[357]: [[0, 2]]
Runtime test
Other work approaches -
# @Joe Iddon soln def for_for_if_listcompr(a): return [(i, j) for i in range(len(a)) for j in range(i+1, len(a)) if all(a[i] == -a[j])]
Dates -
In [492]:
Further improvements
def group_dup_rowids_negation_mod1(ar): a = np.abs(ar) sidx = np.lexsort(aT) b = ar[sidx] dp = view1D(b) dn = view1D(-b) m = np.concatenate(([False], dp[1:] == dn[:-1], [False] )) return zip(sidx[m[1:]], sidx[m[:-1]]) def group_dup_rowids_negation_mod2(ar): a = np.abs(ar) sidx = lexsort_cols_posnum(a) b = ar[sidx] dp = view1D(b) dn = view1D(-b) m = np.concatenate(([False], dp[1:] == dn[:-1], [False] )) return zip(sidx[m[1:]], sidx[m[:-1]])
Secondary functions:
# https:
Runtime Test (borrowed from @Paul Panzer benchmarking) -
In [628]: N = 50000