Python member version with strings and index

Question

Python member version with strings and index

A similar question was asked, but none of the answers does what I need - some of them allow multidimensional search queries (for example, "rows" in matlab), but do not return an index. Some return an index but do not allow rows. My arrays are very large (1M x 2) and I have been successful at creating a loop, but obviously this is very slow. In Matlab, the built-in ismember function takes about 10 seconds.

Here is what I am looking for:

a=np.array([[4, 6],[2, 6],[5, 2]])

b=np.array([[1, 7],[1, 8],[2, 6],[2, 1],[2, 4],[4, 6],[4, 7],[5, 9],[5, 2],[5, 1]])

Exact matlab function doing the trick:

[~,index] = ismember(a,b,'rows')

Where

index = [6, 3, 9]

+4

python numpy matlab

claudiaann1 Mar 27 '14 at 21:37

source share

3 answers

unutbu · Answer 1 · 2014-03-27T22:06:25+0000

import numpy as np

def asvoid(arr):
    """
    View the array as dtype np.void (bytes)
    This views the last axis of ND-arrays as bytes so you can perform comparisons on
    the entire row.
    http://stackoverflow.com/a/16840350/190597 (Jaime, 2013-05)
    Warning: When using asvoid for comparison, note that float zeros may compare UNEQUALLY
    >>> asvoid([-0.]) == asvoid([0.])
    array([False], dtype=bool)
    """
    arr = np.ascontiguousarray(arr)
    return arr.view(np.dtype((np.void, arr.dtype.itemsize * arr.shape[-1])))


def in1d_index(a, b):
    voida, voidb = map(asvoid, (a, b))
    return np.where(np.in1d(voidb, voida))[0]    

a = np.array([[4, 6],[2, 6],[5, 2]])
b = np.array([[1, 7],[1, 8],[2, 6],[2, 1],[2, 4],[4, 6],[4, 7],[5, 9],[5, 2],[5, 1]])

print(in1d_index(a, b))

prints

[2 5 8]

This will be equivalent to Matlab [3, 6, 9], since Python uses 0-based indexing.

:

. a b.
asvoid dtypes, asvoid float, asvoid([-0.]) == asvoid([0.]) array([False]).
asvoid . , , .

, in1d_index :

def ismember_rows(a, b):
    # http://stackoverflow.com/a/22705773/190597 (ashg)
    return np.nonzero(np.all(b == a[:,np.newaxis], axis=2))[1]

In [41]: a2 = np.tile(a,(2000,1))
In [42]: b2 = np.tile(b,(2000,1))

In [46]: %timeit in1d_index(a2, b2)
100 loops, best of 3: 8.49 ms per loop

In [47]: %timeit ismember_rows(a2, b2)
1 loops, best of 3: 5.55 s per loop

So in1d_index ~ 650x ( ), , -, in1d_index , ismember_rows a b.

ashg · Answer 2 · 2014-03-28T06:32:26+0000

import numpy as np 
def ismember_rows(a, b):
    '''Equivalent of 'ismember' from Matlab
    a.shape = (nRows_a, nCol)
    b.shape = (nRows_b, nCol)
    return the idx where b[idx] == a
    '''
    return np.nonzero(np.all(b == a[:,np.newaxis], axis=2))[1]

a = np.array([[4, 6],[2, 6],[5, 2]])
b = np.array([[1, 7],[1, 8],[2, 6],[2, 1],[2, 4],[4, 6],[4, 7],[5, 9],[5, 2],[5, 1]])
idx = ismember_rows(a, b)
print idx
print np.all(b[idx] == a)

array([5, 2, 8])
True

e...

-------------------------- [] ------------------ ------------

def ismember(a, b):
    return np.flatnonzero(np.in1d(b[:,0], a[:,0]) & np.in1d(b[:,1], a[:,1]))

a = np.array([[4, 6],[2, 6],[5, 2]])
b = np.array([[1, 7],[1, 8],[2, 6],[2, 1],[2, 4],[4, 6],[4, 7],[5, 9],[5, 2],[5, 1]])
a2 = np.tile(a,(2000,1))
b2 = np.tile(b,(2000,1))

%timeit timeit in1d_index(a2, b2)
# 100 loops, best of 3: 8.74 ms per loop
%timeit ismember(a2, b2)
# 100 loops, best of 3: 8.5 ms per loop

np.all(in1d_index(a2, b2) == ismember(a2, b2))
# True

unutbu,

Ting on chan · Answer 3 · 2017-06-13T23:05:24+0000

The function first turns several columns of elements into a single column array, then numpy.in1d can be used to search for an answer to a desire, try the following code:

import numpy as np

def ismemberRow(A,B):
    '''
    This function is find which rows found in A can be also found in B,
    The function first turns multiple columns of elements into a single column array, then numpy.in1d can be used

    Input: m x n numpy array (A), and p x q array (B)
    Output unique numpy array with length m, storing either True or False, True for rows can be found in both A and B
    '''

    sa = np.chararray((A.shape[0],1))
    sa[:] = '-'
    sb = np.chararray((B.shape[0],1))
    sb[:] = '-'

    ba = (A).astype(np.str)
    sa2 = np.expand_dims(ba[:,0],axis=1) + sa + np.expand_dims(ba[:,1],axis=1)
    na = A.shape[1] - 2    

    for i in range(0,na):
         sa2 = sa2 + sa + np.expand_dims(ba[:,i+2],axis=1)

    bb = (B).astype(np.str)
    sb2 = np.expand_dims(bb[:,0],axis=1) + sb + np.expand_dims(bb[:,1],axis=1)
    nb = B.shape[1] - 2    

    for i in range(0,nb):
         sb2 = sb2 + sb + np.expand_dims(bb[:,i+2],axis=1)

    return np.in1d(sa2,sb2)

A = np.array([[1, 3, 4],[2, 4, 3],[7, 4, 3],[1, 1, 1],[1, 3, 4],[5, 3, 4],[1, 1, 1],[2, 4, 3]])

B = np.array([[1, 3, 4],[1, 1, 1]])

d = ismemberRow(A,B)

print A[np.where(d)[0],:]

#results:
#[[1 3 4]
# [1 1 1]
# [1 3 4]
# [1 1 1]]

Python member version with strings and index

More articles: