(Python) Find indices of values in two arrays that are equal to values in two other arrays

Question

(Python) Find indices of values in two arrays that are equal to values in two other arrays

I have the following 4 arrays, and I want to get indexes of values that are equal in arrays A and X, corresponding to values at the same position in B and Y. So, for the next example

    import numpy as np
    A = np.asarray([400.5, 100,  700,   200,  15, 900])
    B = np.asarray([500.5, 200,  500, 600.5,   8, 999])
    X = np.asarray([400.5, 700,  100,   300,  15, 555, 900])
    Y = np.asarray([500.5, 500,600.5,   100,   8, 555, 999])

I want to get two arrays with indexes:

indAB = [0 2 4 5]

0, because 400.5 and 500.5 in & B are also in X & Y at position 0
2, because 700 and 500 in B & B are also in X & Y in position 2
4, because 15 and 8 in A & B are also in X & Y at position 4
5, because 900 and 999 in A&B are also in X & Y at position 5

indXY = [0 1 4 6]

0, 1, 4, and 6 are found similarly to indAB, but instead of X & Y instead.

indAB - B, X Y, indXY - X Y, B.

:

    def indices(a,b):
        setb = set(b)
        ind = [i for i, x in enumerate(a) if x in setb]
        return ind

    iA = np.asarray(indices(A,X))
    iB = np.asarray(indices(X,A))
    iX = np.asarray(indices(B,Y))
    iY = np.asarray(indices(Y,B))

    def CommonIndices(a,b):
        return np.asarray(list(set(a) & set(b)))

    indAB = CommonIndices(iA,iX)
    indXY = CommonIndices(iB,iY)

    print(indAB) # returns = [0 2 4 5]
    print(indXY) # returns = [0 1 2 4 6]

[0 1 2 4 6] indXY, . 2 , 600.5 Y B, 200 100 B () .

, - . !

+4

python equals arrays numpy

TimeExplorer 24 . '17 9:52

3

:

import numpy as np

A = np.asarray([400.5, 100,  700,   200,  15, 900])
B = np.asarray([500.5, 200,  500, 600.5,   8, 999])
X = np.asarray([400.5, 700,  100,   300,  15, 555, 900])
Y = np.asarray([500.5, 500,600.5,   100,   8, 555, 999])

AB = np.stack([A, B], axis=-1)
XY = np.stack([X, Y], axis=-1)

eq = AB[:, np.newaxis, :] == XY[np.newaxis, :, :]
eq = np.logical_and.reduce(eq, axis=-1)

indAB, = np.where(np.logical_or.reduce(eq, axis=1))
indXY, = np.where(np.logical_or.reduce(eq, axis=0))

print("indAB", indAB)
print("indXY", indXY)

:

indAB [0 2 4 5]
indXY [0 1 4 6]

AB XY - A B X Y, , "" . eq AB XY; np.newaxis AB XY ( , AB 1 XY 0). == . np.logical_and.reduce , "" ( A X B Y), np.logical_or.reduce , - AB XY XY AB. , np.where .

, len(A) x len(X) x 2, , , .

, . " ", ( - ). " ", , :

import numpy as np

MAX_SIZE = 2  # Biggest array will be MAX_SIZE x MAX_SIZE x 2

A = np.asarray([400.5, 100,  700,   200,  15, 900])
B = np.asarray([500.5, 200,  500, 600.5,   8, 999])
X = np.asarray([400.5, 700,  100,   300,  15, 555, 900])
Y = np.asarray([500.5, 500,600.5,   100,   8, 555, 999])

AB = np.stack([A, B], axis=-1)
XY = np.stack([X, Y], axis=-1)

maskAB = np.full(len(AB), False, dtype=bool)
maskXY = np.full(len(XY), False, dtype=bool)

for iAB in range(0, len(AB), MAX_SIZE):
    pAB = np.expand_dims(AB[iAB:iAB + MAX_SIZE], axis=1)
    for iXY in range(0, len(XY), MAX_SIZE):
        pXY = np.expand_dims(XY[iXY:iXY + MAX_SIZE], axis=0)
        eq = pAB == pXY
        eq = np.logical_and.reduce(eq, axis=-1)
        maskAB[iAB:iAB + MAX_SIZE] |= np.logical_or.reduce(eq, axis=1)
        maskXY[iXY:iXY + MAX_SIZE] |= np.logical_or.reduce(eq, axis=0)

indAB, = np.where(maskAB)
indXY, = np.where(maskXY)

print("indAB", indAB)
print("indXY", indXY)

:

indAB [0 2 4 5]
indXY [0 1 4 6]

MAX_SIZE 2, , , , (, MAX_SIZE = 10000 ). MAX_SIZE , .

+1

jdehesa 24 . '17 10:22

Here is an alternative method. I dare say that this is relatively clear, it should be effective through the use of sets, and this requires only memory O( len(A) + len(X) ).

numpy not even required, but can be used for arrays.

from collections import defaultdict

A = [400.5, 100, 700, 200, 15, 900]
B = [500.5, 200, 500, 600.5, 8, 999]
X = [400.5, 700, 100, 300, 15, 555, 900]
Y = [500.5, 500, 600.5, 100, 8, 555, 999]

def get_indices(values):
    d = defaultdict(set)
    for i, value in enumerate(values):
        d[value].add(i)
    return d

iA, iB, iX, iY = [get_indices(values) for values in [A, B, X, Y]]
print(iA)
# {400.5: {0}, 100: {1}, 200: {3}, 900: {5}, 700: {2}, 15: {4}}
print(iX)
# {400.5: {0}, 100: {2}, 300: {3}, 900: {6}, 555: {5}, 700: {1}, 15: {4}}

for i, (a, b) in enumerate(zip(A, B)):
    common_indices = iX[a] & iY[b]
    if common_indices:
        print("A B : %d" % i)
        print("X Y : %d" % common_indices.pop())
        print()

#   A B : 0
#   X Y : 0

#   A B : 2
#   X Y : 1

#   A B : 4
#   X Y : 4

#   A B : 5
#   X Y : 6

+1

Eric Duminil Jul 24 '17 at 13:30

source share

Eelco Hoogendoorn · Accepted Answer · 2017-07-24T12:40:05+0000

numpy_indexed ( : - ) . , - NlogN. , , :

import numpy as np
import numpy_indexed as npi

A = np.asarray([400.5, 100,  700,   200,  15, 900])
B = np.asarray([500.5, 200,  500, 600.5,   8, 999])
X = np.asarray([400.5, 700,  100,   300,  15, 555, 900])
Y = np.asarray([500.5, 500,600.5,   100,   8, 555, 999])

AB = np.stack([A, B], axis=-1)
XY = np.stack([X, Y], axis=-1)

# casting the AB and XY arrays to npi.index first is not required, but a performance optimization; without this each call to npi.indices would have to re-index the arrays, which is the expensive part
AB = npi.as_index(AB)
XY = npi.as_index(XY)
# npi.indices(list, items) is a vectorized nd-equivalent of list.index(item)
indAB = npi.indices(AB, XY, missing='mask').compressed()
indXY = npi.indices(XY, AB, missing='mask').compressed()

, . , npi.intersection(XY, AB); , .

(Python) Find indices of values ​​in two arrays that are equal to values ​​in two other arrays

indAB = [0 2 4 5]

indXY = [0 1 4 6]

More articles:

(Python) Find indices of values in two arrays that are equal to values in two other arrays