Intersection of two arrays, maintaining order in a larger array

I have a numpy array of alength nthat has numbers from 0 to n-1in some way. I also have a numpy array of masklength <= n, containing some subset of elements a, in a different order.

The query I want to calculate "gives me elements athat are also maskin the order in which they appear in . "

I had a similar question here , but the difference was that it maskwas a Boolean mask, not a mask for individual elements.

I have outlined and tested 4 methods below:

import timeit
import numpy as np
import matplotlib.pyplot as plt

n_test = 100
n_coverages = 10

np.random.seed(0)


def method1():
    return np.array([x for x in a if x in mask])


def method2():
    s = set(mask)
    return np.array([x for x in a if x in s])


def method3():
    return a[np.in1d(a, mask, assume_unique=True)]


def method4():
    bmask = np.full((n_samples,), False)
    bmask[mask] = True
    return a[bmask[a]]


methods = [
    ('naive membership', method1),
    ('python set', method2),
    ('in1d', method3),
    ('binary mask', method4)
]

p_space = np.linspace(0, 1, n_coverages)
for n_samples in [1000]:
    a = np.arange(n_samples)
    np.random.shuffle(a)

    for label, method in methods:
        if method == method1 and n_samples == 10000:
            continue
        times = []
        for coverage in p_space:
            mask = np.random.choice(a, size=int(n_samples * coverage), replace=False)
            time = timeit.timeit(method, number=n_test)
            times.append(time * 1e3)
        plt.plot(p_space, times, label=label)
    plt.xlabel(r'Coverage ($\frac{|\mathrm{mask}|}{|\mathrm{a}|}$)')
    plt.ylabel('Time (ms)')
    plt.title('Comparison of 1-D Intersection Methods for $n = {}$ samples'.format(n_samples))
    plt.legend()
    plt.show()

What caused the following results:

enter image description here

, , , 4 .

: ?

+4
2

, , , 4 .

: ?

, . , , , .

:

  • - T = O (| a | * | mask |). a . O (| mask |) , . | | , .
    | | = * | a |
    T = O (| a | 2 * )
    . , | a |. | | & ; | | | a | = n, T = O (n 2)

  • set. Set - , / O (log (n)), n - . s = set(mask) O (| mask | * log (| mask |)) , | mask | .

    x in s - . , O (| a | * log (| mask |))

    - O (| mask | * log (| mask |) + | a | * log (| mask |)). | | & ; | | | a | = n, T = O (n * log (n)). , f (x) = log (x) .

  • in1d O (| mask | * log (| mask |) + | a | * log (| mask |)). T = O (n * log (n)) f (x) = log (x) .

  • - O (| a | + | mask |), T = O (n), . . .

, n , T = O (n). , 4 .

P.S. f (n), | a | | mask | = 0,9 * | a |.

EDIT: , python / O (1) -.

+2

, a .

def with_searchsorted(a, b):

    sb = b.argsort()
    bs = b[sb]

    sa = a.argsort()
    ia = np.arange(len(a))
    ra = np.empty_like(sa)
    ra[sa] = ia

    ac = bs.searchsorted(ia) % b.size

    return a[(bs[ac] == ia)[ra]]

a = np.arange(10)
np.random.shuffle(a)
b = np.random.choice(a, 5, False)

print(a)
print(b)

[7 2 9 3 0 4 8 5 6 1]
[0 8 5 4 6]

print(with_searchsorted(a, b))

[0 4 8 5 6]

# sort b for faster searchsorting
sb = b.argsort()
bs = b[sb]

# sort a for faster searchsorting
sa = a.argsort()
# this is the sorted a... we just cheat because we know what it will be
ia = np.arange(len(a))

# construct the reverse sort look up
ra = np.empty_like(sa)
ra[sa] = ia

# perform searchsort
ac = bs.searchsorted(ia) % b.size

return a[(bs[ac] == ia)[ra]]
0

Source: https://habr.com/ru/post/1673070/


All Articles