Is a minimum and maximum number of cuts possible?

Suppose I have a NumPy array of integers.

arr = np.random.randint(0, 1000, 1000)

And I have two arrays lowerand upper, which represent the lower and upper bounds respectively on slices arr. These intervals are overlapping and variable, however lowers, and uppersare guaranteed as a nondecreasing.

lowers = np.array([0, 5, 132, 358, 566, 822])
uppers = np.array([45, 93, 189, 533, 800, 923])

I want to find the min and max of each fragment arrdefined by lowersand uppers, and store them in another array.

out_arr = np.empty((lowers.size, 2))

What is the most efficient way to do this ? I am worried that there is no vectorized approach since I do not see how I wrap indexing in a loop.


My current approach is just

for i in range(lowers.size):
    arr_v = arr[lowers[i]:uppers[i]]
    out_arr[i,0] = np.amin(arr_v)
    out_arr[i,1] = np.amax(arr_v)

which leaves me with the desired result, for example,

In [304]: out_arr
Out[304]: 

array([[  26.,  908.],
       [  18.,  993.],
       [   0.,  968.],
       [   3.,  999.],
       [   1.,  998.],
       [   0.,  994.]])

.

+6
3

, np.minimum.reduceat:

lu = np.r_[lowers, uppers]
so = np.argsort(lu)
iso = np.empty_like(so)
iso[so] = np.arange(len(so))
cut = len(lowers)
lmin = np.minimum.reduceat(arr, lu[so])
for i in range(cut):
    print(min(lmin[iso[i]:iso[cut+i]]), min(arr[lowers[i]:uppers[i]]))

# 33 33
# 7 7
# 5 5
# 0 0
# 3 3
# 7 7

, , - , 1000 12 .

Update:

@Eric Hansen . , , . numba, , Eric pure numpy, onepass:

import numpy as np
from timeit import timeit

def twopass(lowers, uppers, arr):
    lu = np.r_[lowers, uppers]
    so = np.argsort(lu)
    iso = np.empty_like(so)
    iso[so] = np.arange(len(so))
    cut = len(lowers)
    lmin = np.minimum.reduceat(arr, lu[so])
    return np.minimum.reduceat(lmin, iso.reshape(2,-1).T.ravel())[::2]

def onepass(lowers, uppers, arr):
    mixture = np.empty((lowers.size*2,), dtype=lowers.dtype) 
    mixture[::2] = lowers; mixture[1::2] = uppers
    return np.minimum.reduceat(arr, mixture)[::2]

arr = np.random.randint(0, 1000, 1000)
lowers = np.array([0, 5, 132, 358, 566, 822])
uppers = np.array([45, 93, 189, 533, 800, 923])

print('small')
for f in twopass, onepass:
    print('{:18s} {:9.6f} ms'.format(f.__name__, 
                                     timeit(lambda: f(lowers, uppers, arr),
                                            number=10)*100))

arr = np.random.randint(0, 1000, 10**6)
lowers = np.random.randint(0, 8*10**5, 10**4)
uppers = np.random.randint(2*10**5, 10**6, 10**4)
swap = lowers > uppers
lowers[swap], uppers[swap] = uppers[swap], lowers[swap]


print('large')
for f in twopass, onepass:
    print('{:18s} {:10.4f} ms'.format(f.__name__, 
                                     timeit(lambda: f(lowers, uppers, arr),
                                            number=10)*100))

:

small
twopass             0.030880 ms
onepass             0.005723 ms
large
twopass               74.4962 ms
onepass             3153.1575 ms
+3

, Paul Panzer reduceat,

mixture = np.empty((lowers.size*2,), dtype=lowers.dtype) 
mixture[::2] = lowers; mixture[1::2] = uppers

np.column_stack((np.minimum.reduceat(arr, mixture)[::2],
                 np.maximum.reduceat(arr, mixture)[::2]))

, , 4.22 , 73 .

, Numba

from numba import jit

@jit
def get_res():
    out_arr = np.empty((lowers.size, 2))
    for i in range(lowers.size):
        arr_v = arr[lowers[i]:uppers[i]]
        out_arr[i,0] = np.amin(arr_v)
        out_arr[i,1] = np.amax(arr_v)
    return out_arr

100 .

+1

Execution will be slow, because inside the loop, the auxiliary array is copied to the array, and then the operation is performed. You can avoid the whole loop with a single line code

out_array = np.array([(np.amin(arr[lowers[i]:uppers[i]]),np.amax(arr[lowers[i]:uppers[i]])) for i in range(lowers.size)])
-one
source

Source: https://habr.com/ru/post/1016451/


All Articles