Replace values ​​in a dictionary-based NumPy array and avoid matching between new values ​​and keys

I want to replace the values ​​in a 2D numpy array based on the following dictionary in python:

code    region
334     0
4       22
8       31
12      16
16      17
24      27
28      18
32      21
36       1

I want to find cells in a numpy2D array that match codeand are replaced by the corresponding value in the column region. The problem is that this will lead to replacing code = 12with region = 16, and on the next line, all cells with a value of 16 (including those that received only a value of 16) will be replaced with a value of 17. How to prevent this?

+2
source share
2 answers

np.searchsorted , ( ) -

def replace_with_dict(ar, dic):
    # Extract out keys and values
    k = np.array(list(dic.keys()))
    v = np.array(list(dic.values()))

    # Get argsort indices
    sidx = k.argsort()

    # Drop the magic bomb with searchsorted to get the corresponding
    # places for a in keys (using sorter since a is not necessarily sorted).
    # Then trace it back to original order with indexing into sidx
    # Finally index into values for desired output.
    return v[sidx[np.searchsorted(k,ar,sorter=sidx)]]

-

In [82]: dic ={334:0, 4:22, 8:31, 12:16, 16:17, 24:27, 28:18, 32:21, 36:1}
    ...: 
    ...: np.random.seed(0)
    ...: a = np.random.choice(dic.keys(), 20)
    ...: 

In [83]: a
Out[83]: 
array([ 28,  16,  32,  32, 334,  32,  28,   4,   8, 334,  12,  36,  36,
        24,  12, 334, 334,  36,  24,  28])

In [84]: replace_with_dict(a, dic)
Out[84]: 
array([18, 17, 21, 21,  0, 21, 18, 22, 31,  0, 16,  1,  1, 27, 16,  0,  0,
        1, 27, 18])

, searchsorted sorter, :

def replace_with_dict2(ar, dic):
    # Extract out keys and values
    k = np.array(list(dic.keys()))
    v = np.array(list(dic.values()))

    # Get argsort indices
    sidx = k.argsort()

    ks = k[sidx]
    vs = v[sidx]
    return vs[np.searchsorted(ks,ar)]

-

In [91]: dic ={334:0, 4:22, 8:31, 12:16, 16:17, 24:27, 28:18, 32:21, 36:1}
    ...: 
    ...: np.random.seed(0)
    ...: a = np.random.choice(dic.keys(), 20000)

In [92]: out1 = replace_with_dict(a, dic)
    ...: out2 = replace_with_dict2(a, dic)
    ...: print np.allclose(out1, out2)
True

In [93]: %timeit replace_with_dict(a, dic)
1000 loops, best of 3: 453 Β΅s per loop

In [95]: %timeit replace_with_dict2(a, dic)
1000 loops, best of 3: 341 Β΅s per loop

,

, , -

def replace_with_dict2_generic(ar, dic, assume_all_present=True):
    # Extract out keys and values
    k = np.array(list(dic.keys()))
    v = np.array(list(dic.values()))

    # Get argsort indices
    sidx = k.argsort()

    ks = k[sidx]
    vs = v[sidx]
    idx = np.searchsorted(ks,ar)

    if assume_all_present==0:
        idx[idx==len(vs)] = 0
        mask = ks[idx] == ar
        return np.where(mask, vs[idx], ar)
    else:
        return vs[idx]

-

In [163]: dic ={334:0, 4:22, 8:31, 12:16, 16:17, 24:27, 28:18, 32:21, 36:1}
     ...: 
     ...: np.random.seed(0)
     ...: a = np.random.choice(dic.keys(), (20))
     ...: a[-1] = 400

In [165]: a
Out[165]: 
array([ 28,  16,  32,  32, 334,  32,  28,   4,   8, 334,  12,  36,  36,
        24,  12, 334, 334,  36,  24, 400])

In [166]: replace_with_dict2_generic(a, dic, assume_all_present=False)
Out[166]: 
array([ 18,  17,  21,  21,   0,  21,  18,  22,  31,   0,  16,   1,   1,
        27,  16,   0,   0,   1,  27, 400])
+3

, , : , , , .

arr = np.array([1,2,3,1,2,3])
code = np.array([1,2])
region = np.array([2,3])
index_list = []
for val in code:
     index_list.append(np.where(arr == val)[0])
for indexes, replace_val in zip(index_list, region):
    arr[indexes] = replace_val
0

Source: https://habr.com/ru/post/1692370/


All Articles