Python filter 2d array using data fragment

Question

Python filter 2d array using data fragment

import numpy as np

data = np.array([
    [20,  0,  5,  1],
    [20,  0,  5,  1],
    [20,  0,  5,  0],
    [20,  1,  5,  0],
    [20,  1,  5,  0],
    [20,  2,  5,  1],
    [20,  3,  5,  0],
    [20,  3,  5,  0],
    [20,  3,  5,  1],
    [20,  4,  5,  0],
    [20,  4,  5,  0],
    [20,  4,  5,  0]
])

I have the following 2d array. allows you to name the fields a, b, c, din the above order, where the column bis similar to id. I want to delete all cells that do not have outlist 1, the number "1" in the column dfor all cells with the same number in the column b(same identifier), so after filtering I will have the following results:

[[20  0  5  1]
 [20  0  5  1]
 [20  0  5  0]
 [20  2  5  1]
 [20  3  5  0]
 [20  3  5  0]
 [20  3  5  1]]

all rows with b = 1and b = 4were deleted from the data

, , . b. "1" d, b. b = 1 b = 4 ( "id" = 1 "id" = 4), 0 "1" d.

+4

python arrays numpy

Eran Moshe 19 . '16 13:10

5

:

import numpy as np

my_list = [[20,0,5,1],
    [20,0,5,1],
    [20,0,5,0],
    [20,1,5,0],
    [20,1,5,0],
    [20,2,5,1],
    [20,3,5,0],
    [20,3,5,0],
    [20,3,5,1],
    [20,4,5,0],
    [20,4,5,0],
    [20,4,5,0]]

all_ids = np.array(my_list)[:,1]
unique_ids = np.unique(all_ids)
indices = [np.where(all_ids==ui)[0][0] for ui in unique_ids ]

final = []
for id in unique_ids:
    try:
        tmp_group = my_list[indices[id]:indices[id+1]]
    except:
        tmp_group = my_list[indices[id]:]
    if 1 in np.array(tmp_group)[:,3]:
        final.extend(tmp_group)

print np.array(final)

:

[[20  0  5  1]
 [20  0  5  1]
 [20  0  5  0]
 [20  2  5  1]
 [20  3  5  0]
 [20  3  5  0]
 [20  3  5  1]]

+1

yugi 19 . '16 13:14

1 :

[sublist for sublist in list_ if sublist[1] != 1]

1 , 1:

[sublist for sublist in list_ if not (sublist[1] == 1 and sublist[3] != 1) ]

+1

Patrick Haugh 19 . '16 13:16

:

b >= 0
b -
b , .. max(b) ~= len(unique(b))

np.ufunc.at:

# unpack for clarity - this costs nothing in numpy
b_vals = data[:,1]
d_vals = data[:,3]

# build an array indexed by b values
is_ok = np.zeros(np.max(b_vals) + 1, dtype=np.bool_)
np.logical_or.at(is_ok, b_vals, d_vals)
# is_ok == array([ True, False,  True,  True, False], dtype=bool)

# take the rows which have a b value that was deemed OK
result = data[is_ok[b_vals]]

np.logical_or.at(is_ok, b_vals, d_vals) - :

for idx, val in zip(b_vals, d_vals):
    is_ok[idx] = np.logical_or(is_ok[idx], val)

+1

Eric 19 . '16 14:37

, , :

import numpy_indexed as npi
g = npi.group_by(data[:, 1])
ids, valid = g.any(data[:, 3])
result = data[valid[g.inverse]]

+1

Eelco Hoogendoorn 19 . '16 16:09

Divakar · Accepted Answer · 2016-10-19T14:32:19+0000

: np.unique np.bincount -

unq,tags = np.unique(data[:,1],return_inverse=1)
goodIDs = np.flatnonzero(np.bincount(tags,data[:,3]==1)>=1)
out = data[np.in1d(tags,goodIDs)]

-

In [15]: data
Out[15]: 
array([[20, 10,  5,  1],
       [20, 73,  5,  0],
       [20, 73,  5,  1],
       [20, 31,  5,  0],
       [20, 10,  5,  1],
       [20, 10,  5,  0],
       [20, 42,  5,  1],
       [20, 54,  5,  0],
       [20, 73,  5,  0],
       [20, 54,  5,  0],
       [20, 54,  5,  0],
       [20, 31,  5,  0]])

In [16]: out
Out[16]: 
array([[20, 10,  5,  1],
       [20, 73,  5,  0],
       [20, 73,  5,  1],
       [20, 10,  5,  1],
       [20, 10,  5,  0],
       [20, 42,  5,  1],
       [20, 73,  5,  0]])

: , 0, , :

goodIDs = np.flatnonzero(np.bincount(data[:,1],data[:,3]==1)>=1)
out = data[np.in1d(data[:,1],goodIDs)]

-

In [44]: data
Out[44]: 
array([[20,  0,  5,  1],
       [20,  0,  5,  1],
       [20,  0,  5,  0],
       [20,  1,  5,  0],
       [20,  1,  5,  0],
       [20,  2,  5,  1],
       [20,  3,  5,  0],
       [20,  3,  5,  0],
       [20,  3,  5,  1],
       [20,  4,  5,  0],
       [20,  4,  5,  0],
       [20,  4,  5,  0]])

In [45]: out
Out[45]: 
array([[20,  0,  5,  1],
       [20,  0,  5,  1],
       [20,  0,  5,  0],
       [20,  2,  5,  1],
       [20,  3,  5,  0],
       [20,  3,  5,  0],
       [20,  3,  5,  1]])

, data[:,3] , data[:,3] data[:,3]==1 .

-

In [69]: def logical_or_based(data): #@ Eric soln
    ...:     b_vals = data[:,1]
    ...:     d_vals = data[:,3]
    ...:     is_ok = np.zeros(np.max(b_vals) + 1, dtype=np.bool_)
    ...:     np.logical_or.at(is_ok, b_vals, d_vals)
    ...:     return is_ok[b_vals]
    ...: 
    ...: def in1d_based(data):
    ...:     goodIDs = np.flatnonzero(np.bincount(data[:,1],data[:,3])!=0)
    ...:     out = np.in1d(data[:,1],goodIDs)
    ...:     return out
    ...: 

In [70]: # Setup input
    ...: data = np.random.randint(0,100,(10000,4))
    ...: data[:,1] = np.sort(np.random.randint(0,100,(10000)))
    ...: data[:,3] = np.random.randint(0,2,(10000))
    ...: 

In [71]: %timeit logical_or_based(data) #@ Eric soln
1000 loops, best of 3: 1.44 ms per loop

In [72]: %timeit in1d_based(data)
1000 loops, best of 3: 528 µs per loop

Python filter 2d array using data fragment

More articles: