Find groups of values ​​that are equal! = 0 in the list

I am looking for an easy way to find "plateau" or groups in python lists. As input, I have something like this:

mydata = [0.0, 0.0, 0.0, 0.0, 0.0, 0.143, 0.0, 0.22, 0.135, 0.44, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33, 0.65, 0.22, 0.0, 0.0, 0.0, 0.0, 0.0]

I want to extract the middle position of each "group." the group is defined in this case as data equal to! = 0 and, for example, at least 3 positions. Closed single zeros (e.g. at position 6) should be ignored.

Basically I want to get the following output:

myoutput = [8, 20]

In my use case, it is not very important to get very accurate output. [10,21]all the same it will be good.

In conclusion, all: first group [0.143, 0.0, 0.22, 0.135, 0.44, 0.1]:; The second group: [0.33, 0.65, 0.22]. now the position of the middle element (either to the left or to the right of the middle if there is no true average value). therefore, the output 8would be the middle of the first group, and 20- the environment of the second group.

I have already tried some approaches. But they are not as stable as I wanted them to be (for example: narrower zeros can cause problems). Therefore, before investing more time in this idea, I wanted to ask if there is a better way to implement this function. I even think that this can be a common problem. Maybe already the standard code that solves it?

There are other questions that describe roughly the same problem, but I also need to “smooth out” the data before processing.

1.) -

import numpy as np
def smooth(y, box_pts):
    box = np.ones(box_pts)/box_pts
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth

y_smooth = smooth(mydata, 20)

2.) ( != 0, before 0, ). : , deque.

laststart = 0
lastend = 0
myoutput = deque()

for i in range(1, len(y_smooth)-1):
        #detect start:
        if y_smooth[i]!=0 and y_smooth[i-1]==0:
            laststart = i   
        #detect end:
        elif y_smooth[i]!=0 and y_smooth[i+1]==0 and laststart+2 < i:
            lastend = i
            myoutput.appendleft(laststart+(lastend-laststart)/2)

EDIT: , . - , . ;

+4
4

, , , 0 , , 1 -1 .

:

import numpy as np

mydata = [0.0, 0.0, 0.0, 0.0, 0.0, 0.143, 0.0, 0.22, 0.135, 0.44, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33, 0.65, 0.22, 0.0, 0.0, 0.0, 0.0, 0.0]
arr = np.array(mydata)

mask = (arr!=0).astype(np.int) #array that contains 1 for every non zero value, zero other wise
padded_mask =  np.pad(mask,(1,),"constant") #add a zero at the start and at the end to handle edge cases
edge_mask = padded_mask[1:] - padded_mask[:-1] #diff between a value and the following one 
#if there a 1 in edge mask it a group start
#if there a -1 it a group stop

#where gives us the index of those starts and stops
starts = np.where(edge_mask == 1)[0]
stops = np.where(edge_mask == -1)[0]
print(starts,stops)

#we format groups and drop groups that are too small
groups = [group for group in zip(starts,stops) if (group[0]+2 < group[1])]


for group in groups:
        print("start,stop : {}  middle : {}".format(group,(group[0]+group[1])/2) ) 

:

[ 5  7 19] [ 6 11 22]
start,stop : (7, 11)  middle : 9.0
start,stop : (19, 22)  middle : 20.5
+2

:

import numpy as np

def smooth(y, box_pts):
    box = np.ones(box_pts)/box_pts
    print(box)
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth

mydata = [0.0, 0.0, 0.0, 0.0,-0.2, 0.143, 
          0.0, 0.22, 0.135, 0.44, 0.1, 0.0, 
          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 
          0.33, 0.65, 0.22, 0.0, 0.0, 0.0, 
          0.0, 0.0]

y_smooth = smooth(mydata, 27)
print(y_smooth)

:

[ 0.0469   0.0519   0.0519   0.0519   0.0519   0.0519   
  0.0519   0.0519  0.0519   0.0519   0.0684   0.1009   
  0.1119   0.1119   0.1119   0.1119  0.10475  0.10475  
  0.09375  0.087    0.065    0.06     0.06     0.06     
  0.06   0.06     0.06   ]

:

def findGroups(data, minGrpSize=1):
  startpos = -1
  endpos = -1
  pospos = []
  for idx,v in enumerate(mydata):
    if v > 0 and startpos == -1:
      startpos = idx
    elif v == 0.0:
      if startpos > -1:
       if idx < (len(mydata)-1) and mydata[idx+1] != 0.0:
         pass # ignore one 0.0 in a run
       else:
         endpos = idx

      if startpos > -1:
        if endpos >-1 or idx == len(mydata)-1: # both set or last one 
          if (endpos - startpos) >= minGrpSize:
              pospos.append((startpos,endpos))
          startpos = -1
          endpos = -1
  return pospos

pos = findGroups(mydata,1)
print(*map(lambda x: sum(x) // len(x), pos))

pos = findGroups(mydata,3)
print(*map(lambda x: sum(x) // len(x), pos))

pos = findGroups(mydata,5)
print(*map(lambda x: sum(x) // len(x), pos))

:

8 20
8 20
8 
+2

Part 2 - find the midpoint of the group:

mydata = [0.0, 0.0, 0.0, 0.0, 0.0, 0.143, 0.0, 0.22, 0.135, 0.44, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
          0.0, 0.0, 0.33, 0.65, 0.22, 0.0, 0.0, 0.0, 0.0, 0.0]

groups = []
last_start = 0
last_end = 0
in_group = 0

for i in range(1, len(mydata) - 1):
    if not in_group:
        if mydata[i] and not mydata[i - 1]:
            last_start = i
            in_group = 1
    else:  # a group continued.
        if mydata[i]:
            last_end = i
        elif last_end - last_start > 1:  # we have a group i.e. not single non-zero value
            mid_point = (last_end - last_start) + last_start
            groups.append(((last_end - last_start)//2) + last_start)
            last_start, last_end, in_group = (0, 0, 0)
        else:  # it was just a single non-zero.
            last_start, last_end, in_group = (0, 0, 0)

print(groups)

Conclusion:

[8, 20]
+1
source

The full numpy solution would be something like this: (not fully optimized)

import numpy as np

input_data = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.143,
                       0.0, 0.22, 0.135, 0.44, 0.1, 0.0,
                       0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                       0.33, 0.65, 0.22, 0.0, 0.0, 0.0,
                       0.0, 0.0])

# Find transitions between zero and nonzero
non_zeros = input_data > 0
changes = np.ediff1d(non_zeros, to_begin=not non_zeros[0],
                     to_end=not non_zeros[-1])
change_idxs = np.nonzero(changes)[0]

# Filter out small holes
holes = change_idxs.reshape(change_idxs.size//2, 2)    
hole_sizes = holes[:, 1]-holes[:, 0]
big_holes = holes[hole_sizes > 1]

kept_change_idxs = np.r_[0, big_holes.flatten(), input_data.size]

# Get midpoints of big intervals
intervals = kept_change_idxs.reshape(kept_change_idxs.size//2, 2)
big_intervals = intervals[intervals[:, 1]-intervals[:, 0] >= 3]
print((big_intervals[:, 0]+big_intervals[:, 1])//2)
+1
source

Source: https://habr.com/ru/post/1693357/


All Articles