Search for adjacent, non-unique fragments in the Pandas series without iteration

I am trying to parse the log file of our production process. Most of the time, the process starts automatically, but occasionally the engineer needs to switch to manual mode to make some changes, and then switch back to automatic control of the reactor software. When installed in manual mode, the log file writes the step as "MAN.OP." instead of a number. The following is a representative example.

steps = [1,2,2,'MAN.OP.','MAN.OP.',2,2,3,3,'MAN.OP.','MAN.OP.',4,4]
ser_orig = pd.Series(steps)

that leads to

0           1
1           2
2           2
3     MAN.OP.
4     MAN.OP.
5           2
6           2
7           3
8           3
9     MAN.OP.
10    MAN.OP.
11          4
12          4
dtype: object

I need to find "MAN.OP." and make them different from each other. In this example, two areas with == 2 values ​​should be the same area after detecting the manual mode section as follows:

0                 1
1                 2
2                 2
3     Manual_Mode_0
4     Manual_Mode_0
5                 2
6                 2
7                 3
8                 3
9     Manual_Mode_1
10    Manual_Mode_1
11                4
12                4
dtype: object

, , . :

@step_series.setter
def step_series(self, ss):
    """
    On assignment, give the manual mode steps a unique name. Leave 
    the steps done on recipe the same.
    """
    manual_mode = "MAN.OP."
    new_manual_mode_text = "Manual_Mode_{}"
    counter = 0
    continuous = False
    for i in ss.index:
        if continuous and ss.at[i] != manual_mode:
            continuous = False
            counter += 1

        elif not continuous and ss.at[i] == manual_mode:
            continuous = True
            ss.at[i] = new_manual_mode_text.format(str(counter))

        elif continuous and ss.at[i] == manual_mode:
            ss.at[i] = new_manual_mode_text.format(str(counter))

    self._step_series = ss

, .

? , .

:

@step_series.setter
def step_series(self, ss):
    pd.options.mode.chained_assignment = None
    manual_mode = "MAN.OP."
    new_manual_mode_text = "Manual_Mode_{}"

    newManOp = (ss=='MAN.OP.') & (ss != ss.shift())
    ss[ss == 'MAN.OP.'] = 'Manual_Mode_' + (newManOp.cumsum()-1).astype(str)

    self._step_series = ss
+4
2

:

steps = [1,2,2,'MAN.OP.','MAN.OP.',2,2,3,3,'MAN.OP.','MAN.OP.',4,4]
steps = pd.Series(steps)

newManOp = (steps=='MAN.OP.') & (steps != steps.shift())
steps[steps=='MAN.OP.'] += seq.cumsum().astype(str)

>>> steps
0            1
1            2
2            2
3     MAN.OP.1
4     MAN.OP.1
5            2
6            2
7            3
8            3
9     MAN.OP.2
10    MAN.OP.2
11           4
12           4
dtype: object

, ( "MAN.OP." "Manual_mode_" ), :

steps[steps=='MAN.OP.'] = 'Manual_Mode_' + (seq.cumsum()-1).astype(str)

>>> steps
0                 1
1                 2
2                 2
3     Manual_Mode_0
4     Manual_Mode_0
5                 2
6                 2
7                 3
8                 3
9     Manual_Mode_1
10    Manual_Mode_1
11                4
12                4
dtype: object

a pandas groupby, .

+2

matplotlib s, (, ). , True.

import matplotlib.mlab as mlab
regions = mlab.contiguous_regions(ser_orig == manual_mode)
for i, (start, end) in enumerate(regions):
    ser_orig[start:end] = new_manual_mode_text.format(i)
ser_orig

0                 1
1                 2
2                 2
3     Manual_Mode_0
4     Manual_Mode_0
5                 2
6                 2
7                 3
8                 3
9     Manual_Mode_1
10    Manual_Mode_1
11                4
12                4
dtype: object
+1

Source: https://habr.com/ru/post/1629448/


All Articles