How can I vectorize a function that uses lagging values ​​of its own output?

I apologize for the poor wording of the question, but that was the best I could do. I know exactly what I want, but not quite exactly how to ask him.

Here is the logic demonstrated by example:

Two conditions that take values ​​1 or 0 cause a signal that also takes values ​​1 or 0. Condition A triggers a signal (if A = 1, then signal = 1, otherwise signal = 0) no matter what. Condition B DOES NOT trigger the signal, but the signal remains on if condition B remains equal to 1 after the signal was previously called by condition A. The signal returns to 0 only after both A and B return to 0.

  • Input signal:

enter image description here

  1. Required output (signal_d) and confirmation that the for loop can solve it (signal_l):

enter image description here

  1. numpy.where():

enter image description here

  1. :

    # Settings
    import numpy as np
    import pandas as pd
    import datetime
    
    # Data frame with input and desired output i column signal_d
    df = pd.DataFrame({'condition_A':list('00001100000110'),
                       'condition_B':list('01110011111000'),
                       'signal_d':list('00001111111110')})
    
    colnames = list(df)
    df[colnames] = df[colnames].apply(pd.to_numeric)
    datelist = pd.date_range(pd.datetime.today().strftime('%Y-%m-%d'), periods=14).tolist()
    df['dates'] = datelist
    df = df.set_index(['dates']) 
    
    # Solution using a for loop with nested ifs in column signal_l
    df['signal_l'] = df['condition_A'].copy(deep = True)
    i=0
    for observations in df['signal_l']:
        if df.ix[i,'condition_A'] == 1:
            df.ix[i,'signal_l'] = 1
        else:
            # Signal previously triggered by condition_A
            # AND kept "alive" by condition_B:                
            if df.ix[i - 1,'signal_l'] & df.ix[i,'condition_B'] == 1:
                 df.ix[i,'signal_l'] = 1
            else:
                df.ix[i,'signal_l'] = 0          
        i = i + 1
    
    
    
    # My attempt with np.where in column signal_v1
    df['Signal_v1'] = df['condition_A'].copy()
    df['Signal_v1'] = np.where(df.condition_A == 1, 
          1, 
          np.where( (df.shift(1).Signal_v1 == 1) & (df.condition_B == 1), 
                    1, 
                    0)
          )
    
    print(df)
    

, for , , numpy.where(). , . .

. , , .

!

+4
1

, , , Python. ( , Python, pandas numpy.)

, . if DataFrame. .

script, : add_signal_l(df) add_lagged(df). - , . - Python, numpy .

import numpy as np
import pandas as pd
import datetime

#-----------------------------------------------------------------------
# Create the test DataFrame

# Data frame with input and desired output i column signal_d
df = pd.DataFrame({'condition_A':list('00001100000110'),
                   'condition_B':list('01110011111000'),
                   'signal_d':list('00001111111110')})

colnames = list(df)
df[colnames] = df[colnames].apply(pd.to_numeric)
datelist = pd.date_range(pd.datetime.today().strftime('%Y-%m-%d'), periods=14).tolist()
df['dates'] = datelist
df = df.set_index(['dates']) 
#-----------------------------------------------------------------------

def add_signal_l(df):
    # Solution using a for loop with nested ifs in column signal_l
    df['signal_l'] = df['condition_A'].copy(deep = True)
    i=0
    for observations in df['signal_l']:
        if df.ix[i,'condition_A'] == 1:
            df.ix[i,'signal_l'] = 1
        else:
            # Signal previously triggered by condition_A
            # AND kept "alive" by condition_B:                
            if df.ix[i - 1,'signal_l'] & df.ix[i,'condition_B'] == 1:
                 df.ix[i,'signal_l'] = 1
            else:
                df.ix[i,'signal_l'] = 0          
        i = i + 1

def compute_lagged_signal(a, b):
    x = np.empty_like(a)
    x[0] = a[0]
    for i in range(1, len(a)):
        x[i] = a[i] | (x[i-1] & b[i])
    return x

def add_lagged(df):
    df['lagged'] = compute_lagged_signal(df['condition_A'].values, df['condition_B'].values)

, IPython:

In [85]: df
Out[85]: 
            condition_A  condition_B  signal_d
dates                                         
2017-06-09            0            0         0
2017-06-10            0            1         0
2017-06-11            0            1         0
2017-06-12            0            1         0
2017-06-13            1            0         1
2017-06-14            1            0         1
2017-06-15            0            1         1
2017-06-16            0            1         1
2017-06-17            0            1         1
2017-06-18            0            1         1
2017-06-19            0            1         1
2017-06-20            1            0         1
2017-06-21            1            0         1
2017-06-22            0            0         0

In [86]: %timeit add_signal_l(df)
8.45 ms ± 177 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In [87]: %timeit add_lagged(df)
137 µs ± 581 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)

, add_lagged(df) .

+2

Source: https://habr.com/ru/post/1678866/


All Articles