Compute DataFrame values ​​recursively

I am trying to compute the column values ​​of a pandas data frame "recursively".

Suppose there is data for two different days, each of which has 10 observations, and you want to calculate some variable r, where only the first value of r is given (every day), and you want to calculate the remaining 2 * 9 records, while how each subsequent value depends on the previous record r and one additional "simultaneous" variable "x".

enter image description here

The first problem is that I want to perform calculations for each day separately, that is, I would like to use the function pandas.groupby()for all my calculations ... but when I try to multiply the data and use it shift(1), I get only "NaN" records

data.groupby(data.index)['r'] =   ( (1+data.groupby(data.index)['x']*0.25) * (1+data.groupby(data.index)['r'].shift(1)))

for ():

for i in range(2,21):
    data[data['rank'] == i]['r'] =  ( (1+data[data['rank'] == i]['x']*0.25) * (1+data[data['rank'] == i]['r'].shift(1))

. DataFrames? , - ?

:

df = pd.DataFrame({
  'rank' : [1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10],
  'x' : [0.00275,0.00285,0.0031,0.0036,0.0043,0.0052,0.0063,0.00755,0.00895,0.0105,0.0027,0.00285,0.0031,0.00355,0.00425,0.0051,0.00615,0.00735,0.00875,0.0103],
  'r' : [0.00158,'NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN',0.001485,'NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN']
  },index=['2014-01-02', '2014-01-02', '2014-01-02', '2014-01-02',
           '2014-01-02', '2014-01-02', '2014-01-02', '2014-01-02',
           '2014-01-02', '2014-01-02', '2014-01-03', '2014-01-03',
           '2014-01-03', '2014-01-03', '2014-01-03', '2014-01-03',
           '2014-01-03', '2014-01-03', '2014-01-03', '2014-01-03'])
+4
2

, pandas.groupby().apply(). . scipy.lfilter, , , .

:

def rolling_apply(group):
    r = [group.r.iloc[0]]
    for x in group.x:
        r.append((1 + r[-1]) * (1 + x * 0.25))
    group.r = r[1:]
    return group

df['R'] = df.groupby(df.index).apply(rolling_apply).r

:

                   r  rank        x          R
2014-01-02   0.00158     1  0.00275   1.002269
2014-01-02       NaN     2  0.00285   2.003695
2014-01-02       NaN     3  0.00310   3.006023
2014-01-02       NaN     4  0.00360   4.009628
2014-01-02       NaN     5  0.00430   5.015014
2014-01-02       NaN     6  0.00520   6.022833
2014-01-02       NaN     7  0.00630   7.033894
2014-01-02       NaN     8  0.00755   8.049058
2014-01-02       NaN     9  0.00895   9.069306
2014-01-02       NaN    10  0.01050  10.095737
2014-01-03  0.001485     1  0.00270   1.002161
2014-01-03       NaN     2  0.00285   2.003588
2014-01-03       NaN     3  0.00310   3.005915
2014-01-03       NaN     4  0.00355   4.009471
2014-01-03       NaN     5  0.00425   5.014793
2014-01-03       NaN     6  0.00510   6.022462
2014-01-03       NaN     7  0.00615   7.033259
2014-01-03       NaN     8  0.00735   8.048020
2014-01-03       NaN     9  0.00875   9.067813
2014-01-03       NaN    10  0.01030  10.093737

:

df = pd.DataFrame({
    'rank': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'x': [0.00275, 0.00285, 0.0031, 0.0036, 0.0043, 0.0052, 0.0063, 0.00755,
          0.00895, 0.0105, 0.0027, 0.00285, 0.0031, 0.00355, 0.00425,
          0.0051, 0.00615, 0.00735, 0.00875, 0.0103],
    'r': [0.00158, 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN',
          'NaN', 0.001485, 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN',
          'NaN', 'NaN']
}, index=['2014-01-02', '2014-01-02', '2014-01-02', '2014-01-02',
          '2014-01-02', '2014-01-02', '2014-01-02', '2014-01-02',
          '2014-01-02', '2014-01-02', '2014-01-03', '2014-01-03',
          '2014-01-03', '2014-01-03', '2014-01-03', '2014-01-03',
          '2014-01-03', '2014-01-03', '2014-01-03', '2014-01-03'])

Update:

, , apply:

def rolling_apply(group):
    r = [group.r.iloc[0]]
    for x in group.x[:-1]:
        r.append((1 + r[-1]) * (1 + x * 0.25) - 1)
    group.r = r
    return group

df.r = df.groupby(df.index).apply(rolling_apply).r
+3

. "r", , (0.00158, 0.001485) , ( , - ). R [0] r [1] .., "", 1.

df = pd.DataFrame({
'rank': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'x': [0.00275, 0.00285, 0.0031, 0.0036, 0.0043, 0.0052, 0.0063, 0.00755,
      0.00895, 0.0105, 0.0027, 0.00285, 0.0031, 0.00355, 0.00425,
      0.0051, 0.00615, 0.00735, 0.00875, 0.0103],
'r': [0.00158, 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN',
      'NaN', 0.001485, 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN',
      'NaN', 'NaN'] }, index=['2014-01-02', '2014-01-02', '2014-01-02', '2014-01-02',
      '2014-01-02', '2014-01-02', '2014-01-02', '2014-01-02',
      '2014-01-02', '2014-01-02', '2014-01-03', '2014-01-03',
      '2014-01-03', '2014-01-03', '2014-01-03', '2014-01-03',
      '2014-01-03', '2014-01-03', '2014-01-03', '2014-01-03'])

 def rolling_apply(group):
     r = [group.r.iloc[0]]
     for x in group.x:
         r.append((1 + r[-1]) * (1 + x * 0.25) -1)
     group.r = r[1:]
     return group

df['R'] = df.groupby(df.index).apply(rolling_apply).r

df['r'] = np.where(df['rank']==1,df['r'],df['R'].shift(1) )

df = df.drop('R',1)

                     r  rank        x
2014-01-02     0.00158     1  0.00275
2014-01-02  0.00226859     2  0.00285
2014-01-02   0.0029827     3  0.00310
2014-01-02  0.00376001     4  0.00360
2014-01-02   0.0046634     5  0.00430
2014-01-02  0.00574341     6  0.00520
2014-01-02  0.00705088     7  0.00630
2014-01-02  0.00863698     8  0.00755
2014-01-02   0.0105408     9  0.00895
2014-01-02   0.0128019    10  0.01050
2014-01-03    0.001485     1  0.00270
2014-01-03    0.002161     2  0.00285
2014-01-03  0.00287504     3  0.00310
2014-01-03  0.00365227     4  0.00355
2014-01-03  0.00454301     5  0.00425
2014-01-03  0.00561034     6  0.00510
2014-01-03  0.00689249     7  0.00615
2014-01-03  0.00844059     8  0.00735
2014-01-03   0.0102936     9  0.00875
2014-01-03   0.0125036    10  0.01030
0

Source: https://habr.com/ru/post/1676031/


All Articles