Assign sum () or mean () to df.groupby inside a function

I suspect that this is very simple functionality in Python, and I looked at the suggestions in Questions that may already have an answer , but I don't think this is a duplicate question, I will delete it if there is one.

Task:

I would like to wrap df.groupby(pd.TimeGrouper(freq='M')).sum()in a function so that I can assign sum(), mean()or count()as arguments in that function. I asked a similar question earlier here , but I don’t think I can use the same technique in this particular case.

Here is a snippet with reproducible input:

# Imports
import pandas as pd
import numpy as np

# Dataframe with 1 or zero
# 100 rows and 4 columns
# Indexed by dates
np.random.seed(12345678)
df = pd.DataFrame(np.random.randint(0,2,size=(100, 4)), columns=list('ABCD'))
datelist = pd.date_range(pd.datetime(2017, 1, 1).strftime('%Y-%m-%d'), periods=100).tolist()
df['dates'] = datelist 
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
print(df.head(10))

What gives:

enter image description here

With this we can do:

df2 = df.groupby(pd.TimeGrouper(freq='M')).sum()
print(df2)

And we get:

enter image description here

Or we can do:

df3 = df.groupby(pd.TimeGrouper(freq='M')).mean()
print(df3)

And we get:

enter image description here

Here is the part of the procedure enclosed in the function:

# My function
def function1(df):
    df = df.groupby(pd.TimeGrouper(freq='M')).sum()
    return df

# Function1 call
df4 = function1(df = df)
print(df4)

And it works great:

enter image description here

, sum() mean() Function2, :

# My function with sum() as an argument
def function2(df, fun):
    df = df.groupby(pd.TimeGrouper(freq='M')).fun
    return df

TypeError:

# Function2 test 1
df5 = function2(df = df, fun = sum())

enter image description here

:

# Function2 test 2
df6 = function2(df = df, fun = 'sum()')

enter image description here

, ? ( "" freq, ). , ?

!

:

#%%

# Imports
import pandas as pd
import numpy as np

# Dataframe with 1 or zero
# 100 rows across 4 columns
# Indexed by dates
np.random.seed(12345678)
df = pd.DataFrame(np.random.randint(0,2,size=(100, 4)), columns=list('ABCD'))
datelist = pd.date_range(pd.datetime(2017, 1, 1).strftime('%Y-%m-%d'), periods=100).tolist()
df['dates'] = datelist 
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
print(df.head(10))

# Calculate sum per month
df2 = df.groupby(pd.TimeGrouper(freq='M')).sum()
print(df2)

# Or calculate average per month
df3 = df.groupby(pd.TimeGrouper(freq='M')).mean()
print(df3)

# My function
def function1(df):
    df = df.groupby(pd.TimeGrouper(freq='M')).sum()
    return df

# Function1 test
df4 = function1(df = df)
print(df4)
# So far so good
#%%
# My function with sum() as argument
def function2(df, fun):
    print(fun)
    df = df.groupby(pd.TimeGrouper(freq='M')).fun
    return df

# Function2 test 1
# df5 = function2(df = df, fun = sum())

# Function2 test 2
# df6 = function2(df = df, fun = 'sum()')

# Function2 test 3
# df7 = function2(df = df, fun = sum)
+4
2

apply

def function2(df, fun):
    return df.groupby(pd.TimeGrouper(freq='M')).apply(fun)

, fun , pd.DataFrame


, , agg. fun , sum mean, . - .

df.groupby(pd.TimeGrouper('M')).agg(['sum', 'mean', fun])
+4

@BlackJack , getattr(gb, foo) foo group gb. , AttributeError. , foo (. ).

def function(df, foo):
    gb = df.groupby(pd.TimeGrouper(freq='M'))
    try:
        foo = getattr(gb, foo)
    except AttributeError:
        raise('{} cannot be performed on this object'.format(foo))
    return foo()

. eval, - . , foo , pd.core.groupby.SeriesGroupBy pd.core.groupby.DataFrameGroupBy.

def function2(df, foo):
    safe_functions = ('sum', 'mean', 'count')
    if foo not in safe_functions:
        raise ValueError('foo is not safe')
    gb = df.groupby(pd.TimeGrouper(freq='M'))
    if not isinstance(gb, (pd.core.groupby.SeriesGroupBy, pd.core.groupby.DataFrameGroupBy)):
        raise ValueError('Unexpected groupby result')
    return eval('gb.{}()'.format(foo))

>>> function(df, 'sum')
             A   B   C   D
dates                     
2017-01-31  18  15  14  14
2017-02-28  15  15  12  17
2017-03-31  18  17  16  17
2017-04-30   8   3   3   7
+2

Source: https://habr.com/ru/post/1683261/


All Articles