:
, :
%timeit (1+sample_data_df).resample('BM', axis=0).prod()-1
21.7 ms ± 170 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
%timeit pd.date_range(start="20000101", periods=500, freq='BM')
21.4 ms ± 272 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
22 , , , , 150'000 .
1000, ( ).
, , . , (lru_cache) (, dfs, lists...). , , , :
from functools import lru_cache
class Sampler():
def __init__(self, df):
self.df = df
def get_resampled_sample(self, n, freq):
resampled = self._wraper_resample_prod(freq)
return resampled.sample(n, replace=True)
def _wraper_resample_prod(self, freq):
hash_df = hash(self.df.values.tobytes())
return self._resample_prod(hash_df, freq)
@lru_cache(maxsize=1)
def _resample_prod(self, hash_df, freq):
return (self.df+1).resample(freq, axis=0).prod()-1
, df . , .
%timeit [sampler.get_resampled_sample(500, 'BM') for i in range(1000)]
881 ms ± 10.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
, , pd.date_range .
class Sampler():
def __init__(self, df):
self.df = df
def update_df(self, df):
self.df = df
def get_resampled_sample(self, n, freq):
resampled = self._wraper_resample_prod(freq)
df = resampled.sample(n, replace=True)
df.index = self._create_date_range(self.df.index[0], n, freq)
return df
def _wraper_resample_prod(self, freq):
hash_df = hash(self.df.values.tobytes())
return self._resample_prod(hash_df, freq)
@lru_cache(maxsize=1)
def _resample_prod(self, hash_df, freq):
return (self.df+1).resample(freq, axis=0).prod()-1
@lru_cache(maxsize=1)
def _create_date_range(self, start, periods, freq):
return pd.date_range(start=start, periods=periods, freq=freq)
:
%timeit [sampler.get_resampled_sample(500, 'BM') for i in range(1000)]
1.11 s ± 43.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)