Chain memoizer in python

I already have a memoizer that works very well. It uses brine dumps to serialize input and creates an MD5 hash as a key. The results of the function are quite large and are stored as pickle files with the file name MD5. When I call two memoized functions one by one, it memoizerwill load the output of the first function and pass it to the second function. The second function will serialize it, create MD5 and then load the result. Here is a very simple code:

@memoize
def f(x):
    ...
    return y

@memoize
def g(x):
    ...
    return y

y1 = f(x1)
y2 = g(y1)

y1loaded from disk during evaluation f, and then serialized during evaluation g. Is it possible to somehow get around this step and pass the key y1(i.e. MD5 hash) to g? If it galready has this key, it loads y2from disk. If this is not the case, he “requests” a full one y1for evaluation g.

EDIT:

import cPickle as pickle
import inspect
import hashlib

class memoize(object):
    def __init__(self, func):
        self.func = func

    def __call__(self, *args, **kwargs):
        arg = inspect.getargspec(self.func).args
        file_name = self._get_key(*args, **kwargs)
        try:
            f = open(file_name, "r")
            out = pickle.load(f)
            f.close()
        except:
            out = self.func(*args, **kwargs)
            f = open(file_name, "wb")
            pickle.dump(out, f, 2)
            f.close()

        return out

    def _arg_hash(self, *args, **kwargs):
        _str = pickle.dumps(args, 2) + pickle.dumps(kwargs, 2)
        return hashlib.md5(_str).hexdigest()

    def _src_hash(self):
        _src = inspect.getsource(self.func)
        return hashlib.md5(_src).hexdigest()

    def _get_key(self, *args, **kwargs):
        arg = self._arg_hash(*args, **kwargs)
        src = self._src_hash()
        return src + '_' + arg + '.pkl'
+4
source share
1 answer

I think you could do it automatically, but I usually think it's better to talk about a "lazy" rating. Therefore, I will present a method of adding an additional argument in your memoized functions lazy. But instead of files, pickle and md5 I’ll simplify the helpers a bit:

# I use a dictionary as storage instead of files
storage = {}

# No md5, just hash
def calculate_md5(obj):
    print('calculating md5 of', obj)
    return hash(obj)

# create dictionary entry instead of pickling the data to a file
def create_file(md5, data):
    print('creating file for md5', md5)
    storage[md5] = data

# Load dictionary entry instead of unpickling a file
def load_file(md5):
    print('loading file with md5 of', md5)
    return storage[md5]

I use a custom class as an intermediate object:

class MemoizedObject(object):
    def __init__(self, md5):
        self.md5 = result_md5

    def get_real_data(self):
        print('load...')
        return load_file(self.md5)

    def __repr__(self):
        return '{self.__class__.__name__}(md5={self.md5})'.format(self=self)

, , Memoize, , :

class Memoize(object):
    def __init__(self, func):
        self.func = func
        # The md5 to md5 storage is needed to find the result file 
        # or result md5 for lazy evaluation.
        self.md5_to_md5_storage = {}

    def __call__(self, x, lazy=False):
        # If the argument is a memoized object no need to
        # calculcate the hash, we can just look it up.
        if isinstance(x, MemoizedObject):
            key = x.md5
        else:
            key = calculate_md5(x)

        if lazy and key in self.md5_to_md5_storage:
            # Check if the key is present in the md5 to md5 storage, otherwise
            # we can't be lazy
            return MemoizedObject(self.md5_to_md5_storage[key])
        elif not lazy and key in self.md5_to_md5_storage:
            # Not lazy but we know the result
            result = load_file(self.md5_to_md5_storage[key])
        else:
            # Unknown argument
            result = self.func(x)
            result_md5 = calculate_md5(result)
            create_file(result_md5, result)
            self.md5_to_md5_storage[key] = result_md5
        return result

, "" , () :

@Memoize
def f(x):
    return x+1

@Memoize
def g(x):
    return x+2

() :

>>> x1 = 10
>>> y1 = f(x1)
calculating md5 of 10
calculating md5 of 11
creating file for md5 11
>>> y2 = g(y1)
calculating md5 of 11
calculating md5 of 13
creating file for md5 13

lazy:

>>> x1 = 10
>>> y1 = f(x1)
calculating md5 of 10
loading file with md5 of 11
>>> y2 = g(y1)
calculating md5 of 11
loading file with md5 of 13

lazy=True

>>> x1 = 10
>>> y1 = f(x1, lazy=True)
calculating md5 of 10
>>> y2 = g(y1)
loading file with md5 of 13

"md5" . , .

+3

Source: https://habr.com/ru/post/1667567/


All Articles