Apply a function to each cell in a DataFrame in place in pandas

Is it possible to apply a function to every cell in a DataFrame in place in pandas?

I know pandas.DataFrame.applymap , but it does not seem to allow the application to be used:

import numpy as np
import pandas as pd
np.random.seed(1)
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'), 
                     index=['Utah', 'Ohio', 'Texas', 'Oregon'])
print(frame)
format = lambda x: '%.2f' % x
frame = frame.applymap(format)
print(frame)

returns:

               b         d         e
Utah    1.624345 -0.611756 -0.528172
Ohio   -1.072969  0.865408 -2.301539
Texas   1.744812 -0.761207  0.319039
Oregon -0.249370  1.462108 -2.060141

            b      d      e
Utah     1.62  -0.61  -0.53
Ohio    -1.07   0.87  -2.30
Texas    1.74  -0.76   0.32
Oregon  -0.25   1.46  -2.06

frame = frame.applymap(format)temporarily holds 2 copies framein memory that I don't want.

I know that it is possible to apply a function to each cell in place with a NumPy array: Mapping a NumPy array in place .

+4
source share
2 answers

If this matters a lot to you, you can try creating your own cpython function

I found applymap in pandas

def applymap(self, func):
      # ...
      def infer(x):
            if x.empty:
                return lib.map_infer(x, func)
            return lib.map_infer(x.asobject, func)

      return self.apply(infer)

, lib.map_infer

lib.map_infer - cython, , , : result = np.empty(n, dtype=object), found

def map_infer(ndarray arr, object f, bint convert=1):
    """
    Substitute for np.vectorize with pandas-friendly dtype inference
    Parameters
    ----------
    arr : ndarray
    f : function
    Returns
    -------
    mapped : ndarray
    """
    cdef:
        Py_ssize_t i, n
        ndarray[object] result
        object val

    n = len(arr)
    result = np.empty(n, dtype=object)
    for i in range(n):
        val = f(util.get_value_at(arr, i))

        # unbox 0-dim arrays, GH #690
        if is_array(val) and PyArray_NDIM(val) == 0:
            # is there a faster way to unbox?
            val = val.item()

        result[i] = val

    if convert:
        return maybe_convert_objects(result,
                                     try_float=0,
                                     convert_datetime=0,
                                     convert_timedelta=0)

return result

... . , op - cython inplace,

- , : (

+1

, pandas inplace .update_inplace(), , , .replace() , , .

.applymap() .apply(); inplace, , .

.applymap() .apply(), .aggregate(), _aggregate(), ._agg(), , Python (.. Cython - ).

, , NumPy : :

frame = pd.DataFrame(np.random.randn(100, 100))

for i in frame.index:
    for j in frame.columns:
        val = round(frame.values[i,j])
        frame.values[i,j] = val

newvals = np.zeros(frame.shape[1])
for i in frame.index:
    for j in frame.columns:
        val = round(frame.values[i,j])
        newvals[j] = val
    frame.values[i] = newvals

1 , - 100 ; .applymap(round) 20 .

, frame = pd.DataFrame(np.random.randn(1, 10000)), .applymap(round) 1,2 , 100 .

, frame = pd.DataFrame(np.random.randn(10000,1)) , 1s (), .applymap(round) 10 .

, .applymap .

frame.applymap(round) : (10000,1), (100,100) (1,10000). , ; , .applymap() . , .applymap(), :

newvals = np.zeros(frame.shape[1])
for i in frame.index:
    for j in frame.columns:
        val = round(frame.values[i,j])
        newvals[j] = val
    frame.values[i] = newvals

NumPy:

newvals = np.zeros(frame.shape[1])
arr = frame.values
for i in frame.index:
    for j in frame.columns:
        val = round(arr[i,j])
        newvals[j] = val
        arr[i] = newvals

100x100 300 , 60 - , .values !

Cython 34 , .applymap(round) - 24 . , .applymap() .

: , .applymap(); , , "" .

.applymap() , NumPy. - , , : . arr=df.values[i], arr, df.values[i] = arr i.

+1

Source: https://habr.com/ru/post/1680849/


All Articles