Broken symmetry of operations between boolean pandas. Unequal Series

Implicit index matching pandasfor operations between different DataFrame/ is Seriesgreat, and most of the time it just works.

However, I came across an example that does not work properly:

import pandas as pd # 0.21.0
import numpy as np # 1.13.3
x = pd.Series([True, False, True, True], index = range(4))
y = pd.Series([False, True, True, False], index = [2,4,3,5])

# logical AND: this works, symmetric as it should be
pd.concat([x, y, x & y, y & x], keys = ['x', 'y', 'x&y', 'y&x'], axis = 1)
#        x      y    x&y    y&x
# 0   True    NaN  False  False
# 1  False    NaN  False  False
# 2   True  False  False  False
# 3   True   True   True   True
# 4    NaN   True  False  False
# 5    NaN  False  False  False

# but logical OR is not symmetric anymore (same for XOR: x^y vs. y^x)
pd.concat([x, y, x | y, y | x], keys = ['x', 'y', 'x|y', 'y|x'], axis = 1)
#        x      y    x|y    y|x
# 0   True    NaN   True  False <-- INCONSISTENT!
# 1  False    NaN  False  False
# 2   True  False   True   True
# 3   True   True   True   True
# 4    NaN   True  False   True <-- INCONSISTENT!
# 5    NaN  False  False  False

Having studied a little, I found two points that seem relevant:

But ultimately the kicker, it seems that pandas is casting from nanto Falseat some point. Looking above, it looks like this happens after the call np.bitwise_or, while I think it should happen sooner?

, np.logical_or , , pandas , , , np.nan or False True. ( , qaru.site/questions/838270/... .)

, , *, | . ( ), -, , .

, , : - (, -) x|y == y|x , ( ) nan | True == True == True | nan nan | False == False == False | nan?

*, - ~(x&y) ~y|~x, NaNs ( ).

+4
1

pandas , pandas.core.ops._bool_method_SERIES, factory, Series.

>>> f = pandas.Series.__or__
>>> f #the actual function you call when you do x|y
<function _bool_method_SERIES.<locals>.wrapper at 0x107436bf8>
>>> f.__closure__[0].cell_contents
    #it holds a reference to the other function defined in this factory na_op
<function _bool_method_SERIES.<locals>.na_op at 0x107436b70>
>>> f.__closure__[0].cell_contents.__closure__[0].cell_contents
    #and na_op has a reference to the built-in function or_
<built-in function or_>

, , , , (, , - TypeError, )

def test_logical_or(a,b):
    print("**** calling logical_or with ****")
    print(type(a), a)
    print(type(b), b)
    print("******")
    raise TypeError("my_logical_or isn't implemented")

#make the wrapper method
wrapper = pd.core.ops._bool_method_SERIES(test_logical_or, None,None)
pd.Series.logical_or = wrapper #insert method


x = pd.Series([True, False, True, True], index = range(4))
y = pd.Series([False, True, True, False], index = [2,4,3,5])

z = x.logical_or(y) #lets try it out!

print(x,y,z, sep="\n")

( , pandas 0.19.1)

**** calling logical_or with ****
<class 'numpy.ndarray'> [True False True True nan nan]
<class 'numpy.ndarray'> [False False False  True  True False]
******
**** calling logical_or with ****
<class 'bool'> True
<class 'bool'> False
******
Traceback (most recent call last):
   ...

, , numpy, - nan, False, , , , . , , , .

, , , , numpy, nan False, return np.logical_or(a,b). , - , .

def my_logical_or(a,b):
    if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
        a[np.isnan(a.astype(float))] = False
        b[np.isnan(b.astype(float))] = False
        return np.logical_or(a,b)
    else:
        raise TypeError("custom logical or is only implemented for numpy arrays")

wrapper = pd.core.ops._bool_method_SERIES(my_logical_or, None,None)
pd.Series.logical_or = wrapper


x = pd.Series([True, False, True, True], index = range(4))
y = pd.Series([False, True, True, False], index = [2,4,3,5])

z = pd.concat([x, y, x.logical_or(y), y.logical_or(x)], keys = ['x', 'y', 'x|y', 'y|x'], axis = 1)
print(z)
#        x      y    x|y    y|x
# 0   True    NaN   True   True
# 1  False    NaN  False  False <-- same!
# 2   True  False   True   True
# 3   True   True   True   True
# 4    NaN   True   True   True <-- same!
# 5    NaN  False  False  False

, , Series.__or__, , , , .


pandas.core.ops line 943, nan False ( 0) self , other, :

    return filler(self._constructor(na_op(self.values, other.values),
                                    index=self.index, name=name))

filler(self).values self.values:

    return filler(self._constructor(na_op(filler(self).values, other.values),
                                    index=self.index, name=name))

or xor , , , pandas, , .

+2

Source: https://habr.com/ru/post/1690282/


All Articles