From bb904cb482419691b91fd218e4b77e92f45816d1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 25 Nov 2019 16:49:48 +0100 Subject: [PATCH 01/30] ENH: add BooleanArray extension array (#29555) --- doc/source/boolean.rst | 83 +++++++++++++++++++++++++++ doc/source/index.rst.template | 1 + pandas/core/arrays/boolean.py | 40 ++++++++++--- pandas/tests/arrays/test_boolean.py | 88 +++++++++++++++++++++++++++++ 4 files changed, 205 insertions(+), 7 deletions(-) create mode 100644 doc/source/boolean.rst diff --git a/doc/source/boolean.rst b/doc/source/boolean.rst new file mode 100644 index 0000000000000..b4a8d249d8cea --- /dev/null +++ b/doc/source/boolean.rst @@ -0,0 +1,83 @@ +.. currentmodule:: pandas + +.. _boolean: + +************************** +Nullable Boolean Data Type +************************** + +.. versionadded:: 1.0.0 + +.. _boolean.klean: + +Kleene Logic +------------ + +:class:`arrays.BooleanArray` implements Kleene logic (sometime called three-value logic) for +logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or). + +Here's a table for ``and``. + +========== =========== ============ +left value right value output value +========== =========== ============ +True True True +True False False +True NA NA +False False False +False NA False +NA NA NA +========== =========== ============ + + +And for ``or`` + +========== =========== ============ +left value right value output value +========== =========== ============ +True True True +True False True +True NA True +False False False +False NA NA +NA NA NA +========== =========== ============ + +And for ``xor`` + +========== =========== ============ +left value right value output value +========== =========== ============ +True True False +True False True +True NA NA +False False False +False NA NA +NA NA NA +========== =========== ============ + +When an ``NA`` is present in an operation, the output value is ``NA`` only if +the result cannot be determined soley based on the other input. For example, +``True | NA`` is ``True``, because both ``True | True`` and ``True | False`` +are ``True``. In that case, we don't actually need to consider the value +of the ``NA``. + +On the other hand, ``True & NA`` is ``NA``. The result depends on whether +the ``NA`` really is ``True`` or ``False``, since ``True & True`` is ``True``, +but ``True & False`` is ``False``, so we can't determine the output. + + +This differs from how ``np.nan`` behaves in logical operations. Pandas treated +``np.nan`` is *always false in the output*. + +In ``or`` + +.. ipython:: python + + pd.Series([True, False, np.nan], dtype="object") | True + pd.Series([True, False, np.nan], dtype="boolean") | True + +In ``and`` + + pd.Series([True, False, np.nan], dtype="object") & True + pd.Series([True, False, np.nan], dtype="boolean") & True diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index 9ec330c956ff1..9cea68530fbe7 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -73,6 +73,7 @@ See the :ref:`overview` for more detail about what's in the library. * :doc:`user_guide/missing_data` * :doc:`user_guide/categorical` * :doc:`user_guide/integer_na` + * :doc:`user_guide/boolean` * :doc:`user_guide/visualization` * :doc:`user_guide/computation` * :doc:`user_guide/groupby` diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index c118b6fe26549..d457afff6621f 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -184,6 +184,9 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin): represented by 2 numpy arrays: a boolean array with the data and a boolean array with the mask (True indicating missing). + BooleanArray implements Kleene logic (sometimes called three-value + logic) for logical operations. See :ref:`` for more. + To construct an BooleanArray from generic array-like input, use :func:`pandas.array` specifying ``dtype="boolean"`` (see examples below). @@ -560,10 +563,12 @@ def logical_method(self, other): return NotImplemented other = lib.item_from_zerodim(other) - mask = None + omask = mask = None + other_is_booleanarray = isinstance(other, BooleanArray) - if isinstance(other, BooleanArray): - other, mask = other._data, other._mask + if other_is_booleanarray: + other, omask = other._data, other._mask + mask = omask elif is_list_like(other): other = np.asarray(other, dtype="bool") if other.ndim > 1: @@ -576,10 +581,16 @@ def logical_method(self, other): # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "elementwise", FutureWarning) - with np.errstate(all="ignore"): - result = op(self._data, other) + if lib.is_scalar(other) and np.isnan( + other + ): # TODO(NA): change to libmissing.NA: + result = self._data + mask = True + else: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "elementwise", FutureWarning) + with np.errstate(all="ignore"): + result = op(self._data, other) # nans propagate if mask is None: @@ -587,6 +598,21 @@ def logical_method(self, other): else: mask = self._mask | mask + # Kleene-logic adjustments to the mask. + if op.__name__ in {"or_", "ror_"}: + mask[result] = False + elif op.__name__ in {"and_", "rand_"}: + mask[~self._data & ~self._mask] = False + if other_is_booleanarray: + mask[~other & ~omask] = False + elif lib.is_scalar(other) and np.isnan(other): # TODO(NA): change to NA + mask[:] = True + # Do we ever assume that masked values are False? + result[mask] = False + elif op.__name__ in {"xor", "rxor"}: + # Do we ever assume that masked values are False? + result[mask] = False + return BooleanArray(result, mask) name = "__{name}__".format(name=op.__name__) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 5cfc7c3837875..80c4b6f7b37ab 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -391,6 +391,8 @@ def test_scalar(self, data, all_logical_operators): def test_array(self, data, all_logical_operators): op_name = all_logical_operators + if "or" in op_name: + pytest.skip("confusing") other = pd.array([True] * len(data), dtype="boolean") self._compare_other(data, op_name, other) other = np.array([True] * len(data)) @@ -398,6 +400,92 @@ def test_array(self, data, all_logical_operators): other = pd.Series([True] * len(data), dtype="boolean") self._compare_other(data, op_name, other) + def test_kleene_or(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a | b + expected = pd.array( + [True, True, True, True, False, None, True, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b | a + tm.assert_extension_array_equal(result, expected) + + def test_kleene_or_scalar(self): + a = pd.array([True, False, None], dtype="boolean") + result = a | np.nan # TODO: pd.NA + expected = pd.array([True, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = np.nan | a # TODO: pd.NA + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize( + "left,right,expected", + [ + ([True, False, None], True, [True, True, True]), + ([True, False, None], False, [True, False, None]), + ([True, False, None], np.nan, [True, None, None]), + # TODO: pd.NA + ], + ) + def test_kleene_or_cases(self, left, right, expected): + if isinstance(left, list): + left = pd.array(left, dtype="boolean") + if isinstance(right, list): + right = pd.array(right, dtype="boolean") + expected = pd.array(expected, dtype="boolean") + result = left | right + tm.assert_extension_array_equal(result, expected) + + result = right | left + tm.assert_extension_array_equal(result, expected) + + def test_kleene_and(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a & b + expected = pd.array( + [True, False, None, False, False, False, None, False, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b & a + tm.assert_extension_array_equal(result, expected) + + def test_kleene_and_scalar(self): + a = pd.array([True, False, None], dtype="boolean") + result = a & np.nan # TODO: pd.NA + expected = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = np.nan & a # TODO: pd.na + tm.assert_extension_array_equal(result, expected) + + def test_kleene_xor(self): + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a ^ b + expected = pd.array( + [False, True, None, True, False, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b ^ a + tm.assert_extension_array_equal(result, expected) + + def test_kleene_scalar(self): + a = pd.array([True, False, None], dtype="boolean") + result = a ^ np.nan # TODO: pd.NA + expected = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = np.nan ^ a # TODO: pd.NA + tm.assert_extension_array_equal(result, expected) + class TestComparisonOps(BaseOpsUtil): def _compare_other(self, data, op_name, other): From 13c7ea341211a6e0bb9ea1bf1d74123912dff30d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 26 Nov 2019 07:37:33 -0600 Subject: [PATCH 02/30] move --- doc/source/{ => user_guide}/boolean.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename doc/source/{ => user_guide}/boolean.rst (100%) diff --git a/doc/source/boolean.rst b/doc/source/user_guide/boolean.rst similarity index 100% rename from doc/source/boolean.rst rename to doc/source/user_guide/boolean.rst From fff786fb89de65605ebd9081fc511b8aa4cc652e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 26 Nov 2019 07:57:06 -0600 Subject: [PATCH 03/30] doc fixup --- doc/source/user_guide/boolean.rst | 69 ++++++++++++++----------------- 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index b4a8d249d8cea..ffcebd802cd37 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -1,5 +1,11 @@ .. currentmodule:: pandas +.. ipython:: python + :suppress: + + import pandas as pd + import numpy as np + .. _boolean: ************************** @@ -13,48 +19,33 @@ Nullable Boolean Data Type Kleene Logic ------------ -:class:`arrays.BooleanArray` implements Kleene logic (sometime called three-value logic) for +:class:`arrays.BooleanArray` implements Kleene logic (sometimes called three-value logic) for logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or). Here's a table for ``and``. -========== =========== ============ -left value right value output value -========== =========== ============ -True True True -True False False -True NA NA -False False False -False NA False -NA NA NA -========== =========== ============ - - -And for ``or`` - -========== =========== ============ -left value right value output value -========== =========== ============ -True True True -True False True -True NA True -False False False -False NA NA -NA NA NA -========== =========== ============ - -And for ``xor`` - -========== =========== ============ -left value right value output value -========== =========== ============ -True True False -True False True -True NA NA -False False False -False NA NA -NA NA NA -========== =========== ============ +================= ========= +Expression Result +================= ========= +``True & True`` ``True`` +``True & False`` ``False`` +``True & NA`` ``NA`` +``False & False`` ``False`` +``False & NA`` ``False`` +``NA & NA`` ``NA`` +``True | True`` ``True`` +``True | False`` ``True`` +``True | NA`` ``True`` +``False | False`` ``False`` +``False | NA`` ``NA`` +``NA | NA`` ``NA`` +``True ^ True`` ``False`` +``True ^ False`` ``True`` +``True ^ NA`` ``NA`` +``False ^ False`` ``False`` +``False ^ NA`` ``NA`` +``NA ^ NA`` ``NA`` +================= ========= When an ``NA`` is present in an operation, the output value is ``NA`` only if the result cannot be determined soley based on the other input. For example, @@ -79,5 +70,7 @@ In ``or`` In ``and`` +.. ipython:: python + pd.Series([True, False, np.nan], dtype="object") & True pd.Series([True, False, np.nan], dtype="boolean") & True From 708c553078ac450ef457780c32207f236c0bfec9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 26 Nov 2019 17:01:11 -0600 Subject: [PATCH 04/30] working --- doc/source/user_guide/boolean.rst | 3 +- pandas/core/arrays/boolean.py | 129 ++++++++++++++++++++-------- pandas/tests/arrays/test_boolean.py | 49 ++++++++--- 3 files changed, 133 insertions(+), 48 deletions(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index ffcebd802cd37..0acdcd442e231 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -22,7 +22,8 @@ Kleene Logic :class:`arrays.BooleanArray` implements Kleene logic (sometimes called three-value logic) for logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or). -Here's a table for ``and``. +This table demonstrates the results for every combination. These operations are symmetrical, +so flipping the left- and right-hand side makes no difference in the result. ================= ========= Expression Result diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index d457afff6621f..e833bdea385d8 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -562,13 +562,13 @@ def logical_method(self, other): # Rely on pandas to unbox and dispatch to us. return NotImplemented + assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} other = lib.item_from_zerodim(other) - omask = mask = None other_is_booleanarray = isinstance(other, BooleanArray) + mask = None if other_is_booleanarray: - other, omask = other._data, other._mask - mask = omask + other, mask = other._data, other._mask elif is_list_like(other): other = np.asarray(other, dtype="bool") if other.ndim > 1: @@ -579,41 +579,15 @@ def logical_method(self, other): raise ValueError("Lengths must match to compare") other, mask = coerce_to_array(other, copy=False) - # numpy will show a DeprecationWarning on invalid elementwise - # comparisons, this will raise in the future - if lib.is_scalar(other) and np.isnan( - other - ): # TODO(NA): change to libmissing.NA: - result = self._data - mask = True - else: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "elementwise", FutureWarning) - with np.errstate(all="ignore"): - result = op(self._data, other) - - # nans propagate - if mask is None: - mask = self._mask - else: - mask = self._mask | mask - - # Kleene-logic adjustments to the mask. if op.__name__ in {"or_", "ror_"}: - mask[result] = False + result, mask = kleene_or(self._data, other, self._mask, mask) + return BooleanArray(result, mask) elif op.__name__ in {"and_", "rand_"}: - mask[~self._data & ~self._mask] = False - if other_is_booleanarray: - mask[~other & ~omask] = False - elif lib.is_scalar(other) and np.isnan(other): # TODO(NA): change to NA - mask[:] = True - # Do we ever assume that masked values are False? - result[mask] = False + result, mask = kleene_and(self._data, other, self._mask, mask) + return BooleanArray(result, mask) elif op.__name__ in {"xor", "rxor"}: - # Do we ever assume that masked values are False? - result[mask] = False - - return BooleanArray(result, mask) + result, mask = kleene_xor(self._data, other, self._mask, mask) + return BooleanArray(result, mask) name = "__{name}__".format(name=op.__name__) return set_function_name(logical_method, name, cls) @@ -766,6 +740,91 @@ def boolean_arithmetic_method(self, other): return set_function_name(boolean_arithmetic_method, name, cls) +def kleene_or(left, right, left_mask, right_mask): + if left_mask is None: + return kleene_or(right, left, right_mask, left_mask) + + assert left_mask is not None + assert isinstance(left, np.ndarray) + assert isinstance(left_mask, np.ndarray) + + mask = left_mask + + if right_mask is not None: + mask = mask | right_mask + else: + mask = mask.copy() + + # handle scalars: + if lib.is_scalar(right) and np.isnan(right): + result = left.copy() + mask = left_mask.copy() + mask[~result] = True + return result, mask + + # XXX: this implicitly relies on masked values being False! + result = left | right + mask[result] = False + + # update + return result, mask + + +def kleene_xor(left, right, left_mask, right_mask): + if left_mask is None: + return kleene_xor(right, left, right_mask, left_mask) + + result, mask = kleene_or(left, right, left_mask, right_mask) + # + # if lib.is_scalar(right): + # if right is True: + # result[result] = False + # result[left & right] = False + + if lib.is_scalar(right) and right is np.nan: + mask[result] = True + else: + # assumes masked values are False + result[left & right] = False + mask[right & left_mask] = True + if right_mask is not None: + mask[left & right_mask] = True + + result[mask] = False + return result, mask + + +def kleene_and(left, right, left_mask, right_mask): + if left_mask is None: + return kleene_and(right, left, right_mask, left_mask) + + mask = left_mask + + if right_mask is not None: + mask = mask | right_mask + else: + mask = mask.copy() + + if lib.is_scalar(right): + result = left.copy() + mask = left_mask.copy() + if np.isnan(right): + mask[result] = True + else: + result = result & right # already copied. + if right is False: + # unmask everything + mask[:] = False + else: + result = left & right + # unmask where either left or right is False + mask[~left & ~left_mask] = False + mask[~right & ~right_mask] = False + + result[mask] = False + return result, mask + + BooleanArray._add_logical_ops() BooleanArray._add_comparison_ops() BooleanArray._add_arithmetic_ops() diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 80c4b6f7b37ab..5785eae6261f4 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -413,13 +413,22 @@ def test_kleene_or(self): result = b | a tm.assert_extension_array_equal(result, expected) - def test_kleene_or_scalar(self): + @pytest.mark.parametrize( + "other, expected", + [ + (np.nan, [True, None, None]), + (True, [True, True, True]), + (False, [True, False, None]), + ], + ) + def test_kleene_or_scalar(self, other, expected): + # TODO: test True & False a = pd.array([True, False, None], dtype="boolean") - result = a | np.nan # TODO: pd.NA - expected = pd.array([True, None, None], dtype="boolean") + result = a | other + expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - result = np.nan | a # TODO: pd.NA + result = other | a tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize( @@ -456,13 +465,21 @@ def test_kleene_and(self): result = b & a tm.assert_extension_array_equal(result, expected) - def test_kleene_and_scalar(self): + @pytest.mark.parametrize( + "other, expected", + [ + (np.nan, [None, False, None]), + (True, [True, False, None]), + (False, [False, False, False]), + ], + ) + def test_kleene_and_scalar(self, other, expected): a = pd.array([True, False, None], dtype="boolean") - result = a & np.nan # TODO: pd.NA - expected = pd.array([None, None, None], dtype="boolean") + result = a & other + expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - result = np.nan & a # TODO: pd.na + result = other & a tm.assert_extension_array_equal(result, expected) def test_kleene_xor(self): @@ -477,13 +494,21 @@ def test_kleene_xor(self): result = b ^ a tm.assert_extension_array_equal(result, expected) - def test_kleene_scalar(self): + @pytest.mark.parametrize( + "other, expected", + [ + (np.nan, [None, None, None]), + (True, [False, True, None]), + (False, [True, False, None]), + ], + ) + def test_kleene_xor_scalar(self, other, expected): a = pd.array([True, False, None], dtype="boolean") - result = a ^ np.nan # TODO: pd.NA - expected = pd.array([None, None, None], dtype="boolean") + result = a ^ other + expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - result = np.nan ^ a # TODO: pd.NA + result = other ^ a tm.assert_extension_array_equal(result, expected) From 2e9d5473362c9460ef17349ccf2508c8f1d5dcc8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 08:38:45 -0600 Subject: [PATCH 05/30] updates --- pandas/core/arrays/boolean.py | 5 ++- pandas/tests/arrays/test_boolean.py | 67 +++++------------------------ 2 files changed, 13 insertions(+), 59 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index e833bdea385d8..86ddc80ddb0e4 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -575,10 +575,11 @@ def logical_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) - if len(self) != len(other): - raise ValueError("Lengths must match to compare") other, mask = coerce_to_array(other, copy=False) + if not lib.is_scalar(other) and len(self) != len(other): + raise ValueError("Lengths must match to compare") + if op.__name__ in {"or_", "ror_"}: result, mask = kleene_or(self._data, other, self._mask, mask) return BooleanArray(result, mask) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 5785eae6261f4..a026cdf1dbd2f 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -360,45 +360,19 @@ def get_op_from_name(self, op_name): return op - def _compare_other(self, data, op_name, other): - op = self.get_op_from_name(op_name) - - # array - result = pd.Series(op(data, other)) - expected = pd.Series(op(data._data, other), dtype="boolean") - - # fill the nan locations - expected[data._mask] = np.nan - - tm.assert_series_equal(result, expected) - - # series - s = pd.Series(data) - result = op(s, other) - - expected = pd.Series(data._data) - expected = op(expected, other) - expected = pd.Series(expected, dtype="boolean") - - # fill the nan locations - expected[data._mask] = np.nan + def test_logical_length_mismatch_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Lengths must match to compare" - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)([True, False]) - def test_scalar(self, data, all_logical_operators): - op_name = all_logical_operators - self._compare_other(data, op_name, True) + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(np.array([True, False])) - def test_array(self, data, all_logical_operators): - op_name = all_logical_operators - if "or" in op_name: - pytest.skip("confusing") - other = pd.array([True] * len(data), dtype="boolean") - self._compare_other(data, op_name, other) - other = np.array([True] * len(data)) - self._compare_other(data, op_name, other) - other = pd.Series([True] * len(data), dtype="boolean") - self._compare_other(data, op_name, other) + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(pd.array([True, False], dtype="boolean")) def test_kleene_or(self): # A clear test of behavior. @@ -431,27 +405,6 @@ def test_kleene_or_scalar(self, other, expected): result = other | a tm.assert_extension_array_equal(result, expected) - @pytest.mark.parametrize( - "left,right,expected", - [ - ([True, False, None], True, [True, True, True]), - ([True, False, None], False, [True, False, None]), - ([True, False, None], np.nan, [True, None, None]), - # TODO: pd.NA - ], - ) - def test_kleene_or_cases(self, left, right, expected): - if isinstance(left, list): - left = pd.array(left, dtype="boolean") - if isinstance(right, list): - right = pd.array(right, dtype="boolean") - expected = pd.array(expected, dtype="boolean") - result = left | right - tm.assert_extension_array_equal(result, expected) - - result = right | left - tm.assert_extension_array_equal(result, expected) - def test_kleene_and(self): # A clear test of behavior. a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") From 373aaabb157c5083b9ff406768910825320ed6c6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 08:55:15 -0600 Subject: [PATCH 06/30] updates --- pandas/core/arrays/boolean.py | 102 +++++++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 86ddc80ddb0e4..8996fcfaeca43 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,5 +1,5 @@ import numbers -from typing import TYPE_CHECKING, Type +from typing import TYPE_CHECKING, Optional, Type, Union import warnings import numpy as np @@ -565,6 +565,7 @@ def logical_method(self, other): assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} other = lib.item_from_zerodim(other) other_is_booleanarray = isinstance(other, BooleanArray) + other_is_scalar = lib.is_scalar(other) mask = None if other_is_booleanarray: @@ -577,7 +578,7 @@ def logical_method(self, other): ) other, mask = coerce_to_array(other, copy=False) - if not lib.is_scalar(other) and len(self) != len(other): + if not other_is_scalar and len(self) != len(other): raise ValueError("Lengths must match to compare") if op.__name__ in {"or_", "ror_"}: @@ -741,13 +742,38 @@ def boolean_arithmetic_method(self, other): return set_function_name(boolean_arithmetic_method, name, cls) -def kleene_or(left, right, left_mask, right_mask): +def kleene_or( + left: Union[bool, np.nan, np.ndarray], + right: Union[bool, np.nan, np.ndarary], + left_mask: Optional[np.ndarary], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``or`` using Kleene logic. + + Values are NA where we have ``NA | NA`` or ``NA | False``. + ``NA | True`` is considered True. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. When + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical or, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since because + # A | B == B | A if left_mask is None: return kleene_or(right, left, right_mask, left_mask) assert left_mask is not None - assert isinstance(left, np.ndarray) - assert isinstance(left_mask, np.ndarray) + right_is_scalar = right_mask is None mask = left_mask @@ -757,13 +783,13 @@ def kleene_or(left, right, left_mask, right_mask): mask = mask.copy() # handle scalars: - if lib.is_scalar(right) and np.isnan(right): + if right_is_scalar and np.isnan(right): # TODO(pd.NA): change to NA result = left.copy() mask = left_mask.copy() mask[~result] = True return result, mask - # XXX: this implicitly relies on masked values being False! + # XXX: verify that this doesn't assume masked values are False! result = left | right mask[result] = False @@ -771,21 +797,45 @@ def kleene_or(left, right, left_mask, right_mask): return result, mask -def kleene_xor(left, right, left_mask, right_mask): +def kleene_xor( + left: Union[bool, np.nan, np.ndarray], + right: Union[bool, np.nan, np.ndarary], + left_mask: Optional[np.ndarary], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``xor`` using Kleene logic. + + This is the same as ``or``, with the following adjustments + + * True, True -> False + * True, NA -> NA + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. When + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ if left_mask is None: return kleene_xor(right, left, right_mask, left_mask) + # Re-use or, and update with adustments. result, mask = kleene_or(left, right, left_mask, right_mask) - # - # if lib.is_scalar(right): - # if right is True: - # result[result] = False - # result[left & right] = False + # TODO(pd.NA): change to pd.NA if lib.is_scalar(right) and right is np.nan: + # True | NA == True + # True ^ NA == NA mask[result] = True else: - # assumes masked values are False + # XXX: verify that this doesn't assume masked values are False! result[left & right] = False mask[right & left_mask] = True if right_mask is not None: @@ -795,7 +845,29 @@ def kleene_xor(left, right, left_mask, right_mask): return result, mask -def kleene_and(left, right, left_mask, right_mask): +def kleene_and( + left: Union[bool, np.nan, np.ndarray], + right: Union[bool, np.nan, np.ndarary], + left_mask: Optional[np.ndarary], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``and`` using Kleene logic. + + Values are ``NA`` for ``NA & NA`` or ``True & NA``. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. When + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ if left_mask is None: return kleene_and(right, left, right_mask, left_mask) From 7f78a64faee7d9be2c88990f3f930fa89fae2d11 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 09:42:23 -0600 Subject: [PATCH 07/30] Raise for NaN --- pandas/core/arrays/boolean.py | 65 ++++++++++++++++------------- pandas/tests/arrays/test_boolean.py | 14 +++++-- 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 8996fcfaeca43..7c954c5925a1c 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -286,7 +286,7 @@ def __getitem__(self, item): def _coerce_to_ndarray(self, force_bool: bool = False): """ - Coerce to an ndarary of object dtype or bool dtype (if force_bool=True). + Coerce to an ndarray of object dtype or bool dtype (if force_bool=True). Parameters ---------- @@ -743,9 +743,9 @@ def boolean_arithmetic_method(self, other): def kleene_or( - left: Union[bool, np.nan, np.ndarray], - right: Union[bool, np.nan, np.ndarary], - left_mask: Optional[np.ndarary], + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], right_mask: Optional[np.ndarray], ): """ @@ -772,8 +772,7 @@ def kleene_or( if left_mask is None: return kleene_or(right, left, right_mask, left_mask) - assert left_mask is not None - right_is_scalar = right_mask is None + raise_for_nan(right, method="or") mask = left_mask @@ -783,11 +782,11 @@ def kleene_or( mask = mask.copy() # handle scalars: - if right_is_scalar and np.isnan(right): # TODO(pd.NA): change to NA - result = left.copy() - mask = left_mask.copy() - mask[~result] = True - return result, mask + # if right_is_scalar and right is libmissing.NA: + # result = left.copy() + # mask = left_mask.copy() + # mask[~result] = True + # return result, mask # XXX: verify that this doesn't assume masked values are False! result = left | right @@ -798,9 +797,9 @@ def kleene_or( def kleene_xor( - left: Union[bool, np.nan, np.ndarray], - right: Union[bool, np.nan, np.ndarary], - left_mask: Optional[np.ndarary], + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], right_mask: Optional[np.ndarray], ): """ @@ -826,29 +825,30 @@ def kleene_xor( if left_mask is None: return kleene_xor(right, left, right_mask, left_mask) - # Re-use or, and update with adustments. + raise_for_nan(right, method="xor") + # Re-use or, and update with adjustments. result, mask = kleene_or(left, right, left_mask, right_mask) - # TODO(pd.NA): change to pd.NA - if lib.is_scalar(right) and right is np.nan: - # True | NA == True - # True ^ NA == NA - mask[result] = True - else: - # XXX: verify that this doesn't assume masked values are False! - result[left & right] = False - mask[right & left_mask] = True - if right_mask is not None: - mask[left & right_mask] = True + # # TODO(pd.NA): change to pd.NA + # if lib.is_scalar(right) and right is libmissing.NA: + # # True | NA == True + # # True ^ NA == NA + # mask[result] = True + + # XXX: verify that this doesn't assume masked values are False! + result[left & right] = False + mask[right & left_mask] = True + if right_mask is not None: + mask[left & right_mask] = True result[mask] = False return result, mask def kleene_and( - left: Union[bool, np.nan, np.ndarray], - right: Union[bool, np.nan, np.ndarary], - left_mask: Optional[np.ndarary], + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], right_mask: Optional[np.ndarray], ): """ @@ -871,6 +871,7 @@ def kleene_and( if left_mask is None: return kleene_and(right, left, right_mask, left_mask) + raise_for_nan(right, method="and") mask = left_mask if right_mask is not None: @@ -882,6 +883,7 @@ def kleene_and( result = left.copy() mask = left_mask.copy() if np.isnan(right): + # TODO(pd.NA): change to NA mask[result] = True else: result = result & right # already copied. @@ -898,6 +900,11 @@ def kleene_and( return result, mask +def raise_for_nan(value, method): + if lib.is_scalar(value) and isinstance(value, float) and np.isnan(value): + raise ValueError(f"Cannot perform logical '{method}' with NaN") + + BooleanArray._add_logical_ops() BooleanArray._add_comparison_ops() BooleanArray._add_arithmetic_ops() diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index a026cdf1dbd2f..5d0375045aa29 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -374,6 +374,14 @@ def test_logical_length_mismatch_raises(self, all_logical_operators): with pytest.raises(ValueError, match=msg): getattr(a, op_name)(pd.array([True, False], dtype="boolean")) + def test_logical_nan_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "" + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(np.nan) + def test_kleene_or(self): # A clear test of behavior. a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") @@ -390,7 +398,7 @@ def test_kleene_or(self): @pytest.mark.parametrize( "other, expected", [ - (np.nan, [True, None, None]), + # (pd.NA, [True, None, None]), (True, [True, True, True]), (False, [True, False, None]), ], @@ -421,7 +429,7 @@ def test_kleene_and(self): @pytest.mark.parametrize( "other, expected", [ - (np.nan, [None, False, None]), + # (pd.NA, [None, False, None]), (True, [True, False, None]), (False, [False, False, False]), ], @@ -450,7 +458,7 @@ def test_kleene_xor(self): @pytest.mark.parametrize( "other, expected", [ - (np.nan, [None, None, None]), + # (pd.NA, [None, None, None]), (True, [False, True, None]), (False, [True, False, None]), ], From 36b171b2ff1f01563367c13313572d257d7d67fc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 09:47:06 -0600 Subject: [PATCH 08/30] added tests for empty --- pandas/tests/arrays/test_boolean.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 5d0375045aa29..ffbe8641b6c14 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -360,6 +360,19 @@ def get_op_from_name(self, op_name): return op + def test_empty_ok(self, all_logical_operators): + a = pd.array([], dtype="boolean") + op_name = all_logical_operators + result = getattr(a, op_name)(True) + tm.assert_extension_array_equal(a, result) + + result = getattr(a, op_name)(False) + tm.assert_extension_array_equal(a, result) + + # TODO: pd.NA + # result = getattr(a, op_name)(pd.NA) + # tm.assert_extension_array_equal(a, result) + def test_logical_length_mismatch_raises(self, all_logical_operators): op_name = all_logical_operators a = pd.array([True, False, None], dtype="boolean") From 747e046ed54866498260f5922fdbc44eb0e4a85f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 09:51:30 -0600 Subject: [PATCH 09/30] added tests for inplace mutation --- pandas/tests/arrays/test_boolean.py | 39 +++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index ffbe8641b6c14..772c805fa7f78 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -408,6 +408,14 @@ def test_kleene_or(self): result = b | a tm.assert_extension_array_equal(result, expected) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + @pytest.mark.parametrize( "other, expected", [ @@ -426,6 +434,11 @@ def test_kleene_or_scalar(self, other, expected): result = other | a tm.assert_extension_array_equal(result, expected) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + def test_kleene_and(self): # A clear test of behavior. a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") @@ -439,6 +452,14 @@ def test_kleene_and(self): result = b & a tm.assert_extension_array_equal(result, expected) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + @pytest.mark.parametrize( "other, expected", [ @@ -456,6 +477,11 @@ def test_kleene_and_scalar(self, other, expected): result = other & a tm.assert_extension_array_equal(result, expected) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + def test_kleene_xor(self): a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") b = pd.array([True, False, None] * 3, dtype="boolean") @@ -468,6 +494,14 @@ def test_kleene_xor(self): result = b ^ a tm.assert_extension_array_equal(result, expected) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + @pytest.mark.parametrize( "other, expected", [ @@ -485,6 +519,11 @@ def test_kleene_xor_scalar(self, other, expected): result = other ^ a tm.assert_extension_array_equal(result, expected) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + class TestComparisonOps(BaseOpsUtil): def _compare_other(self, data, op_name, other): From d0a8cca3d8499e62c5488612a527756e7ad9057e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 10:14:02 -0600 Subject: [PATCH 10/30] Do not assume masked values are False --- pandas/core/arrays/boolean.py | 8 +++++--- pandas/tests/arrays/test_boolean.py | 31 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 7c954c5925a1c..401b1a9a88230 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -788,9 +788,12 @@ def kleene_or( # mask[~result] = True # return result, mask - # XXX: verify that this doesn't assume masked values are False! result = left | right - mask[result] = False + mask[left & ~left_mask] = False + if right_mask is not None: + mask[right & ~right_mask] = False + elif right is True: + mask[:] = False # update return result, mask @@ -835,7 +838,6 @@ def kleene_xor( # # True ^ NA == NA # mask[result] = True - # XXX: verify that this doesn't assume masked values are False! result[left & right] = False mask[right & left_mask] = True if right_mask is not None: diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 772c805fa7f78..7a13b9cfa06e5 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -524,6 +524,37 @@ def test_kleene_xor_scalar(self, other, expected): a, pd.array([True, False, None], dtype="boolean") ) + @pytest.mark.parametrize( + "other", + [ + True, + False, + # pd.NA + [True, False, None] * 3, + ], + ) + def test_no_masked_assumptions(self, other, all_logical_operators): + # The logical operations should not assume that masked values are False! + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = a.copy() + if isinstance(other, list): + other = pd.array(other, dtype="boolean") + + # mutate the data inplace + a._data[a._mask] = True + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + + if isinstance(other, BooleanArray): + other._data[other._mask] = True + a._data[a._mask] = False + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + class TestComparisonOps(BaseOpsUtil): def _compare_other(self, data, op_name, other): From 9f9e44ccdd8a6fc67d5f179f5565ccb9fd8e7d88 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 11:02:38 -0600 Subject: [PATCH 11/30] mypy --- pandas/core/arrays/boolean.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 401b1a9a88230..ee939dd9a2e1f 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -873,6 +873,7 @@ def kleene_and( if left_mask is None: return kleene_and(right, left, right_mask, left_mask) + assert isinstance(left, np.ndarray) raise_for_nan(right, method="and") mask = left_mask From 0a34257c5b76d63ad3f4fe34223727e8dac1c155 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 11:05:19 -0600 Subject: [PATCH 12/30] doc fixups --- doc/source/user_guide/boolean.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index 0acdcd442e231..f8fef9239d6ac 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -16,8 +16,8 @@ Nullable Boolean Data Type .. _boolean.klean: -Kleene Logic ------------- +Kleene Logical Operations +------------------------- :class:`arrays.BooleanArray` implements Kleene logic (sometimes called three-value logic) for logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or). @@ -49,7 +49,7 @@ Expression Result ================= ========= When an ``NA`` is present in an operation, the output value is ``NA`` only if -the result cannot be determined soley based on the other input. For example, +the result cannot be determined solely based on the other input. For example, ``True | NA`` is ``True``, because both ``True | True`` and ``True | False`` are ``True``. In that case, we don't actually need to consider the value of the ``NA``. From 2ba00345c7dcf724a9f7951e6d6efcbf6b149ff1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 13:00:39 -0600 Subject: [PATCH 13/30] Added benchmarks --- asv_bench/benchmarks/boolean.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 asv_bench/benchmarks/boolean.py diff --git a/asv_bench/benchmarks/boolean.py b/asv_bench/benchmarks/boolean.py new file mode 100644 index 0000000000000..71c422c641775 --- /dev/null +++ b/asv_bench/benchmarks/boolean.py @@ -0,0 +1,32 @@ +import numpy as np + +import pandas as pd + + +class TimeLogicalOps: + def setup(self): + N = 10_000 + left, right, lmask, rmask = np.random.randint(0, 2, size=(4, N)).astype("bool") + self.left = pd.arrays.BooleanArray(left, lmask) + self.right = pd.arrays.BooleanArray(right, rmask) + + def time_or_scalar(self): + self.left | True + self.left | False + + def time_or_array(self): + self.left | self.right + + def time_and_scalar(self): + self.left & True + self.left & False + + def time_and_array(self): + self.left & self.right + + def time_xor_scalar(self): + self.left ^ True + self.left ^ False + + def time_xor_array(self): + self.left ^ self.right From 2d1129a222d9afad23700ac7a6f296afeffcad8d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 13:03:34 -0600 Subject: [PATCH 14/30] update tests --- doc/source/user_guide/boolean.rst | 5 +++- pandas/core/arrays/boolean.py | 45 ++++++++++------------------- pandas/tests/arrays/test_boolean.py | 10 +++---- 3 files changed, 25 insertions(+), 35 deletions(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index f8fef9239d6ac..855b3b20ae762 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -19,7 +19,7 @@ Nullable Boolean Data Type Kleene Logical Operations ------------------------- -:class:`arrays.BooleanArray` implements Kleene logic (sometimes called three-value logic) for +:class:`arrays.BooleanArray` implements `Kleene Logic`_ (sometimes called three-value logic) for logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or). This table demonstrates the results for every combination. These operations are symmetrical, @@ -75,3 +75,6 @@ In ``and`` pd.Series([True, False, np.nan], dtype="object") & True pd.Series([True, False, np.nan], dtype="boolean") & True + + +.. _Kleene Logic: https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index ee939dd9a2e1f..6ef3c23bdf477 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -583,13 +583,12 @@ def logical_method(self, other): if op.__name__ in {"or_", "ror_"}: result, mask = kleene_or(self._data, other, self._mask, mask) - return BooleanArray(result, mask) elif op.__name__ in {"and_", "rand_"}: result, mask = kleene_and(self._data, other, self._mask, mask) - return BooleanArray(result, mask) elif op.__name__ in {"xor", "rxor"}: result, mask = kleene_xor(self._data, other, self._mask, mask) - return BooleanArray(result, mask) + + return BooleanArray(result, mask) name = "__{name}__".format(name=op.__name__) return set_function_name(logical_method, name, cls) @@ -781,13 +780,6 @@ def kleene_or( else: mask = mask.copy() - # handle scalars: - # if right_is_scalar and right is libmissing.NA: - # result = left.copy() - # mask = left_mask.copy() - # mask[~result] = True - # return result, mask - result = left | right mask[left & ~left_mask] = False if right_mask is not None: @@ -795,7 +787,6 @@ def kleene_or( elif right is True: mask[:] = False - # update return result, mask @@ -870,34 +861,30 @@ def kleene_and( result, mask: ndarray[bool] The result of the logical xor, and the new mask. """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since because + # A | B == B | A if left_mask is None: return kleene_and(right, left, right_mask, left_mask) assert isinstance(left, np.ndarray) raise_for_nan(right, method="and") - mask = left_mask - if right_mask is not None: - mask = mask | right_mask - else: - mask = mask.copy() + if right_mask is None: + # Scalar `right` + # TODO(pd.NA): handle NA here. + result = left & right - if lib.is_scalar(right): - result = left.copy() mask = left_mask.copy() - if np.isnan(right): - # TODO(pd.NA): change to NA - mask[result] = True - else: - result = result & right # already copied. - if right is False: - # unmask everything - mask[:] = False + if right is False: + # unmask everything + mask[:] = False else: result = left & right # unmask where either left or right is False - mask[~left & ~left_mask] = False - mask[~right & ~right_mask] = False + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = (left_mask & ~right_false) | (right_mask & ~left_false) result[mask] = False return result, mask @@ -905,7 +892,7 @@ def kleene_and( def raise_for_nan(value, method): if lib.is_scalar(value) and isinstance(value, float) and np.isnan(value): - raise ValueError(f"Cannot perform logical '{method}' with NaN") + raise ValueError(f"Cannot perform logical '{method}' with floating NaN") BooleanArray._add_logical_ops() diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 7a13b9cfa06e5..52756ca242781 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -535,14 +535,14 @@ def test_kleene_xor_scalar(self, other, expected): ) def test_no_masked_assumptions(self, other, all_logical_operators): # The logical operations should not assume that masked values are False! - a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") - b = a.copy() + a = pd.arrays.BooleanArray( + np.array([True, True, True, False, False, False, True, False, True]), + np.array([False] * 6 + [True, True, True]), + ) + b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") if isinstance(other, list): other = pd.array(other, dtype="boolean") - # mutate the data inplace - a._data[a._mask] = True - result = getattr(a, all_logical_operators)(other) expected = getattr(b, all_logical_operators)(other) tm.assert_extension_array_equal(result, expected) From 77dd1fc212605c3f603247539b570a23677a56a0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 13:55:19 -0600 Subject: [PATCH 15/30] remove unneded setitem --- pandas/core/arrays/boolean.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 6ef3c23bdf477..61cefc1e30ae1 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -834,7 +834,6 @@ def kleene_xor( if right_mask is not None: mask[left & right_mask] = True - result[mask] = False return result, mask @@ -886,7 +885,6 @@ def kleene_and( right_false = ~(right | right_mask) mask = (left_mask & ~right_false) | (right_mask & ~left_false) - result[mask] = False return result, mask From 7b9002cf52e93ba49bb425da6abf09b3fb0cf529 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 15:34:06 -0600 Subject: [PATCH 16/30] optimize --- pandas/core/arrays/boolean.py | 36 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 61cefc1e30ae1..6f331a22d0250 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -773,19 +773,20 @@ def kleene_or( raise_for_nan(right, method="or") - mask = left_mask + result = left | right if right_mask is not None: - mask = mask | right_mask + # output is unknown where (False & NA), (NA & False), (NA & NA) + mask = ( + ((~left & ~left_mask) & right_mask) + | ((~right & ~right_mask) & left_mask) # F & NA + | (left_mask & right_mask) # NA & F # NA & NA + ) else: - mask = mask.copy() - - result = left | right - mask[left & ~left_mask] = False - if right_mask is not None: - mask[right & ~right_mask] = False - elif right is True: - mask[:] = False + if right is True: + mask = np.zeros_like(left_mask) + else: # mask is False + mask = left_mask.copy() return result, mask @@ -822,17 +823,14 @@ def kleene_xor( raise_for_nan(right, method="xor") # Re-use or, and update with adjustments. result, mask = kleene_or(left, right, left_mask, right_mask) + result[left & right] = False - # # TODO(pd.NA): change to pd.NA - # if lib.is_scalar(right) and right is libmissing.NA: - # # True | NA == True - # # True ^ NA == NA - # mask[result] = True + if right_mask is None: + mask = left_mask.copy() - result[left & right] = False - mask[right & left_mask] = True - if right_mask is not None: - mask[left & right_mask] = True + else: + # breakpoint() + mask = left_mask | right_mask return result, mask From c18046b57013f5bf971a1ca5e4c50e897868da3a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 15:35:26 -0600 Subject: [PATCH 17/30] comments --- pandas/core/arrays/boolean.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 6f331a22d0250..79f3eddc5c34b 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -779,8 +779,8 @@ def kleene_or( # output is unknown where (False & NA), (NA & False), (NA & NA) mask = ( ((~left & ~left_mask) & right_mask) - | ((~right & ~right_mask) & left_mask) # F & NA - | (left_mask & right_mask) # NA & F # NA & NA + | ((~right & ~right_mask) & left_mask) + | (left_mask & right_mask) ) else: if right is True: @@ -829,7 +829,6 @@ def kleene_xor( mask = left_mask.copy() else: - # breakpoint() mask = left_mask | right_mask return result, mask From 1237caaa1eee0e5167ca66a81343b1a9b98750c5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 Nov 2019 15:38:18 -0600 Subject: [PATCH 18/30] just do the xor --- pandas/core/arrays/boolean.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 79f3eddc5c34b..98593dc4704c0 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -822,8 +822,7 @@ def kleene_xor( raise_for_nan(right, method="xor") # Re-use or, and update with adjustments. - result, mask = kleene_or(left, right, left_mask, right_mask) - result[left & right] = False + result = left ^ right if right_mask is None: mask = left_mask.copy() From 87aeb09808a10a7f3d5e3f9bb6df1485a608aa54 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 2 Dec 2019 06:28:32 -0600 Subject: [PATCH 19/30] fixup docstring --- doc/source/user_guide/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst index b86961a71433b..30b1c0b4eac0d 100644 --- a/doc/source/user_guide/index.rst +++ b/doc/source/user_guide/index.rst @@ -30,6 +30,7 @@ Further information on any specific method can be obtained in the missing_data categorical integer_na + boolean visualization computation groupby From 969b6dc7cc287bdc3c64ec8f32db98e44886da14 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 2 Dec 2019 06:28:58 -0600 Subject: [PATCH 20/30] fix label --- doc/source/user_guide/boolean.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index 855b3b20ae762..9931a435e40ca 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -14,7 +14,7 @@ Nullable Boolean Data Type .. versionadded:: 1.0.0 -.. _boolean.klean: +.. _boolean.kleene: Kleene Logical Operations ------------------------- From 1c9ba495adb62d7836745cb59a949b02ad394e0e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 2 Dec 2019 06:30:33 -0600 Subject: [PATCH 21/30] PERF: faster or --- pandas/core/arrays/boolean.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 98593dc4704c0..b0e85fa7d0d6d 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -777,9 +777,11 @@ def kleene_or( if right_mask is not None: # output is unknown where (False & NA), (NA & False), (NA & NA) + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) mask = ( - ((~left & ~left_mask) & right_mask) - | ((~right & ~right_mask) & left_mask) + (left_false & right_mask) + | (right_false & left_mask) | (left_mask & right_mask) ) else: From cb47b6a11a27a9fa20c9bb63967db606f464560a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 4 Dec 2019 14:01:40 -0600 Subject: [PATCH 22/30] handle pd.NA --- pandas/core/arrays/boolean.py | 41 ++++++++++++++++++++--------- pandas/tests/arrays/test_boolean.py | 6 ++--- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index f204493c587c3..1416a430680fa 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -782,7 +782,10 @@ def kleene_or( raise_for_nan(right, method="or") - result = left | right + if right is libmissing.NA: + result = left.copy() + else: + result = left | right if right_mask is not None: # output is unknown where (False & NA), (NA & False), (NA & NA) @@ -796,7 +799,10 @@ def kleene_or( else: if right is True: mask = np.zeros_like(left_mask) - else: # mask is False + elif right is libmissing.NA: + mask = (~left & ~left_mask) | left_mask + else: + # False mask = left_mask.copy() return result, mask @@ -832,12 +838,16 @@ def kleene_xor( return kleene_xor(right, left, right_mask, left_mask) raise_for_nan(right, method="xor") - # Re-use or, and update with adjustments. - result = left ^ right + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left ^ right if right_mask is None: - mask = left_mask.copy() - + if right is libmissing.NA: + mask = np.ones_like(left_mask) + else: + mask = left_mask.copy() else: mask = left_mask | right_mask @@ -876,17 +886,22 @@ def kleene_and( assert isinstance(left, np.ndarray) raise_for_nan(right, method="and") + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left & right + if right_mask is None: # Scalar `right` - # TODO(pd.NA): handle NA here. - result = left & right + if right is libmissing.NA: + mask = (left & ~left_mask) | left_mask - mask = left_mask.copy() - if right is False: - # unmask everything - mask[:] = False + else: + mask = left_mask.copy() + if right is False: + # unmask everything + mask[:] = False else: - result = left & right # unmask where either left or right is False left_false = ~(left | left_mask) right_false = ~(right | right_mask) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index b4f401825cab7..c48534d8bc4dc 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -427,7 +427,7 @@ def test_kleene_or(self): @pytest.mark.parametrize( "other, expected", [ - # (pd.NA, [True, None, None]), + (pd.NA, [True, None, None]), (True, [True, True, True]), (False, [True, False, None]), ], @@ -471,7 +471,7 @@ def test_kleene_and(self): @pytest.mark.parametrize( "other, expected", [ - # (pd.NA, [None, False, None]), + (pd.NA, [None, False, None]), (True, [True, False, None]), (False, [False, False, False]), ], @@ -513,7 +513,7 @@ def test_kleene_xor(self): @pytest.mark.parametrize( "other, expected", [ - # (pd.NA, [None, None, None]), + (pd.NA, [None, None, None]), (True, [False, True, None]), (False, [True, False, None]), ], From 2a946b9f7d570cb27b00599db9e8bd7cc60f4a3d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 4 Dec 2019 14:09:36 -0600 Subject: [PATCH 23/30] validate --- pandas/core/arrays/boolean.py | 7 +++++++ pandas/tests/arrays/test_boolean.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 1416a430680fa..2b5fc17909429 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -584,6 +584,13 @@ def logical_method(self, other): ) other, mask = coerce_to_array(other, copy=False) + if other_is_scalar and not isinstance(other, (type(libmissing.NA), bool)): + raise TypeError( + "'other' should be pandas.NA or a bool. Got {} instead".format( + type(other).__name__ + ) + ) + if not other_is_scalar and len(self) != len(other): raise ValueError("Lengths must match to compare") diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index c48534d8bc4dc..8ee496523c1fc 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -356,6 +356,13 @@ def test_ufunc_reduce_raises(values): class TestLogicalOps(BaseOpsUtil): + def test_numpy_scalars_ok(self, all_logical_operators): + a = pd.array([True, False, None], dtype="boolean") + op = getattr(a, all_logical_operators) + + tm.assert_extension_array_equal(op(True), op(np.bool(True))) + tm.assert_extension_array_equal(op(False), op(np.bool(False))) + def get_op_from_name(self, op_name): short_opname = op_name.strip("_") short_opname = short_opname if "xor" in short_opname else short_opname + "_" @@ -403,6 +410,12 @@ def test_logical_nan_raises(self, all_logical_operators): with pytest.raises(ValueError, match=msg): getattr(a, op_name)(np.nan) + @pytest.mark.parametrize("other", ["a", 1]) + def test_non_bool_or_na_other_raises(self, other, all_logical_operators): + a = pd.array([True, False], dtype="boolean") + with pytest.raises(TypeError, match=str(type(other).__name__)): + getattr(a, all_logical_operators)(other) + def test_kleene_or(self): # A clear test of behavior. a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") From efb6f8bbf575579dc21e6038f8c3d2d7a61ed07d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 4 Dec 2019 15:03:34 -0600 Subject: [PATCH 24/30] please mypy --- pandas/core/arrays/boolean.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 2b5fc17909429..c3904a3385b69 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -787,6 +787,8 @@ def kleene_or( if left_mask is None: return kleene_or(right, left, right_mask, left_mask) + assert isinstance(left, np.ndarray) + raise_for_nan(right, method="or") if right is libmissing.NA: From 004238e3a7658f01bbf3d847e12d4cece8f48128 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 4 Dec 2019 15:06:26 -0600 Subject: [PATCH 25/30] move to nanops --- pandas/core/arrays/boolean.py | 175 +--------------------------------- pandas/core/nanops.py | 169 +++++++++++++++++++++++++++++++- 2 files changed, 172 insertions(+), 172 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index c3904a3385b69..3e1dd8c092eb6 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,5 +1,5 @@ import numbers -from typing import TYPE_CHECKING, Any, Optional, Tuple, Type, Union +from typing import TYPE_CHECKING, Any, Tuple, Type import warnings import numpy as np @@ -595,11 +595,11 @@ def logical_method(self, other): raise ValueError("Lengths must match to compare") if op.__name__ in {"or_", "ror_"}: - result, mask = kleene_or(self._data, other, self._mask, mask) + result, mask = nanops.kleene_or(self._data, other, self._mask, mask) elif op.__name__ in {"and_", "rand_"}: - result, mask = kleene_and(self._data, other, self._mask, mask) + result, mask = nanops.kleene_and(self._data, other, self._mask, mask) elif op.__name__ in {"xor", "rxor"}: - result, mask = kleene_xor(self._data, other, self._mask, mask) + result, mask = nanops.kleene_xor(self._data, other, self._mask, mask) return BooleanArray(result, mask) @@ -757,173 +757,6 @@ def boolean_arithmetic_method(self, other): return set_function_name(boolean_arithmetic_method, name, cls) -def kleene_or( - left: Union[bool, np.ndarray], - right: Union[bool, np.ndarray], - left_mask: Optional[np.ndarray], - right_mask: Optional[np.ndarray], -): - """ - Boolean ``or`` using Kleene logic. - - Values are NA where we have ``NA | NA`` or ``NA | False``. - ``NA | True`` is considered True. - - Parameters - ---------- - left, right : ndarray, NA, or bool - The values of the array. - left_mask, right_mask : ndarray, optional - The masks. When - - Returns - ------- - result, mask: ndarray[bool] - The result of the logical or, and the new mask. - """ - # To reduce the number of cases, we ensure that `left` & `left_mask` - # always come from an array, not a scalar. This is safe, since because - # A | B == B | A - if left_mask is None: - return kleene_or(right, left, right_mask, left_mask) - - assert isinstance(left, np.ndarray) - - raise_for_nan(right, method="or") - - if right is libmissing.NA: - result = left.copy() - else: - result = left | right - - if right_mask is not None: - # output is unknown where (False & NA), (NA & False), (NA & NA) - left_false = ~(left | left_mask) - right_false = ~(right | right_mask) - mask = ( - (left_false & right_mask) - | (right_false & left_mask) - | (left_mask & right_mask) - ) - else: - if right is True: - mask = np.zeros_like(left_mask) - elif right is libmissing.NA: - mask = (~left & ~left_mask) | left_mask - else: - # False - mask = left_mask.copy() - - return result, mask - - -def kleene_xor( - left: Union[bool, np.ndarray], - right: Union[bool, np.ndarray], - left_mask: Optional[np.ndarray], - right_mask: Optional[np.ndarray], -): - """ - Boolean ``xor`` using Kleene logic. - - This is the same as ``or``, with the following adjustments - - * True, True -> False - * True, NA -> NA - - Parameters - ---------- - left, right : ndarray, NA, or bool - The values of the array. - left_mask, right_mask : ndarray, optional - The masks. When - - Returns - ------- - result, mask: ndarray[bool] - The result of the logical xor, and the new mask. - """ - if left_mask is None: - return kleene_xor(right, left, right_mask, left_mask) - - raise_for_nan(right, method="xor") - if right is libmissing.NA: - result = np.zeros_like(left) - else: - result = left ^ right - - if right_mask is None: - if right is libmissing.NA: - mask = np.ones_like(left_mask) - else: - mask = left_mask.copy() - else: - mask = left_mask | right_mask - - return result, mask - - -def kleene_and( - left: Union[bool, np.ndarray], - right: Union[bool, np.ndarray], - left_mask: Optional[np.ndarray], - right_mask: Optional[np.ndarray], -): - """ - Boolean ``and`` using Kleene logic. - - Values are ``NA`` for ``NA & NA`` or ``True & NA``. - - Parameters - ---------- - left, right : ndarray, NA, or bool - The values of the array. - left_mask, right_mask : ndarray, optional - The masks. When - - Returns - ------- - result, mask: ndarray[bool] - The result of the logical xor, and the new mask. - """ - # To reduce the number of cases, we ensure that `left` & `left_mask` - # always come from an array, not a scalar. This is safe, since because - # A | B == B | A - if left_mask is None: - return kleene_and(right, left, right_mask, left_mask) - - assert isinstance(left, np.ndarray) - raise_for_nan(right, method="and") - - if right is libmissing.NA: - result = np.zeros_like(left) - else: - result = left & right - - if right_mask is None: - # Scalar `right` - if right is libmissing.NA: - mask = (left & ~left_mask) | left_mask - - else: - mask = left_mask.copy() - if right is False: - # unmask everything - mask[:] = False - else: - # unmask where either left or right is False - left_false = ~(left | left_mask) - right_false = ~(right | right_mask) - mask = (left_mask & ~right_false) | (right_mask & ~left_false) - - return result, mask - - -def raise_for_nan(value, method): - if lib.is_scalar(value) and isinstance(value, float) and np.isnan(value): - raise ValueError(f"Cannot perform logical '{method}' with floating NaN") - - BooleanArray._add_logical_ops() BooleanArray._add_comparison_ops() BooleanArray._add_arithmetic_ops() diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index a2a40bbf93604..71c3bbcb35cb3 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -7,7 +7,7 @@ from pandas._config import get_option -from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib +from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib, missing as libmissing from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask @@ -1417,3 +1417,170 @@ def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): return result else: return np.percentile(values, q, axis=axis, interpolation=interpolation) + + +def kleene_or( + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``or`` using Kleene logic. + + Values are NA where we have ``NA | NA`` or ``NA | False``. + ``NA | True`` is considered True. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. When + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical or, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since because + # A | B == B | A + if left_mask is None: + return kleene_or(right, left, right_mask, left_mask) + + assert isinstance(left, np.ndarray) + + raise_for_nan(right, method="or") + + if right is libmissing.NA: + result = left.copy() + else: + result = left | right + + if right_mask is not None: + # output is unknown where (False & NA), (NA & False), (NA & NA) + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = ( + (left_false & right_mask) + | (right_false & left_mask) + | (left_mask & right_mask) + ) + else: + if right is True: + mask = np.zeros_like(left_mask) + elif right is libmissing.NA: + mask = (~left & ~left_mask) | left_mask + else: + # False + mask = left_mask.copy() + + return result, mask + + +def kleene_xor( + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``xor`` using Kleene logic. + + This is the same as ``or``, with the following adjustments + + * True, True -> False + * True, NA -> NA + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. When + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + if left_mask is None: + return kleene_xor(right, left, right_mask, left_mask) + + raise_for_nan(right, method="xor") + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left ^ right + + if right_mask is None: + if right is libmissing.NA: + mask = np.ones_like(left_mask) + else: + mask = left_mask.copy() + else: + mask = left_mask | right_mask + + return result, mask + + +def kleene_and( + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``and`` using Kleene logic. + + Values are ``NA`` for ``NA & NA`` or ``True & NA``. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. When + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since because + # A | B == B | A + if left_mask is None: + return kleene_and(right, left, right_mask, left_mask) + + assert isinstance(left, np.ndarray) + raise_for_nan(right, method="and") + + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left & right + + if right_mask is None: + # Scalar `right` + if right is libmissing.NA: + mask = (left & ~left_mask) | left_mask + + else: + mask = left_mask.copy() + if right is False: + # unmask everything + mask[:] = False + else: + # unmask where either left or right is False + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = (left_mask & ~right_false) | (right_mask & ~left_false) + + return result, mask + + +def raise_for_nan(value, method): + if lib.is_scalar(value) and isinstance(value, float) and np.isnan(value): + raise ValueError(f"Cannot perform logical '{method}' with floating NaN") From 70323181de1658dc34f13a9f80c8e273e252b60b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Dec 2019 08:15:15 -0600 Subject: [PATCH 26/30] move --- pandas/core/nanops.py | 169 +--------------------------------- pandas/core/ops/__init__.py | 1 + pandas/core/ops/mask_ops.py | 178 ++++++++++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 168 deletions(-) create mode 100644 pandas/core/ops/mask_ops.py diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 9dc661a9a6b5f..9e436ec4c6c13 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -7,7 +7,7 @@ from pandas._config import get_option -from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib, missing as libmissing +from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask @@ -1416,170 +1416,3 @@ def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): return result else: return np.percentile(values, q, axis=axis, interpolation=interpolation) - - -def kleene_or( - left: Union[bool, np.ndarray], - right: Union[bool, np.ndarray], - left_mask: Optional[np.ndarray], - right_mask: Optional[np.ndarray], -): - """ - Boolean ``or`` using Kleene logic. - - Values are NA where we have ``NA | NA`` or ``NA | False``. - ``NA | True`` is considered True. - - Parameters - ---------- - left, right : ndarray, NA, or bool - The values of the array. - left_mask, right_mask : ndarray, optional - The masks. When - - Returns - ------- - result, mask: ndarray[bool] - The result of the logical or, and the new mask. - """ - # To reduce the number of cases, we ensure that `left` & `left_mask` - # always come from an array, not a scalar. This is safe, since because - # A | B == B | A - if left_mask is None: - return kleene_or(right, left, right_mask, left_mask) - - assert isinstance(left, np.ndarray) - - raise_for_nan(right, method="or") - - if right is libmissing.NA: - result = left.copy() - else: - result = left | right - - if right_mask is not None: - # output is unknown where (False & NA), (NA & False), (NA & NA) - left_false = ~(left | left_mask) - right_false = ~(right | right_mask) - mask = ( - (left_false & right_mask) - | (right_false & left_mask) - | (left_mask & right_mask) - ) - else: - if right is True: - mask = np.zeros_like(left_mask) - elif right is libmissing.NA: - mask = (~left & ~left_mask) | left_mask - else: - # False - mask = left_mask.copy() - - return result, mask - - -def kleene_xor( - left: Union[bool, np.ndarray], - right: Union[bool, np.ndarray], - left_mask: Optional[np.ndarray], - right_mask: Optional[np.ndarray], -): - """ - Boolean ``xor`` using Kleene logic. - - This is the same as ``or``, with the following adjustments - - * True, True -> False - * True, NA -> NA - - Parameters - ---------- - left, right : ndarray, NA, or bool - The values of the array. - left_mask, right_mask : ndarray, optional - The masks. When - - Returns - ------- - result, mask: ndarray[bool] - The result of the logical xor, and the new mask. - """ - if left_mask is None: - return kleene_xor(right, left, right_mask, left_mask) - - raise_for_nan(right, method="xor") - if right is libmissing.NA: - result = np.zeros_like(left) - else: - result = left ^ right - - if right_mask is None: - if right is libmissing.NA: - mask = np.ones_like(left_mask) - else: - mask = left_mask.copy() - else: - mask = left_mask | right_mask - - return result, mask - - -def kleene_and( - left: Union[bool, np.ndarray], - right: Union[bool, np.ndarray], - left_mask: Optional[np.ndarray], - right_mask: Optional[np.ndarray], -): - """ - Boolean ``and`` using Kleene logic. - - Values are ``NA`` for ``NA & NA`` or ``True & NA``. - - Parameters - ---------- - left, right : ndarray, NA, or bool - The values of the array. - left_mask, right_mask : ndarray, optional - The masks. When - - Returns - ------- - result, mask: ndarray[bool] - The result of the logical xor, and the new mask. - """ - # To reduce the number of cases, we ensure that `left` & `left_mask` - # always come from an array, not a scalar. This is safe, since because - # A | B == B | A - if left_mask is None: - return kleene_and(right, left, right_mask, left_mask) - - assert isinstance(left, np.ndarray) - raise_for_nan(right, method="and") - - if right is libmissing.NA: - result = np.zeros_like(left) - else: - result = left & right - - if right_mask is None: - # Scalar `right` - if right is libmissing.NA: - mask = (left & ~left_mask) | left_mask - - else: - mask = left_mask.copy() - if right is False: - # unmask everything - mask[:] = False - else: - # unmask where either left or right is False - left_false = ~(left | left_mask) - right_false = ~(right | right_mask) - mask = (left_mask & ~right_false) | (right_mask & ~left_false) - - return result, mask - - -def raise_for_nan(value, method): - if lib.is_scalar(value) and isinstance(value, float) and np.isnan(value): - raise ValueError(f"Cannot perform logical '{method}' with floating NaN") diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d14fb040c4e30..f3c01efed6d43 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -39,6 +39,7 @@ _op_descriptions, ) from pandas.core.ops.invalid import invalid_comparison # noqa:F401 +from pandas.core.ops.mask_ops import kleene_and, kleene_or, kleene_xor # noqa: F401 from pandas.core.ops.methods import ( # noqa:F401 add_flex_arithmetic_methods, add_special_arithmetic_methods, diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py new file mode 100644 index 0000000000000..fd91e78451da9 --- /dev/null +++ b/pandas/core/ops/mask_ops.py @@ -0,0 +1,178 @@ +""" +Ops for masked ararys. +""" +from typing import Optional, Union + +import numpy as np + +from pandas._libs import lib, missing as libmissing + + +def kleene_or( + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``or`` using Kleene logic. + + Values are NA where we have ``NA | NA`` or ``NA | False``. + ``NA | True`` is considered True. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical or, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since because + # A | B == B | A + if left_mask is None: + return kleene_or(right, left, right_mask, left_mask) + + assert isinstance(left, np.ndarray) + + raise_for_nan(right, method="or") + + if right is libmissing.NA: + result = left.copy() + else: + result = left | right + + if right_mask is not None: + # output is unknown where (False & NA), (NA & False), (NA & NA) + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = ( + (left_false & right_mask) + | (right_false & left_mask) + | (left_mask & right_mask) + ) + else: + if right is True: + mask = np.zeros_like(left_mask) + elif right is libmissing.NA: + mask = (~left & ~left_mask) | left_mask + else: + # False + mask = left_mask.copy() + + return result, mask + + +def kleene_xor( + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``xor`` using Kleene logic. + + This is the same as ``or``, with the following adjustments + + * True, True -> False + * True, NA -> NA + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + if left_mask is None: + return kleene_xor(right, left, right_mask, left_mask) + + raise_for_nan(right, method="xor") + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left ^ right + + if right_mask is None: + if right is libmissing.NA: + mask = np.ones_like(left_mask) + else: + mask = left_mask.copy() + else: + mask = left_mask | right_mask + + return result, mask + + +def kleene_and( + left: Union[bool, libmissing.NAType, np.ndarray], + right: Union[bool, libmissing.NAType, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``and`` using Kleene logic. + + Values are ``NA`` for ``NA & NA`` or ``True & NA``. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since because + # A | B == B | A + if left_mask is None: + return kleene_and(right, left, right_mask, left_mask) + + assert isinstance(left, np.ndarray) + raise_for_nan(right, method="and") + + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left & right + + if right_mask is None: + # Scalar `right` + if right is libmissing.NA: + mask = (left & ~left_mask) | left_mask + + else: + mask = left_mask.copy() + if right is False: + # unmask everything + mask[:] = False + else: + # unmask where either left or right is False + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = (left_mask & ~right_false) | (right_mask & ~left_false) + + return result, mask + + +def raise_for_nan(value, method): + if lib.is_float(value) and np.isnan(value): + raise ValueError(f"Cannot perform logical '{method}' with floating NaN") From bbb7f9b91f50a11f884e8ee122a8f3281ffbdcf4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Dec 2019 08:15:24 -0600 Subject: [PATCH 27/30] numpy scalars --- pandas/core/arrays/boolean.py | 12 ++++--- pandas/tests/arrays/test_boolean.py | 54 ++++++++++++++++++----------- 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 3e1dd8c092eb6..4cf43d1750c30 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -583,10 +583,12 @@ def logical_method(self, other): "can only perform ops with 1-d structures" ) other, mask = coerce_to_array(other, copy=False) + elif isinstance(other, np.bool_): + other = other.item() - if other_is_scalar and not isinstance(other, (type(libmissing.NA), bool)): + if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)): raise TypeError( - "'other' should be pandas.NA or a bool. Got {} instead".format( + "'other' should be pandas.NA or a bool. Got {} instead.".format( type(other).__name__ ) ) @@ -595,11 +597,11 @@ def logical_method(self, other): raise ValueError("Lengths must match to compare") if op.__name__ in {"or_", "ror_"}: - result, mask = nanops.kleene_or(self._data, other, self._mask, mask) + result, mask = ops.kleene_or(self._data, other, self._mask, mask) elif op.__name__ in {"and_", "rand_"}: - result, mask = nanops.kleene_and(self._data, other, self._mask, mask) + result, mask = ops.kleene_and(self._data, other, self._mask, mask) elif op.__name__ in {"xor", "rxor"}: - result, mask = nanops.kleene_xor(self._data, other, self._mask, mask) + result, mask = ops.kleene_xor(self._data, other, self._mask, mask) return BooleanArray(result, mask) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 6ed7a7a281719..c71b30dc0e3dc 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -405,9 +405,9 @@ def test_logical_length_mismatch_raises(self, all_logical_operators): def test_logical_nan_raises(self, all_logical_operators): op_name = all_logical_operators a = pd.array([True, False, None], dtype="boolean") - msg = "" + msg = "Got float instead" - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match=msg): getattr(a, op_name)(np.nan) @pytest.mark.parametrize("other", ["a", 1]) @@ -442,7 +442,9 @@ def test_kleene_or(self): [ (pd.NA, [True, None, None]), (True, [True, True, True]), + (np.bool_(True), [True, True, True]), (False, [True, False, None]), + (np.bool_(False), [True, False, None]), ], ) def test_kleene_or_scalar(self, other, expected): @@ -452,13 +454,16 @@ def test_kleene_or_scalar(self, other, expected): expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - result = other | a - tm.assert_extension_array_equal(result, expected) + if not isinstance(other, np.bool_): + # NumPy scalars don't dispatch, so they give the wrong answer. - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) + result = other | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) def test_kleene_and(self): # A clear test of behavior. @@ -487,6 +492,8 @@ def test_kleene_and(self): (pd.NA, [None, False, None]), (True, [True, False, None]), (False, [False, False, False]), + (np.bool_(True), [True, False, None]), + (np.bool_(False), [False, False, False]), ], ) def test_kleene_and_scalar(self, other, expected): @@ -495,13 +502,15 @@ def test_kleene_and_scalar(self, other, expected): expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - result = other & a - tm.assert_extension_array_equal(result, expected) + if not isinstance(other, np.bool_): + # NumPy scalars don't dispatch, so they give the wrong answer. + result = other & a + tm.assert_extension_array_equal(result, expected) - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) def test_kleene_xor(self): a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") @@ -528,7 +537,8 @@ def test_kleene_xor(self): [ (pd.NA, [None, None, None]), (True, [False, True, None]), - (False, [True, False, None]), + (np.bool_(True), [False, True, None]), + (np.bool_(False), [True, False, None]), ], ) def test_kleene_xor_scalar(self, other, expected): @@ -537,13 +547,15 @@ def test_kleene_xor_scalar(self, other, expected): expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - result = other ^ a - tm.assert_extension_array_equal(result, expected) + if not isinstance(other, np.bool_): + # NumPy scalars don't dispatch, so they give the wrong answer. + result = other ^ a + tm.assert_extension_array_equal(result, expected) - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) @pytest.mark.parametrize( "other", From ce763b4e67840effe6502e2476e6e251d2a66627 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Dec 2019 08:18:16 -0600 Subject: [PATCH 28/30] doc note --- doc/source/user_guide/boolean.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index 9931a435e40ca..e9f94dac6326b 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -77,4 +77,18 @@ In ``and`` pd.Series([True, False, np.nan], dtype="boolean") & True +.. warning:: + + At the moment, NumPy's boolean scalars ``np.bool_(False)`` + and ``np.bool_(True)`` do not defer to :class:`arrays.BooleanArray` + for binary operations. If the first argument of a logical operation + is a ``np.bool_``, you'll get the wrong result. + + .. ipython:: python + + a = pd.array([True, False, pd.NA], dtype="boolean") + a | np.bool_(True) # correct + np.bool_(True) | a # incorrect! + + .. _Kleene Logic: https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics From 5bc53285b3922364c19c7e17d908367b36188d14 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Dec 2019 08:39:56 -0600 Subject: [PATCH 29/30] handle numpy bool --- doc/source/user_guide/boolean.rst | 15 ---------- pandas/core/ops/dispatch.py | 6 ++++ pandas/tests/arrays/test_boolean.py | 43 ++++++++++++----------------- 3 files changed, 24 insertions(+), 40 deletions(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index e9f94dac6326b..e0f676d3072fc 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -76,19 +76,4 @@ In ``and`` pd.Series([True, False, np.nan], dtype="object") & True pd.Series([True, False, np.nan], dtype="boolean") & True - -.. warning:: - - At the moment, NumPy's boolean scalars ``np.bool_(False)`` - and ``np.bool_(True)`` do not defer to :class:`arrays.BooleanArray` - for binary operations. If the first argument of a logical operation - is a ``np.bool_``, you'll get the wrong result. - - .. ipython:: python - - a = pd.array([True, False, pd.NA], dtype="boolean") - a | np.bool_(True) # correct - np.bool_(True) | a # incorrect! - - .. _Kleene Logic: https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py index c39f4d6d9698d..016a89eb56da3 100644 --- a/pandas/core/ops/dispatch.py +++ b/pandas/core/ops/dispatch.py @@ -189,6 +189,9 @@ def maybe_dispatch_ufunc_to_dunder_op( "ge", "remainder", "matmul", + "or", + "xor", + "and", } aliases = { "subtract": "sub", @@ -204,6 +207,9 @@ def maybe_dispatch_ufunc_to_dunder_op( "less_equal": "le", "greater": "gt", "greater_equal": "ge", + "bitwise_or": "or", + "bitwise_and": "and", + "bitwise_xor": "xor", } # For op(., Array) -> Array.__r{op}__ diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index c71b30dc0e3dc..6cf6233ff10cf 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -454,16 +454,13 @@ def test_kleene_or_scalar(self, other, expected): expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - if not isinstance(other, np.bool_): - # NumPy scalars don't dispatch, so they give the wrong answer. - - result = other | a - tm.assert_extension_array_equal(result, expected) + result = other | a + tm.assert_extension_array_equal(result, expected) - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) def test_kleene_and(self): # A clear test of behavior. @@ -502,15 +499,13 @@ def test_kleene_and_scalar(self, other, expected): expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - if not isinstance(other, np.bool_): - # NumPy scalars don't dispatch, so they give the wrong answer. - result = other & a - tm.assert_extension_array_equal(result, expected) + result = other & a + tm.assert_extension_array_equal(result, expected) - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) def test_kleene_xor(self): a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") @@ -547,15 +542,13 @@ def test_kleene_xor_scalar(self, other, expected): expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) - if not isinstance(other, np.bool_): - # NumPy scalars don't dispatch, so they give the wrong answer. - result = other ^ a - tm.assert_extension_array_equal(result, expected) + result = other ^ a + tm.assert_extension_array_equal(result, expected) - # ensure we haven't mutated anything inplace - tm.assert_extension_array_equal( - a, pd.array([True, False, None], dtype="boolean") - ) + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) @pytest.mark.parametrize( "other", From 31c2bc6dc3c8d7ef7cbf7aea41d6d686f7ff58ca Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 15:40:06 -0600 Subject: [PATCH 30/30] cleanup --- pandas/core/arrays/boolean.py | 2 +- pandas/tests/arrays/test_boolean.py | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 4cf43d1750c30..31dc656eb4b25 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -185,7 +185,7 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin): a boolean array with the mask (True indicating missing). BooleanArray implements Kleene logic (sometimes called three-value - logic) for logical operations. See :ref:`` for more. + logic) for logical operations. See :ref:`boolean.kleene` for more. To construct an BooleanArray from generic array-like input, use :func:`pandas.array` specifying ``dtype="boolean"`` (see examples diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 6cf6233ff10cf..d2f1a4a60ada8 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -551,13 +551,7 @@ def test_kleene_xor_scalar(self, other, expected): ) @pytest.mark.parametrize( - "other", - [ - True, - False, - # pd.NA - [True, False, None] * 3, - ], + "other", [True, False, pd.NA, [True, False, None] * 3], ) def test_no_masked_assumptions(self, other, all_logical_operators): # The logical operations should not assume that masked values are False!