From 68e6bd5628e7e8f07cc76e7b1a356d58c0e53275 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Sat, 25 Feb 2023 20:08:50 -0500
Subject: [PATCH 1/7] API: ArrowExtensionArray._cmp_method to return
 pyarrow.bool_ type

---
 pandas/core/arrays/arrow/array.py             | 131 ++++++++++++++++--
 pandas/tests/arrays/string_/test_string.py    |  14 +-
 .../tests/arrays/string_/test_string_arrow.py |   2 +-
 pandas/tests/extension/test_arrow.py          |  18 +--
 pandas/tests/extension/test_string.py         |   3 +-
 5 files changed, 133 insertions(+), 35 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index bb4bdae188fd2..2985c115015d5 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -429,8 +429,6 @@ def __setstate__(self, state) -> None:
         self.__dict__.update(state)
 
     def _cmp_method(self, other, op):
-        from pandas.arrays import BooleanArray
-
         pc_func = ARROW_CMP_FUNCS[op.__name__]
         if isinstance(other, ArrowExtensionArray):
             result = pc_func(self._data, other._data)
@@ -444,20 +442,13 @@ def _cmp_method(self, other, op):
                 valid = ~mask
                 result = np.zeros(len(self), dtype="bool")
                 result[valid] = op(np.array(self)[valid], other)
-                return BooleanArray(result, mask)
+                result = pa.array(result, type=pa.bool_())
+                result = pc.if_else(valid, result, None)
         else:
             raise NotImplementedError(
                 f"{op.__name__} not implemented for {type(other)}"
             )
-
-        if result.null_count > 0:
-            # GH50524: avoid conversion to object for better perf
-            values = pc.fill_null(result, False).to_numpy()
-            mask = result.is_null().to_numpy()
-        else:
-            values = result.to_numpy()
-            mask = np.zeros(len(values), dtype=np.bool_)
-        return BooleanArray(values, mask)
+        return ArrowExtensionArray(result)
 
     def _evaluate_op_method(self, other, op, arrow_funcs):
         pa_type = self._data.type
@@ -566,6 +557,122 @@ def isna(self) -> npt.NDArray[np.bool_]:
 
         return self._data.is_null().to_numpy()
 
+    def any(self, *, skipna: bool = True, **kwargs):
+        """
+        Return whether any element is truthy.
+
+        Returns False unless there is at least one element that is truthy.
+        By default, NAs are skipped. If ``skipna=False`` is specified and
+        missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
+        is used as for logical operations.
+
+        Parameters
+        ----------
+        skipna : bool, default True
+            Exclude NA values. If the entire array is NA and `skipna` is
+            True, then the result will be False, as for an empty array.
+            If `skipna` is False, the result will still be True if there is
+            at least one element that is truthy, otherwise NA will be returned
+            if there are NA's present.
+
+        Returns
+        -------
+        bool or :attr:`pandas.NA`
+
+        See Also
+        --------
+        ArrowExtensionArray.all : Return whether all elements are truthy.
+
+        Examples
+        --------
+        The result indicates whether any element is truthy (and by default
+        skips NAs):
+
+        >>> pd.array([True, False, True], dtype="boolean[pyarrow]").any()
+        True
+        >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").any()
+        True
+        >>> pd.array([False, False, pd.NA], dtype="boolean[pyarrow]").any()
+        False
+        >>> pd.array([], dtype="boolean[pyarrow]").any()
+        False
+        >>> pd.array([pd.NA], dtype="boolean[pyarrow]").any()
+        False
+        >>> pd.array([pd.NA], dtype="float64[pyarrow]").any()
+        False
+
+        With ``skipna=False``, the result can be NA if this is logically
+        required (whether ``pd.NA`` is True or False influences the result):
+
+        >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
+        True
+        >>> pd.array([1, 0, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
+        True
+        >>> pd.array([False, False, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
+        <NA>
+        >>> pd.array([0, 0, pd.NA], dtype="boolean[pyarrow]").any(skipna=False)
+        <NA>
+        """
+        return self._reduce("any", skipna=skipna, **kwargs)
+
+    def all(self, *, skipna: bool = True, **kwargs):
+        """
+        Return whether all elements are truthy.
+
+        Returns True unless there is at least one element that is falsey.
+        By default, NAs are skipped. If ``skipna=False`` is specified and
+        missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
+        is used as for logical operations.
+
+        Parameters
+        ----------
+        skipna : bool, default True
+            Exclude NA values. If the entire array is NA and `skipna` is
+            True, then the result will be True, as for an empty array.
+            If `skipna` is False, the result will still be False if there is
+            at least one element that is falsey, otherwise NA will be returned
+            if there are NA's present.
+
+        Returns
+        -------
+        bool or :attr:`pandas.NA`
+
+        See Also
+        --------
+        ArrowExtensionArray.any : Return whether any element is truthy.
+
+        Examples
+        --------
+        The result indicates whether all elements are truthy (and by default
+        skips NAs):
+
+        >>> pd.array([True, True, pd.NA], dtype="boolean[pyarrow]").all()
+        True
+        >>> pd.array([1, 1, pd.NA], dtype="boolean[pyarrow]").all()
+        True
+        >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").all()
+        False
+        >>> pd.array([], dtype="boolean[pyarrow]").all()
+        True
+        >>> pd.array([pd.NA], dtype="boolean[pyarrow]").all()
+        True
+        >>> pd.array([pd.NA], dtype="float64[pyarrow]").all()
+        True
+
+        With ``skipna=False``, the result can be NA if this is logically
+        required (whether ``pd.NA`` is True or False influences the result):
+
+        >>> pd.array([True, True, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
+        <NA>
+        >>> pd.array([1, 1, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
+        <NA>
+        >>> pd.array([True, False, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
+        False
+        >>> pd.array([1, 0, pd.NA], dtype="boolean[pyarrow]").all(skipna=False)
+        False
+        """
+        return self._reduce("all", skipna=skipna, **kwargs)
+
     def argsort(
         self,
         *,
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 0b41abc3b3a73..469221a306186 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -196,8 +196,9 @@ def test_comparison_methods_scalar(comparison_op, dtype):
     a = pd.array(["a", None, "c"], dtype=dtype)
     other = "a"
     result = getattr(a, op_name)(other)
+    expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
     expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
-    expected = pd.array(expected, dtype="boolean")
+    expected = pd.array(expected, dtype=expected_dtype)
     tm.assert_extension_array_equal(result, expected)
 
 
@@ -205,7 +206,8 @@ def test_comparison_methods_scalar_pd_na(comparison_op, dtype):
     op_name = f"__{comparison_op.__name__}__"
     a = pd.array(["a", None, "c"], dtype=dtype)
     result = getattr(a, op_name)(pd.NA)
-    expected = pd.array([None, None, None], dtype="boolean")
+    expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+    expected = pd.array([None, None, None], dtype=expected_dtype)
     tm.assert_extension_array_equal(result, expected)
 
 
@@ -225,7 +227,8 @@ def test_comparison_methods_scalar_not_string(comparison_op, dtype):
     expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[
         op_name
     ]
-    expected = pd.array(expected_data, dtype="boolean")
+    expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+    expected = pd.array(expected_data, dtype=expected_dtype)
     tm.assert_extension_array_equal(result, expected)
 
 
@@ -235,13 +238,14 @@ def test_comparison_methods_array(comparison_op, dtype):
     a = pd.array(["a", None, "c"], dtype=dtype)
     other = [None, None, "c"]
     result = getattr(a, op_name)(other)
+    expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
     expected = np.empty_like(a, dtype="object")
     expected[-1] = getattr(other[-1], op_name)(a[-1])
-    expected = pd.array(expected, dtype="boolean")
+    expected = pd.array(expected, dtype=expected_dtype)
     tm.assert_extension_array_equal(result, expected)
 
     result = getattr(a, op_name)(pd.NA)
-    expected = pd.array([None, None, None], dtype="boolean")
+    expected = pd.array([None, None, None], dtype=expected_dtype)
     tm.assert_extension_array_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index 07c6bca67311b..45098e12ccb38 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -24,7 +24,7 @@
 def test_eq_all_na():
     a = pd.array([pd.NA, pd.NA], dtype=StringDtype("pyarrow"))
     result = a == a
-    expected = pd.array([pd.NA, pd.NA], dtype="boolean")
+    expected = pd.array([pd.NA, pd.NA], dtype="boolean[pyarrow]")
     tm.assert_extension_array_equal(result, expected)
 
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index f9af3a3063386..f48bf35f317d7 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1211,14 +1211,7 @@ def test_add_series_with_extension_array(self, data, request):
 
 
 class TestBaseComparisonOps(base.BaseComparisonOpsTests):
-    def assert_series_equal(self, left, right, *args, **kwargs):
-        # Series.combine for "expected" retains bool[pyarrow] dtype
-        # While "result" return "boolean" dtype
-        right = pd.Series(right._values.to_numpy(), dtype="boolean")
-        super().assert_series_equal(left, right, *args, **kwargs)
-
     def test_compare_array(self, data, comparison_op, na_value, request):
-        pa_dtype = data.dtype.pyarrow_dtype
         ser = pd.Series(data)
         # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray
         # since ser.iloc[0] is a python scalar
@@ -1233,7 +1226,7 @@ def test_compare_array(self, data, comparison_op, na_value, request):
             expected = ser.combine(other, comparison_op)
             expected[8] = na_value
             expected[97] = na_value
-            self.assert_series_equal(result, expected)
+            tm.assert_series_equal(result, expected)
 
         else:
             exc = None
@@ -1244,15 +1237,8 @@ def test_compare_array(self, data, comparison_op, na_value, request):
 
             if exc is None:
                 # Didn't error, then should match point-wise behavior
-                if pa.types.is_temporal(pa_dtype):
-                    # point-wise comparison with pd.NA raises TypeError
-                    assert result[8] is na_value
-                    assert result[97] is na_value
-                    result = result.drop([8, 97]).reset_index(drop=True)
-                    ser = ser.drop([8, 97])
-                    other = other.drop([8, 97])
                 expected = ser.combine(other, comparison_op)
-                self.assert_series_equal(result, expected)
+                tm.assert_series_equal(result, expected)
             else:
                 with pytest.raises(type(exc)):
                     ser.combine(other, comparison_op)
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index a2e438b858e59..11ac6a151324a 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -221,7 +221,8 @@ class TestComparisonOps(base.BaseComparisonOpsTests):
     def _compare_other(self, ser, data, op, other):
         op_name = f"__{op.__name__}__"
         result = getattr(ser, op_name)(other)
-        expected = getattr(ser.astype(object), op_name)(other).astype("boolean")
+        dtype = "boolean[pyarrow]" if ser.dtype.storage == "pyarrow" else "boolean"
+        expected = getattr(ser.astype(object), op_name)(other).astype(dtype)
         self.assert_series_equal(result, expected)
 
     def test_compare_scalar(self, data, comparison_op):

From 772a4f109c0c5730ee0365deab853ad4f7d75d59 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Sat, 25 Feb 2023 20:21:50 -0500
Subject: [PATCH 2/7] whatsnew

---
 doc/source/whatsnew/v2.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 45b5c16415f9d..be2c34bb7a0d8 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -84,7 +84,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
 
 Other API changes
 ^^^^^^^^^^^^^^^^^
--
+- :class:`~arrays.ArrowExtensionArray` comparison methods now return data with :class:`ArrowDtype` with ``pyarrow.bool_`` type instead of ``"boolean"`` dtype (:issue:`51643`)
 -
 
 .. ---------------------------------------------------------------------------

From 106a1c028ccdf394e672f293e09c3613105d868e Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Sun, 26 Feb 2023 08:24:57 -0500
Subject: [PATCH 3/7] try removing asv parallel build

---
 asv_bench/asv.conf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index 16f8f28b66d31..8b81c6ea03bbb 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -126,6 +126,6 @@
     },
     "build_command":
     ["python -m pip install versioneer[toml]",
-     "python setup.py build -j4",
+     "python setup.py build -j1",
      "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"],
 }

From 096374b696e85536c0cb445fe4a41339928a123e Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Sun, 26 Feb 2023 13:34:42 -0500
Subject: [PATCH 4/7] fix logical func keys

---
 asv_bench/asv.conf.json           | 2 +-
 pandas/core/arrays/arrow/array.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index 8b81c6ea03bbb..16f8f28b66d31 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -126,6 +126,6 @@
     },
     "build_command":
     ["python -m pip install versioneer[toml]",
-     "python setup.py build -j1",
+     "python setup.py build -j4",
      "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"],
 }
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 2985c115015d5..708c8e9ff4e09 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -81,10 +81,10 @@
     }
 
     ARROW_LOGICAL_FUNCS = {
-        "and": pc.and_kleene,
-        "rand": lambda x, y: pc.and_kleene(y, x),
-        "or": pc.or_kleene,
-        "ror": lambda x, y: pc.or_kleene(y, x),
+        "and_": pc.and_kleene,
+        "rand_": lambda x, y: pc.and_kleene(y, x),
+        "or_": pc.or_kleene,
+        "ror_": lambda x, y: pc.or_kleene(y, x),
         "xor": pc.xor,
         "rxor": lambda x, y: pc.xor(y, x),
     }

From d7cc4f698c681cbf88adc2a95781fa5ce197e48b Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Mon, 6 Mar 2023 19:32:41 -0500
Subject: [PATCH 5/7] cleanup

---
 pandas/tests/extension/test_arrow.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 22c8d68310e80..1e9e2a298b452 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1222,7 +1222,7 @@ def test_compare_array(self, data, comparison_op, na_value, request):
             expected = ser.combine(other, comparison_op)
             expected[8] = na_value
             expected[97] = na_value
-            tm.assert_series_equal(result, expected)
+            self.assert_series_equal(result, expected)
 
         else:
             exc = None
@@ -1234,7 +1234,7 @@ def test_compare_array(self, data, comparison_op, na_value, request):
             if exc is None:
                 # Didn't error, then should match point-wise behavior
                 expected = ser.combine(other, comparison_op)
-                tm.assert_series_equal(result, expected)
+                self.assert_series_equal(result, expected)
             else:
                 with pytest.raises(type(exc)):
                     ser.combine(other, comparison_op)

From 9aa9ec7559c88ee815c6b8b369b8a5f9bb94f0fc Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Tue, 7 Mar 2023 19:28:02 -0500
Subject: [PATCH 6/7] fix test

---
 pandas/tests/arrays/string_/test_string.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 469221a306186..dd0b43c116266 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -239,7 +239,7 @@ def test_comparison_methods_array(comparison_op, dtype):
     other = [None, None, "c"]
     result = getattr(a, op_name)(other)
     expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
-    expected = np.empty_like(a, dtype="object")
+    expected = np.full(len(a), fill_value=None, dtype="object")
     expected[-1] = getattr(other[-1], op_name)(a[-1])
     expected = pd.array(expected, dtype=expected_dtype)
     tm.assert_extension_array_equal(result, expected)

From a700417429601c71394a03181ab3cf20db87f47f Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 8 Mar 2023 20:28:09 -0500
Subject: [PATCH 7/7] subclass ExtensionArraySupportsAnyAll

---
 pandas/core/arrays/arrow/array.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index deae6148ee069..d8b0b53331229 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -52,7 +52,10 @@
 
 from pandas.core import roperator
 from pandas.core.arraylike import OpsMixin
-from pandas.core.arrays.base import ExtensionArray
+from pandas.core.arrays.base import (
+    ExtensionArray,
+    ExtensionArraySupportsAnyAll,
+)
 import pandas.core.common as com
 from pandas.core.indexers import (
     check_array_indexer,
@@ -170,7 +173,9 @@ def to_pyarrow_type(
     return None
 
 
-class ArrowExtensionArray(OpsMixin, ExtensionArray, BaseStringArrayMethods):
+class ArrowExtensionArray(
+    OpsMixin, ExtensionArraySupportsAnyAll, BaseStringArrayMethods
+):
     """
     Pandas ExtensionArray backed by a PyArrow ChunkedArray.