From 293a0ef8f1ef66097939809c86b45091758a1cda Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 10 May 2023 17:49:57 -0400
Subject: [PATCH 01/10] BUG/REF: ArrowExtensionArray non-nanosecond units

---
 doc/source/whatsnew/v2.1.0.rst       |   1 +
 pandas/core/arrays/arrow/array.py    | 204 +++++++++++++++++----------
 pandas/tests/extension/test_arrow.py |  72 +++++++++-
 3 files changed, 200 insertions(+), 77 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 010773b2806a2..779ca50e1ac41 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -434,6 +434,7 @@ ExtensionArray
 - Bug in :meth:`Series.quantile` for pyarrow temporal types raising ArrowInvalid (:issue:`52678`)
 - Bug in :meth:`Series.rank` returning wrong order for small values with ``Float64`` dtype (:issue:`52471`)
 - Bug where the ``__from_arrow__`` method of masked ExtensionDtypes(e.g. :class:`Float64Dtype`, :class:`BooleanDtype`) would not accept pyarrow arrays of type ``pyarrow.null()`` (:issue:`52223`)
+- Bug in :class:`~arrays.ArrowExtensionArray` converting pandas non-nanosecond temporal objects from non-zero values to zero values (:issue:`#####`)
 -
 
 Styler
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index d842e49589c4d..6cb839fe5ada4 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -243,36 +243,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
         """
         Construct a new ExtensionArray from a sequence of scalars.
         """
-        pa_dtype = to_pyarrow_type(dtype)
-        if (
-            isinstance(scalars, np.ndarray)
-            and isinstance(dtype, ArrowDtype)
-            and (
-                pa.types.is_large_binary(pa_dtype) or pa.types.is_large_string(pa_dtype)
-            )
-        ):
-            # See https://github.com/apache/arrow/issues/35289
-            scalars = scalars.tolist()
-
-        if isinstance(scalars, cls):
-            scalars = scalars._pa_array
-        elif not isinstance(scalars, (pa.Array, pa.ChunkedArray)):
-            if copy and is_array_like(scalars):
-                # pa array should not get updated when numpy array is updated
-                scalars = scalars.copy()
-            try:
-                scalars = pa.array(scalars, type=pa_dtype, from_pandas=True)
-            except pa.ArrowInvalid:
-                # GH50430: let pyarrow infer type, then cast
-                scalars = pa.array(scalars, from_pandas=True)
-        if pa_dtype and scalars.type != pa_dtype:
-            scalars = scalars.cast(pa_dtype)
-        arr = cls(scalars)
-        if pa.types.is_duration(scalars.type) and scalars.null_count > 0:
-            # GH52843: upstream bug for duration types when originally
-            # constructed with data containing numpy NaT.
-            # https://github.com/apache/arrow/issues/35088
-            arr = arr.fillna(arr.dtype.na_value)
+        pa_type = to_pyarrow_type(dtype)
+        pa_array = cls._box_pa_array(scalars, pa_type=pa_type, copy=copy)
+        arr = cls(pa_array)
         return arr
 
     @classmethod
@@ -466,65 +439,50 @@ def __setstate__(self, state) -> None:
 
     def _cmp_method(self, other, op):
         pc_func = ARROW_CMP_FUNCS[op.__name__]
-        if isinstance(other, ArrowExtensionArray):
-            result = pc_func(self._pa_array, other._pa_array)
-        elif isinstance(other, (np.ndarray, list)):
-            result = pc_func(self._pa_array, other)
-        elif isinstance(other, BaseMaskedArray):
-            # GH 52625
-            result = pc_func(self._pa_array, other.__arrow_array__())
-        elif is_scalar(other):
-            try:
-                result = pc_func(self._pa_array, pa.scalar(other))
-            except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
+        try:
+            result = pc_func(self._pa_array, self._box_pa(other))
+        except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
+            if is_scalar(other):
                 mask = isna(self) | isna(other)
                 valid = ~mask
                 result = np.zeros(len(self), dtype="bool")
                 result[valid] = op(np.array(self)[valid], other)
                 result = pa.array(result, type=pa.bool_())
                 result = pc.if_else(valid, result, None)
-        else:
-            raise NotImplementedError(
-                f"{op.__name__} not implemented for {type(other)}"
-            )
-        return ArrowExtensionArray(result)
+            else:
+                raise NotImplementedError(
+                    f"{op.__name__} not implemented for {type(other)}"
+                )
+        return type(self)(result)
 
     def _evaluate_op_method(self, other, op, arrow_funcs):
         pa_type = self._pa_array.type
+        other = self._box_pa(other)
+
         if (pa.types.is_string(pa_type) or pa.types.is_binary(pa_type)) and op in [
             operator.add,
             roperator.radd,
         ]:
             sep = pa.scalar("", type=pa_type)
-            if isinstance(other, type(self)):
-                other = other._pa_array
             if op is operator.add:
                 result = pc.binary_join_element_wise(self._pa_array, other, sep)
             else:
                 result = pc.binary_join_element_wise(other, self._pa_array, sep)
             return type(self)(result)
 
+        if (
+            isinstance(other, pa.Scalar)
+            and pc.is_null(other).as_py()
+            and op.__name__ in ARROW_LOGICAL_FUNCS
+        ):
+            # pyarrow kleene ops require null to be typed
+            other = other.cast(pa_type)
+
         pc_func = arrow_funcs[op.__name__]
         if pc_func is NotImplemented:
             raise NotImplementedError(f"{op.__name__} not implemented.")
-        if isinstance(other, ArrowExtensionArray):
-            result = pc_func(self._pa_array, other._pa_array)
-        elif isinstance(other, (np.ndarray, list)):
-            result = pc_func(self._pa_array, pa.array(other, from_pandas=True))
-        elif isinstance(other, BaseMaskedArray):
-            # GH 52625
-            result = pc_func(self._pa_array, other.__arrow_array__())
-        elif is_scalar(other):
-            if isna(other) and op.__name__ in ARROW_LOGICAL_FUNCS:
-                # pyarrow kleene ops require null to be typed
-                pa_scalar = pa.scalar(None, type=self._pa_array.type)
-            else:
-                pa_scalar = pa.scalar(other)
-            result = pc_func(self._pa_array, pa_scalar)
-        else:
-            raise NotImplementedError(
-                f"{op.__name__} not implemented for {type(other)}"
-            )
+
+        result = pc_func(self._pa_array, other)
         return type(self)(result)
 
     def _logical_method(self, other, op):
@@ -1601,18 +1559,114 @@ def _mode(self, dropna: bool = True) -> Self:
 
         return type(self)(most_common)
 
-    def _maybe_convert_setitem_value(self, value):
-        """Maybe convert value to be pyarrow compatible."""
-        if value is None:
-            return value
-        if isinstance(value, (pa.Scalar, pa.Array, pa.ChunkedArray)):
-            return value
+    @classmethod
+    def _box_pa(cls, value, pa_type=None):
         if is_list_like(value):
-            pa_box = pa.array
+            return cls._box_pa_array(value, pa_type)
+        return cls._box_pa_scalar(value, pa_type)
+
+    @classmethod
+    def _box_pa_scalar(cls, value, pa_type=None):
+        if isinstance(value, pa.Scalar):
+            pa_scalar = value
+        elif isna(value):
+            pa_scalar = pa.scalar(None, type=pa_type)
         else:
-            pa_box = pa.scalar
+            # GH#####: pyarrow does not yet handle pandas non-nano correctly
+            # see https://github.com/apache/arrow/issues/33321
+            from pandas import (
+                Timedelta,
+                Timestamp,
+            )
+
+            if isinstance(value, Timedelta):
+                if pa_type is None:
+                    pa_type = pa.duration(value.unit)
+                elif value.unit != pa_type.unit:
+                    value = value.as_unit(pa_type.unit)
+                value = value._value
+            elif isinstance(value, Timestamp):
+                if pa_type is None:
+                    pa_type = pa.timestamp(value.unit, tz=value.tz)
+                elif value.unit != pa_type.unit:
+                    value = value.as_unit(pa_type.unit)
+                value = value._value
+
+            pa_scalar = pa.scalar(value, type=pa_type, from_pandas=True)
+
+        if pa_type is not None and pa_scalar.type != pa_type:
+            pa_scalar = pa_scalar.cast(pa_type)
+
+        return pa_scalar
+
+    @classmethod
+    def _box_pa_array(cls, value, pa_type=None, copy: bool = False):
+        if isinstance(value, cls):
+            pa_array = value._pa_array
+        elif isinstance(value, (pa.Array, pa.ChunkedArray)):
+            pa_array = value
+        elif isinstance(value, BaseMaskedArray):
+            # GH 52625
+            pa_array = value.__arrow_array__()
+        else:
+            if (
+                isinstance(value, np.ndarray)
+                and pa_type is not None
+                and (
+                    pa.types.is_large_binary(pa_type)
+                    or pa.types.is_large_string(pa_type)
+                )
+            ):
+                # See https://github.com/apache/arrow/issues/35289
+                value = value.tolist()
+            elif copy and is_array_like(value):
+                # pa array should not get updated when numpy array is updated
+                value = value.copy()
+
+            if (
+                pa_type is not None
+                and pa.types.is_duration(pa_type)
+                and (not isinstance(value, np.ndarray) or value.dtype.kind not in "mi")
+            ):
+                # GH#####: pyarrow does not yet handle pandas non-nano correctly
+                # see https://github.com/apache/arrow/issues/33321
+                from pandas import to_timedelta
+
+                value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
+                value = value.to_numpy()
+
+            try:
+                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+            except pa.ArrowInvalid:
+                # GH50430: let pyarrow infer type, then cast
+                pa_array = pa.array(value, from_pandas=True)
+
+            if pa_type is None and pa.types.is_duration(pa_array.type):
+                # GH#####: pyarrow does not yet handle pandas non-nano correctly
+                # see https://github.com/apache/arrow/issues/33321
+                from pandas import to_timedelta
+
+                value = to_timedelta(value)
+                value = value.to_numpy()
+                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+
+            if pa.types.is_duration(pa_array.type) and pa_array.null_count > 0:
+                # GH52843: upstream bug for duration types when originally
+                # constructed with data containing numpy NaT.
+                # https://github.com/apache/arrow/issues/35088
+                arr = cls(pa_array)
+                arr = arr.fillna(arr.dtype.na_value)
+                pa_array = arr._pa_array
+
+        if pa_type is not None and pa_array.type != pa_type:
+            pa_array = pa_array.cast(pa_type)
+
+        return pa_array
+
+    def _maybe_convert_setitem_value(self, value):
+        """Maybe convert value to be pyarrow compatible."""
         try:
-            value = pa_box(value, type=self._pa_array.type, from_pandas=True)
+            value = self._box_pa(value, self._pa_array.type)
         except pa.ArrowTypeError as err:
             msg = f"Invalid value '{str(value)}' for dtype {self.dtype}"
             raise TypeError(msg) from err
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 5078a4e8078f8..a5a2400d434f5 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1719,8 +1719,9 @@ def test_setitem_null_slice(data):
 
     result = orig.copy()
     result[:] = data[0]
-    expected = ArrowExtensionArray(
-        pa.array([data[0]] * len(data), type=data._pa_array.type)
+    expected = ArrowExtensionArray._from_sequence(
+        [data[0]] * len(data),
+        dtype=data._pa_array.type,
     )
     tm.assert_extension_array_equal(result, expected)
 
@@ -2919,3 +2920,70 @@ def test_infer_dtype_pyarrow_dtype(data, request):
         request.node.add_marker(mark)
 
     assert res == lib.infer_dtype(list(data), skipna=True)
+
+
+@pytest.mark.parametrize(
+    "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
+)
+def test_from_sequence_temporal(pa_type):
+    # GH#####
+    val = 3
+    unit = pa_type.unit
+    if pa.types.is_duration(pa_type):
+        seq = [pd.Timedelta(val, unit=unit).as_unit(unit)]
+    else:
+        seq = [pd.Timestamp(val, unit=unit, tz=pa_type.tz).as_unit(unit)]
+
+    result = ArrowExtensionArray._from_sequence(seq, dtype=pa_type)
+    expected = ArrowExtensionArray(pa.array([val], type=pa_type))
+    tm.assert_extension_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
+)
+def test_setitem_temporal(pa_type):
+    # GH#####
+    unit = pa_type.unit
+    if pa.types.is_duration(pa_type):
+        val = pd.Timedelta(1, unit=unit).as_unit(unit)
+    else:
+        val = pd.Timestamp(1, unit=unit, tz=pa_type.tz).as_unit(unit)
+
+    arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type))
+
+    result = arr.copy()
+    result[:] = val
+    expected = ArrowExtensionArray(pa.array([1, 1, 1], type=pa_type))
+    tm.assert_extension_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
+)
+def test_arithmetic_temporal(pa_type):
+    # GH#####
+    arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type))
+    unit = pa_type.unit
+
+    result = arr - pd.Timedelta(1, unit=unit).as_unit(unit)
+    expected = ArrowExtensionArray(pa.array([0, 1, 2], type=pa_type))
+    tm.assert_extension_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
+)
+def test_comparison_temporal(pa_type):
+    # GH#####
+    unit = pa_type.unit
+    if pa.types.is_duration(pa_type):
+        val = pd.Timedelta(1, unit=unit).as_unit(unit)
+    else:
+        val = pd.Timestamp(1, unit=unit, tz=pa_type.tz).as_unit(unit)
+
+    arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type))
+
+    result = arr > val
+    expected = ArrowExtensionArray(pa.array([False, True, True], type=pa.bool_()))
+    tm.assert_extension_array_equal(result, expected)

From e4919316e57997a6ec280a9e987a9251cc83a669 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 10 May 2023 17:59:23 -0400
Subject: [PATCH 02/10] mypy

---
 pandas/core/arrays/arrow/array.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 6cb839fe5ada4..39dc4d4b0e662 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1560,13 +1560,15 @@ def _mode(self, dropna: bool = True) -> Self:
         return type(self)(most_common)
 
     @classmethod
-    def _box_pa(cls, value, pa_type=None):
+    def _box_pa(
+        cls, value, pa_type: pa.DataType | None = None
+    ) -> pa.Array | pa.ChunkedArray | pa.Scalar:
         if is_list_like(value):
             return cls._box_pa_array(value, pa_type)
         return cls._box_pa_scalar(value, pa_type)
 
     @classmethod
-    def _box_pa_scalar(cls, value, pa_type=None):
+    def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
         if isinstance(value, pa.Scalar):
             pa_scalar = value
         elif isna(value):
@@ -1600,7 +1602,9 @@ def _box_pa_scalar(cls, value, pa_type=None):
         return pa_scalar
 
     @classmethod
-    def _box_pa_array(cls, value, pa_type=None, copy: bool = False):
+    def _box_pa_array(
+        cls, value, pa_type: pa.DataType | None = None, copy: bool = False
+    ) -> pa.Array | pa.ChunkedArray:
         if isinstance(value, cls):
             pa_array = value._pa_array
         elif isinstance(value, (pa.Array, pa.ChunkedArray)):

From 961b4b9d8cf021cad9fd69d3ee7e79888f5c7fc9 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 10 May 2023 18:04:55 -0400
Subject: [PATCH 03/10] gh refs

---
 doc/source/whatsnew/v2.1.0.rst       | 2 +-
 pandas/core/arrays/arrow/array.py    | 6 +++---
 pandas/tests/extension/test_arrow.py | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 779ca50e1ac41..e3ede4d0dffa6 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -431,10 +431,10 @@ Sparse
 
 ExtensionArray
 ^^^^^^^^^^^^^^
+- Bug in :class:`~arrays.ArrowExtensionArray` converting pandas non-nanosecond temporal objects from non-zero values to zero values (:issue:`53171`)
 - Bug in :meth:`Series.quantile` for pyarrow temporal types raising ArrowInvalid (:issue:`52678`)
 - Bug in :meth:`Series.rank` returning wrong order for small values with ``Float64`` dtype (:issue:`52471`)
 - Bug where the ``__from_arrow__`` method of masked ExtensionDtypes(e.g. :class:`Float64Dtype`, :class:`BooleanDtype`) would not accept pyarrow arrays of type ``pyarrow.null()`` (:issue:`52223`)
-- Bug in :class:`~arrays.ArrowExtensionArray` converting pandas non-nanosecond temporal objects from non-zero values to zero values (:issue:`#####`)
 -
 
 Styler
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 39dc4d4b0e662..96417b8eb1898 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1574,7 +1574,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
         elif isna(value):
             pa_scalar = pa.scalar(None, type=pa_type)
         else:
-            # GH#####: pyarrow does not yet handle pandas non-nano correctly
+            # GH 53171: pyarrow does not yet handle pandas non-nano correctly
             # see https://github.com/apache/arrow/issues/33321
             from pandas import (
                 Timedelta,
@@ -1632,7 +1632,7 @@ def _box_pa_array(
                 and pa.types.is_duration(pa_type)
                 and (not isinstance(value, np.ndarray) or value.dtype.kind not in "mi")
             ):
-                # GH#####: pyarrow does not yet handle pandas non-nano correctly
+                # GH 53171: pyarrow does not yet handle pandas non-nano correctly
                 # see https://github.com/apache/arrow/issues/33321
                 from pandas import to_timedelta
 
@@ -1646,7 +1646,7 @@ def _box_pa_array(
                 pa_array = pa.array(value, from_pandas=True)
 
             if pa_type is None and pa.types.is_duration(pa_array.type):
-                # GH#####: pyarrow does not yet handle pandas non-nano correctly
+                # GH 53171: pyarrow does not yet handle pandas non-nano correctly
                 # see https://github.com/apache/arrow/issues/33321
                 from pandas import to_timedelta
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index a5a2400d434f5..8772cdfd1a256 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2926,7 +2926,7 @@ def test_infer_dtype_pyarrow_dtype(data, request):
     "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
 )
 def test_from_sequence_temporal(pa_type):
-    # GH#####
+    # GH 53171
     val = 3
     unit = pa_type.unit
     if pa.types.is_duration(pa_type):
@@ -2943,7 +2943,7 @@ def test_from_sequence_temporal(pa_type):
     "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
 )
 def test_setitem_temporal(pa_type):
-    # GH#####
+    # GH 53171
     unit = pa_type.unit
     if pa.types.is_duration(pa_type):
         val = pd.Timedelta(1, unit=unit).as_unit(unit)
@@ -2962,7 +2962,7 @@ def test_setitem_temporal(pa_type):
     "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
 )
 def test_arithmetic_temporal(pa_type):
-    # GH#####
+    # GH 53171
     arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type))
     unit = pa_type.unit
 
@@ -2975,7 +2975,7 @@ def test_arithmetic_temporal(pa_type):
     "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
 )
 def test_comparison_temporal(pa_type):
-    # GH#####
+    # GH 53171
     unit = pa_type.unit
     if pa.types.is_duration(pa_type):
         val = pd.Timedelta(1, unit=unit).as_unit(unit)

From 299b20d9300ad15c87d54434a601f17b646052b8 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 10 May 2023 19:55:12 -0400
Subject: [PATCH 04/10] fixes

---
 pandas/core/arrays/arrow/array.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 96417b8eb1898..8af8dd1ce6479 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -453,7 +453,7 @@ def _cmp_method(self, other, op):
                 raise NotImplementedError(
                     f"{op.__name__} not implemented for {type(other)}"
                 )
-        return type(self)(result)
+        return ArrowExtensionArray(result)
 
     def _evaluate_op_method(self, other, op, arrow_funcs):
         pa_type = self._pa_array.type
@@ -1611,6 +1611,8 @@ def _box_pa_array(
             pa_array = value
         elif isinstance(value, BaseMaskedArray):
             # GH 52625
+            if copy:
+                value = value.copy()
             pa_array = value.__arrow_array__()
         else:
             if (

From 47ec5ef3fab2eecdeb7be89d30de86fd7d4b0ab3 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 10 May 2023 22:18:57 -0400
Subject: [PATCH 05/10] xfail min versions

---
 pandas/tests/extension/test_arrow.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 8772cdfd1a256..183445c360c6a 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2958,6 +2958,11 @@ def test_setitem_temporal(pa_type):
     tm.assert_extension_array_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    pa_version_under8p0,
+    reason="Function 'add_checked' has no kernel matching input types",
+    raises=pa.ArrowNotImplementedError,
+)
 @pytest.mark.parametrize(
     "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
 )

From 2a031ae1492bdcc88c0a0377bab97a5d999bc865 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 10 May 2023 22:20:30 -0400
Subject: [PATCH 06/10] docstrings

---
 pandas/core/arrays/arrow/array.py | 256 +++++++++++++++++-------------
 1 file changed, 146 insertions(+), 110 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 8af8dd1ce6479..6ae9c23bbf920 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -321,6 +321,152 @@ def _from_sequence_of_strings(
             )
         return cls._from_sequence(scalars, dtype=pa_type, copy=copy)
 
+    @classmethod
+    def _box_pa(
+        cls, value, pa_type: pa.DataType | None = None
+    ) -> pa.Array | pa.ChunkedArray | pa.Scalar:
+        """
+        Box value into a pyarrow Array, ChunkedArray or Scalar.
+
+        Parameters
+        ----------
+        value : any
+        pa_type : pa.DataType | None
+
+        Returns
+        -------
+        pa.Array or pa.ChunkedArray or pa.Scalar
+        """
+        if is_list_like(value):
+            return cls._box_pa_array(value, pa_type)
+        return cls._box_pa_scalar(value, pa_type)
+
+    @classmethod
+    def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
+        """
+        Box value into a pyarrow Scalar.
+
+        Parameters
+        ----------
+        value : any
+        pa_type : pa.DataType | None
+
+        Returns
+        -------
+        pa.Scalar
+        """
+        if isinstance(value, pa.Scalar):
+            pa_scalar = value
+        elif isna(value):
+            pa_scalar = pa.scalar(None, type=pa_type)
+        else:
+            # GH 53171: pyarrow does not yet handle pandas non-nano correctly
+            # see https://github.com/apache/arrow/issues/33321
+            from pandas import (
+                Timedelta,
+                Timestamp,
+            )
+
+            if isinstance(value, Timedelta):
+                if pa_type is None:
+                    pa_type = pa.duration(value.unit)
+                elif value.unit != pa_type.unit:
+                    value = value.as_unit(pa_type.unit)
+                value = value._value
+            elif isinstance(value, Timestamp):
+                if pa_type is None:
+                    pa_type = pa.timestamp(value.unit, tz=value.tz)
+                elif value.unit != pa_type.unit:
+                    value = value.as_unit(pa_type.unit)
+                value = value._value
+
+            pa_scalar = pa.scalar(value, type=pa_type, from_pandas=True)
+
+        if pa_type is not None and pa_scalar.type != pa_type:
+            pa_scalar = pa_scalar.cast(pa_type)
+
+        return pa_scalar
+
+    @classmethod
+    def _box_pa_array(
+        cls, value, pa_type: pa.DataType | None = None, copy: bool = False
+    ) -> pa.Array | pa.ChunkedArray:
+        """
+        Box value into a pyarrow Array or ChunkedArray.
+
+        Parameters
+        ----------
+        value : Sequence
+        pa_type : pa.DataType | None
+
+        Returns
+        -------
+        pa.Array or pa.ChunkedArray
+        """
+        if isinstance(value, cls):
+            pa_array = value._pa_array
+        elif isinstance(value, (pa.Array, pa.ChunkedArray)):
+            pa_array = value
+        elif isinstance(value, BaseMaskedArray):
+            # GH 52625
+            if copy:
+                value = value.copy()
+            pa_array = value.__arrow_array__()
+        else:
+            if (
+                isinstance(value, np.ndarray)
+                and pa_type is not None
+                and (
+                    pa.types.is_large_binary(pa_type)
+                    or pa.types.is_large_string(pa_type)
+                )
+            ):
+                # See https://github.com/apache/arrow/issues/35289
+                value = value.tolist()
+            elif copy and is_array_like(value):
+                # pa array should not get updated when numpy array is updated
+                value = value.copy()
+
+            if (
+                pa_type is not None
+                and pa.types.is_duration(pa_type)
+                and (not isinstance(value, np.ndarray) or value.dtype.kind not in "mi")
+            ):
+                # GH 53171: pyarrow does not yet handle pandas non-nano correctly
+                # see https://github.com/apache/arrow/issues/33321
+                from pandas import to_timedelta
+
+                value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
+                value = value.to_numpy()
+
+            try:
+                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+            except pa.ArrowInvalid:
+                # GH50430: let pyarrow infer type, then cast
+                pa_array = pa.array(value, from_pandas=True)
+
+            if pa_type is None and pa.types.is_duration(pa_array.type):
+                # GH 53171: pyarrow does not yet handle pandas non-nano correctly
+                # see https://github.com/apache/arrow/issues/33321
+                from pandas import to_timedelta
+
+                value = to_timedelta(value)
+                value = value.to_numpy()
+                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+
+            if pa.types.is_duration(pa_array.type) and pa_array.null_count > 0:
+                # GH52843: upstream bug for duration types when originally
+                # constructed with data containing numpy NaT.
+                # https://github.com/apache/arrow/issues/35088
+                arr = cls(pa_array)
+                arr = arr.fillna(arr.dtype.na_value)
+                pa_array = arr._pa_array
+
+        if pa_type is not None and pa_array.type != pa_type:
+            pa_array = pa_array.cast(pa_type)
+
+        return pa_array
+
     def __getitem__(self, item: PositionalIndexer):
         """Select a subset of self.
 
@@ -1559,116 +1705,6 @@ def _mode(self, dropna: bool = True) -> Self:
 
         return type(self)(most_common)
 
-    @classmethod
-    def _box_pa(
-        cls, value, pa_type: pa.DataType | None = None
-    ) -> pa.Array | pa.ChunkedArray | pa.Scalar:
-        if is_list_like(value):
-            return cls._box_pa_array(value, pa_type)
-        return cls._box_pa_scalar(value, pa_type)
-
-    @classmethod
-    def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
-        if isinstance(value, pa.Scalar):
-            pa_scalar = value
-        elif isna(value):
-            pa_scalar = pa.scalar(None, type=pa_type)
-        else:
-            # GH 53171: pyarrow does not yet handle pandas non-nano correctly
-            # see https://github.com/apache/arrow/issues/33321
-            from pandas import (
-                Timedelta,
-                Timestamp,
-            )
-
-            if isinstance(value, Timedelta):
-                if pa_type is None:
-                    pa_type = pa.duration(value.unit)
-                elif value.unit != pa_type.unit:
-                    value = value.as_unit(pa_type.unit)
-                value = value._value
-            elif isinstance(value, Timestamp):
-                if pa_type is None:
-                    pa_type = pa.timestamp(value.unit, tz=value.tz)
-                elif value.unit != pa_type.unit:
-                    value = value.as_unit(pa_type.unit)
-                value = value._value
-
-            pa_scalar = pa.scalar(value, type=pa_type, from_pandas=True)
-
-        if pa_type is not None and pa_scalar.type != pa_type:
-            pa_scalar = pa_scalar.cast(pa_type)
-
-        return pa_scalar
-
-    @classmethod
-    def _box_pa_array(
-        cls, value, pa_type: pa.DataType | None = None, copy: bool = False
-    ) -> pa.Array | pa.ChunkedArray:
-        if isinstance(value, cls):
-            pa_array = value._pa_array
-        elif isinstance(value, (pa.Array, pa.ChunkedArray)):
-            pa_array = value
-        elif isinstance(value, BaseMaskedArray):
-            # GH 52625
-            if copy:
-                value = value.copy()
-            pa_array = value.__arrow_array__()
-        else:
-            if (
-                isinstance(value, np.ndarray)
-                and pa_type is not None
-                and (
-                    pa.types.is_large_binary(pa_type)
-                    or pa.types.is_large_string(pa_type)
-                )
-            ):
-                # See https://github.com/apache/arrow/issues/35289
-                value = value.tolist()
-            elif copy and is_array_like(value):
-                # pa array should not get updated when numpy array is updated
-                value = value.copy()
-
-            if (
-                pa_type is not None
-                and pa.types.is_duration(pa_type)
-                and (not isinstance(value, np.ndarray) or value.dtype.kind not in "mi")
-            ):
-                # GH 53171: pyarrow does not yet handle pandas non-nano correctly
-                # see https://github.com/apache/arrow/issues/33321
-                from pandas import to_timedelta
-
-                value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
-                value = value.to_numpy()
-
-            try:
-                pa_array = pa.array(value, type=pa_type, from_pandas=True)
-            except pa.ArrowInvalid:
-                # GH50430: let pyarrow infer type, then cast
-                pa_array = pa.array(value, from_pandas=True)
-
-            if pa_type is None and pa.types.is_duration(pa_array.type):
-                # GH 53171: pyarrow does not yet handle pandas non-nano correctly
-                # see https://github.com/apache/arrow/issues/33321
-                from pandas import to_timedelta
-
-                value = to_timedelta(value)
-                value = value.to_numpy()
-                pa_array = pa.array(value, type=pa_type, from_pandas=True)
-
-            if pa.types.is_duration(pa_array.type) and pa_array.null_count > 0:
-                # GH52843: upstream bug for duration types when originally
-                # constructed with data containing numpy NaT.
-                # https://github.com/apache/arrow/issues/35088
-                arr = cls(pa_array)
-                arr = arr.fillna(arr.dtype.na_value)
-                pa_array = arr._pa_array
-
-        if pa_type is not None and pa_array.type != pa_type:
-            pa_array = pa_array.cast(pa_type)
-
-        return pa_array
-
     def _maybe_convert_setitem_value(self, value):
         """Maybe convert value to be pyarrow compatible."""
         try:

From 9c48d6a91e5c14ce335632f4c85cb8c64ab20503 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Thu, 11 May 2023 03:19:12 -0400
Subject: [PATCH 07/10] fix test

---
 pandas/tests/extension/test_arrow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 183445c360c6a..4cdf26a25d6a3 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2960,7 +2960,7 @@ def test_setitem_temporal(pa_type):
 
 @pytest.mark.xfail(
     pa_version_under8p0,
-    reason="Function 'add_checked' has no kernel matching input types",
+    reason="Function 'subtract_checked' has no kernel matching input types",
     raises=pa.ArrowNotImplementedError,
 )
 @pytest.mark.parametrize(

From b460e6ecd6fb6d47ab93236f1d8e2e8d8b24ee5a Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Thu, 11 May 2023 06:15:41 -0400
Subject: [PATCH 08/10] fix test

---
 pandas/tests/extension/test_arrow.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 4cdf26a25d6a3..6718b2d288466 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2958,19 +2958,20 @@ def test_setitem_temporal(pa_type):
     tm.assert_extension_array_equal(result, expected)
 
 
-@pytest.mark.xfail(
-    pa_version_under8p0,
-    reason="Function 'subtract_checked' has no kernel matching input types",
-    raises=pa.ArrowNotImplementedError,
-)
 @pytest.mark.parametrize(
     "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
 )
-def test_arithmetic_temporal(pa_type):
+def test_arithmetic_temporal(pa_type, request):
     # GH 53171
+    if pa_version_under8p0 and pa.types.is_duration(pa_type):
+        mark = pytest.mark.xfail(
+            raises=pa.ArrowNotImplementedError,
+            reason="Function 'subtract_checked' has no kernel matching input types",
+        )
+        request.node.add_marker(mark)
+
     arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type))
     unit = pa_type.unit
-
     result = arr - pd.Timedelta(1, unit=unit).as_unit(unit)
     expected = ArrowExtensionArray(pa.array([0, 1, 2], type=pa_type))
     tm.assert_extension_array_equal(result, expected)

From 8e55b2645a5f339fbb6e8b6fd2d3bc621c3064e3 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Fri, 12 May 2023 05:44:27 -0400
Subject: [PATCH 09/10] update imports

---
 pandas/core/arrays/arrow/array.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 6ae9c23bbf920..7942d2cbc83d5 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -362,7 +362,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
         else:
             # GH 53171: pyarrow does not yet handle pandas non-nano correctly
             # see https://github.com/apache/arrow/issues/33321
-            from pandas import (
+            from pandas._libs.tslibs import (
                 Timedelta,
                 Timestamp,
             )
@@ -434,7 +434,7 @@ def _box_pa_array(
             ):
                 # GH 53171: pyarrow does not yet handle pandas non-nano correctly
                 # see https://github.com/apache/arrow/issues/33321
-                from pandas import to_timedelta
+                from pandas.core.tools.timedeltas import to_timedelta
 
                 value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
                 value = value.to_numpy()
@@ -448,7 +448,7 @@ def _box_pa_array(
             if pa_type is None and pa.types.is_duration(pa_array.type):
                 # GH 53171: pyarrow does not yet handle pandas non-nano correctly
                 # see https://github.com/apache/arrow/issues/33321
-                from pandas import to_timedelta
+                from pandas.core.tools.timedeltas import to_timedelta
 
                 value = to_timedelta(value)
                 value = value.to_numpy()

From 04bce55373ae2c6251944fa2f33b9b7e4874325b Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Fri, 12 May 2023 22:25:45 -0400
Subject: [PATCH 10/10] move imports

---
 pandas/core/arrays/arrow/array.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 7942d2cbc83d5..d201a9ba3a1d9 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -18,6 +18,10 @@
 import numpy as np
 
 from pandas._libs import lib
+from pandas._libs.tslibs import (
+    Timedelta,
+    Timestamp,
+)
 from pandas.compat import (
     pa_version_under7p0,
     pa_version_under8p0,
@@ -362,11 +366,6 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
         else:
             # GH 53171: pyarrow does not yet handle pandas non-nano correctly
             # see https://github.com/apache/arrow/issues/33321
-            from pandas._libs.tslibs import (
-                Timedelta,
-                Timestamp,
-            )
-
             if isinstance(value, Timedelta):
                 if pa_type is None:
                     pa_type = pa.duration(value.unit)