From b93bbb01846482d901cff69ebfa47604a89b8361 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 13 Jun 2023 16:07:42 -0700
Subject: [PATCH 01/10] CI: Build pandas even if doctests fail

---
 .github/workflows/code-checks.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index 4ad2fbc71c8c1..f6c35decfd30b 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -77,6 +77,7 @@ jobs:
 
     - name: Install pandas in editable mode
       id: build-editable
+      if: ${{ steps.build.outcome == 'success' && always() }}
       uses: ./.github/actions/build_pandas
       with:
         editable: true

From 52f85da249699b41ebde985a0764879f57a0aa71 Mon Sep 17 00:00:00 2001
From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com>
Date: Tue, 13 Jun 2023 22:53:13 -0400
Subject: [PATCH 02/10] BUG: groupby sum turning `inf+inf` and `(-inf)+(-inf)`
 into `nan` (#53623)

---
 doc/source/whatsnew/v2.1.0.rst          |  5 +++--
 pandas/_libs/groupby.pyx                |  7 +++++++
 pandas/tests/groupby/test_libgroupby.py | 27 +++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index bea2ad8c7450c..ceda799ebb959 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -462,8 +462,9 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`)
 - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`)
 - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`)
-- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`)
-- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`)
+- Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`)
+- Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`)
+- Bug in :meth:`SeriesGroupBy.sum` and :meth:`DataFrameGroupby.sum` summing ``np.inf + np.inf`` and ``(-np.inf) + (-np.inf)`` to ``np.nan`` (:issue:`53606`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 61f448cbe0c3f..0baae23a4a71c 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -746,6 +746,13 @@ def group_sum(
                         y = val - compensation[lab, j]
                         t = sumx[lab, j] + y
                         compensation[lab, j] = t - sumx[lab, j] - y
+                        if compensation[lab, j] != compensation[lab, j]:
+                            # GH#53606
+                            # If val is +/- infinity compensation is NaN
+                            # which would lead to results being NaN instead
+                            # of +/- infinity. We cannot use util.is_nan
+                            # because of no gil
+                            compensation[lab, j] = 0
                         sumx[lab, j] = t
 
             _check_below_mincount(
diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py
index d10bcf9053d1a..92c3b68d87fad 100644
--- a/pandas/tests/groupby/test_libgroupby.py
+++ b/pandas/tests/groupby/test_libgroupby.py
@@ -6,6 +6,7 @@
     group_cumprod,
     group_cumsum,
     group_mean,
+    group_sum,
     group_var,
 )
 
@@ -302,3 +303,29 @@ def test_cython_group_mean_Inf_at_begining_and_end():
         actual,
         expected,
     )
+
+
+@pytest.mark.parametrize(
+    "values, out",
+    [
+        ([[np.inf], [np.inf], [np.inf]], [[np.inf], [np.inf]]),
+        ([[np.inf], [np.inf], [-np.inf]], [[np.inf], [np.nan]]),
+        ([[np.inf], [-np.inf], [np.inf]], [[np.inf], [np.nan]]),
+        ([[np.inf], [-np.inf], [-np.inf]], [[np.inf], [-np.inf]]),
+    ],
+)
+def test_cython_group_sum_Inf_at_begining_and_end(values, out):
+    # GH #53606
+    actual = np.array([[np.nan], [np.nan]], dtype="float64")
+    counts = np.array([0, 0], dtype="int64")
+    data = np.array(values, dtype="float64")
+    labels = np.array([0, 1, 1], dtype=np.intp)
+
+    group_sum(actual, counts, data, labels, None, is_datetimelike=False)
+
+    expected = np.array(out, dtype="float64")
+
+    tm.assert_numpy_array_equal(
+        actual,
+        expected,
+    )

From 257db3307903ffe45fd59a9535032c95b2290c55 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 14 Jun 2023 09:58:38 -0700
Subject: [PATCH 03/10] DEPR: method, limit in NDFrame.replace (#53492)

* DEPR: method, limit in NDFrame.replace

* update test, docs

* suppress doctest warning

* doctests
---
 doc/source/user_guide/missing_data.rst      |  7 ----
 doc/source/whatsnew/v2.1.0.rst              |  2 +
 pandas/conftest.py                          |  2 +
 pandas/core/generic.py                      | 33 +++++++++++++++
 pandas/core/shared_docs.py                  |  7 ++++
 pandas/tests/frame/methods/test_replace.py  | 11 ++++-
 pandas/tests/frame/test_subclass.py         |  4 +-
 pandas/tests/series/methods/test_replace.py | 45 ++++++++++++++++-----
 8 files changed, 91 insertions(+), 20 deletions(-)

diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
index ed58554896a4f..443fdd4f59e3f 100644
--- a/doc/source/user_guide/missing_data.rst
+++ b/doc/source/user_guide/missing_data.rst
@@ -551,13 +551,6 @@ For a DataFrame, you can specify individual values by column:
 
    df.replace({"a": 0, "b": 5}, 100)
 
-Instead of replacing with specified values, you can treat all given values as
-missing and interpolate over them:
-
-.. ipython:: python
-
-   ser.replace([1, 2, 3], method="pad")
-
 .. _missing_data.replace_expression:
 
 String/regular expression replacement
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index ceda799ebb959..806abf670f32f 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -286,9 +286,11 @@ Deprecations
 - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
 - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
 - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
+- Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
 - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
 - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
 - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
+- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
 - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)
 -
 
diff --git a/pandas/conftest.py b/pandas/conftest.py
index fbef2fb272ed6..ed05ddd1b2f31 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -134,6 +134,8 @@ def pytest_collection_modifyitems(items, config) -> None:
         ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
         ("is_categorical_dtype", "is_categorical_dtype is deprecated"),
         ("is_sparse", "is_sparse is deprecated"),
+        ("NDFrame.replace", "The 'method' keyword"),
+        ("NDFrame.replace", "Series.replace without 'value'"),
         # Docstring divides by zero to show behavior difference
         ("missing.mask_zero_div_zero", "divide by zero encountered"),
         (
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 711e552f262ac..d112f5aa7d671 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7470,6 +7470,39 @@ def replace(
         regex: bool_t = False,
         method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default,
     ) -> Self | None:
+        if method is not lib.no_default:
+            warnings.warn(
+                # GH#33302
+                f"The 'method' keyword in {type(self).__name__}.replace is "
+                "deprecated and will be removed in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+        elif limit is not None:
+            warnings.warn(
+                # GH#33302
+                f"The 'limit' keyword in {type(self).__name__}.replace is "
+                "deprecated and will be removed in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+        if (
+            value is lib.no_default
+            and method is lib.no_default
+            and not is_dict_like(to_replace)
+            and regex is False
+        ):
+            # case that goes through _replace_single and defaults to method="pad"
+            warnings.warn(
+                # GH#33302
+                f"{type(self).__name__}.replace without 'value' and with "
+                "non-dict-like 'to_replace' is deprecated "
+                "and will raise in a future version. "
+                "Explicitly specify the new values instead.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+
         if not (
             is_scalar(to_replace)
             or is_re_compilable(to_replace)
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 7bddaad780b8c..7579f816d0ace 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -562,6 +562,8 @@
     {inplace}
     limit : int, default None
         Maximum size gap to forward or backward fill.
+
+        .. deprecated:: 2.1.0
     regex : bool or same types as `to_replace`, default False
         Whether to interpret `to_replace` and/or `value` as regular
         expressions. If this is ``True`` then `to_replace` *must* be a
@@ -572,6 +574,8 @@
         The method to use when for replacement, when `to_replace` is a
         scalar, list or tuple and `value` is ``None``.
 
+        .. deprecated:: 2.1.0
+
     Returns
     -------
     {klass}
@@ -766,6 +770,9 @@
     4     b
     dtype: object
 
+        .. deprecated:: 2.1.0
+            The 'method' parameter and padding behavior are deprecated.
+
     On the other hand, if ``None`` is explicitly passed for ``value``, it will
     be respected:
 
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index d5668020bab5d..9256df72cdf7b 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -1236,7 +1236,9 @@ def test_replace_method(self, to_replace, method, expected):
         # GH 19632
         df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]})
 
-        result = df.replace(to_replace=to_replace, value=None, method=method)
+        msg = "The 'method' keyword in DataFrame.replace is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.replace(to_replace=to_replace, value=None, method=method)
         expected = DataFrame(expected)
         tm.assert_frame_equal(result, expected)
 
@@ -1327,8 +1329,13 @@ def test_replace_invalid_to_replace(self):
             r"Expecting 'to_replace' to be either a scalar, array-like, "
             r"dict or None, got invalid type.*"
         )
+        msg2 = (
+            "DataFrame.replace without 'value' and with non-dict-like "
+            "'to_replace' is deprecated"
+        )
         with pytest.raises(TypeError, match=msg):
-            df.replace(lambda x: x.strip())
+            with tm.assert_produces_warning(FutureWarning, match=msg2):
+                df.replace(lambda x: x.strip())
 
     @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"])
     @pytest.mark.parametrize("value", [np.nan, pd.NA])
diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py
index 5c44a957b9373..3d1e9d26c1ea6 100644
--- a/pandas/tests/frame/test_subclass.py
+++ b/pandas/tests/frame/test_subclass.py
@@ -732,7 +732,9 @@ def test_equals_subclass(self):
     def test_replace_list_method(self):
         # https://github.com/pandas-dev/pandas/pull/46018
         df = tm.SubclassedDataFrame({"A": [0, 1, 2]})
-        result = df.replace([1, 2], method="ffill")
+        msg = "The 'method' keyword in SubclassedDataFrame.replace is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.replace([1, 2], method="ffill")
         expected = tm.SubclassedDataFrame({"A": [0, 0, 0]})
         assert isinstance(result, tm.SubclassedDataFrame)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 2880e3f3e85db..d3cdae63d26f3 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -131,12 +131,18 @@ def test_replace_gh5319(self):
         # GH 5319
         ser = pd.Series([0, np.nan, 2, 3, 4])
         expected = ser.ffill()
-        result = ser.replace([np.nan])
+        msg = (
+            "Series.replace without 'value' and with non-dict-like "
+            "'to_replace' is deprecated"
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = ser.replace([np.nan])
         tm.assert_series_equal(result, expected)
 
         ser = pd.Series([0, np.nan, 2, 3, 4])
         expected = ser.ffill()
-        result = ser.replace(np.nan)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = ser.replace(np.nan)
         tm.assert_series_equal(result, expected)
 
     def test_replace_datetime64(self):
@@ -169,11 +175,17 @@ def test_replace_timedelta_td64(self):
 
     def test_replace_with_single_list(self):
         ser = pd.Series([0, 1, 2, 3, 4])
-        result = ser.replace([1, 2, 3])
+        msg2 = (
+            "Series.replace without 'value' and with non-dict-like "
+            "'to_replace' is deprecated"
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg2):
+            result = ser.replace([1, 2, 3])
         tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
 
         s = ser.copy()
-        return_value = s.replace([1, 2, 3], inplace=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg2):
+            return_value = s.replace([1, 2, 3], inplace=True)
         assert return_value is None
         tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
 
@@ -183,8 +195,10 @@ def test_replace_with_single_list(self):
             r"Invalid fill method\. Expecting pad \(ffill\) or backfill "
             r"\(bfill\)\. Got crash_cymbal"
         )
+        msg3 = "The 'method' keyword in Series.replace is deprecated"
         with pytest.raises(ValueError, match=msg):
-            return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
+            with tm.assert_produces_warning(FutureWarning, match=msg3):
+                return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
             assert return_value is None
         tm.assert_series_equal(s, ser)
 
@@ -450,8 +464,13 @@ def test_replace_invalid_to_replace(self):
             r"Expecting 'to_replace' to be either a scalar, array-like, "
             r"dict or None, got invalid type.*"
         )
+        msg2 = (
+            "Series.replace without 'value' and with non-dict-like "
+            "'to_replace' is deprecated"
+        )
         with pytest.raises(TypeError, match=msg):
-            series.replace(lambda x: x.strip())
+            with tm.assert_produces_warning(FutureWarning, match=msg2):
+                series.replace(lambda x: x.strip())
 
     @pytest.mark.parametrize("frame", [False, True])
     def test_replace_nonbool_regex(self, frame):
@@ -502,19 +521,25 @@ def test_replace_extension_other(self, frame_or_series):
     def _check_replace_with_method(self, ser: pd.Series):
         df = ser.to_frame()
 
-        res = ser.replace(ser[1], method="pad")
+        msg1 = "The 'method' keyword in Series.replace is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg1):
+            res = ser.replace(ser[1], method="pad")
         expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
         tm.assert_series_equal(res, expected)
 
-        res_df = df.replace(ser[1], method="pad")
+        msg2 = "The 'method' keyword in DataFrame.replace is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg2):
+            res_df = df.replace(ser[1], method="pad")
         tm.assert_frame_equal(res_df, expected.to_frame())
 
         ser2 = ser.copy()
-        res2 = ser2.replace(ser[1], method="pad", inplace=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg1):
+            res2 = ser2.replace(ser[1], method="pad", inplace=True)
         assert res2 is None
         tm.assert_series_equal(ser2, expected)
 
-        res_df2 = df.replace(ser[1], method="pad", inplace=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg2):
+            res_df2 = df.replace(ser[1], method="pad", inplace=True)
         assert res_df2 is None
         tm.assert_frame_equal(df, expected.to_frame())
 

From 00aa70033627c47c640e9d374d30a22fcfeb5288 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 14 Jun 2023 13:08:06 -0400
Subject: [PATCH 04/10] PERF: Series.str.get_dummies for
 ArrowDtype(pa.string()) (#53655)

* PERF: Series.str.get_dummies for ArrowDtype(pa.string())

* whatsnew

* typing
---
 doc/source/whatsnew/v2.1.0.rst    |  1 +
 pandas/core/arrays/arrow/array.py | 22 ++++++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 806abf670f32f..42b1346696bb8 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -323,6 +323,7 @@ Performance improvements
 - Performance improvement in :meth:`DataFrame.loc` when selecting rows and columns (:issue:`53014`)
 - Performance improvement in :meth:`Series.add` for pyarrow string and binary dtypes (:issue:`53150`)
 - Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)
+- Performance improvement in :meth:`Series.str.get_dummies` for pyarrow-backed strings (:issue:`53655`)
 - Performance improvement in :meth:`Series.str.get` for pyarrow-backed strings (:issue:`53152`)
 - Performance improvement in :meth:`Series.str.split` with ``expand=True`` for pyarrow-backed strings (:issue:`53585`)
 - Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 0c1b86440b11d..0ca136914b614 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2239,17 +2239,19 @@ def _str_findall(self, pat: str, flags: int = 0):
         return type(self)(pa.chunked_array(result))
 
     def _str_get_dummies(self, sep: str = "|"):
-        split = pc.split_pattern(self._pa_array, sep).combine_chunks()
-        uniques = split.flatten().unique()
+        split = pc.split_pattern(self._pa_array, sep)
+        flattened_values = pc.list_flatten(split)
+        uniques = flattened_values.unique()
         uniques_sorted = uniques.take(pa.compute.array_sort_indices(uniques))
-        result_data = []
-        for lst in split.to_pylist():
-            if lst is None:
-                result_data.append([False] * len(uniques_sorted))
-            else:
-                res = pc.is_in(uniques_sorted, pa.array(set(lst)))
-                result_data.append(res.to_pylist())
-        result = type(self)(pa.array(result_data))
+        lengths = pc.list_value_length(split).fill_null(0).to_numpy()
+        n_rows = len(self)
+        n_cols = len(uniques)
+        indices = pc.index_in(flattened_values, uniques_sorted).to_numpy()
+        indices = indices + np.arange(n_rows).repeat(lengths) * n_cols
+        dummies = np.zeros(n_rows * n_cols, dtype=np.bool_)
+        dummies[indices] = True
+        dummies = dummies.reshape((n_rows, n_cols))
+        result = type(self)(pa.array(list(dummies)))
         return result, uniques_sorted.to_pylist()
 
     def _str_index(self, sub: str, start: int = 0, end: int | None = None):

From 6458c1c64a9c254f68d03bdbfb9c0c643da3bff3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 14 Jun 2023 10:11:11 -0700
Subject: [PATCH 05/10] TYP: core.missing (#53625)

---
 pandas/core/arrays/sparse/array.py |   7 +-
 pandas/core/missing.py             | 160 +++++++++++++++++------------
 2 files changed, 98 insertions(+), 69 deletions(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 16e7835a7183d..269b7a086de93 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -769,7 +769,12 @@ def fillna(
             )
             new_values = np.asarray(self)
             # interpolate_2d modifies new_values inplace
-            interpolate_2d(new_values, method=method, limit=limit)
+            # error: Argument "method" to "interpolate_2d" has incompatible type
+            # "Literal['backfill', 'bfill', 'ffill', 'pad']"; expected
+            # "Literal['pad', 'backfill']"
+            interpolate_2d(
+                new_values, method=method, limit=limit  # type: ignore[arg-type]
+            )
             return type(self)(new_values, fill_value=self.fill_value)
 
         else:
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 0766b9c5c7145..8b6b6a2c2a07b 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -10,6 +10,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Literal,
     cast,
 )
 
@@ -22,7 +23,6 @@
 )
 from pandas._typing import (
     ArrayLike,
-    Axis,
     AxisInt,
     F,
     ReindexMethod,
@@ -223,6 +223,35 @@ def find_valid_index(how: str, is_valid: npt.NDArray[np.bool_]) -> int | None:
     return idxpos  # type: ignore[return-value]
 
 
+def validate_limit_direction(
+    limit_direction: str,
+) -> Literal["forward", "backward", "both"]:
+    valid_limit_directions = ["forward", "backward", "both"]
+    limit_direction = limit_direction.lower()
+    if limit_direction not in valid_limit_directions:
+        raise ValueError(
+            "Invalid limit_direction: expecting one of "
+            f"{valid_limit_directions}, got '{limit_direction}'."
+        )
+    # error: Incompatible return value type (got "str", expected
+    # "Literal['forward', 'backward', 'both']")
+    return limit_direction  # type: ignore[return-value]
+
+
+def validate_limit_area(limit_area: str | None) -> Literal["inside", "outside"] | None:
+    if limit_area is not None:
+        valid_limit_areas = ["inside", "outside"]
+        limit_area = limit_area.lower()
+        if limit_area not in valid_limit_areas:
+            raise ValueError(
+                f"Invalid limit_area: expecting one of {valid_limit_areas}, got "
+                f"{limit_area}."
+            )
+    # error: Incompatible return value type (got "Optional[str]", expected
+    # "Optional[Literal['inside', 'outside']]")
+    return limit_area  # type: ignore[return-value]
+
+
 def infer_limit_direction(limit_direction, method):
     # Set `limit_direction` depending on `method`
     if limit_direction is None:
@@ -308,7 +337,9 @@ def interpolate_array_2d(
             method=m,
             axis=axis,
             limit=limit,
-            limit_area=limit_area,
+            # error: Argument "limit_area" to "interpolate_2d" has incompatible
+            # type "Optional[str]"; expected "Optional[Literal['inside', 'outside']]"
+            limit_area=limit_area,  # type: ignore[arg-type]
         )
     else:
         assert index is not None  # for mypy
@@ -362,22 +393,8 @@ def _interpolate_2d_with_fill(
             )
         method = "values"
 
-    valid_limit_directions = ["forward", "backward", "both"]
-    limit_direction = limit_direction.lower()
-    if limit_direction not in valid_limit_directions:
-        raise ValueError(
-            "Invalid limit_direction: expecting one of "
-            f"{valid_limit_directions}, got '{limit_direction}'."
-        )
-
-    if limit_area is not None:
-        valid_limit_areas = ["inside", "outside"]
-        limit_area = limit_area.lower()
-        if limit_area not in valid_limit_areas:
-            raise ValueError(
-                f"Invalid limit_area: expecting one of {valid_limit_areas}, got "
-                f"{limit_area}."
-            )
+    limit_direction = validate_limit_direction(limit_direction)
+    limit_area_validated = validate_limit_area(limit_area)
 
     # default limit is unlimited GH #16282
     limit = algos.validate_limit(nobs=None, limit=limit)
@@ -393,7 +410,7 @@ def func(yvalues: np.ndarray) -> None:
             method=method,
             limit=limit,
             limit_direction=limit_direction,
-            limit_area=limit_area,
+            limit_area=limit_area_validated,
             fill_value=fill_value,
             bounds_error=False,
             **kwargs,
@@ -433,10 +450,10 @@ def _index_to_interp_indices(index: Index, method: str) -> np.ndarray:
 def _interpolate_1d(
     indices: np.ndarray,
     yvalues: np.ndarray,
-    method: str | None = "linear",
+    method: str = "linear",
     limit: int | None = None,
     limit_direction: str = "forward",
-    limit_area: str | None = None,
+    limit_area: Literal["inside", "outside"] | None = None,
     fill_value: Any | None = None,
     bounds_error: bool = False,
     order: int | None = None,
@@ -539,10 +556,10 @@ def _interpolate_1d(
 
 
 def _interpolate_scipy_wrapper(
-    x,
-    y,
-    new_x,
-    method,
+    x: np.ndarray,
+    y: np.ndarray,
+    new_x: np.ndarray,
+    method: str,
     fill_value=None,
     bounds_error: bool = False,
     order=None,
@@ -565,19 +582,11 @@ def _interpolate_scipy_wrapper(
         "krogh": interpolate.krogh_interpolate,
         "from_derivatives": _from_derivatives,
         "piecewise_polynomial": _from_derivatives,
+        "cubicspline": _cubicspline_interpolate,
+        "akima": _akima_interpolate,
+        "pchip": interpolate.pchip_interpolate,
     }
 
-    if getattr(x, "_is_all_dates", False):
-        # GH 5975, scipy.interp1d can't handle datetime64s
-        x, new_x = x._values.astype("i8"), new_x.astype("i8")
-
-    if method == "pchip":
-        alt_methods["pchip"] = interpolate.pchip_interpolate
-    elif method == "akima":
-        alt_methods["akima"] = _akima_interpolate
-    elif method == "cubicspline":
-        alt_methods["cubicspline"] = _cubicspline_interpolate
-
     interp1d_methods = [
         "nearest",
         "zero",
@@ -588,9 +597,11 @@ def _interpolate_scipy_wrapper(
     ]
     if method in interp1d_methods:
         if method == "polynomial":
-            method = order
+            kind = order
+        else:
+            kind = method
         terp = interpolate.interp1d(
-            x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error
+            x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error
         )
         new_y = terp(new_x)
     elif method == "spline":
@@ -610,13 +621,18 @@ def _interpolate_scipy_wrapper(
             y = y.copy()
         if not new_x.flags.writeable:
             new_x = new_x.copy()
-        method = alt_methods[method]
-        new_y = method(x, y, new_x, **kwargs)
+        terp = alt_methods[method]
+        new_y = terp(x, y, new_x, **kwargs)
     return new_y
 
 
 def _from_derivatives(
-    xi, yi, x, order=None, der: int | list[int] | None = 0, extrapolate: bool = False
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: np.ndarray,
+    order=None,
+    der: int | list[int] | None = 0,
+    extrapolate: bool = False,
 ):
     """
     Convenience function for interpolate.BPoly.from_derivatives.
@@ -660,7 +676,13 @@ def _from_derivatives(
     return m(x)
 
 
-def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt = 0):
+def _akima_interpolate(
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: np.ndarray,
+    der: int | list[int] | None = 0,
+    axis: AxisInt = 0,
+):
     """
     Convenience function for akima interpolation.
     xi and yi are arrays of values used to approximate some function f,
@@ -670,13 +692,13 @@ def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt
 
     Parameters
     ----------
-    xi : array-like
+    xi : np.ndarray
         A sorted list of x-coordinates, of length N.
-    yi : array-like
+    yi : np.ndarray
         A 1-D array of real values.  `yi`'s length along the interpolation
         axis must be equal to the length of `xi`. If N-D array, use axis
         parameter to select correct axis.
-    x : scalar or array-like
+    x : np.ndarray
         Of length M.
     der : int, optional
         How many derivatives to extract; None for all potentially
@@ -704,9 +726,9 @@ def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt
 
 
 def _cubicspline_interpolate(
-    xi,
-    yi,
-    x,
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: np.ndarray,
     axis: AxisInt = 0,
     bc_type: str | tuple[Any, Any] = "not-a-knot",
     extrapolate=None,
@@ -718,14 +740,14 @@ def _cubicspline_interpolate(
 
     Parameters
     ----------
-    xi : array-like, shape (n,)
+    xi : np.ndarray, shape (n,)
         1-d array containing values of the independent variable.
         Values must be real, finite and in strictly increasing order.
-    yi : array-like
+    yi : np.ndarray
         Array containing values of the dependent variable. It can have
         arbitrary number of dimensions, but the length along ``axis``
         (see below) must match the length of ``x``. Values must be finite.
-    x : scalar or array-like, shape (m,)
+    x : np.ndarray, shape (m,)
     axis : int, optional
         Axis along which `y` is assumed to be varying. Meaning that for
         ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
@@ -790,7 +812,10 @@ def _cubicspline_interpolate(
 
 
 def _interpolate_with_limit_area(
-    values: np.ndarray, method: str, limit: int | None, limit_area: str | None
+    values: np.ndarray,
+    method: Literal["pad", "backfill"],
+    limit: int | None,
+    limit_area: Literal["inside", "outside"],
 ) -> None:
     """
     Apply interpolation and limit_area logic to values along a to-be-specified axis.
@@ -803,8 +828,8 @@ def _interpolate_with_limit_area(
         Interpolation method. Could be "bfill" or "pad"
     limit: int, optional
         Index limit on interpolation.
-    limit_area: str
-        Limit area for interpolation. Can be "inside" or "outside"
+    limit_area: {'inside', 'outside'}
+        Limit area for interpolation.
 
     Notes
     -----
@@ -832,16 +857,18 @@ def _interpolate_with_limit_area(
             invalid[first : last + 1] = False
         elif limit_area == "outside":
             invalid[:first] = invalid[last + 1 :] = False
+        else:
+            raise ValueError("limit_area should be 'inside' or 'outside'")
 
         values[invalid] = np.nan
 
 
 def interpolate_2d(
     values: np.ndarray,
-    method: str = "pad",
-    axis: Axis = 0,
+    method: Literal["pad", "backfill"] = "pad",
+    axis: AxisInt = 0,
     limit: int | None = None,
-    limit_area: str | None = None,
+    limit_area: Literal["inside", "outside"] | None = None,
 ) -> None:
     """
     Perform an actual interpolation of values, values will be make 2-d if
@@ -880,9 +907,7 @@ def interpolate_2d(
                 limit=limit,
                 limit_area=limit_area,
             ),
-            # error: Argument 2 to "apply_along_axis" has incompatible type
-            # "Union[str, int]"; expected "SupportsIndex"
-            axis,  # type: ignore[arg-type]
+            axis,
             values,
         )
         return
@@ -898,12 +923,9 @@ def interpolate_2d(
     method = clean_fill_method(method)
     tvalues = transf(values)
 
+    func = get_fill_func(method, ndim=2)
     # _pad_2d and _backfill_2d both modify tvalues inplace
-    if method == "pad":
-        _pad_2d(tvalues, limit=limit)
-    else:
-        _backfill_2d(tvalues, limit=limit)
-
+    func(tvalues, limit=limit)
     return
 
 
@@ -969,7 +991,7 @@ def _pad_2d(
 ):
     mask = _fillna_prep(values, mask)
 
-    if np.all(values.shape):
+    if values.size:
         algos.pad_2d_inplace(values, mask, limit=limit)
     else:
         # for test coverage
@@ -983,7 +1005,7 @@ def _backfill_2d(
 ):
     mask = _fillna_prep(values, mask)
 
-    if np.all(values.shape):
+    if values.size:
         algos.backfill_2d_inplace(values, mask, limit=limit)
     else:
         # for test coverage
@@ -1007,7 +1029,9 @@ def clean_reindex_fill_method(method) -> ReindexMethod | None:
     return clean_fill_method(method, allow_nearest=True)
 
 
-def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit):
+def _interp_limit(
+    invalid: npt.NDArray[np.bool_], fw_limit: int | None, bw_limit: int | None
+):
     """
     Get indexers of values that won't be filled
     because they exceed the limits.

From b3556141ac009118d74f808933dd99ea09e8139d Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Wed, 14 Jun 2023 12:09:45 -0700
Subject: [PATCH 06/10] CI: Attempt to fix wheel builds (#53670)

---
 .github/workflows/wheels.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index ea5ab81e74030..eae2949594bcc 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -111,8 +111,11 @@ jobs:
 
       - name: Build wheels
         uses: pypa/cibuildwheel@v2.13.1
-        with:
-          package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }}
+        # TODO: Build wheels from sdist again
+        # There's some sort of weird race condition?
+        # within Github that makes the sdist be missing files
+        #with:
+        #  package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }}
         env:
           CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
 

From 0489c93f04380c6e61d57b97dd0c9b3fc59ba889 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= <deamarialeon@gmail.com>
Date: Wed, 14 Jun 2023 22:16:16 +0200
Subject: [PATCH 07/10] DOC: Fixing EX01 - Added examples (#53647)

* SeriesGroupBy.fillna example added

* Added examples

* Corrected failing test for timedelta.total_seconds

* Corrected fillna example
---
 ci/code_checks.sh                  | 11 -----
 pandas/_libs/tslibs/nattype.pyx    | 16 ++++++-
 pandas/_libs/tslibs/timedeltas.pyx | 76 +++++++++++++++++++++++++++++-
 pandas/core/arrays/datetimes.py    |  8 ++++
 pandas/core/groupby/generic.py     | 21 +++++++++
 5 files changed, 118 insertions(+), 14 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index adda422296396..f63cc1fcc5767 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -119,16 +119,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.Timestamp.utcoffset \
         pandas.Timestamp.utctimetuple \
         pandas.Timestamp.weekday \
-        pandas.arrays.DatetimeArray \
-        pandas.Timedelta.view \
-        pandas.Timedelta.as_unit \
-        pandas.Timedelta.ceil \
-        pandas.Timedelta.floor \
-        pandas.Timedelta.round \
-        pandas.Timedelta.to_pytimedelta \
-        pandas.Timedelta.to_timedelta64 \
-        pandas.Timedelta.to_numpy \
-        pandas.Timedelta.total_seconds \
         pandas.arrays.TimedeltaArray \
         pandas.Period.asfreq \
         pandas.Period.now \
@@ -261,7 +251,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.core.window.ewm.ExponentialMovingWindow.cov \
         pandas.api.indexers.BaseIndexer \
         pandas.api.indexers.VariableOffsetWindowIndexer \
-        pandas.core.groupby.SeriesGroupBy.fillna \
         pandas.io.formats.style.Styler \
         pandas.io.formats.style.Styler.from_custom_template \
         pandas.io.formats.style.Styler.set_caption \
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index ea859a5f7d53d..75205a359db68 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -4,7 +4,6 @@ from cpython.datetime cimport (
     PyDelta_Check,
     datetime,
     import_datetime,
-    timedelta,
 )
 
 import_datetime()
@@ -440,7 +439,20 @@ class NaTType(_NaT):
         Monday == 1 ... Sunday == 7.
         """,
     )
-    total_seconds = _make_nan_func("total_seconds", timedelta.total_seconds.__doc__)
+    total_seconds = _make_nan_func(
+        "total_seconds",
+        """
+        Total seconds in the duration.
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('1min')
+        >>> td
+        Timedelta('0 days 00:01:00')
+        >>> td.total_seconds()
+        60.0
+        """,
+    )
     month_name = _make_nan_func(
         "month_name",
         """
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 047b5e861da2c..e68b8b210437a 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1112,7 +1112,17 @@ cdef class _Timedelta(timedelta):
         return self._ms * 1000 + self._us
 
     def total_seconds(self) -> float:
-        """Total seconds in the duration."""
+        """
+        Total seconds in the duration.
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('1min')
+        >>> td
+        Timedelta('0 days 00:01:00')
+        >>> td.total_seconds()
+        60.0
+        """
         # We need to override bc we overrode days/seconds/microseconds
         # TODO: add nanos/1e9?
         return self.days * 24 * 3600 + self.seconds + self.microseconds / 1_000_000
@@ -1274,6 +1284,14 @@ cdef class _Timedelta(timedelta):
         Notes
         -----
         Any nanosecond resolution will be lost.
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('3D')
+        >>> td
+        Timedelta('3 days 00:00:00')
+        >>> td.to_pytimedelta()
+        datetime.timedelta(days=3)
         """
         if self._creso == NPY_FR_ns:
             return timedelta(microseconds=int(self._value) / 1000)
@@ -1287,6 +1305,14 @@ cdef class _Timedelta(timedelta):
     def to_timedelta64(self) -> np.timedelta64:
         """
         Return a numpy.timedelta64 object with 'ns' precision.
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('3D')
+        >>> td
+        Timedelta('3 days 00:00:00')
+        >>> td.to_timedelta64()
+        numpy.timedelta64(259200000000000,'ns')
         """
         cdef:
             str abbrev = npy_unit_to_abbrev(self._creso)
@@ -1309,6 +1335,14 @@ cdef class _Timedelta(timedelta):
         See Also
         --------
         Series.to_numpy : Similar method for Series.
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('3D')
+        >>> td
+        Timedelta('3 days 00:00:00')
+        >>> td.to_numpy()
+        numpy.timedelta64(259200000000000,'ns')
         """
         if dtype is not None or copy is not False:
             raise ValueError(
@@ -1324,6 +1358,14 @@ cdef class _Timedelta(timedelta):
         ----------
         dtype : str or dtype
             The dtype to view the underlying data as.
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('3D')
+        >>> td
+        Timedelta('3 days 00:00:00')
+        >>> td.view(int)
+        259200000000000
         """
         return np.timedelta64(self._value).view(dtype)
 
@@ -1603,6 +1645,14 @@ cdef class _Timedelta(timedelta):
         Returns
         -------
         Timedelta
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('1001ms')
+        >>> td
+        Timedelta('0 days 00:00:01.001000')
+        >>> td.as_unit('s')
+        Timedelta('0 days 00:00:01')
         """
         dtype = np.dtype(f"m8[{unit}]")
         reso = get_unit_from_dtype(dtype)
@@ -1875,6 +1925,14 @@ class Timedelta(_Timedelta):
         Raises
         ------
         ValueError if the freq cannot be converted
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('1001ms')
+        >>> td
+        Timedelta('0 days 00:00:01.001000')
+        >>> td.round('s')
+        Timedelta('0 days 00:00:01')
         """
         return self._round(freq, RoundTo.NEAREST_HALF_EVEN)
 
@@ -1886,6 +1944,14 @@ class Timedelta(_Timedelta):
         ----------
         freq : str
             Frequency string indicating the flooring resolution.
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('1001ms')
+        >>> td
+        Timedelta('0 days 00:00:01.001000')
+        >>> td.floor('s')
+        Timedelta('0 days 00:00:01')
         """
         return self._round(freq, RoundTo.MINUS_INFTY)
 
@@ -1897,6 +1963,14 @@ class Timedelta(_Timedelta):
         ----------
         freq : str
             Frequency string indicating the ceiling resolution.
+
+        Examples
+        --------
+        >>> td = pd.Timedelta('1001ms')
+        >>> td
+        Timedelta('0 days 00:00:01.001000')
+        >>> td.ceil('s')
+        Timedelta('0 days 00:00:02')
         """
         return self._round(freq, RoundTo.PLUS_INFTY)
 
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 3d083e55b12ab..d6afba8c34904 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -183,6 +183,14 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):  # type: ignore[misc]
     Methods
     -------
     None
+
+    Examples
+    --------
+    >>> pd.arrays.DatetimeArray(pd.DatetimeIndex(['2023-01-01', '2023-01-02']),
+    ...                         freq='D')
+    <DatetimeArray>
+    ['2023-01-01 00:00:00', '2023-01-02 00:00:00']
+    Length: 2, dtype: datetime64[ns]
     """
 
     _typ = "datetimearray"
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 2b1ff05f18d5e..cecb9a84c62dd 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -914,6 +914,27 @@ def fillna(
         --------
         ffill : Forward fill values within a group.
         bfill : Backward fill values within a group.
+
+        Examples
+        --------
+        For SeriesGroupBy:
+
+        >>> lst = ['cat', 'cat', 'cat', 'mouse', 'mouse']
+        >>> ser = pd.Series([1, None, None, 2, None], index=lst)
+        >>> ser
+        cat    1.0
+        cat    NaN
+        cat    NaN
+        mouse  2.0
+        mouse  NaN
+        dtype: float64
+        >>> ser.groupby(level=0).fillna(0, limit=1)
+        cat    1.0
+        cat    0.0
+        cat    NaN
+        mouse  2.0
+        mouse  0.0
+        dtype: float64
         """
         result = self._op_via_apply(
             "fillna",

From 38198faa9091e25f83822b326b2dc5e10b80b955 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 14 Jun 2023 17:05:41 -0700
Subject: [PATCH 08/10] CI/TST: Mark test_to_read_gcs as single_cpu (#53677)

---
 pandas/tests/io/test_gcs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index d82cfd5bd169d..bdea24f7bb5aa 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -44,6 +44,8 @@ def ls(self, path, **kwargs):
 
 
 @td.skip_if_no("gcsfs")
+# Patches pyarrow; other processes should not pick up change
+@pytest.mark.single_cpu
 @pytest.mark.parametrize("format", ["csv", "json", "parquet", "excel", "markdown"])
 def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys):
     """

From a7fd75746cc69e318742fbcddb36195eab260525 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Wed, 14 Jun 2023 17:06:17 -0700
Subject: [PATCH 09/10] BUG/CoW: is_range_indexer can't handle very large
 arrays (#53672)

* BUG: is_range_indexer can't handle very large arrays

* fix test on 32-bit
---
 doc/source/whatsnew/v2.1.0.rst |  2 +-
 pandas/_libs/lib.pyx           |  2 +-
 pandas/tests/libs/test_lib.py  | 13 +++++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 42b1346696bb8..19e314cbf5ed8 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -406,7 +406,7 @@ Indexing
 ^^^^^^^^
 - Bug in :meth:`DataFrame.__setitem__` losing dtype when setting a :class:`DataFrame` into duplicated columns (:issue:`53143`)
 - Bug in :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN`` incorrectly raising ``TypeError`` (:issue:`53291`)
--
+- Bug in indexing methods (e.g. :meth:`DataFrame.__getitem__`) where taking the entire :class:`DataFrame`/:class:`Series` would raise an ``OverflowError`` when Copy on Write was enabled and the length of the array was over the maximum size a 32-bit integer can hold (:issue:`53616`)
 
 Missing
 ^^^^^^^
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index e68dbfa26a104..f7934865fbb43 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -668,7 +668,7 @@ ctypedef fused int6432_t:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def is_range_indexer(ndarray[int6432_t, ndim=1] left, int n) -> bool:
+def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool:
     """
     Perform an element by element comparison on 1-d integer arrays, meant for indexer
     comparisons
diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
index 383e1b81e17a7..6ad8d748d6997 100644
--- a/pandas/tests/libs/test_lib.py
+++ b/pandas/tests/libs/test_lib.py
@@ -6,6 +6,7 @@
     lib,
     writers as libwriters,
 )
+from pandas.compat import IS64
 
 from pandas import Index
 import pandas._testing as tm
@@ -248,6 +249,18 @@ def test_is_range_indexer(self, dtype):
         left = np.arange(0, 100, dtype=dtype)
         assert lib.is_range_indexer(left, 100)
 
+    @pytest.mark.skipif(
+        not IS64,
+        reason="2**31 is too big for Py_ssize_t on 32-bit. "
+        "It doesn't matter though since you cannot create an array that long on 32-bit",
+    )
+    @pytest.mark.parametrize("dtype", ["int64", "int32"])
+    def test_is_range_indexer_big_n(self, dtype):
+        # GH53616
+        left = np.arange(0, 100, dtype=dtype)
+
+        assert not lib.is_range_indexer(left, 2**31)
+
     @pytest.mark.parametrize("dtype", ["int64", "int32"])
     def test_is_range_indexer_not_equal(self, dtype):
         # GH#50592

From 300392d1b8cea79cc396fd0ebbbe7371a4f1a7ce Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Sun, 18 Jun 2023 12:10:05 -0700
Subject: [PATCH 10/10] TST: Use more pytest fixtures

---
 pandas/tests/resample/test_resample_api.py    |  53 ++--
 .../tests/resample/test_resampler_grouper.py  |  23 +-
 pandas/tests/resample/test_time_grouper.py    |  11 +-
 .../reshape/concat/test_append_common.py      |  78 ++---
 pandas/tests/series/methods/test_argsort.py   |   8 +-
 .../series/methods/test_convert_dtypes.py     | 287 +++++++++---------
 pandas/tests/series/test_ufunc.py             |  26 +-
 pandas/tests/test_take.py                     |   4 -
 .../offsets/test_custom_business_month.py     |   8 +-
 pandas/tests/tseries/offsets/test_offsets.py  |   8 -
 pandas/tests/util/test_validate_args.py       |  15 +-
 .../util/test_validate_args_and_kwargs.py     |  15 +-
 pandas/tests/util/test_validate_kwargs.py     |  11 +-
 13 files changed, 280 insertions(+), 267 deletions(-)

diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index d7b8f0c8053da..a9ea2999c3ba2 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -15,38 +15,43 @@
 import pandas._testing as tm
 from pandas.core.indexes.datetimes import date_range
 
-dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
 
-test_series = Series(np.random.rand(len(dti)), dti)
-_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))})
+@pytest.fixture
+def dti():
+    return date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
+
+
+@pytest.fixture
+def _test_series(dti):
+    return Series(np.random.rand(len(dti)), dti)
 
 
 @pytest.fixture
-def test_frame():
-    return _test_frame.copy()
+def test_frame(dti, _test_series):
+    return DataFrame({"A": _test_series, "B": _test_series, "C": np.arange(len(dti))})
 
 
-def test_str():
-    r = test_series.resample("H")
+def test_str(_test_series):
+    r = _test_series.resample("H")
     assert (
         "DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
         "label=left, convention=start, origin=start_day]" in str(r)
     )
 
-    r = test_series.resample("H", origin="2000-01-01")
+    r = _test_series.resample("H", origin="2000-01-01")
     assert (
         "DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
         "label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r)
     )
 
 
-def test_api():
-    r = test_series.resample("H")
+def test_api(_test_series):
+    r = _test_series.resample("H")
     result = r.mean()
     assert isinstance(result, Series)
     assert len(result) == 217
 
-    r = test_series.to_frame().resample("H")
+    r = _test_series.to_frame().resample("H")
     result = r.mean()
     assert isinstance(result, DataFrame)
     assert len(result) == 217
@@ -115,11 +120,11 @@ def test_resample_group_keys():
     tm.assert_frame_equal(result, expected)
 
 
-def test_pipe(test_frame):
+def test_pipe(test_frame, _test_series):
     # GH17905
 
     # series
-    r = test_series.resample("H")
+    r = _test_series.resample("H")
     expected = r.max() - r.mean()
     result = r.pipe(lambda x: x.max() - x.mean())
     tm.assert_series_equal(result, expected)
@@ -259,9 +264,9 @@ def test_combined_up_downsampling_of_irregular():
     tm.assert_series_equal(result, expected)
 
 
-def test_transform_series():
-    r = test_series.resample("20min")
-    expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
+def test_transform_series(_test_series):
+    r = _test_series.resample("20min")
+    expected = _test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
     result = r.transform("mean")
     tm.assert_series_equal(result, expected)
 
@@ -317,17 +322,17 @@ def test_fillna():
     ],
     ids=["resample", "groupby"],
 )
-def test_apply_without_aggregation(func):
+def test_apply_without_aggregation(func, _test_series):
     # both resample and groupby should work w/o aggregation
-    t = func(test_series)
+    t = func(_test_series)
     result = t.apply(lambda x: x)
-    tm.assert_series_equal(result, test_series)
+    tm.assert_series_equal(result, _test_series)
 
 
-def test_apply_without_aggregation2():
-    grouped = test_series.to_frame(name="foo").resample("20min", group_keys=False)
+def test_apply_without_aggregation2(_test_series):
+    grouped = _test_series.to_frame(name="foo").resample("20min", group_keys=False)
     result = grouped["foo"].apply(lambda x: x)
-    tm.assert_series_equal(result, test_series.rename("foo"))
+    tm.assert_series_equal(result, _test_series.rename("foo"))
 
 
 def test_agg_consistency():
@@ -1002,13 +1007,13 @@ def test_df_axis_param_depr():
         df.resample("M", axis=0)
 
 
-def test_series_axis_param_depr():
+def test_series_axis_param_depr(_test_series):
     warning_msg = (
         "The 'axis' keyword in Series.resample is "
         "deprecated and will be removed in a future version."
     )
     with tm.assert_produces_warning(FutureWarning, match=warning_msg):
-        test_series.resample("H", axis=0)
+        _test_series.resample("H", axis=0)
 
 
 def test_resample_empty():
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 1682edb42915d..df14a5bc374c6 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -17,10 +17,13 @@
 import pandas._testing as tm
 from pandas.core.indexes.datetimes import date_range
 
-test_frame = DataFrame(
-    {"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
-    index=date_range("1/1/2000", freq="s", periods=40),
-)
+
+@pytest.fixture
+def test_frame():
+    return DataFrame(
+        {"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
+        index=date_range("1/1/2000", freq="s", periods=40),
+    )
 
 
 @async_mark()
@@ -85,7 +88,7 @@ def f_1(x):
     tm.assert_frame_equal(result, expected)
 
 
-def test_getitem():
+def test_getitem(test_frame):
     g = test_frame.groupby("A")
 
     expected = g.B.apply(lambda x: x.resample("2s").mean())
@@ -217,7 +220,7 @@ def test_nearest():
         "ohlc",
     ],
 )
-def test_methods(f):
+def test_methods(f, test_frame):
     g = test_frame.groupby("A")
     r = g.resample("2s")
 
@@ -226,7 +229,7 @@ def test_methods(f):
     tm.assert_equal(result, expected)
 
 
-def test_methods_nunique():
+def test_methods_nunique(test_frame):
     # series only
     g = test_frame.groupby("A")
     r = g.resample("2s")
@@ -236,7 +239,7 @@ def test_methods_nunique():
 
 
 @pytest.mark.parametrize("f", ["std", "var"])
-def test_methods_std_var(f):
+def test_methods_std_var(f, test_frame):
     g = test_frame.groupby("A")
     r = g.resample("2s")
     result = getattr(r, f)(ddof=1)
@@ -244,7 +247,7 @@ def test_methods_std_var(f):
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply():
+def test_apply(test_frame):
     g = test_frame.groupby("A")
     r = g.resample("2s")
 
@@ -342,7 +345,7 @@ def test_resample_groupby_with_label():
     tm.assert_frame_equal(result, expected)
 
 
-def test_consistency_with_window():
+def test_consistency_with_window(test_frame):
     # consistent return values with window
     df = test_frame
     expected = Index([1, 2, 3], name="A")
diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
index debfb48c2b39c..a5fb48f801522 100644
--- a/pandas/tests/resample/test_time_grouper.py
+++ b/pandas/tests/resample/test_time_grouper.py
@@ -14,10 +14,13 @@
 from pandas.core.groupby.grouper import Grouper
 from pandas.core.indexes.datetimes import date_range
 
-test_series = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000))
 
+@pytest.fixture
+def test_series():
+    return Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000))
 
-def test_apply():
+
+def test_apply(test_series):
     grouper = Grouper(freq="A", label="right", closed="right")
 
     grouped = test_series.groupby(grouper)
@@ -33,7 +36,7 @@ def f(x):
     tm.assert_series_equal(applied, expected)
 
 
-def test_count():
+def test_count(test_series):
     test_series[::3] = np.nan
 
     expected = test_series.groupby(lambda x: x.year).count()
@@ -48,7 +51,7 @@ def test_count():
     tm.assert_series_equal(result, expected)
 
 
-def test_numpy_reduction():
+def test_numpy_reduction(test_series):
     result = test_series.resample("A", closed="right").prod()
 
     expected = test_series.groupby(lambda x: x.year).agg(np.prod)
diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py
index 2d84de8145111..948545320a31a 100644
--- a/pandas/tests/reshape/concat/test_append_common.py
+++ b/pandas/tests/reshape/concat/test_append_common.py
@@ -10,37 +10,46 @@
 )
 import pandas._testing as tm
 
-dt_data = [
-    pd.Timestamp("2011-01-01"),
-    pd.Timestamp("2011-01-02"),
-    pd.Timestamp("2011-01-03"),
-]
-tz_data = [
-    pd.Timestamp("2011-01-01", tz="US/Eastern"),
-    pd.Timestamp("2011-01-02", tz="US/Eastern"),
-    pd.Timestamp("2011-01-03", tz="US/Eastern"),
-]
-td_data = [
-    pd.Timedelta("1 days"),
-    pd.Timedelta("2 days"),
-    pd.Timedelta("3 days"),
-]
-period_data = [
-    pd.Period("2011-01", freq="M"),
-    pd.Period("2011-02", freq="M"),
-    pd.Period("2011-03", freq="M"),
-]
-data_dict = {
-    "bool": [True, False, True],
-    "int64": [1, 2, 3],
-    "float64": [1.1, np.nan, 3.3],
-    "category": Categorical(["X", "Y", "Z"]),
-    "object": ["a", "b", "c"],
-    "datetime64[ns]": dt_data,
-    "datetime64[ns, US/Eastern]": tz_data,
-    "timedelta64[ns]": td_data,
-    "period[M]": period_data,
-}
+
+@pytest.fixture(
+    params=list(
+        {
+            "bool": [True, False, True],
+            "int64": [1, 2, 3],
+            "float64": [1.1, np.nan, 3.3],
+            "category": Categorical(["X", "Y", "Z"]),
+            "object": ["a", "b", "c"],
+            "datetime64[ns]": [
+                pd.Timestamp("2011-01-01"),
+                pd.Timestamp("2011-01-02"),
+                pd.Timestamp("2011-01-03"),
+            ],
+            "datetime64[ns, US/Eastern]": [
+                pd.Timestamp("2011-01-01", tz="US/Eastern"),
+                pd.Timestamp("2011-01-02", tz="US/Eastern"),
+                pd.Timestamp("2011-01-03", tz="US/Eastern"),
+            ],
+            "timedelta64[ns]": [
+                pd.Timedelta("1 days"),
+                pd.Timedelta("2 days"),
+                pd.Timedelta("3 days"),
+            ],
+            "period[M]": [
+                pd.Period("2011-01", freq="M"),
+                pd.Period("2011-02", freq="M"),
+                pd.Period("2011-03", freq="M"),
+            ],
+        }.items()
+    )
+)
+def item(request):
+    key, data = request.param
+    return key, data
+
+
+@pytest.fixture
+def item2(item):
+    return item
 
 
 class TestConcatAppendCommon:
@@ -48,13 +57,6 @@ class TestConcatAppendCommon:
     Test common dtype coercion rules between concat and append.
     """
 
-    @pytest.fixture(params=sorted(data_dict.keys()))
-    def item(self, request):
-        key = request.param
-        return key, data_dict[key]
-
-    item2 = item
-
     def test_dtypes(self, item, index_or_series):
         # to confirm test case covers intended dtypes
         typ, vals = item
diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py
index 1fbc9ed787e11..e1d64795e235d 100644
--- a/pandas/tests/series/methods/test_argsort.py
+++ b/pandas/tests/series/methods/test_argsort.py
@@ -10,10 +10,11 @@
 
 
 class TestSeriesArgsort:
-    def _check_accum_op(self, name, ser, check_dtype=True):
-        func = getattr(np, name)
+    def test_argsort_numpy(self, datetime_series):
+        ser = datetime_series
+        func = np.argsort
         tm.assert_numpy_array_equal(
-            func(ser).values, func(np.array(ser)), check_dtype=check_dtype
+            func(ser).values, func(np.array(ser)), check_dtype=False
         )
 
         # with missing values
@@ -26,7 +27,6 @@ def _check_accum_op(self, name, ser, check_dtype=True):
         tm.assert_numpy_array_equal(result.values, expected, check_dtype=False)
 
     def test_argsort(self, datetime_series):
-        self._check_accum_op("argsort", datetime_series, check_dtype=False)
         argsorted = datetime_series.argsort()
         assert issubclass(argsorted.dtype.type, np.integer)
 
diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
index d91cd6a43daea..ea1cb1047bde8 100644
--- a/pandas/tests/series/methods/test_convert_dtypes.py
+++ b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -12,149 +12,162 @@
 # this default. Those overrides are defined as a dict with (keyword, val) as
 # dictionary key. In case of multiple items, the last override takes precedence.
 
-test_cases = [
-    (
-        # data
-        [1, 2, 3],
-        # original dtype
-        np.dtype("int32"),
-        # default expected dtype
-        "Int32",
-        # exceptions on expected dtype
-        {("convert_integer", False): np.dtype("int32")},
-    ),
-    (
-        [1, 2, 3],
-        np.dtype("int64"),
-        "Int64",
-        {("convert_integer", False): np.dtype("int64")},
-    ),
-    (
-        ["x", "y", "z"],
-        np.dtype("O"),
-        pd.StringDtype(),
-        {("convert_string", False): np.dtype("O")},
-    ),
-    (
-        [True, False, np.nan],
-        np.dtype("O"),
-        pd.BooleanDtype(),
-        {("convert_boolean", False): np.dtype("O")},
-    ),
-    (
-        ["h", "i", np.nan],
-        np.dtype("O"),
-        pd.StringDtype(),
-        {("convert_string", False): np.dtype("O")},
-    ),
-    (  # GH32117
-        ["h", "i", 1],
-        np.dtype("O"),
-        np.dtype("O"),
-        {},
-    ),
-    (
-        [10, np.nan, 20],
-        np.dtype("float"),
-        "Int64",
-        {
-            ("convert_integer", False, "convert_floating", True): "Float64",
-            ("convert_integer", False, "convert_floating", False): np.dtype("float"),
-        },
-    ),
-    (
-        [np.nan, 100.5, 200],
-        np.dtype("float"),
-        "Float64",
-        {("convert_floating", False): np.dtype("float")},
-    ),
-    (
-        [3, 4, 5],
-        "Int8",
-        "Int8",
-        {},
-    ),
-    (
-        [[1, 2], [3, 4], [5]],
-        None,
-        np.dtype("O"),
-        {},
-    ),
-    (
-        [4, 5, 6],
-        np.dtype("uint32"),
-        "UInt32",
-        {("convert_integer", False): np.dtype("uint32")},
-    ),
-    (
-        [-10, 12, 13],
-        np.dtype("i1"),
-        "Int8",
-        {("convert_integer", False): np.dtype("i1")},
-    ),
-    (
-        [1.2, 1.3],
-        np.dtype("float32"),
-        "Float32",
-        {("convert_floating", False): np.dtype("float32")},
-    ),
-    (
-        [1, 2.0],
-        object,
-        "Int64",
-        {
-            ("convert_integer", False): "Float64",
-            ("convert_integer", False, "convert_floating", False): np.dtype("float"),
-            ("infer_objects", False): np.dtype("object"),
-        },
-    ),
-    (
-        [1, 2.5],
-        object,
-        "Float64",
-        {
-            ("convert_floating", False): np.dtype("float"),
-            ("infer_objects", False): np.dtype("object"),
-        },
-    ),
-    (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
-    (
-        pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
-        pd.DatetimeTZDtype(tz="UTC"),
-        pd.DatetimeTZDtype(tz="UTC"),
-        {},
-    ),
-    (
-        pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
-        "datetime64[ns]",
-        np.dtype("datetime64[ns]"),
-        {},
-    ),
-    (
-        pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
-        object,
-        np.dtype("datetime64[ns]"),
-        {("infer_objects", False): np.dtype("object")},
-    ),
-    (pd.period_range("1/1/2011", freq="M", periods=3), None, pd.PeriodDtype("M"), {}),
-    (
-        pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
-        None,
-        pd.IntervalDtype("int64", "right"),
-        {},
-    ),
-]
+
+@pytest.fixture(
+    params=[
+        (
+            # data
+            [1, 2, 3],
+            # original dtype
+            np.dtype("int32"),
+            # default expected dtype
+            "Int32",
+            # exceptions on expected dtype
+            {("convert_integer", False): np.dtype("int32")},
+        ),
+        (
+            [1, 2, 3],
+            np.dtype("int64"),
+            "Int64",
+            {("convert_integer", False): np.dtype("int64")},
+        ),
+        (
+            ["x", "y", "z"],
+            np.dtype("O"),
+            pd.StringDtype(),
+            {("convert_string", False): np.dtype("O")},
+        ),
+        (
+            [True, False, np.nan],
+            np.dtype("O"),
+            pd.BooleanDtype(),
+            {("convert_boolean", False): np.dtype("O")},
+        ),
+        (
+            ["h", "i", np.nan],
+            np.dtype("O"),
+            pd.StringDtype(),
+            {("convert_string", False): np.dtype("O")},
+        ),
+        (  # GH32117
+            ["h", "i", 1],
+            np.dtype("O"),
+            np.dtype("O"),
+            {},
+        ),
+        (
+            [10, np.nan, 20],
+            np.dtype("float"),
+            "Int64",
+            {
+                ("convert_integer", False, "convert_floating", True): "Float64",
+                ("convert_integer", False, "convert_floating", False): np.dtype(
+                    "float"
+                ),
+            },
+        ),
+        (
+            [np.nan, 100.5, 200],
+            np.dtype("float"),
+            "Float64",
+            {("convert_floating", False): np.dtype("float")},
+        ),
+        (
+            [3, 4, 5],
+            "Int8",
+            "Int8",
+            {},
+        ),
+        (
+            [[1, 2], [3, 4], [5]],
+            None,
+            np.dtype("O"),
+            {},
+        ),
+        (
+            [4, 5, 6],
+            np.dtype("uint32"),
+            "UInt32",
+            {("convert_integer", False): np.dtype("uint32")},
+        ),
+        (
+            [-10, 12, 13],
+            np.dtype("i1"),
+            "Int8",
+            {("convert_integer", False): np.dtype("i1")},
+        ),
+        (
+            [1.2, 1.3],
+            np.dtype("float32"),
+            "Float32",
+            {("convert_floating", False): np.dtype("float32")},
+        ),
+        (
+            [1, 2.0],
+            object,
+            "Int64",
+            {
+                ("convert_integer", False): "Float64",
+                ("convert_integer", False, "convert_floating", False): np.dtype(
+                    "float"
+                ),
+                ("infer_objects", False): np.dtype("object"),
+            },
+        ),
+        (
+            [1, 2.5],
+            object,
+            "Float64",
+            {
+                ("convert_floating", False): np.dtype("float"),
+                ("infer_objects", False): np.dtype("object"),
+            },
+        ),
+        (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
+        (
+            pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
+            pd.DatetimeTZDtype(tz="UTC"),
+            pd.DatetimeTZDtype(tz="UTC"),
+            {},
+        ),
+        (
+            pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
+            "datetime64[ns]",
+            np.dtype("datetime64[ns]"),
+            {},
+        ),
+        (
+            pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
+            object,
+            np.dtype("datetime64[ns]"),
+            {("infer_objects", False): np.dtype("object")},
+        ),
+        (
+            pd.period_range("1/1/2011", freq="M", periods=3),
+            None,
+            pd.PeriodDtype("M"),
+            {},
+        ),
+        (
+            pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
+            None,
+            pd.IntervalDtype("int64", "right"),
+            {},
+        ),
+    ]
+)
+def test_cases(request):
+    return request.param
 
 
 class TestSeriesConvertDtypes:
-    @pytest.mark.parametrize(
-        "data, maindtype, expected_default, expected_other",
-        test_cases,
-    )
     @pytest.mark.parametrize("params", product(*[(True, False)] * 5))
     def test_convert_dtypes(
-        self, data, maindtype, params, expected_default, expected_other
+        self,
+        test_cases,
+        params,
     ):
+        data, maindtype, expected_default, expected_other = test_cases
         if (
             hasattr(data, "dtype")
             and data.dtype == "M8[ns]"
diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
index ac36103edcdcc..38dea7dc5f8bf 100644
--- a/pandas/tests/series/test_ufunc.py
+++ b/pandas/tests/series/test_ufunc.py
@@ -11,9 +11,16 @@
 import pandas._testing as tm
 from pandas.arrays import SparseArray
 
-BINARY_UFUNCS = [np.add, np.logaddexp]  # dunder op
-SPARSE = [True, False]
-SPARSE_IDS = ["sparse", "dense"]
+
+@pytest.fixture(params=[np.add, np.logaddexp])
+def ufunc(request):
+    # dunder op
+    return request.param
+
+
+@pytest.fixture(params=[True, False], ids=["sparse", "dense"])
+def sparse(request):
+    return request.param
 
 
 @pytest.fixture
@@ -29,7 +36,6 @@ def arrays_for_binary_ufunc():
 
 
 @pytest.mark.parametrize("ufunc", [np.positive, np.floor, np.exp])
-@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
 def test_unary_ufunc(ufunc, sparse):
     # Test that ufunc(pd.Series) == pd.Series(ufunc)
     arr = np.random.randint(0, 10, 10, dtype="int64")
@@ -46,8 +52,6 @@ def test_unary_ufunc(ufunc, sparse):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
-@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
 @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
 def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc):
     # Test that ufunc(pd.Series(a), array) == pd.Series(ufunc(a, b))
@@ -72,8 +76,6 @@ def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
-@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
 @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
 def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
     # Test that
@@ -101,8 +103,6 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
-@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
 @pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"])
 @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
 def test_binary_ufunc_with_series(
@@ -143,8 +143,6 @@ def test_binary_ufunc_with_series(
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
-@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
 @pytest.mark.parametrize("flip", [True, False])
 def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
     # Test that
@@ -170,7 +168,6 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
 
 
 @pytest.mark.parametrize("ufunc", [np.divmod])  # TODO: np.modf, np.frexp
-@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
 @pytest.mark.parametrize("shuffle", [True, False])
 @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning")
 def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc):
@@ -203,7 +200,6 @@ def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary
     tm.assert_series_equal(result[1], pd.Series(expected[1]))
 
 
-@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
 def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc):
     # Test that the same conditions from unary input apply to multi-output
     # ufuncs
@@ -223,8 +219,6 @@ def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc):
     tm.assert_series_equal(result[1], pd.Series(expected[1], name="name"))
 
 
-@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
-@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
 def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc):
     # Drop the names when they differ.
     a1, a2 = arrays_for_binary_ufunc
diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py
index cefcf09613de1..47615be32e5b0 100644
--- a/pandas/tests/test_take.py
+++ b/pandas/tests/test_take.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-import re
 
 import numpy as np
 import pytest
@@ -41,9 +40,6 @@ def dtype_fill_out_dtype(request):
 
 
 class TestTake:
-    # Standard incompatible fill error.
-    fill_error = re.compile("Incompatible type for fill_value")
-
     def test_1d_fill_nonna(self, dtype_fill_out_dtype):
         dtype, fill_value, out_dtype = dtype_fill_out_dtype
         data = np.random.randint(0, 2, 4).astype(dtype)
diff --git a/pandas/tests/tseries/offsets/test_custom_business_month.py b/pandas/tests/tseries/offsets/test_custom_business_month.py
index faf0f9810200b..0fff99ff8c025 100644
--- a/pandas/tests/tseries/offsets/test_custom_business_month.py
+++ b/pandas/tests/tseries/offsets/test_custom_business_month.py
@@ -11,7 +11,6 @@
     datetime,
     timedelta,
 )
-from typing import TYPE_CHECKING
 
 import numpy as np
 import pytest
@@ -34,9 +33,6 @@
 from pandas.tseries import offsets
 from pandas.tseries.holiday import USFederalHolidayCalendar
 
-if TYPE_CHECKING:
-    from pandas.tests.tseries.offsets.test_offsets import _ApplyCases
-
 
 @pytest.fixture
 def dt():
@@ -132,7 +128,7 @@ def test_is_on_offset(self, case):
         offset, dt, expected = case
         assert_is_on_offset(offset, dt, expected)
 
-    apply_cases: _ApplyCases = [
+    apply_cases = [
         (
             CBMonthBegin(),
             {
@@ -330,7 +326,7 @@ def test_is_on_offset(self, case):
         offset, dt, expected = case
         assert_is_on_offset(offset, dt, expected)
 
-    apply_cases: _ApplyCases = [
+    apply_cases = [
         (
             CBMonthEnd(),
             {
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index bfc5139c78b91..6df47968bd3bb 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -7,11 +7,6 @@
     datetime,
     timedelta,
 )
-from typing import (
-    Dict,
-    List,
-    Tuple,
-)
 
 import numpy as np
 import pytest
@@ -42,7 +37,6 @@
 from pandas.tseries import offsets
 from pandas.tseries.offsets import (
     FY5253,
-    BaseOffset,
     BDay,
     BMonthEnd,
     BusinessHour,
@@ -61,8 +55,6 @@
     WeekOfMonth,
 )
 
-_ApplyCases = List[Tuple[BaseOffset, Dict[datetime, datetime]]]
-
 _ARITHMETIC_DATE_OFFSET = [
     "years",
     "months",
diff --git a/pandas/tests/util/test_validate_args.py b/pandas/tests/util/test_validate_args.py
index 77e6b01ba1180..eef0931ec28ef 100644
--- a/pandas/tests/util/test_validate_args.py
+++ b/pandas/tests/util/test_validate_args.py
@@ -2,17 +2,20 @@
 
 from pandas.util._validators import validate_args
 
-_fname = "func"
 
+@pytest.fixture
+def _fname():
+    return "func"
 
-def test_bad_min_fname_arg_count():
+
+def test_bad_min_fname_arg_count(_fname):
     msg = "'max_fname_arg_count' must be non-negative"
 
     with pytest.raises(ValueError, match=msg):
         validate_args(_fname, (None,), -1, "foo")
 
 
-def test_bad_arg_length_max_value_single():
+def test_bad_arg_length_max_value_single(_fname):
     args = (None, None)
     compat_args = ("foo",)
 
@@ -28,7 +31,7 @@ def test_bad_arg_length_max_value_single():
         validate_args(_fname, args, min_fname_arg_count, compat_args)
 
 
-def test_bad_arg_length_max_value_multiple():
+def test_bad_arg_length_max_value_multiple(_fname):
     args = (None, None)
     compat_args = {"foo": None}
 
@@ -45,7 +48,7 @@ def test_bad_arg_length_max_value_multiple():
 
 
 @pytest.mark.parametrize("i", range(1, 3))
-def test_not_all_defaults(i):
+def test_not_all_defaults(i, _fname):
     bad_arg = "foo"
     msg = (
         f"the '{bad_arg}' parameter is not supported "
@@ -59,7 +62,7 @@ def test_not_all_defaults(i):
         validate_args(_fname, arg_vals[:i], 2, compat_args)
 
 
-def test_validation():
+def test_validation(_fname):
     # No exceptions should be raised.
     validate_args(_fname, (None,), 2, {"out": None})
 
diff --git a/pandas/tests/util/test_validate_args_and_kwargs.py b/pandas/tests/util/test_validate_args_and_kwargs.py
index 54d94d2194909..215026d648471 100644
--- a/pandas/tests/util/test_validate_args_and_kwargs.py
+++ b/pandas/tests/util/test_validate_args_and_kwargs.py
@@ -2,10 +2,13 @@
 
 from pandas.util._validators import validate_args_and_kwargs
 
-_fname = "func"
 
+@pytest.fixture
+def _fname():
+    return "func"
 
-def test_invalid_total_length_max_length_one():
+
+def test_invalid_total_length_max_length_one(_fname):
     compat_args = ("foo",)
     kwargs = {"foo": "FOO"}
     args = ("FoO", "BaZ")
@@ -23,7 +26,7 @@ def test_invalid_total_length_max_length_one():
         validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args)
 
 
-def test_invalid_total_length_max_length_multiple():
+def test_invalid_total_length_max_length_multiple(_fname):
     compat_args = ("foo", "bar", "baz")
     kwargs = {"foo": "FOO", "bar": "BAR"}
     args = ("FoO", "BaZ")
@@ -42,7 +45,7 @@ def test_invalid_total_length_max_length_multiple():
 
 
 @pytest.mark.parametrize("args,kwargs", [((), {"foo": -5, "bar": 2}), ((-5, 2), {})])
-def test_missing_args_or_kwargs(args, kwargs):
+def test_missing_args_or_kwargs(args, kwargs, _fname):
     bad_arg = "bar"
     min_fname_arg_count = 2
 
@@ -57,7 +60,7 @@ def test_missing_args_or_kwargs(args, kwargs):
         validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args)
 
 
-def test_duplicate_argument():
+def test_duplicate_argument(_fname):
     min_fname_arg_count = 2
 
     compat_args = {"foo": None, "bar": None, "baz": None}
@@ -70,7 +73,7 @@ def test_duplicate_argument():
         validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args)
 
 
-def test_validation():
+def test_validation(_fname):
     # No exceptions should be raised.
     compat_args = {"foo": 1, "bar": None, "baz": -2}
     kwargs = {"baz": -2}
diff --git a/pandas/tests/util/test_validate_kwargs.py b/pandas/tests/util/test_validate_kwargs.py
index de49cdd5e247d..dba447e30cf57 100644
--- a/pandas/tests/util/test_validate_kwargs.py
+++ b/pandas/tests/util/test_validate_kwargs.py
@@ -5,10 +5,13 @@
     validate_kwargs,
 )
 
-_fname = "func"
 
+@pytest.fixture
+def _fname():
+    return "func"
 
-def test_bad_kwarg():
+
+def test_bad_kwarg(_fname):
     good_arg = "f"
     bad_arg = good_arg + "o"
 
@@ -22,7 +25,7 @@ def test_bad_kwarg():
 
 
 @pytest.mark.parametrize("i", range(1, 3))
-def test_not_all_none(i):
+def test_not_all_none(i, _fname):
     bad_arg = "foo"
     msg = (
         rf"the '{bad_arg}' parameter is not supported "
@@ -40,7 +43,7 @@ def test_not_all_none(i):
         validate_kwargs(_fname, kwargs, compat_args)
 
 
-def test_validation():
+def test_validation(_fname):
     # No exceptions should be raised.
     compat_args = {"f": None, "b": 1, "ba": "s"}