From b93bbb01846482d901cff69ebfa47604a89b8361 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 13 Jun 2023 16:07:42 -0700 Subject: [PATCH 01/10] CI: Build pandas even if doctests fail --- .github/workflows/code-checks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 4ad2fbc71c8c1..f6c35decfd30b 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -77,6 +77,7 @@ jobs: - name: Install pandas in editable mode id: build-editable + if: ${{ steps.build.outcome == 'success' && always() }} uses: ./.github/actions/build_pandas with: editable: true From 52f85da249699b41ebde985a0764879f57a0aa71 Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Tue, 13 Jun 2023 22:53:13 -0400 Subject: [PATCH 02/10] BUG: groupby sum turning `inf+inf` and `(-inf)+(-inf)` into `nan` (#53623) --- doc/source/whatsnew/v2.1.0.rst | 5 +++-- pandas/_libs/groupby.pyx | 7 +++++++ pandas/tests/groupby/test_libgroupby.py | 27 +++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index bea2ad8c7450c..ceda799ebb959 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -462,8 +462,9 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) -- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`) -- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.sum` and :meth:`DataFrameGroupby.sum` summing ``np.inf + np.inf`` and ``(-np.inf) + (-np.inf)`` to ``np.nan`` (:issue:`53606`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 61f448cbe0c3f..0baae23a4a71c 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -746,6 +746,13 @@ def group_sum( y = val - compensation[lab, j] t = sumx[lab, j] + y compensation[lab, j] = t - sumx[lab, j] - y + if compensation[lab, j] != compensation[lab, j]: + # GH#53606 + # If val is +/- infinity compensation is NaN + # which would lead to results being NaN instead + # of +/- infinity. We cannot use util.is_nan + # because of no gil + compensation[lab, j] = 0 sumx[lab, j] = t _check_below_mincount( diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index d10bcf9053d1a..92c3b68d87fad 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -6,6 +6,7 @@ group_cumprod, group_cumsum, group_mean, + group_sum, group_var, ) @@ -302,3 +303,29 @@ def test_cython_group_mean_Inf_at_begining_and_end(): actual, expected, ) + + +@pytest.mark.parametrize( + "values, out", + [ + ([[np.inf], [np.inf], [np.inf]], [[np.inf], [np.inf]]), + ([[np.inf], [np.inf], [-np.inf]], [[np.inf], [np.nan]]), + ([[np.inf], [-np.inf], [np.inf]], [[np.inf], [np.nan]]), + ([[np.inf], [-np.inf], [-np.inf]], [[np.inf], [-np.inf]]), + ], +) +def test_cython_group_sum_Inf_at_begining_and_end(values, out): + # GH #53606 + actual = np.array([[np.nan], [np.nan]], dtype="float64") + counts = np.array([0, 0], dtype="int64") + data = np.array(values, dtype="float64") + labels = np.array([0, 1, 1], dtype=np.intp) + + group_sum(actual, counts, data, labels, None, is_datetimelike=False) + + expected = np.array(out, dtype="float64") + + tm.assert_numpy_array_equal( + actual, + expected, + ) From 257db3307903ffe45fd59a9535032c95b2290c55 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 14 Jun 2023 09:58:38 -0700 Subject: [PATCH 03/10] DEPR: method, limit in NDFrame.replace (#53492) * DEPR: method, limit in NDFrame.replace * update test, docs * suppress doctest warning * doctests --- doc/source/user_guide/missing_data.rst | 7 ---- doc/source/whatsnew/v2.1.0.rst | 2 + pandas/conftest.py | 2 + pandas/core/generic.py | 33 +++++++++++++++ pandas/core/shared_docs.py | 7 ++++ pandas/tests/frame/methods/test_replace.py | 11 ++++- pandas/tests/frame/test_subclass.py | 4 +- pandas/tests/series/methods/test_replace.py | 45 ++++++++++++++++----- 8 files changed, 91 insertions(+), 20 deletions(-) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index ed58554896a4f..443fdd4f59e3f 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -551,13 +551,6 @@ For a DataFrame, you can specify individual values by column: df.replace({"a": 0, "b": 5}, 100) -Instead of replacing with specified values, you can treat all given values as -missing and interpolate over them: - -.. ipython:: python - - ser.replace([1, 2, 3], method="pad") - .. _missing_data.replace_expression: String/regular expression replacement diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ceda799ebb959..806abf670f32f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -286,9 +286,11 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) +- Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) +- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) - diff --git a/pandas/conftest.py b/pandas/conftest.py index fbef2fb272ed6..ed05ddd1b2f31 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -134,6 +134,8 @@ def pytest_collection_modifyitems(items, config) -> None: ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"), ("is_categorical_dtype", "is_categorical_dtype is deprecated"), ("is_sparse", "is_sparse is deprecated"), + ("NDFrame.replace", "The 'method' keyword"), + ("NDFrame.replace", "Series.replace without 'value'"), # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 711e552f262ac..d112f5aa7d671 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7470,6 +7470,39 @@ def replace( regex: bool_t = False, method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, ) -> Self | None: + if method is not lib.no_default: + warnings.warn( + # GH#33302 + f"The 'method' keyword in {type(self).__name__}.replace is " + "deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif limit is not None: + warnings.warn( + # GH#33302 + f"The 'limit' keyword in {type(self).__name__}.replace is " + "deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + value is lib.no_default + and method is lib.no_default + and not is_dict_like(to_replace) + and regex is False + ): + # case that goes through _replace_single and defaults to method="pad" + warnings.warn( + # GH#33302 + f"{type(self).__name__}.replace without 'value' and with " + "non-dict-like 'to_replace' is deprecated " + "and will raise in a future version. " + "Explicitly specify the new values instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if not ( is_scalar(to_replace) or is_re_compilable(to_replace) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 7bddaad780b8c..7579f816d0ace 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -562,6 +562,8 @@ {inplace} limit : int, default None Maximum size gap to forward or backward fill. + + .. deprecated:: 2.1.0 regex : bool or same types as `to_replace`, default False Whether to interpret `to_replace` and/or `value` as regular expressions. If this is ``True`` then `to_replace` *must* be a @@ -572,6 +574,8 @@ The method to use when for replacement, when `to_replace` is a scalar, list or tuple and `value` is ``None``. + .. deprecated:: 2.1.0 + Returns ------- {klass} @@ -766,6 +770,9 @@ 4 b dtype: object + .. deprecated:: 2.1.0 + The 'method' parameter and padding behavior are deprecated. + On the other hand, if ``None`` is explicitly passed for ``value``, it will be respected: diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index d5668020bab5d..9256df72cdf7b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1236,7 +1236,9 @@ def test_replace_method(self, to_replace, method, expected): # GH 19632 df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}) - result = df.replace(to_replace=to_replace, value=None, method=method) + msg = "The 'method' keyword in DataFrame.replace is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.replace(to_replace=to_replace, value=None, method=method) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) @@ -1327,8 +1329,13 @@ def test_replace_invalid_to_replace(self): r"Expecting 'to_replace' to be either a scalar, array-like, " r"dict or None, got invalid type.*" ) + msg2 = ( + "DataFrame.replace without 'value' and with non-dict-like " + "'to_replace' is deprecated" + ) with pytest.raises(TypeError, match=msg): - df.replace(lambda x: x.strip()) + with tm.assert_produces_warning(FutureWarning, match=msg2): + df.replace(lambda x: x.strip()) @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"]) @pytest.mark.parametrize("value", [np.nan, pd.NA]) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 5c44a957b9373..3d1e9d26c1ea6 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -732,7 +732,9 @@ def test_equals_subclass(self): def test_replace_list_method(self): # https://github.com/pandas-dev/pandas/pull/46018 df = tm.SubclassedDataFrame({"A": [0, 1, 2]}) - result = df.replace([1, 2], method="ffill") + msg = "The 'method' keyword in SubclassedDataFrame.replace is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.replace([1, 2], method="ffill") expected = tm.SubclassedDataFrame({"A": [0, 0, 0]}) assert isinstance(result, tm.SubclassedDataFrame) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 2880e3f3e85db..d3cdae63d26f3 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -131,12 +131,18 @@ def test_replace_gh5319(self): # GH 5319 ser = pd.Series([0, np.nan, 2, 3, 4]) expected = ser.ffill() - result = ser.replace([np.nan]) + msg = ( + "Series.replace without 'value' and with non-dict-like " + "'to_replace' is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.replace([np.nan]) tm.assert_series_equal(result, expected) ser = pd.Series([0, np.nan, 2, 3, 4]) expected = ser.ffill() - result = ser.replace(np.nan) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.replace(np.nan) tm.assert_series_equal(result, expected) def test_replace_datetime64(self): @@ -169,11 +175,17 @@ def test_replace_timedelta_td64(self): def test_replace_with_single_list(self): ser = pd.Series([0, 1, 2, 3, 4]) - result = ser.replace([1, 2, 3]) + msg2 = ( + "Series.replace without 'value' and with non-dict-like " + "'to_replace' is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg2): + result = ser.replace([1, 2, 3]) tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) s = ser.copy() - return_value = s.replace([1, 2, 3], inplace=True) + with tm.assert_produces_warning(FutureWarning, match=msg2): + return_value = s.replace([1, 2, 3], inplace=True) assert return_value is None tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) @@ -183,8 +195,10 @@ def test_replace_with_single_list(self): r"Invalid fill method\. Expecting pad \(ffill\) or backfill " r"\(bfill\)\. Got crash_cymbal" ) + msg3 = "The 'method' keyword in Series.replace is deprecated" with pytest.raises(ValueError, match=msg): - return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") + with tm.assert_produces_warning(FutureWarning, match=msg3): + return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") assert return_value is None tm.assert_series_equal(s, ser) @@ -450,8 +464,13 @@ def test_replace_invalid_to_replace(self): r"Expecting 'to_replace' to be either a scalar, array-like, " r"dict or None, got invalid type.*" ) + msg2 = ( + "Series.replace without 'value' and with non-dict-like " + "'to_replace' is deprecated" + ) with pytest.raises(TypeError, match=msg): - series.replace(lambda x: x.strip()) + with tm.assert_produces_warning(FutureWarning, match=msg2): + series.replace(lambda x: x.strip()) @pytest.mark.parametrize("frame", [False, True]) def test_replace_nonbool_regex(self, frame): @@ -502,19 +521,25 @@ def test_replace_extension_other(self, frame_or_series): def _check_replace_with_method(self, ser: pd.Series): df = ser.to_frame() - res = ser.replace(ser[1], method="pad") + msg1 = "The 'method' keyword in Series.replace is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg1): + res = ser.replace(ser[1], method="pad") expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype) tm.assert_series_equal(res, expected) - res_df = df.replace(ser[1], method="pad") + msg2 = "The 'method' keyword in DataFrame.replace is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg2): + res_df = df.replace(ser[1], method="pad") tm.assert_frame_equal(res_df, expected.to_frame()) ser2 = ser.copy() - res2 = ser2.replace(ser[1], method="pad", inplace=True) + with tm.assert_produces_warning(FutureWarning, match=msg1): + res2 = ser2.replace(ser[1], method="pad", inplace=True) assert res2 is None tm.assert_series_equal(ser2, expected) - res_df2 = df.replace(ser[1], method="pad", inplace=True) + with tm.assert_produces_warning(FutureWarning, match=msg2): + res_df2 = df.replace(ser[1], method="pad", inplace=True) assert res_df2 is None tm.assert_frame_equal(df, expected.to_frame()) From 00aa70033627c47c640e9d374d30a22fcfeb5288 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 14 Jun 2023 13:08:06 -0400 Subject: [PATCH 04/10] PERF: Series.str.get_dummies for ArrowDtype(pa.string()) (#53655) * PERF: Series.str.get_dummies for ArrowDtype(pa.string()) * whatsnew * typing --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/arrow/array.py | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 806abf670f32f..42b1346696bb8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -323,6 +323,7 @@ Performance improvements - Performance improvement in :meth:`DataFrame.loc` when selecting rows and columns (:issue:`53014`) - Performance improvement in :meth:`Series.add` for pyarrow string and binary dtypes (:issue:`53150`) - Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`) +- Performance improvement in :meth:`Series.str.get_dummies` for pyarrow-backed strings (:issue:`53655`) - Performance improvement in :meth:`Series.str.get` for pyarrow-backed strings (:issue:`53152`) - Performance improvement in :meth:`Series.str.split` with ``expand=True`` for pyarrow-backed strings (:issue:`53585`) - Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0c1b86440b11d..0ca136914b614 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2239,17 +2239,19 @@ def _str_findall(self, pat: str, flags: int = 0): return type(self)(pa.chunked_array(result)) def _str_get_dummies(self, sep: str = "|"): - split = pc.split_pattern(self._pa_array, sep).combine_chunks() - uniques = split.flatten().unique() + split = pc.split_pattern(self._pa_array, sep) + flattened_values = pc.list_flatten(split) + uniques = flattened_values.unique() uniques_sorted = uniques.take(pa.compute.array_sort_indices(uniques)) - result_data = [] - for lst in split.to_pylist(): - if lst is None: - result_data.append([False] * len(uniques_sorted)) - else: - res = pc.is_in(uniques_sorted, pa.array(set(lst))) - result_data.append(res.to_pylist()) - result = type(self)(pa.array(result_data)) + lengths = pc.list_value_length(split).fill_null(0).to_numpy() + n_rows = len(self) + n_cols = len(uniques) + indices = pc.index_in(flattened_values, uniques_sorted).to_numpy() + indices = indices + np.arange(n_rows).repeat(lengths) * n_cols + dummies = np.zeros(n_rows * n_cols, dtype=np.bool_) + dummies[indices] = True + dummies = dummies.reshape((n_rows, n_cols)) + result = type(self)(pa.array(list(dummies))) return result, uniques_sorted.to_pylist() def _str_index(self, sub: str, start: int = 0, end: int | None = None): From 6458c1c64a9c254f68d03bdbfb9c0c643da3bff3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 14 Jun 2023 10:11:11 -0700 Subject: [PATCH 05/10] TYP: core.missing (#53625) --- pandas/core/arrays/sparse/array.py | 7 +- pandas/core/missing.py | 160 +++++++++++++++++------------ 2 files changed, 98 insertions(+), 69 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 16e7835a7183d..269b7a086de93 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -769,7 +769,12 @@ def fillna( ) new_values = np.asarray(self) # interpolate_2d modifies new_values inplace - interpolate_2d(new_values, method=method, limit=limit) + # error: Argument "method" to "interpolate_2d" has incompatible type + # "Literal['backfill', 'bfill', 'ffill', 'pad']"; expected + # "Literal['pad', 'backfill']" + interpolate_2d( + new_values, method=method, limit=limit # type: ignore[arg-type] + ) return type(self)(new_values, fill_value=self.fill_value) else: diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 0766b9c5c7145..8b6b6a2c2a07b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -10,6 +10,7 @@ from typing import ( TYPE_CHECKING, Any, + Literal, cast, ) @@ -22,7 +23,6 @@ ) from pandas._typing import ( ArrayLike, - Axis, AxisInt, F, ReindexMethod, @@ -223,6 +223,35 @@ def find_valid_index(how: str, is_valid: npt.NDArray[np.bool_]) -> int | None: return idxpos # type: ignore[return-value] +def validate_limit_direction( + limit_direction: str, +) -> Literal["forward", "backward", "both"]: + valid_limit_directions = ["forward", "backward", "both"] + limit_direction = limit_direction.lower() + if limit_direction not in valid_limit_directions: + raise ValueError( + "Invalid limit_direction: expecting one of " + f"{valid_limit_directions}, got '{limit_direction}'." + ) + # error: Incompatible return value type (got "str", expected + # "Literal['forward', 'backward', 'both']") + return limit_direction # type: ignore[return-value] + + +def validate_limit_area(limit_area: str | None) -> Literal["inside", "outside"] | None: + if limit_area is not None: + valid_limit_areas = ["inside", "outside"] + limit_area = limit_area.lower() + if limit_area not in valid_limit_areas: + raise ValueError( + f"Invalid limit_area: expecting one of {valid_limit_areas}, got " + f"{limit_area}." + ) + # error: Incompatible return value type (got "Optional[str]", expected + # "Optional[Literal['inside', 'outside']]") + return limit_area # type: ignore[return-value] + + def infer_limit_direction(limit_direction, method): # Set `limit_direction` depending on `method` if limit_direction is None: @@ -308,7 +337,9 @@ def interpolate_array_2d( method=m, axis=axis, limit=limit, - limit_area=limit_area, + # error: Argument "limit_area" to "interpolate_2d" has incompatible + # type "Optional[str]"; expected "Optional[Literal['inside', 'outside']]" + limit_area=limit_area, # type: ignore[arg-type] ) else: assert index is not None # for mypy @@ -362,22 +393,8 @@ def _interpolate_2d_with_fill( ) method = "values" - valid_limit_directions = ["forward", "backward", "both"] - limit_direction = limit_direction.lower() - if limit_direction not in valid_limit_directions: - raise ValueError( - "Invalid limit_direction: expecting one of " - f"{valid_limit_directions}, got '{limit_direction}'." - ) - - if limit_area is not None: - valid_limit_areas = ["inside", "outside"] - limit_area = limit_area.lower() - if limit_area not in valid_limit_areas: - raise ValueError( - f"Invalid limit_area: expecting one of {valid_limit_areas}, got " - f"{limit_area}." - ) + limit_direction = validate_limit_direction(limit_direction) + limit_area_validated = validate_limit_area(limit_area) # default limit is unlimited GH #16282 limit = algos.validate_limit(nobs=None, limit=limit) @@ -393,7 +410,7 @@ def func(yvalues: np.ndarray) -> None: method=method, limit=limit, limit_direction=limit_direction, - limit_area=limit_area, + limit_area=limit_area_validated, fill_value=fill_value, bounds_error=False, **kwargs, @@ -433,10 +450,10 @@ def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: def _interpolate_1d( indices: np.ndarray, yvalues: np.ndarray, - method: str | None = "linear", + method: str = "linear", limit: int | None = None, limit_direction: str = "forward", - limit_area: str | None = None, + limit_area: Literal["inside", "outside"] | None = None, fill_value: Any | None = None, bounds_error: bool = False, order: int | None = None, @@ -539,10 +556,10 @@ def _interpolate_1d( def _interpolate_scipy_wrapper( - x, - y, - new_x, - method, + x: np.ndarray, + y: np.ndarray, + new_x: np.ndarray, + method: str, fill_value=None, bounds_error: bool = False, order=None, @@ -565,19 +582,11 @@ def _interpolate_scipy_wrapper( "krogh": interpolate.krogh_interpolate, "from_derivatives": _from_derivatives, "piecewise_polynomial": _from_derivatives, + "cubicspline": _cubicspline_interpolate, + "akima": _akima_interpolate, + "pchip": interpolate.pchip_interpolate, } - if getattr(x, "_is_all_dates", False): - # GH 5975, scipy.interp1d can't handle datetime64s - x, new_x = x._values.astype("i8"), new_x.astype("i8") - - if method == "pchip": - alt_methods["pchip"] = interpolate.pchip_interpolate - elif method == "akima": - alt_methods["akima"] = _akima_interpolate - elif method == "cubicspline": - alt_methods["cubicspline"] = _cubicspline_interpolate - interp1d_methods = [ "nearest", "zero", @@ -588,9 +597,11 @@ def _interpolate_scipy_wrapper( ] if method in interp1d_methods: if method == "polynomial": - method = order + kind = order + else: + kind = method terp = interpolate.interp1d( - x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error + x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error ) new_y = terp(new_x) elif method == "spline": @@ -610,13 +621,18 @@ def _interpolate_scipy_wrapper( y = y.copy() if not new_x.flags.writeable: new_x = new_x.copy() - method = alt_methods[method] - new_y = method(x, y, new_x, **kwargs) + terp = alt_methods[method] + new_y = terp(x, y, new_x, **kwargs) return new_y def _from_derivatives( - xi, yi, x, order=None, der: int | list[int] | None = 0, extrapolate: bool = False + xi: np.ndarray, + yi: np.ndarray, + x: np.ndarray, + order=None, + der: int | list[int] | None = 0, + extrapolate: bool = False, ): """ Convenience function for interpolate.BPoly.from_derivatives. @@ -660,7 +676,13 @@ def _from_derivatives( return m(x) -def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt = 0): +def _akima_interpolate( + xi: np.ndarray, + yi: np.ndarray, + x: np.ndarray, + der: int | list[int] | None = 0, + axis: AxisInt = 0, +): """ Convenience function for akima interpolation. xi and yi are arrays of values used to approximate some function f, @@ -670,13 +692,13 @@ def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt Parameters ---------- - xi : array-like + xi : np.ndarray A sorted list of x-coordinates, of length N. - yi : array-like + yi : np.ndarray A 1-D array of real values. `yi`'s length along the interpolation axis must be equal to the length of `xi`. If N-D array, use axis parameter to select correct axis. - x : scalar or array-like + x : np.ndarray Of length M. der : int, optional How many derivatives to extract; None for all potentially @@ -704,9 +726,9 @@ def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt def _cubicspline_interpolate( - xi, - yi, - x, + xi: np.ndarray, + yi: np.ndarray, + x: np.ndarray, axis: AxisInt = 0, bc_type: str | tuple[Any, Any] = "not-a-knot", extrapolate=None, @@ -718,14 +740,14 @@ def _cubicspline_interpolate( Parameters ---------- - xi : array-like, shape (n,) + xi : np.ndarray, shape (n,) 1-d array containing values of the independent variable. Values must be real, finite and in strictly increasing order. - yi : array-like + yi : np.ndarray Array containing values of the dependent variable. It can have arbitrary number of dimensions, but the length along ``axis`` (see below) must match the length of ``x``. Values must be finite. - x : scalar or array-like, shape (m,) + x : np.ndarray, shape (m,) axis : int, optional Axis along which `y` is assumed to be varying. Meaning that for ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. @@ -790,7 +812,10 @@ def _cubicspline_interpolate( def _interpolate_with_limit_area( - values: np.ndarray, method: str, limit: int | None, limit_area: str | None + values: np.ndarray, + method: Literal["pad", "backfill"], + limit: int | None, + limit_area: Literal["inside", "outside"], ) -> None: """ Apply interpolation and limit_area logic to values along a to-be-specified axis. @@ -803,8 +828,8 @@ def _interpolate_with_limit_area( Interpolation method. Could be "bfill" or "pad" limit: int, optional Index limit on interpolation. - limit_area: str - Limit area for interpolation. Can be "inside" or "outside" + limit_area: {'inside', 'outside'} + Limit area for interpolation. Notes ----- @@ -832,16 +857,18 @@ def _interpolate_with_limit_area( invalid[first : last + 1] = False elif limit_area == "outside": invalid[:first] = invalid[last + 1 :] = False + else: + raise ValueError("limit_area should be 'inside' or 'outside'") values[invalid] = np.nan def interpolate_2d( values: np.ndarray, - method: str = "pad", - axis: Axis = 0, + method: Literal["pad", "backfill"] = "pad", + axis: AxisInt = 0, limit: int | None = None, - limit_area: str | None = None, + limit_area: Literal["inside", "outside"] | None = None, ) -> None: """ Perform an actual interpolation of values, values will be make 2-d if @@ -880,9 +907,7 @@ def interpolate_2d( limit=limit, limit_area=limit_area, ), - # error: Argument 2 to "apply_along_axis" has incompatible type - # "Union[str, int]"; expected "SupportsIndex" - axis, # type: ignore[arg-type] + axis, values, ) return @@ -898,12 +923,9 @@ def interpolate_2d( method = clean_fill_method(method) tvalues = transf(values) + func = get_fill_func(method, ndim=2) # _pad_2d and _backfill_2d both modify tvalues inplace - if method == "pad": - _pad_2d(tvalues, limit=limit) - else: - _backfill_2d(tvalues, limit=limit) - + func(tvalues, limit=limit) return @@ -969,7 +991,7 @@ def _pad_2d( ): mask = _fillna_prep(values, mask) - if np.all(values.shape): + if values.size: algos.pad_2d_inplace(values, mask, limit=limit) else: # for test coverage @@ -983,7 +1005,7 @@ def _backfill_2d( ): mask = _fillna_prep(values, mask) - if np.all(values.shape): + if values.size: algos.backfill_2d_inplace(values, mask, limit=limit) else: # for test coverage @@ -1007,7 +1029,9 @@ def clean_reindex_fill_method(method) -> ReindexMethod | None: return clean_fill_method(method, allow_nearest=True) -def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit): +def _interp_limit( + invalid: npt.NDArray[np.bool_], fw_limit: int | None, bw_limit: int | None +): """ Get indexers of values that won't be filled because they exceed the limits. From b3556141ac009118d74f808933dd99ea09e8139d Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 14 Jun 2023 12:09:45 -0700 Subject: [PATCH 06/10] CI: Attempt to fix wheel builds (#53670) --- .github/workflows/wheels.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index ea5ab81e74030..eae2949594bcc 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -111,8 +111,11 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.13.1 - with: - package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }} + # TODO: Build wheels from sdist again + # There's some sort of weird race condition? + # within Github that makes the sdist be missing files + #with: + # package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }} env: CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} From 0489c93f04380c6e61d57b97dd0c9b3fc59ba889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 14 Jun 2023 22:16:16 +0200 Subject: [PATCH 07/10] DOC: Fixing EX01 - Added examples (#53647) * SeriesGroupBy.fillna example added * Added examples * Corrected failing test for timedelta.total_seconds * Corrected fillna example --- ci/code_checks.sh | 11 ----- pandas/_libs/tslibs/nattype.pyx | 16 ++++++- pandas/_libs/tslibs/timedeltas.pyx | 76 +++++++++++++++++++++++++++++- pandas/core/arrays/datetimes.py | 8 ++++ pandas/core/groupby/generic.py | 21 +++++++++ 5 files changed, 118 insertions(+), 14 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index adda422296396..f63cc1fcc5767 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -119,16 +119,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Timestamp.utcoffset \ pandas.Timestamp.utctimetuple \ pandas.Timestamp.weekday \ - pandas.arrays.DatetimeArray \ - pandas.Timedelta.view \ - pandas.Timedelta.as_unit \ - pandas.Timedelta.ceil \ - pandas.Timedelta.floor \ - pandas.Timedelta.round \ - pandas.Timedelta.to_pytimedelta \ - pandas.Timedelta.to_timedelta64 \ - pandas.Timedelta.to_numpy \ - pandas.Timedelta.total_seconds \ pandas.arrays.TimedeltaArray \ pandas.Period.asfreq \ pandas.Period.now \ @@ -261,7 +251,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.window.ewm.ExponentialMovingWindow.cov \ pandas.api.indexers.BaseIndexer \ pandas.api.indexers.VariableOffsetWindowIndexer \ - pandas.core.groupby.SeriesGroupBy.fillna \ pandas.io.formats.style.Styler \ pandas.io.formats.style.Styler.from_custom_template \ pandas.io.formats.style.Styler.set_caption \ diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index ea859a5f7d53d..75205a359db68 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -4,7 +4,6 @@ from cpython.datetime cimport ( PyDelta_Check, datetime, import_datetime, - timedelta, ) import_datetime() @@ -440,7 +439,20 @@ class NaTType(_NaT): Monday == 1 ... Sunday == 7. """, ) - total_seconds = _make_nan_func("total_seconds", timedelta.total_seconds.__doc__) + total_seconds = _make_nan_func( + "total_seconds", + """ + Total seconds in the duration. + + Examples + -------- + >>> td = pd.Timedelta('1min') + >>> td + Timedelta('0 days 00:01:00') + >>> td.total_seconds() + 60.0 + """, + ) month_name = _make_nan_func( "month_name", """ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 047b5e861da2c..e68b8b210437a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1112,7 +1112,17 @@ cdef class _Timedelta(timedelta): return self._ms * 1000 + self._us def total_seconds(self) -> float: - """Total seconds in the duration.""" + """ + Total seconds in the duration. + + Examples + -------- + >>> td = pd.Timedelta('1min') + >>> td + Timedelta('0 days 00:01:00') + >>> td.total_seconds() + 60.0 + """ # We need to override bc we overrode days/seconds/microseconds # TODO: add nanos/1e9? return self.days * 24 * 3600 + self.seconds + self.microseconds / 1_000_000 @@ -1274,6 +1284,14 @@ cdef class _Timedelta(timedelta): Notes ----- Any nanosecond resolution will be lost. + + Examples + -------- + >>> td = pd.Timedelta('3D') + >>> td + Timedelta('3 days 00:00:00') + >>> td.to_pytimedelta() + datetime.timedelta(days=3) """ if self._creso == NPY_FR_ns: return timedelta(microseconds=int(self._value) / 1000) @@ -1287,6 +1305,14 @@ cdef class _Timedelta(timedelta): def to_timedelta64(self) -> np.timedelta64: """ Return a numpy.timedelta64 object with 'ns' precision. + + Examples + -------- + >>> td = pd.Timedelta('3D') + >>> td + Timedelta('3 days 00:00:00') + >>> td.to_timedelta64() + numpy.timedelta64(259200000000000,'ns') """ cdef: str abbrev = npy_unit_to_abbrev(self._creso) @@ -1309,6 +1335,14 @@ cdef class _Timedelta(timedelta): See Also -------- Series.to_numpy : Similar method for Series. + + Examples + -------- + >>> td = pd.Timedelta('3D') + >>> td + Timedelta('3 days 00:00:00') + >>> td.to_numpy() + numpy.timedelta64(259200000000000,'ns') """ if dtype is not None or copy is not False: raise ValueError( @@ -1324,6 +1358,14 @@ cdef class _Timedelta(timedelta): ---------- dtype : str or dtype The dtype to view the underlying data as. + + Examples + -------- + >>> td = pd.Timedelta('3D') + >>> td + Timedelta('3 days 00:00:00') + >>> td.view(int) + 259200000000000 """ return np.timedelta64(self._value).view(dtype) @@ -1603,6 +1645,14 @@ cdef class _Timedelta(timedelta): Returns ------- Timedelta + + Examples + -------- + >>> td = pd.Timedelta('1001ms') + >>> td + Timedelta('0 days 00:00:01.001000') + >>> td.as_unit('s') + Timedelta('0 days 00:00:01') """ dtype = np.dtype(f"m8[{unit}]") reso = get_unit_from_dtype(dtype) @@ -1875,6 +1925,14 @@ class Timedelta(_Timedelta): Raises ------ ValueError if the freq cannot be converted + + Examples + -------- + >>> td = pd.Timedelta('1001ms') + >>> td + Timedelta('0 days 00:00:01.001000') + >>> td.round('s') + Timedelta('0 days 00:00:01') """ return self._round(freq, RoundTo.NEAREST_HALF_EVEN) @@ -1886,6 +1944,14 @@ class Timedelta(_Timedelta): ---------- freq : str Frequency string indicating the flooring resolution. + + Examples + -------- + >>> td = pd.Timedelta('1001ms') + >>> td + Timedelta('0 days 00:00:01.001000') + >>> td.floor('s') + Timedelta('0 days 00:00:01') """ return self._round(freq, RoundTo.MINUS_INFTY) @@ -1897,6 +1963,14 @@ class Timedelta(_Timedelta): ---------- freq : str Frequency string indicating the ceiling resolution. + + Examples + -------- + >>> td = pd.Timedelta('1001ms') + >>> td + Timedelta('0 days 00:00:01.001000') + >>> td.ceil('s') + Timedelta('0 days 00:00:02') """ return self._round(freq, RoundTo.PLUS_INFTY) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3d083e55b12ab..d6afba8c34904 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -183,6 +183,14 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc] Methods ------- None + + Examples + -------- + >>> pd.arrays.DatetimeArray(pd.DatetimeIndex(['2023-01-01', '2023-01-02']), + ... freq='D') + + ['2023-01-01 00:00:00', '2023-01-02 00:00:00'] + Length: 2, dtype: datetime64[ns] """ _typ = "datetimearray" diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2b1ff05f18d5e..cecb9a84c62dd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -914,6 +914,27 @@ def fillna( -------- ffill : Forward fill values within a group. bfill : Backward fill values within a group. + + Examples + -------- + For SeriesGroupBy: + + >>> lst = ['cat', 'cat', 'cat', 'mouse', 'mouse'] + >>> ser = pd.Series([1, None, None, 2, None], index=lst) + >>> ser + cat 1.0 + cat NaN + cat NaN + mouse 2.0 + mouse NaN + dtype: float64 + >>> ser.groupby(level=0).fillna(0, limit=1) + cat 1.0 + cat 0.0 + cat NaN + mouse 2.0 + mouse 0.0 + dtype: float64 """ result = self._op_via_apply( "fillna", From 38198faa9091e25f83822b326b2dc5e10b80b955 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 14 Jun 2023 17:05:41 -0700 Subject: [PATCH 08/10] CI/TST: Mark test_to_read_gcs as single_cpu (#53677) --- pandas/tests/io/test_gcs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index d82cfd5bd169d..bdea24f7bb5aa 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -44,6 +44,8 @@ def ls(self, path, **kwargs): @td.skip_if_no("gcsfs") +# Patches pyarrow; other processes should not pick up change +@pytest.mark.single_cpu @pytest.mark.parametrize("format", ["csv", "json", "parquet", "excel", "markdown"]) def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys): """ From a7fd75746cc69e318742fbcddb36195eab260525 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 14 Jun 2023 17:06:17 -0700 Subject: [PATCH 09/10] BUG/CoW: is_range_indexer can't handle very large arrays (#53672) * BUG: is_range_indexer can't handle very large arrays * fix test on 32-bit --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/_libs/lib.pyx | 2 +- pandas/tests/libs/test_lib.py | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 42b1346696bb8..19e314cbf5ed8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -406,7 +406,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__setitem__` losing dtype when setting a :class:`DataFrame` into duplicated columns (:issue:`53143`) - Bug in :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN`` incorrectly raising ``TypeError`` (:issue:`53291`) -- +- Bug in indexing methods (e.g. :meth:`DataFrame.__getitem__`) where taking the entire :class:`DataFrame`/:class:`Series` would raise an ``OverflowError`` when Copy on Write was enabled and the length of the array was over the maximum size a 32-bit integer can hold (:issue:`53616`) Missing ^^^^^^^ diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e68dbfa26a104..f7934865fbb43 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -668,7 +668,7 @@ ctypedef fused int6432_t: @cython.wraparound(False) @cython.boundscheck(False) -def is_range_indexer(ndarray[int6432_t, ndim=1] left, int n) -> bool: +def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool: """ Perform an element by element comparison on 1-d integer arrays, meant for indexer comparisons diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index 383e1b81e17a7..6ad8d748d6997 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -6,6 +6,7 @@ lib, writers as libwriters, ) +from pandas.compat import IS64 from pandas import Index import pandas._testing as tm @@ -248,6 +249,18 @@ def test_is_range_indexer(self, dtype): left = np.arange(0, 100, dtype=dtype) assert lib.is_range_indexer(left, 100) + @pytest.mark.skipif( + not IS64, + reason="2**31 is too big for Py_ssize_t on 32-bit. " + "It doesn't matter though since you cannot create an array that long on 32-bit", + ) + @pytest.mark.parametrize("dtype", ["int64", "int32"]) + def test_is_range_indexer_big_n(self, dtype): + # GH53616 + left = np.arange(0, 100, dtype=dtype) + + assert not lib.is_range_indexer(left, 2**31) + @pytest.mark.parametrize("dtype", ["int64", "int32"]) def test_is_range_indexer_not_equal(self, dtype): # GH#50592 From 300392d1b8cea79cc396fd0ebbbe7371a4f1a7ce Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 18 Jun 2023 12:10:05 -0700 Subject: [PATCH 10/10] TST: Use more pytest fixtures --- pandas/tests/resample/test_resample_api.py | 53 ++-- .../tests/resample/test_resampler_grouper.py | 23 +- pandas/tests/resample/test_time_grouper.py | 11 +- .../reshape/concat/test_append_common.py | 78 ++--- pandas/tests/series/methods/test_argsort.py | 8 +- .../series/methods/test_convert_dtypes.py | 287 +++++++++--------- pandas/tests/series/test_ufunc.py | 26 +- pandas/tests/test_take.py | 4 - .../offsets/test_custom_business_month.py | 8 +- pandas/tests/tseries/offsets/test_offsets.py | 8 - pandas/tests/util/test_validate_args.py | 15 +- .../util/test_validate_args_and_kwargs.py | 15 +- pandas/tests/util/test_validate_kwargs.py | 11 +- 13 files changed, 280 insertions(+), 267 deletions(-) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index d7b8f0c8053da..a9ea2999c3ba2 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -15,38 +15,43 @@ import pandas._testing as tm from pandas.core.indexes.datetimes import date_range -dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min") -test_series = Series(np.random.rand(len(dti)), dti) -_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))}) +@pytest.fixture +def dti(): + return date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min") + + +@pytest.fixture +def _test_series(dti): + return Series(np.random.rand(len(dti)), dti) @pytest.fixture -def test_frame(): - return _test_frame.copy() +def test_frame(dti, _test_series): + return DataFrame({"A": _test_series, "B": _test_series, "C": np.arange(len(dti))}) -def test_str(): - r = test_series.resample("H") +def test_str(_test_series): + r = _test_series.resample("H") assert ( "DatetimeIndexResampler [freq=, axis=0, closed=left, " "label=left, convention=start, origin=start_day]" in str(r) ) - r = test_series.resample("H", origin="2000-01-01") + r = _test_series.resample("H", origin="2000-01-01") assert ( "DatetimeIndexResampler [freq=, axis=0, closed=left, " "label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r) ) -def test_api(): - r = test_series.resample("H") +def test_api(_test_series): + r = _test_series.resample("H") result = r.mean() assert isinstance(result, Series) assert len(result) == 217 - r = test_series.to_frame().resample("H") + r = _test_series.to_frame().resample("H") result = r.mean() assert isinstance(result, DataFrame) assert len(result) == 217 @@ -115,11 +120,11 @@ def test_resample_group_keys(): tm.assert_frame_equal(result, expected) -def test_pipe(test_frame): +def test_pipe(test_frame, _test_series): # GH17905 # series - r = test_series.resample("H") + r = _test_series.resample("H") expected = r.max() - r.mean() result = r.pipe(lambda x: x.max() - x.mean()) tm.assert_series_equal(result, expected) @@ -259,9 +264,9 @@ def test_combined_up_downsampling_of_irregular(): tm.assert_series_equal(result, expected) -def test_transform_series(): - r = test_series.resample("20min") - expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean") +def test_transform_series(_test_series): + r = _test_series.resample("20min") + expected = _test_series.groupby(pd.Grouper(freq="20min")).transform("mean") result = r.transform("mean") tm.assert_series_equal(result, expected) @@ -317,17 +322,17 @@ def test_fillna(): ], ids=["resample", "groupby"], ) -def test_apply_without_aggregation(func): +def test_apply_without_aggregation(func, _test_series): # both resample and groupby should work w/o aggregation - t = func(test_series) + t = func(_test_series) result = t.apply(lambda x: x) - tm.assert_series_equal(result, test_series) + tm.assert_series_equal(result, _test_series) -def test_apply_without_aggregation2(): - grouped = test_series.to_frame(name="foo").resample("20min", group_keys=False) +def test_apply_without_aggregation2(_test_series): + grouped = _test_series.to_frame(name="foo").resample("20min", group_keys=False) result = grouped["foo"].apply(lambda x: x) - tm.assert_series_equal(result, test_series.rename("foo")) + tm.assert_series_equal(result, _test_series.rename("foo")) def test_agg_consistency(): @@ -1002,13 +1007,13 @@ def test_df_axis_param_depr(): df.resample("M", axis=0) -def test_series_axis_param_depr(): +def test_series_axis_param_depr(_test_series): warning_msg = ( "The 'axis' keyword in Series.resample is " "deprecated and will be removed in a future version." ) with tm.assert_produces_warning(FutureWarning, match=warning_msg): - test_series.resample("H", axis=0) + _test_series.resample("H", axis=0) def test_resample_empty(): diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 1682edb42915d..df14a5bc374c6 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -17,10 +17,13 @@ import pandas._testing as tm from pandas.core.indexes.datetimes import date_range -test_frame = DataFrame( - {"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}, - index=date_range("1/1/2000", freq="s", periods=40), -) + +@pytest.fixture +def test_frame(): + return DataFrame( + {"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}, + index=date_range("1/1/2000", freq="s", periods=40), + ) @async_mark() @@ -85,7 +88,7 @@ def f_1(x): tm.assert_frame_equal(result, expected) -def test_getitem(): +def test_getitem(test_frame): g = test_frame.groupby("A") expected = g.B.apply(lambda x: x.resample("2s").mean()) @@ -217,7 +220,7 @@ def test_nearest(): "ohlc", ], ) -def test_methods(f): +def test_methods(f, test_frame): g = test_frame.groupby("A") r = g.resample("2s") @@ -226,7 +229,7 @@ def test_methods(f): tm.assert_equal(result, expected) -def test_methods_nunique(): +def test_methods_nunique(test_frame): # series only g = test_frame.groupby("A") r = g.resample("2s") @@ -236,7 +239,7 @@ def test_methods_nunique(): @pytest.mark.parametrize("f", ["std", "var"]) -def test_methods_std_var(f): +def test_methods_std_var(f, test_frame): g = test_frame.groupby("A") r = g.resample("2s") result = getattr(r, f)(ddof=1) @@ -244,7 +247,7 @@ def test_methods_std_var(f): tm.assert_frame_equal(result, expected) -def test_apply(): +def test_apply(test_frame): g = test_frame.groupby("A") r = g.resample("2s") @@ -342,7 +345,7 @@ def test_resample_groupby_with_label(): tm.assert_frame_equal(result, expected) -def test_consistency_with_window(): +def test_consistency_with_window(test_frame): # consistent return values with window df = test_frame expected = Index([1, 2, 3], name="A") diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index debfb48c2b39c..a5fb48f801522 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -14,10 +14,13 @@ from pandas.core.groupby.grouper import Grouper from pandas.core.indexes.datetimes import date_range -test_series = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000)) +@pytest.fixture +def test_series(): + return Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000)) -def test_apply(): + +def test_apply(test_series): grouper = Grouper(freq="A", label="right", closed="right") grouped = test_series.groupby(grouper) @@ -33,7 +36,7 @@ def f(x): tm.assert_series_equal(applied, expected) -def test_count(): +def test_count(test_series): test_series[::3] = np.nan expected = test_series.groupby(lambda x: x.year).count() @@ -48,7 +51,7 @@ def test_count(): tm.assert_series_equal(result, expected) -def test_numpy_reduction(): +def test_numpy_reduction(test_series): result = test_series.resample("A", closed="right").prod() expected = test_series.groupby(lambda x: x.year).agg(np.prod) diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index 2d84de8145111..948545320a31a 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -10,37 +10,46 @@ ) import pandas._testing as tm -dt_data = [ - pd.Timestamp("2011-01-01"), - pd.Timestamp("2011-01-02"), - pd.Timestamp("2011-01-03"), -] -tz_data = [ - pd.Timestamp("2011-01-01", tz="US/Eastern"), - pd.Timestamp("2011-01-02", tz="US/Eastern"), - pd.Timestamp("2011-01-03", tz="US/Eastern"), -] -td_data = [ - pd.Timedelta("1 days"), - pd.Timedelta("2 days"), - pd.Timedelta("3 days"), -] -period_data = [ - pd.Period("2011-01", freq="M"), - pd.Period("2011-02", freq="M"), - pd.Period("2011-03", freq="M"), -] -data_dict = { - "bool": [True, False, True], - "int64": [1, 2, 3], - "float64": [1.1, np.nan, 3.3], - "category": Categorical(["X", "Y", "Z"]), - "object": ["a", "b", "c"], - "datetime64[ns]": dt_data, - "datetime64[ns, US/Eastern]": tz_data, - "timedelta64[ns]": td_data, - "period[M]": period_data, -} + +@pytest.fixture( + params=list( + { + "bool": [True, False, True], + "int64": [1, 2, 3], + "float64": [1.1, np.nan, 3.3], + "category": Categorical(["X", "Y", "Z"]), + "object": ["a", "b", "c"], + "datetime64[ns]": [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + ], + "datetime64[ns, US/Eastern]": [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03", tz="US/Eastern"), + ], + "timedelta64[ns]": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + "period[M]": [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Period("2011-03", freq="M"), + ], + }.items() + ) +) +def item(request): + key, data = request.param + return key, data + + +@pytest.fixture +def item2(item): + return item class TestConcatAppendCommon: @@ -48,13 +57,6 @@ class TestConcatAppendCommon: Test common dtype coercion rules between concat and append. """ - @pytest.fixture(params=sorted(data_dict.keys())) - def item(self, request): - key = request.param - return key, data_dict[key] - - item2 = item - def test_dtypes(self, item, index_or_series): # to confirm test case covers intended dtypes typ, vals = item diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 1fbc9ed787e11..e1d64795e235d 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -10,10 +10,11 @@ class TestSeriesArgsort: - def _check_accum_op(self, name, ser, check_dtype=True): - func = getattr(np, name) + def test_argsort_numpy(self, datetime_series): + ser = datetime_series + func = np.argsort tm.assert_numpy_array_equal( - func(ser).values, func(np.array(ser)), check_dtype=check_dtype + func(ser).values, func(np.array(ser)), check_dtype=False ) # with missing values @@ -26,7 +27,6 @@ def _check_accum_op(self, name, ser, check_dtype=True): tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) def test_argsort(self, datetime_series): - self._check_accum_op("argsort", datetime_series, check_dtype=False) argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index d91cd6a43daea..ea1cb1047bde8 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -12,149 +12,162 @@ # this default. Those overrides are defined as a dict with (keyword, val) as # dictionary key. In case of multiple items, the last override takes precedence. -test_cases = [ - ( - # data - [1, 2, 3], - # original dtype - np.dtype("int32"), - # default expected dtype - "Int32", - # exceptions on expected dtype - {("convert_integer", False): np.dtype("int32")}, - ), - ( - [1, 2, 3], - np.dtype("int64"), - "Int64", - {("convert_integer", False): np.dtype("int64")}, - ), - ( - ["x", "y", "z"], - np.dtype("O"), - pd.StringDtype(), - {("convert_string", False): np.dtype("O")}, - ), - ( - [True, False, np.nan], - np.dtype("O"), - pd.BooleanDtype(), - {("convert_boolean", False): np.dtype("O")}, - ), - ( - ["h", "i", np.nan], - np.dtype("O"), - pd.StringDtype(), - {("convert_string", False): np.dtype("O")}, - ), - ( # GH32117 - ["h", "i", 1], - np.dtype("O"), - np.dtype("O"), - {}, - ), - ( - [10, np.nan, 20], - np.dtype("float"), - "Int64", - { - ("convert_integer", False, "convert_floating", True): "Float64", - ("convert_integer", False, "convert_floating", False): np.dtype("float"), - }, - ), - ( - [np.nan, 100.5, 200], - np.dtype("float"), - "Float64", - {("convert_floating", False): np.dtype("float")}, - ), - ( - [3, 4, 5], - "Int8", - "Int8", - {}, - ), - ( - [[1, 2], [3, 4], [5]], - None, - np.dtype("O"), - {}, - ), - ( - [4, 5, 6], - np.dtype("uint32"), - "UInt32", - {("convert_integer", False): np.dtype("uint32")}, - ), - ( - [-10, 12, 13], - np.dtype("i1"), - "Int8", - {("convert_integer", False): np.dtype("i1")}, - ), - ( - [1.2, 1.3], - np.dtype("float32"), - "Float32", - {("convert_floating", False): np.dtype("float32")}, - ), - ( - [1, 2.0], - object, - "Int64", - { - ("convert_integer", False): "Float64", - ("convert_integer", False, "convert_floating", False): np.dtype("float"), - ("infer_objects", False): np.dtype("object"), - }, - ), - ( - [1, 2.5], - object, - "Float64", - { - ("convert_floating", False): np.dtype("float"), - ("infer_objects", False): np.dtype("object"), - }, - ), - (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}), - ( - pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), - pd.DatetimeTZDtype(tz="UTC"), - pd.DatetimeTZDtype(tz="UTC"), - {}, - ), - ( - pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), - "datetime64[ns]", - np.dtype("datetime64[ns]"), - {}, - ), - ( - pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), - object, - np.dtype("datetime64[ns]"), - {("infer_objects", False): np.dtype("object")}, - ), - (pd.period_range("1/1/2011", freq="M", periods=3), None, pd.PeriodDtype("M"), {}), - ( - pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), - None, - pd.IntervalDtype("int64", "right"), - {}, - ), -] + +@pytest.fixture( + params=[ + ( + # data + [1, 2, 3], + # original dtype + np.dtype("int32"), + # default expected dtype + "Int32", + # exceptions on expected dtype + {("convert_integer", False): np.dtype("int32")}, + ), + ( + [1, 2, 3], + np.dtype("int64"), + "Int64", + {("convert_integer", False): np.dtype("int64")}, + ), + ( + ["x", "y", "z"], + np.dtype("O"), + pd.StringDtype(), + {("convert_string", False): np.dtype("O")}, + ), + ( + [True, False, np.nan], + np.dtype("O"), + pd.BooleanDtype(), + {("convert_boolean", False): np.dtype("O")}, + ), + ( + ["h", "i", np.nan], + np.dtype("O"), + pd.StringDtype(), + {("convert_string", False): np.dtype("O")}, + ), + ( # GH32117 + ["h", "i", 1], + np.dtype("O"), + np.dtype("O"), + {}, + ), + ( + [10, np.nan, 20], + np.dtype("float"), + "Int64", + { + ("convert_integer", False, "convert_floating", True): "Float64", + ("convert_integer", False, "convert_floating", False): np.dtype( + "float" + ), + }, + ), + ( + [np.nan, 100.5, 200], + np.dtype("float"), + "Float64", + {("convert_floating", False): np.dtype("float")}, + ), + ( + [3, 4, 5], + "Int8", + "Int8", + {}, + ), + ( + [[1, 2], [3, 4], [5]], + None, + np.dtype("O"), + {}, + ), + ( + [4, 5, 6], + np.dtype("uint32"), + "UInt32", + {("convert_integer", False): np.dtype("uint32")}, + ), + ( + [-10, 12, 13], + np.dtype("i1"), + "Int8", + {("convert_integer", False): np.dtype("i1")}, + ), + ( + [1.2, 1.3], + np.dtype("float32"), + "Float32", + {("convert_floating", False): np.dtype("float32")}, + ), + ( + [1, 2.0], + object, + "Int64", + { + ("convert_integer", False): "Float64", + ("convert_integer", False, "convert_floating", False): np.dtype( + "float" + ), + ("infer_objects", False): np.dtype("object"), + }, + ), + ( + [1, 2.5], + object, + "Float64", + { + ("convert_floating", False): np.dtype("float"), + ("infer_objects", False): np.dtype("object"), + }, + ), + (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + pd.DatetimeTZDtype(tz="UTC"), + pd.DatetimeTZDtype(tz="UTC"), + {}, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + "datetime64[ns]", + np.dtype("datetime64[ns]"), + {}, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + object, + np.dtype("datetime64[ns]"), + {("infer_objects", False): np.dtype("object")}, + ), + ( + pd.period_range("1/1/2011", freq="M", periods=3), + None, + pd.PeriodDtype("M"), + {}, + ), + ( + pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), + None, + pd.IntervalDtype("int64", "right"), + {}, + ), + ] +) +def test_cases(request): + return request.param class TestSeriesConvertDtypes: - @pytest.mark.parametrize( - "data, maindtype, expected_default, expected_other", - test_cases, - ) @pytest.mark.parametrize("params", product(*[(True, False)] * 5)) def test_convert_dtypes( - self, data, maindtype, params, expected_default, expected_other + self, + test_cases, + params, ): + data, maindtype, expected_default, expected_other = test_cases if ( hasattr(data, "dtype") and data.dtype == "M8[ns]" diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index ac36103edcdcc..38dea7dc5f8bf 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -11,9 +11,16 @@ import pandas._testing as tm from pandas.arrays import SparseArray -BINARY_UFUNCS = [np.add, np.logaddexp] # dunder op -SPARSE = [True, False] -SPARSE_IDS = ["sparse", "dense"] + +@pytest.fixture(params=[np.add, np.logaddexp]) +def ufunc(request): + # dunder op + return request.param + + +@pytest.fixture(params=[True, False], ids=["sparse", "dense"]) +def sparse(request): + return request.param @pytest.fixture @@ -29,7 +36,6 @@ def arrays_for_binary_ufunc(): @pytest.mark.parametrize("ufunc", [np.positive, np.floor, np.exp]) -@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) def test_unary_ufunc(ufunc, sparse): # Test that ufunc(pd.Series) == pd.Series(ufunc) arr = np.random.randint(0, 10, 10, dtype="int64") @@ -46,8 +52,6 @@ def test_unary_ufunc(ufunc, sparse): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) -@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): # Test that ufunc(pd.Series(a), array) == pd.Series(ufunc(a, b)) @@ -72,8 +76,6 @@ def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) -@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): # Test that @@ -101,8 +103,6 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) -@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"]) @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) def test_binary_ufunc_with_series( @@ -143,8 +143,6 @@ def test_binary_ufunc_with_series( tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) -@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("flip", [True, False]) def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): # Test that @@ -170,7 +168,6 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): @pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: np.modf, np.frexp -@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc): @@ -203,7 +200,6 @@ def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary tm.assert_series_equal(result[1], pd.Series(expected[1])) -@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc): # Test that the same conditions from unary input apply to multi-output # ufuncs @@ -223,8 +219,6 @@ def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc): tm.assert_series_equal(result[1], pd.Series(expected[1], name="name")) -@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) -@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc): # Drop the names when they differ. a1, a2 = arrays_for_binary_ufunc diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index cefcf09613de1..47615be32e5b0 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -1,5 +1,4 @@ from datetime import datetime -import re import numpy as np import pytest @@ -41,9 +40,6 @@ def dtype_fill_out_dtype(request): class TestTake: - # Standard incompatible fill error. - fill_error = re.compile("Incompatible type for fill_value") - def test_1d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype data = np.random.randint(0, 2, 4).astype(dtype) diff --git a/pandas/tests/tseries/offsets/test_custom_business_month.py b/pandas/tests/tseries/offsets/test_custom_business_month.py index faf0f9810200b..0fff99ff8c025 100644 --- a/pandas/tests/tseries/offsets/test_custom_business_month.py +++ b/pandas/tests/tseries/offsets/test_custom_business_month.py @@ -11,7 +11,6 @@ datetime, timedelta, ) -from typing import TYPE_CHECKING import numpy as np import pytest @@ -34,9 +33,6 @@ from pandas.tseries import offsets from pandas.tseries.holiday import USFederalHolidayCalendar -if TYPE_CHECKING: - from pandas.tests.tseries.offsets.test_offsets import _ApplyCases - @pytest.fixture def dt(): @@ -132,7 +128,7 @@ def test_is_on_offset(self, case): offset, dt, expected = case assert_is_on_offset(offset, dt, expected) - apply_cases: _ApplyCases = [ + apply_cases = [ ( CBMonthBegin(), { @@ -330,7 +326,7 @@ def test_is_on_offset(self, case): offset, dt, expected = case assert_is_on_offset(offset, dt, expected) - apply_cases: _ApplyCases = [ + apply_cases = [ ( CBMonthEnd(), { diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index bfc5139c78b91..6df47968bd3bb 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -7,11 +7,6 @@ datetime, timedelta, ) -from typing import ( - Dict, - List, - Tuple, -) import numpy as np import pytest @@ -42,7 +37,6 @@ from pandas.tseries import offsets from pandas.tseries.offsets import ( FY5253, - BaseOffset, BDay, BMonthEnd, BusinessHour, @@ -61,8 +55,6 @@ WeekOfMonth, ) -_ApplyCases = List[Tuple[BaseOffset, Dict[datetime, datetime]]] - _ARITHMETIC_DATE_OFFSET = [ "years", "months", diff --git a/pandas/tests/util/test_validate_args.py b/pandas/tests/util/test_validate_args.py index 77e6b01ba1180..eef0931ec28ef 100644 --- a/pandas/tests/util/test_validate_args.py +++ b/pandas/tests/util/test_validate_args.py @@ -2,17 +2,20 @@ from pandas.util._validators import validate_args -_fname = "func" +@pytest.fixture +def _fname(): + return "func" -def test_bad_min_fname_arg_count(): + +def test_bad_min_fname_arg_count(_fname): msg = "'max_fname_arg_count' must be non-negative" with pytest.raises(ValueError, match=msg): validate_args(_fname, (None,), -1, "foo") -def test_bad_arg_length_max_value_single(): +def test_bad_arg_length_max_value_single(_fname): args = (None, None) compat_args = ("foo",) @@ -28,7 +31,7 @@ def test_bad_arg_length_max_value_single(): validate_args(_fname, args, min_fname_arg_count, compat_args) -def test_bad_arg_length_max_value_multiple(): +def test_bad_arg_length_max_value_multiple(_fname): args = (None, None) compat_args = {"foo": None} @@ -45,7 +48,7 @@ def test_bad_arg_length_max_value_multiple(): @pytest.mark.parametrize("i", range(1, 3)) -def test_not_all_defaults(i): +def test_not_all_defaults(i, _fname): bad_arg = "foo" msg = ( f"the '{bad_arg}' parameter is not supported " @@ -59,7 +62,7 @@ def test_not_all_defaults(i): validate_args(_fname, arg_vals[:i], 2, compat_args) -def test_validation(): +def test_validation(_fname): # No exceptions should be raised. validate_args(_fname, (None,), 2, {"out": None}) diff --git a/pandas/tests/util/test_validate_args_and_kwargs.py b/pandas/tests/util/test_validate_args_and_kwargs.py index 54d94d2194909..215026d648471 100644 --- a/pandas/tests/util/test_validate_args_and_kwargs.py +++ b/pandas/tests/util/test_validate_args_and_kwargs.py @@ -2,10 +2,13 @@ from pandas.util._validators import validate_args_and_kwargs -_fname = "func" +@pytest.fixture +def _fname(): + return "func" -def test_invalid_total_length_max_length_one(): + +def test_invalid_total_length_max_length_one(_fname): compat_args = ("foo",) kwargs = {"foo": "FOO"} args = ("FoO", "BaZ") @@ -23,7 +26,7 @@ def test_invalid_total_length_max_length_one(): validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) -def test_invalid_total_length_max_length_multiple(): +def test_invalid_total_length_max_length_multiple(_fname): compat_args = ("foo", "bar", "baz") kwargs = {"foo": "FOO", "bar": "BAR"} args = ("FoO", "BaZ") @@ -42,7 +45,7 @@ def test_invalid_total_length_max_length_multiple(): @pytest.mark.parametrize("args,kwargs", [((), {"foo": -5, "bar": 2}), ((-5, 2), {})]) -def test_missing_args_or_kwargs(args, kwargs): +def test_missing_args_or_kwargs(args, kwargs, _fname): bad_arg = "bar" min_fname_arg_count = 2 @@ -57,7 +60,7 @@ def test_missing_args_or_kwargs(args, kwargs): validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) -def test_duplicate_argument(): +def test_duplicate_argument(_fname): min_fname_arg_count = 2 compat_args = {"foo": None, "bar": None, "baz": None} @@ -70,7 +73,7 @@ def test_duplicate_argument(): validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) -def test_validation(): +def test_validation(_fname): # No exceptions should be raised. compat_args = {"foo": 1, "bar": None, "baz": -2} kwargs = {"baz": -2} diff --git a/pandas/tests/util/test_validate_kwargs.py b/pandas/tests/util/test_validate_kwargs.py index de49cdd5e247d..dba447e30cf57 100644 --- a/pandas/tests/util/test_validate_kwargs.py +++ b/pandas/tests/util/test_validate_kwargs.py @@ -5,10 +5,13 @@ validate_kwargs, ) -_fname = "func" +@pytest.fixture +def _fname(): + return "func" -def test_bad_kwarg(): + +def test_bad_kwarg(_fname): good_arg = "f" bad_arg = good_arg + "o" @@ -22,7 +25,7 @@ def test_bad_kwarg(): @pytest.mark.parametrize("i", range(1, 3)) -def test_not_all_none(i): +def test_not_all_none(i, _fname): bad_arg = "foo" msg = ( rf"the '{bad_arg}' parameter is not supported " @@ -40,7 +43,7 @@ def test_not_all_none(i): validate_kwargs(_fname, kwargs, compat_args) -def test_validation(): +def test_validation(_fname): # No exceptions should be raised. compat_args = {"f": None, "b": 1, "ba": "s"}