From 30ebd3ea10939dfde5cfd11d8030b930a42bb5fd Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 25 Mar 2024 19:03:27 -0700 Subject: [PATCH] DEPR: value_counts doing dtype inference on result.index --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/algorithms.py | 35 ++++---------------------- pandas/core/arrays/interval.py | 12 ++------- pandas/tests/base/test_value_counts.py | 5 ++-- 4 files changed, 10 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a398b93b60018..4fd2f46fc71fd 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -199,6 +199,7 @@ Removal of prior version deprecations/changes - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) +- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`) - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) - Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 344314d829c19..8620aafd97528 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -892,26 +892,9 @@ def value_counts_internal( if keys.dtype == np.float16: keys = keys.astype(np.float32) - # For backwards compatibility, we let Index do its normal type - # inference, _except_ for if if infers from object to bool. - idx = Index(keys) - if idx.dtype == bool and keys.dtype == object: - idx = idx.astype(object) - elif ( - idx.dtype != keys.dtype # noqa: PLR1714 # # pylint: disable=R1714 - and idx.dtype != "string[pyarrow_numpy]" - ): - warnings.warn( - # GH#56161 - "The behavior of value_counts with object-dtype is deprecated. " - "In a future version, this will *not* perform dtype inference " - "on the resulting index. To retain the old behavior, use " - "`result.index = result.index.infer_objects()`", - FutureWarning, - stacklevel=find_stack_level(), - ) - idx.name = index_name - + # Starting in 3.0, we no longer perform dtype inference on the + # Index object we construct here, xref GH#56161 + idx = Index(keys, dtype=keys.dtype, name=index_name) result = Series(counts, index=idx, name=name, copy=False) if sort: @@ -1606,16 +1589,8 @@ def union_with_duplicates( """ from pandas import Series - with warnings.catch_warnings(): - # filter warning from object dtype inference; we will end up discarding - # the index here, so the deprecation does not affect the end result here. - warnings.filterwarnings( - "ignore", - "The behavior of value_counts with object-dtype is deprecated", - category=FutureWarning, - ) - l_count = value_counts_internal(lvals, dropna=False) - r_count = value_counts_internal(rvals, dropna=False) + l_count = value_counts_internal(lvals, dropna=False) + r_count = value_counts_internal(rvals, dropna=False) l_count, r_count = l_count.align(r_count, fill_value=0) final_count = np.maximum(l_count.values, r_count.values) final_count = Series(final_count, index=l_count.index, dtype="int", copy=False) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 56ea28c0b50f8..af666a591b1bc 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -13,7 +13,6 @@ Union, overload, ) -import warnings import numpy as np @@ -1217,15 +1216,8 @@ def value_counts(self, dropna: bool = True) -> Series: Series.value_counts """ # TODO: implement this is a non-naive way! - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - "The behavior of value_counts with object-dtype is deprecated", - category=FutureWarning, - ) - result = value_counts(np.asarray(self), dropna=dropna) - # Once the deprecation is enforced, we will need to do - # `result.index = result.index.astype(self.dtype)` + result = value_counts(np.asarray(self), dropna=dropna) + result.index = result.index.astype(self.dtype) return result # --------------------------------------------------------------------- diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index a0b0bdfdb46d8..ac40e48f3d523 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -347,9 +347,8 @@ def test_value_counts_object_inference_deprecated(): dti = pd.date_range("2016-01-01", periods=3, tz="UTC") idx = dti.astype(object) - msg = "The behavior of value_counts with object-dtype is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = idx.value_counts() + res = idx.value_counts() exp = dti.value_counts() + exp.index = exp.index.astype(object) tm.assert_series_equal(res, exp)