Skip to content

DEPR: value_counts doing dtype inference on result.index #58009

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ Removal of prior version deprecations/changes
- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`)
- Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`)
- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`)
- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`)
Expand Down
35 changes: 5 additions & 30 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,26 +892,9 @@ def value_counts_internal(
if keys.dtype == np.float16:
keys = keys.astype(np.float32)

# For backwards compatibility, we let Index do its normal type
# inference, _except_ for if if infers from object to bool.
idx = Index(keys)
if idx.dtype == bool and keys.dtype == object:
idx = idx.astype(object)
elif (
idx.dtype != keys.dtype # noqa: PLR1714 # # pylint: disable=R1714
and idx.dtype != "string[pyarrow_numpy]"
):
warnings.warn(
# GH#56161
"The behavior of value_counts with object-dtype is deprecated. "
"In a future version, this will *not* perform dtype inference "
"on the resulting index. To retain the old behavior, use "
"`result.index = result.index.infer_objects()`",
FutureWarning,
stacklevel=find_stack_level(),
)
idx.name = index_name

# Starting in 3.0, we no longer perform dtype inference on the
# Index object we construct here, xref GH#56161
idx = Index(keys, dtype=keys.dtype, name=index_name)
result = Series(counts, index=idx, name=name, copy=False)

if sort:
Expand Down Expand Up @@ -1606,16 +1589,8 @@ def union_with_duplicates(
"""
from pandas import Series

with warnings.catch_warnings():
# filter warning from object dtype inference; we will end up discarding
# the index here, so the deprecation does not affect the end result here.
warnings.filterwarnings(
"ignore",
"The behavior of value_counts with object-dtype is deprecated",
category=FutureWarning,
)
l_count = value_counts_internal(lvals, dropna=False)
r_count = value_counts_internal(rvals, dropna=False)
l_count = value_counts_internal(lvals, dropna=False)
r_count = value_counts_internal(rvals, dropna=False)
l_count, r_count = l_count.align(r_count, fill_value=0)
final_count = np.maximum(l_count.values, r_count.values)
final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)
Expand Down
12 changes: 2 additions & 10 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
Union,
overload,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -1217,15 +1216,8 @@ def value_counts(self, dropna: bool = True) -> Series:
Series.value_counts
"""
# TODO: implement this is a non-naive way!
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"The behavior of value_counts with object-dtype is deprecated",
category=FutureWarning,
)
result = value_counts(np.asarray(self), dropna=dropna)
# Once the deprecation is enforced, we will need to do
# `result.index = result.index.astype(self.dtype)`
result = value_counts(np.asarray(self), dropna=dropna)
result.index = result.index.astype(self.dtype)
return result

# ---------------------------------------------------------------------
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,8 @@ def test_value_counts_object_inference_deprecated():
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")

idx = dti.astype(object)
msg = "The behavior of value_counts with object-dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = idx.value_counts()
res = idx.value_counts()

exp = dti.value_counts()
exp.index = exp.index.astype(object)
tm.assert_series_equal(res, exp)