Skip to content

DEPR: special-casing dt64/td64 in Series.unique #42741

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
cast,
final,
)
import warnings

import numpy as np

Expand All @@ -34,6 +35,7 @@
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_categorical_dtype,
Expand Down Expand Up @@ -977,6 +979,14 @@ def unique(self):
if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries):
# GH#31182 Series._values returns EA, unpack for backward-compat
if getattr(self.dtype, "tz", None) is None:

warnings.warn(
f"Series.unique behavior with {self.dtype} dtype is "
"deprecated. In a future version this will return a "
f"{type(self._values)} instead of a np.ndarray",
FutureWarning,
stacklevel=find_stack_level(),
)
result = np.asarray(result)
else:
result = unique1d(values)
Expand Down
23 changes: 16 additions & 7 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def test_value_counts_bins(index_or_series):

def test_value_counts_datetime64(index_or_series):
klass = index_or_series
warn = None if klass is Index else FutureWarning

# GH 3002, datetime64[ns]
# don't test names though
Expand Down Expand Up @@ -223,12 +224,15 @@ def test_value_counts_datetime64(index_or_series):
["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"],
dtype="datetime64[ns]",
)
with tm.assert_produces_warning(warn, match="DatetimeArray"):
unq = s.unique()
if isinstance(s, Index):
tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
tm.assert_index_equal(unq, DatetimeIndex(expected))
else:
tm.assert_numpy_array_equal(s.unique(), expected)
tm.assert_numpy_array_equal(unq, expected)

assert s.nunique() == 3
with tm.assert_produces_warning(warn, match="DatetimeArray"):
assert s.nunique() == 3

# with NaT
s = df["dt"].copy()
Expand All @@ -243,7 +247,9 @@ def test_value_counts_datetime64(index_or_series):
tm.assert_series_equal(result, expected_s)

assert s.dtype == "datetime64[ns]"
unique = s.unique()
warn = None if isinstance(s, DatetimeIndex) else FutureWarning
with tm.assert_produces_warning(warn, match="DatetimeArray"):
unique = s.unique()
assert unique.dtype == "datetime64[ns]"

# numpy_array_equal cannot compare pd.NaT
Expand All @@ -254,8 +260,9 @@ def test_value_counts_datetime64(index_or_series):
tm.assert_numpy_array_equal(unique[:3], expected)
assert pd.isna(unique[3])

assert s.nunique() == 3
assert s.nunique(dropna=False) == 4
with tm.assert_produces_warning(warn, match="DatetimeArray"):
assert s.nunique() == 3
assert s.nunique(dropna=False) == 4

# timedelta64[ns]
td = df.dt - df.dt + timedelta(1)
Expand All @@ -269,7 +276,9 @@ def test_value_counts_datetime64(index_or_series):
if isinstance(td, Index):
tm.assert_index_equal(td.unique(), expected)
else:
tm.assert_numpy_array_equal(td.unique(), expected.values)
with tm.assert_produces_warning(FutureWarning, match="TimedeltaArray"):
res = td.unique()
tm.assert_numpy_array_equal(res, expected.values)

td2 = timedelta(1) + (df.dt - df.dt)
td2 = klass(td2, name="dt")
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ def assert_stat_op_calc(
f = getattr(frame, opname)

if check_dates:
expected_warning = FutureWarning if opname in ["mean", "median"] else None
expected_warning = (
FutureWarning if opname in ["mean", "median", "nunique"] else None
)
df = DataFrame({"b": date_range("1/1/2001", periods=2)})
with tm.assert_produces_warning(expected_warning):
result = getattr(df, opname)()
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,15 +625,19 @@ def test_merge_nosort(self):
],
}
df = DataFrame.from_dict(d)
var3 = df.var3.unique()
with tm.assert_produces_warning(FutureWarning, match="DatetimeArray"):
var3 = df.var3.unique()
var3.sort()
new = DataFrame.from_dict({"var3": var3, "var8": np.random.random(7)})

result = df.merge(new, on="var3", sort=False)
exp = merge(df, new, on="var3", sort=False)
tm.assert_frame_equal(result, exp)

assert (df.var3.unique() == result.var3.unique()).all()
with tm.assert_produces_warning(FutureWarning, match="DatetimeArray"):
res = df.var3.unique()
expected = result.var3.unique()
assert (res == expected).all()

@pytest.mark.parametrize(
("sort", "values"), [(False, [1, 1, 0, 1, 1]), (True, [0, 1, 1, 1, 1])]
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,13 @@ def test_dtype_preservation(self, any_numpy_dtype):
data = [1, 2, 2]
uniques = [1, 2]

result = Series(data, dtype=any_numpy_dtype).unique()
warn = None
if np.dtype(any_numpy_dtype).kind in ["m", "M"]:
warn = FutureWarning

ser = Series(data, dtype=any_numpy_dtype)
with tm.assert_produces_warning(warn, match="DatetimeArray|TimedeltaArray"):
result = ser.unique()
expected = np.array(uniques, dtype=any_numpy_dtype)

tm.assert_numpy_array_equal(result, expected)
Expand Down