diff --git a/pandas/core/base.py b/pandas/core/base.py index 7d51b50f783a5..99a14166055f3 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -15,6 +15,7 @@ cast, final, ) +import warnings import numpy as np @@ -34,6 +35,7 @@ cache_readonly, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -977,6 +979,14 @@ def unique(self): if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries): # GH#31182 Series._values returns EA, unpack for backward-compat if getattr(self.dtype, "tz", None) is None: + + warnings.warn( + f"Series.unique behavior with {self.dtype} dtype is " + "deprecated. In a future version this will return a " + f"{type(self._values)} instead of a np.ndarray", + FutureWarning, + stacklevel=find_stack_level(), + ) result = np.asarray(result) else: result = unique1d(values) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 10f391a49d98f..6053dc1928fc9 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -193,6 +193,7 @@ def test_value_counts_bins(index_or_series): def test_value_counts_datetime64(index_or_series): klass = index_or_series + warn = None if klass is Index else FutureWarning # GH 3002, datetime64[ns] # don't test names though @@ -223,12 +224,15 @@ def test_value_counts_datetime64(index_or_series): ["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"], dtype="datetime64[ns]", ) + with tm.assert_produces_warning(warn, match="DatetimeArray"): + unq = s.unique() if isinstance(s, Index): - tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) + tm.assert_index_equal(unq, DatetimeIndex(expected)) else: - tm.assert_numpy_array_equal(s.unique(), expected) + tm.assert_numpy_array_equal(unq, expected) - assert s.nunique() == 3 + with tm.assert_produces_warning(warn, match="DatetimeArray"): + assert s.nunique() == 3 # with NaT s = df["dt"].copy() @@ -243,7 +247,9 @@ def test_value_counts_datetime64(index_or_series): tm.assert_series_equal(result, expected_s) assert s.dtype == "datetime64[ns]" - unique = s.unique() + warn = None if isinstance(s, DatetimeIndex) else FutureWarning + with tm.assert_produces_warning(warn, match="DatetimeArray"): + unique = s.unique() assert unique.dtype == "datetime64[ns]" # numpy_array_equal cannot compare pd.NaT @@ -254,8 +260,9 @@ def test_value_counts_datetime64(index_or_series): tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isna(unique[3]) - assert s.nunique() == 3 - assert s.nunique(dropna=False) == 4 + with tm.assert_produces_warning(warn, match="DatetimeArray"): + assert s.nunique() == 3 + assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) @@ -269,7 +276,9 @@ def test_value_counts_datetime64(index_or_series): if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: - tm.assert_numpy_array_equal(td.unique(), expected.values) + with tm.assert_produces_warning(FutureWarning, match="TimedeltaArray"): + res = td.unique() + tm.assert_numpy_array_equal(res, expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name="dt") diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 9d778cdee6a5b..8e5b1f5cffe87 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -70,7 +70,9 @@ def assert_stat_op_calc( f = getattr(frame, opname) if check_dates: - expected_warning = FutureWarning if opname in ["mean", "median"] else None + expected_warning = ( + FutureWarning if opname in ["mean", "median", "nunique"] else None + ) df = DataFrame({"b": date_range("1/1/2001", periods=2)}) with tm.assert_produces_warning(expected_warning): result = getattr(df, opname)() diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index cd07b3814d023..c07081889e557 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -625,7 +625,8 @@ def test_merge_nosort(self): ], } df = DataFrame.from_dict(d) - var3 = df.var3.unique() + with tm.assert_produces_warning(FutureWarning, match="DatetimeArray"): + var3 = df.var3.unique() var3.sort() new = DataFrame.from_dict({"var3": var3, "var8": np.random.random(7)}) @@ -633,7 +634,10 @@ def test_merge_nosort(self): exp = merge(df, new, on="var3", sort=False) tm.assert_frame_equal(result, exp) - assert (df.var3.unique() == result.var3.unique()).all() + with tm.assert_produces_warning(FutureWarning, match="DatetimeArray"): + res = df.var3.unique() + expected = result.var3.unique() + assert (res == expected).all() @pytest.mark.parametrize( ("sort", "values"), [(False, [1, 1, 0, 1, 1]), (True, [0, 1, 1, 1, 1])] diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index b4836dffffa06..99e652d091068 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -534,7 +534,13 @@ def test_dtype_preservation(self, any_numpy_dtype): data = [1, 2, 2] uniques = [1, 2] - result = Series(data, dtype=any_numpy_dtype).unique() + warn = None + if np.dtype(any_numpy_dtype).kind in ["m", "M"]: + warn = FutureWarning + + ser = Series(data, dtype=any_numpy_dtype) + with tm.assert_produces_warning(warn, match="DatetimeArray|TimedeltaArray"): + result = ser.unique() expected = np.array(uniques, dtype=any_numpy_dtype) tm.assert_numpy_array_equal(result, expected)