From 313f4a2d00771fd8e02b231af73873aa2c0abec3 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 28 Mar 2024 18:48:14 -0700 Subject: [PATCH] DEPR: allowing non-standard types in unique, factorize, isin --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/algorithms.py | 9 +-- pandas/tests/libs/test_hashtable.py | 7 +- pandas/tests/test_algos.py | 99 +++++++++++++---------------- 4 files changed, 50 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 98b497bd6988b..bd1610cab1ecd 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -206,6 +206,7 @@ Removal of prior version deprecations/changes - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`) - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) +- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`) - Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`) - Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`) - Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6a6096567c65d..33beef23197bd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -226,12 +226,9 @@ def _ensure_arraylike(values, func_name: str) -> ArrayLike: # GH#52986 if func_name != "isin-targets": # Make an exception for the comps argument in isin. - warnings.warn( - f"{func_name} with argument that is not not a Series, Index, " - "ExtensionArray, or np.ndarray is deprecated and will raise in a " - "future version.", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + f"{func_name} requires a Series, Index, " + f"ExtensionArray, or np.ndarray, got {type(values).__name__}." ) inferred = lib.infer_dtype(values, skipna=False) diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py index e54764f9ac4a6..b70386191d9d9 100644 --- a/pandas/tests/libs/test_hashtable.py +++ b/pandas/tests/libs/test_hashtable.py @@ -730,12 +730,11 @@ def test_mode(self, dtype): def test_ismember_tuple_with_nans(): # GH-41836 - values = [("a", float("nan")), ("b", 1)] + values = np.empty(2, dtype=object) + values[:] = [("a", float("nan")), ("b", 1)] comps = [("a", float("nan"))] - msg = "isin with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = isin(values, comps) + result = isin(values, comps) expected = np.array([True, False], dtype=np.bool_) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 365ec452a7f25..1b5d33fc10595 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -54,16 +54,13 @@ class TestFactorize: def test_factorize_complex(self): # GH#17927 - array = [1, 2, 2 + 1j] - msg = "factorize with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - labels, uniques = algos.factorize(array) + array = np.array([1, 2, 2 + 1j], dtype=complex) + labels, uniques = algos.factorize(array) expected_labels = np.array([0, 1, 2], dtype=np.intp) tm.assert_numpy_array_equal(labels, expected_labels) - # Should return a complex dtype in the future - expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object) + expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=complex) tm.assert_numpy_array_equal(uniques, expected_uniques) def test_factorize(self, index_or_series_obj, sort): @@ -265,9 +262,8 @@ def test_factorizer_object_with_nan(self): ) def test_factorize_tuple_list(self, data, expected_codes, expected_uniques): # GH9454 - msg = "factorize with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - codes, uniques = pd.factorize(data) + data = com.asarray_tuplesafe(data, dtype=object) + codes, uniques = pd.factorize(data) tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp)) @@ -488,12 +484,12 @@ def test_object_factorize_use_na_sentinel_false( "data, expected_codes, expected_uniques", [ ( - [1, None, 1, 2], + np.array([1, None, 1, 2], dtype=object), np.array([0, 1, 0, 2], dtype=np.dtype("intp")), np.array([1, np.nan, 2], dtype="O"), ), ( - [1, np.nan, 1, 2], + np.array([1, np.nan, 1, 2], dtype=np.float64), np.array([0, 1, 0, 2], dtype=np.dtype("intp")), np.array([1, np.nan, 2], dtype=np.float64), ), @@ -502,9 +498,7 @@ def test_object_factorize_use_na_sentinel_false( def test_int_factorize_use_na_sentinel_false( self, data, expected_codes, expected_uniques ): - msg = "factorize with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - codes, uniques = algos.factorize(data, use_na_sentinel=False) + codes, uniques = algos.factorize(data, use_na_sentinel=False) tm.assert_numpy_array_equal(uniques, expected_uniques, strict_nan=True) tm.assert_numpy_array_equal(codes, expected_codes, strict_nan=True) @@ -777,9 +771,8 @@ def test_order_of_appearance(self): result = pd.unique(Series([2] + [1] * 5)) tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64")) - msg = "unique with argument that is not not a Series, Index," - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pd.unique(list("aabc")) + data = np.array(["a", "a", "b", "c"], dtype=object) + result = pd.unique(data) expected = np.array(["a", "b", "c"], dtype=object) tm.assert_numpy_array_equal(result, expected) @@ -815,9 +808,8 @@ def test_order_of_appearance_dt64tz(self, unit): ) def test_tuple_with_strings(self, arg, expected): # see GH 17108 - msg = "unique with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pd.unique(arg) + arg = com.asarray_tuplesafe(arg, dtype=object) + result = pd.unique(arg) tm.assert_numpy_array_equal(result, expected) def test_obj_none_preservation(self): @@ -904,12 +896,6 @@ def test_invalid(self): algos.isin([1], 1) def test_basic(self): - msg = "isin with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = algos.isin([1, 2], [1]) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - result = algos.isin(np.array([1, 2]), [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) @@ -926,21 +912,20 @@ def test_basic(self): expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = algos.isin(["a", "b"], ["a"]) + arg = np.array(["a", "b"], dtype=object) + result = algos.isin(arg, ["a"]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(Series(["a", "b"]), Series(["a"])) + result = algos.isin(Series(arg), Series(["a"])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(Series(["a", "b"]), {"a"}) + result = algos.isin(Series(arg), {"a"}) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = algos.isin(["a", "b"], [1]) + result = algos.isin(arg, [1]) expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) @@ -1058,12 +1043,10 @@ def test_same_nan_is_in(self): # at least, isin() should follow python's "np.nan in [nan] == True" # casting to -> np.float64 -> another float-object somewhere on # the way could lead jeopardize this behavior - comps = [np.nan] # could be casted to float64 + comps = np.array([np.nan], dtype=object) # could be casted to float64 values = [np.nan] expected = np.array([True]) - msg = "isin with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = algos.isin(comps, values) + result = algos.isin(comps, values) tm.assert_numpy_array_equal(expected, result) def test_same_nan_is_in_large(self): @@ -1098,12 +1081,12 @@ def __hash__(self): a, b = LikeNan(), LikeNan() - msg = "isin with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - # same object -> True - tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True])) - # different objects -> False - tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False])) + arg = np.array([a], dtype=object) + + # same object -> True + tm.assert_numpy_array_equal(algos.isin(arg, [a]), np.array([True])) + # different objects -> False + tm.assert_numpy_array_equal(algos.isin(arg, [b]), np.array([False])) def test_different_nans(self): # GH 22160 @@ -1132,12 +1115,11 @@ def test_different_nans(self): def test_no_cast(self): # GH 22160 # ensure 42 is not casted to a string - comps = ["ss", 42] + comps = np.array(["ss", 42], dtype=object) values = ["42"] expected = np.array([False, False]) - msg = "isin with argument that is not not a Series, Index" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = algos.isin(comps, values) + + result = algos.isin(comps, values) tm.assert_numpy_array_equal(expected, result) @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) @@ -1658,27 +1640,32 @@ def test_unique_tuples(self, arr, uniques): expected = np.empty(len(uniques), dtype=object) expected[:] = uniques - msg = "unique with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pd.unique(arr) - tm.assert_numpy_array_equal(result, expected) + msg = "unique requires a Series, Index, ExtensionArray, or np.ndarray, got list" + with pytest.raises(TypeError, match=msg): + # GH#52986 + pd.unique(arr) + + res = pd.unique(com.asarray_tuplesafe(arr, dtype=object)) + tm.assert_numpy_array_equal(res, expected) @pytest.mark.parametrize( "array,expected", [ ( [1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j], - # Should return a complex dtype in the future - np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object), + np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=complex), ) ], ) def test_unique_complex_numbers(self, array, expected): # GH 17927 - msg = "unique with argument that is not not a Series" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pd.unique(array) - tm.assert_numpy_array_equal(result, expected) + msg = "unique requires a Series, Index, ExtensionArray, or np.ndarray, got list" + with pytest.raises(TypeError, match=msg): + # GH#52986 + pd.unique(array) + + res = pd.unique(np.array(array)) + tm.assert_numpy_array_equal(res, expected) class TestHashTable: