From 313f4a2d00771fd8e02b231af73873aa2c0abec3 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 28 Mar 2024 18:48:14 -0700
Subject: [PATCH] DEPR: allowing non-standard types in unique, factorize, isin

---
 doc/source/whatsnew/v3.0.0.rst      |  1 +
 pandas/core/algorithms.py           |  9 +--
 pandas/tests/libs/test_hashtable.py |  7 +-
 pandas/tests/test_algos.py          | 99 +++++++++++++----------------
 4 files changed, 50 insertions(+), 66 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 98b497bd6988b..bd1610cab1ecd 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -206,6 +206,7 @@ Removal of prior version deprecations/changes
 - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
 - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
 - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
+- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
 - Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`)
 - Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`)
 - Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 6a6096567c65d..33beef23197bd 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -226,12 +226,9 @@ def _ensure_arraylike(values, func_name: str) -> ArrayLike:
         # GH#52986
         if func_name != "isin-targets":
             # Make an exception for the comps argument in isin.
-            warnings.warn(
-                f"{func_name} with argument that is not not a Series, Index, "
-                "ExtensionArray, or np.ndarray is deprecated and will raise in a "
-                "future version.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
+            raise TypeError(
+                f"{func_name} requires a Series, Index, "
+                f"ExtensionArray, or np.ndarray, got {type(values).__name__}."
             )
 
         inferred = lib.infer_dtype(values, skipna=False)
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index e54764f9ac4a6..b70386191d9d9 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -730,12 +730,11 @@ def test_mode(self, dtype):
 
 def test_ismember_tuple_with_nans():
     # GH-41836
-    values = [("a", float("nan")), ("b", 1)]
+    values = np.empty(2, dtype=object)
+    values[:] = [("a", float("nan")), ("b", 1)]
     comps = [("a", float("nan"))]
 
-    msg = "isin with argument that is not not a Series"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = isin(values, comps)
+    result = isin(values, comps)
     expected = np.array([True, False], dtype=np.bool_)
     tm.assert_numpy_array_equal(result, expected)
 
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 365ec452a7f25..1b5d33fc10595 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -54,16 +54,13 @@
 class TestFactorize:
     def test_factorize_complex(self):
         # GH#17927
-        array = [1, 2, 2 + 1j]
-        msg = "factorize with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            labels, uniques = algos.factorize(array)
+        array = np.array([1, 2, 2 + 1j], dtype=complex)
+        labels, uniques = algos.factorize(array)
 
         expected_labels = np.array([0, 1, 2], dtype=np.intp)
         tm.assert_numpy_array_equal(labels, expected_labels)
 
-        # Should return a complex dtype in the future
-        expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object)
+        expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=complex)
         tm.assert_numpy_array_equal(uniques, expected_uniques)
 
     def test_factorize(self, index_or_series_obj, sort):
@@ -265,9 +262,8 @@ def test_factorizer_object_with_nan(self):
     )
     def test_factorize_tuple_list(self, data, expected_codes, expected_uniques):
         # GH9454
-        msg = "factorize with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            codes, uniques = pd.factorize(data)
+        data = com.asarray_tuplesafe(data, dtype=object)
+        codes, uniques = pd.factorize(data)
 
         tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp))
 
@@ -488,12 +484,12 @@ def test_object_factorize_use_na_sentinel_false(
         "data, expected_codes, expected_uniques",
         [
             (
-                [1, None, 1, 2],
+                np.array([1, None, 1, 2], dtype=object),
                 np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
                 np.array([1, np.nan, 2], dtype="O"),
             ),
             (
-                [1, np.nan, 1, 2],
+                np.array([1, np.nan, 1, 2], dtype=np.float64),
                 np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
                 np.array([1, np.nan, 2], dtype=np.float64),
             ),
@@ -502,9 +498,7 @@ def test_object_factorize_use_na_sentinel_false(
     def test_int_factorize_use_na_sentinel_false(
         self, data, expected_codes, expected_uniques
     ):
-        msg = "factorize with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            codes, uniques = algos.factorize(data, use_na_sentinel=False)
+        codes, uniques = algos.factorize(data, use_na_sentinel=False)
 
         tm.assert_numpy_array_equal(uniques, expected_uniques, strict_nan=True)
         tm.assert_numpy_array_equal(codes, expected_codes, strict_nan=True)
@@ -777,9 +771,8 @@ def test_order_of_appearance(self):
         result = pd.unique(Series([2] + [1] * 5))
         tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64"))
 
-        msg = "unique with argument that is not not a Series, Index,"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = pd.unique(list("aabc"))
+        data = np.array(["a", "a", "b", "c"], dtype=object)
+        result = pd.unique(data)
         expected = np.array(["a", "b", "c"], dtype=object)
         tm.assert_numpy_array_equal(result, expected)
 
@@ -815,9 +808,8 @@ def test_order_of_appearance_dt64tz(self, unit):
     )
     def test_tuple_with_strings(self, arg, expected):
         # see GH 17108
-        msg = "unique with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = pd.unique(arg)
+        arg = com.asarray_tuplesafe(arg, dtype=object)
+        result = pd.unique(arg)
         tm.assert_numpy_array_equal(result, expected)
 
     def test_obj_none_preservation(self):
@@ -904,12 +896,6 @@ def test_invalid(self):
             algos.isin([1], 1)
 
     def test_basic(self):
-        msg = "isin with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = algos.isin([1, 2], [1])
-        expected = np.array([True, False])
-        tm.assert_numpy_array_equal(result, expected)
-
         result = algos.isin(np.array([1, 2]), [1])
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
@@ -926,21 +912,20 @@ def test_basic(self):
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = algos.isin(["a", "b"], ["a"])
+        arg = np.array(["a", "b"], dtype=object)
+        result = algos.isin(arg, ["a"])
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.isin(Series(["a", "b"]), Series(["a"]))
+        result = algos.isin(Series(arg), Series(["a"]))
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.isin(Series(["a", "b"]), {"a"})
+        result = algos.isin(Series(arg), {"a"})
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = algos.isin(["a", "b"], [1])
+        result = algos.isin(arg, [1])
         expected = np.array([False, False])
         tm.assert_numpy_array_equal(result, expected)
 
@@ -1058,12 +1043,10 @@ def test_same_nan_is_in(self):
         # at least, isin() should follow python's "np.nan in [nan] == True"
         # casting to -> np.float64 -> another float-object somewhere on
         # the way could lead jeopardize this behavior
-        comps = [np.nan]  # could be casted to float64
+        comps = np.array([np.nan], dtype=object)  # could be casted to float64
         values = [np.nan]
         expected = np.array([True])
-        msg = "isin with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = algos.isin(comps, values)
+        result = algos.isin(comps, values)
         tm.assert_numpy_array_equal(expected, result)
 
     def test_same_nan_is_in_large(self):
@@ -1098,12 +1081,12 @@ def __hash__(self):
 
         a, b = LikeNan(), LikeNan()
 
-        msg = "isin with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            # same object -> True
-            tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True]))
-            # different objects -> False
-            tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False]))
+        arg = np.array([a], dtype=object)
+
+        # same object -> True
+        tm.assert_numpy_array_equal(algos.isin(arg, [a]), np.array([True]))
+        # different objects -> False
+        tm.assert_numpy_array_equal(algos.isin(arg, [b]), np.array([False]))
 
     def test_different_nans(self):
         # GH 22160
@@ -1132,12 +1115,11 @@ def test_different_nans(self):
     def test_no_cast(self):
         # GH 22160
         # ensure 42 is not casted to a string
-        comps = ["ss", 42]
+        comps = np.array(["ss", 42], dtype=object)
         values = ["42"]
         expected = np.array([False, False])
-        msg = "isin with argument that is not not a Series, Index"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = algos.isin(comps, values)
+
+        result = algos.isin(comps, values)
         tm.assert_numpy_array_equal(expected, result)
 
     @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
@@ -1658,27 +1640,32 @@ def test_unique_tuples(self, arr, uniques):
         expected = np.empty(len(uniques), dtype=object)
         expected[:] = uniques
 
-        msg = "unique with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = pd.unique(arr)
-        tm.assert_numpy_array_equal(result, expected)
+        msg = "unique requires a Series, Index, ExtensionArray, or np.ndarray, got list"
+        with pytest.raises(TypeError, match=msg):
+            # GH#52986
+            pd.unique(arr)
+
+        res = pd.unique(com.asarray_tuplesafe(arr, dtype=object))
+        tm.assert_numpy_array_equal(res, expected)
 
     @pytest.mark.parametrize(
         "array,expected",
         [
             (
                 [1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j],
-                # Should return a complex dtype in the future
-                np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object),
+                np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=complex),
             )
         ],
     )
     def test_unique_complex_numbers(self, array, expected):
         # GH 17927
-        msg = "unique with argument that is not not a Series"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = pd.unique(array)
-        tm.assert_numpy_array_equal(result, expected)
+        msg = "unique requires a Series, Index, ExtensionArray, or np.ndarray, got list"
+        with pytest.raises(TypeError, match=msg):
+            # GH#52986
+            pd.unique(array)
+
+        res = pd.unique(np.array(array))
+        tm.assert_numpy_array_equal(res, expected)
 
 
 class TestHashTable: