Skip to content

DEPR: allowing non-standard types in unique, factorize, isin #58058

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ Removal of prior version deprecations/changes
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
- Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`)
- Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`)
- Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)
Expand Down
9 changes: 3 additions & 6 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,12 +226,9 @@ def _ensure_arraylike(values, func_name: str) -> ArrayLike:
# GH#52986
if func_name != "isin-targets":
# Make an exception for the comps argument in isin.
warnings.warn(
f"{func_name} with argument that is not not a Series, Index, "
"ExtensionArray, or np.ndarray is deprecated and will raise in a "
"future version.",
FutureWarning,
stacklevel=find_stack_level(),
raise TypeError(
f"{func_name} requires a Series, Index, "
f"ExtensionArray, or np.ndarray, got {type(values).__name__}."
)

inferred = lib.infer_dtype(values, skipna=False)
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/libs/test_hashtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,12 +730,11 @@ def test_mode(self, dtype):

def test_ismember_tuple_with_nans():
# GH-41836
values = [("a", float("nan")), ("b", 1)]
values = np.empty(2, dtype=object)
values[:] = [("a", float("nan")), ("b", 1)]
comps = [("a", float("nan"))]

msg = "isin with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = isin(values, comps)
result = isin(values, comps)
expected = np.array([True, False], dtype=np.bool_)
tm.assert_numpy_array_equal(result, expected)

Expand Down
99 changes: 43 additions & 56 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,13 @@
class TestFactorize:
def test_factorize_complex(self):
# GH#17927
array = [1, 2, 2 + 1j]
msg = "factorize with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
labels, uniques = algos.factorize(array)
array = np.array([1, 2, 2 + 1j], dtype=complex)
labels, uniques = algos.factorize(array)

expected_labels = np.array([0, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(labels, expected_labels)

# Should return a complex dtype in the future
expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object)
expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=complex)
tm.assert_numpy_array_equal(uniques, expected_uniques)

def test_factorize(self, index_or_series_obj, sort):
Expand Down Expand Up @@ -265,9 +262,8 @@ def test_factorizer_object_with_nan(self):
)
def test_factorize_tuple_list(self, data, expected_codes, expected_uniques):
# GH9454
msg = "factorize with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
codes, uniques = pd.factorize(data)
data = com.asarray_tuplesafe(data, dtype=object)
codes, uniques = pd.factorize(data)

tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp))

Expand Down Expand Up @@ -488,12 +484,12 @@ def test_object_factorize_use_na_sentinel_false(
"data, expected_codes, expected_uniques",
[
(
[1, None, 1, 2],
np.array([1, None, 1, 2], dtype=object),
np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
np.array([1, np.nan, 2], dtype="O"),
),
(
[1, np.nan, 1, 2],
np.array([1, np.nan, 1, 2], dtype=np.float64),
np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
np.array([1, np.nan, 2], dtype=np.float64),
),
Expand All @@ -502,9 +498,7 @@ def test_object_factorize_use_na_sentinel_false(
def test_int_factorize_use_na_sentinel_false(
self, data, expected_codes, expected_uniques
):
msg = "factorize with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
codes, uniques = algos.factorize(data, use_na_sentinel=False)
codes, uniques = algos.factorize(data, use_na_sentinel=False)

tm.assert_numpy_array_equal(uniques, expected_uniques, strict_nan=True)
tm.assert_numpy_array_equal(codes, expected_codes, strict_nan=True)
Expand Down Expand Up @@ -777,9 +771,8 @@ def test_order_of_appearance(self):
result = pd.unique(Series([2] + [1] * 5))
tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64"))

msg = "unique with argument that is not not a Series, Index,"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pd.unique(list("aabc"))
data = np.array(["a", "a", "b", "c"], dtype=object)
result = pd.unique(data)
expected = np.array(["a", "b", "c"], dtype=object)
tm.assert_numpy_array_equal(result, expected)

Expand Down Expand Up @@ -815,9 +808,8 @@ def test_order_of_appearance_dt64tz(self, unit):
)
def test_tuple_with_strings(self, arg, expected):
# see GH 17108
msg = "unique with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pd.unique(arg)
arg = com.asarray_tuplesafe(arg, dtype=object)
result = pd.unique(arg)
tm.assert_numpy_array_equal(result, expected)

def test_obj_none_preservation(self):
Expand Down Expand Up @@ -904,12 +896,6 @@ def test_invalid(self):
algos.isin([1], 1)

def test_basic(self):
msg = "isin with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.isin([1, 2], [1])
expected = np.array([True, False])
tm.assert_numpy_array_equal(result, expected)

result = algos.isin(np.array([1, 2]), [1])
expected = np.array([True, False])
tm.assert_numpy_array_equal(result, expected)
Expand All @@ -926,21 +912,20 @@ def test_basic(self):
expected = np.array([True, False])
tm.assert_numpy_array_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.isin(["a", "b"], ["a"])
arg = np.array(["a", "b"], dtype=object)
result = algos.isin(arg, ["a"])
expected = np.array([True, False])
tm.assert_numpy_array_equal(result, expected)

result = algos.isin(Series(["a", "b"]), Series(["a"]))
result = algos.isin(Series(arg), Series(["a"]))
expected = np.array([True, False])
tm.assert_numpy_array_equal(result, expected)

result = algos.isin(Series(["a", "b"]), {"a"})
result = algos.isin(Series(arg), {"a"})
expected = np.array([True, False])
tm.assert_numpy_array_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.isin(["a", "b"], [1])
result = algos.isin(arg, [1])
expected = np.array([False, False])
tm.assert_numpy_array_equal(result, expected)

Expand Down Expand Up @@ -1058,12 +1043,10 @@ def test_same_nan_is_in(self):
# at least, isin() should follow python's "np.nan in [nan] == True"
# casting to -> np.float64 -> another float-object somewhere on
# the way could lead jeopardize this behavior
comps = [np.nan] # could be casted to float64
comps = np.array([np.nan], dtype=object) # could be casted to float64
values = [np.nan]
expected = np.array([True])
msg = "isin with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.isin(comps, values)
result = algos.isin(comps, values)
tm.assert_numpy_array_equal(expected, result)

def test_same_nan_is_in_large(self):
Expand Down Expand Up @@ -1098,12 +1081,12 @@ def __hash__(self):

a, b = LikeNan(), LikeNan()

msg = "isin with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
# same object -> True
tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True]))
# different objects -> False
tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False]))
arg = np.array([a], dtype=object)

# same object -> True
tm.assert_numpy_array_equal(algos.isin(arg, [a]), np.array([True]))
# different objects -> False
tm.assert_numpy_array_equal(algos.isin(arg, [b]), np.array([False]))

def test_different_nans(self):
# GH 22160
Expand Down Expand Up @@ -1132,12 +1115,11 @@ def test_different_nans(self):
def test_no_cast(self):
# GH 22160
# ensure 42 is not casted to a string
comps = ["ss", 42]
comps = np.array(["ss", 42], dtype=object)
values = ["42"]
expected = np.array([False, False])
msg = "isin with argument that is not not a Series, Index"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.isin(comps, values)

result = algos.isin(comps, values)
tm.assert_numpy_array_equal(expected, result)

@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
Expand Down Expand Up @@ -1658,27 +1640,32 @@ def test_unique_tuples(self, arr, uniques):
expected = np.empty(len(uniques), dtype=object)
expected[:] = uniques

msg = "unique with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pd.unique(arr)
tm.assert_numpy_array_equal(result, expected)
msg = "unique requires a Series, Index, ExtensionArray, or np.ndarray, got list"
with pytest.raises(TypeError, match=msg):
# GH#52986
pd.unique(arr)

res = pd.unique(com.asarray_tuplesafe(arr, dtype=object))
tm.assert_numpy_array_equal(res, expected)

@pytest.mark.parametrize(
"array,expected",
[
(
[1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j],
# Should return a complex dtype in the future
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object),
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=complex),
)
],
)
def test_unique_complex_numbers(self, array, expected):
# GH 17927
msg = "unique with argument that is not not a Series"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pd.unique(array)
tm.assert_numpy_array_equal(result, expected)
msg = "unique requires a Series, Index, ExtensionArray, or np.ndarray, got list"
with pytest.raises(TypeError, match=msg):
# GH#52986
pd.unique(array)

res = pd.unique(np.array(array))
tm.assert_numpy_array_equal(res, expected)


class TestHashTable:
Expand Down
Loading