Skip to content

API/DEPR: reintroduce errors kwd in where, mask, deprecate inconsistent behaviors #45193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,6 @@ Other Deprecations
- Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
- Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`)
- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`,:issue:`44940`)
- Deprecated the 'errors' keyword argument in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, and :meth:`DataFrame.mask`; in a future version the argument will be removed (:issue:`44294`)
- Deprecated the ``prefix`` keyword argument in :func:`read_csv` and :func:`read_table`, in a future version the argument will be removed (:issue:`43396`)
- Deprecated passing non boolean argument to sort in :func:`concat` (:issue:`41518`)
- Deprecated passing arguments as positional for :func:`read_fwf` other than ``filepath_or_buffer`` (:issue:`41485`):
Expand Down
9 changes: 1 addition & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9010,14 +9010,6 @@ def _where(
"""
inplace = validate_bool_kwarg(inplace, "inplace")

if errors is not lib.no_default:
warnings.warn(
f"The 'errors' keyword in {type(self).__name__}.where and mask is "
"deprecated and will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)

if axis is not None:
axis = self._get_axis_number(axis)

Expand Down Expand Up @@ -9135,6 +9127,7 @@ def _where(
other=other,
cond=cond,
align=align,
errors=errors,
)
result = self._constructor(new_data)
return result.__finalize__(self)
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T

return type(self)(result_arrays, self._axes)

def where(self: T, other, cond, align: bool) -> T:
def where(self: T, other, cond, align: bool, errors) -> T:
if align:
align_keys = ["other", "cond"]
else:
Expand All @@ -339,13 +339,14 @@ def where(self: T, other, cond, align: bool) -> T:
align_keys=align_keys,
other=other,
cond=cond,
errors=errors,
)

# TODO what is this used for?
# def setitem(self, indexer, value) -> ArrayManager:
# return self.apply_with_block("setitem", indexer=indexer, value=value)

def putmask(self, mask, new, align: bool = True):
def putmask(self, mask, new, align: bool = True, errors=lib.no_default):
if align:
align_keys = ["new", "mask"]
else:
Expand All @@ -357,6 +358,7 @@ def putmask(self, mask, new, align: bool = True):
align_keys=align_keys,
mask=mask,
new=new,
errors=errors,
)

def diff(self: T, n: int, axis: int) -> T:
Expand Down
128 changes: 109 additions & 19 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ def setitem(self, indexer, value):

return self

def putmask(self, mask, new) -> list[Block]:
def putmask(self, mask, new, errors) -> list[Block]:
"""
putmask the data to the block; it is possible that we may create a
new dtype of block
Expand Down Expand Up @@ -960,15 +960,15 @@ def putmask(self, mask, new) -> list[Block]:
if not self.is_object and is_valid_na_for_dtype(new, self.dtype):
new = self.fill_value

if self._can_hold_element(new):
if self._can_hold_element(new) or errors == "raise":
putmask_without_repeat(values.T, mask, new)
return [self]

elif np_version_under1p20 and infer_dtype_from(new)[0].kind in ["m", "M"]:
# using putmask with object dtype will incorrectly cast to object
# Having excluded self._can_hold_element, we know we cannot operate
# in-place, so we are safe using `where`
return self.where(new, ~mask)
return self.where(new, ~mask, errors=errors)

elif noop:
return [self]
Expand All @@ -978,7 +978,8 @@ def putmask(self, mask, new) -> list[Block]:

if not is_list_like(new):
# putmask_smart can't save us the need to cast
return self.coerce_to_target_dtype(new).putmask(mask, new)
blk = self.coerce_to_target_dtype(new)
return blk.putmask(mask, new, errors=errors)

# This differs from
# `self.coerce_to_target_dtype(new).putmask(mask, new)`
Expand All @@ -999,7 +1000,7 @@ def putmask(self, mask, new) -> list[Block]:
n = new[:, i : i + 1]

submask = orig_mask[:, i : i + 1]
rbs = nb.putmask(submask, n)
rbs = nb.putmask(submask, n, errors=errors)
res_blocks.extend(rbs)
return res_blocks

Expand Down Expand Up @@ -1141,7 +1142,7 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Blo

return [self.make_block(new_values)]

def where(self, other, cond) -> list[Block]:
def where(self, other, cond, errors) -> list[Block]:
"""
evaluate the block; return result block(s) from the result

Expand Down Expand Up @@ -1178,10 +1179,10 @@ def where(self, other, cond) -> list[Block]:
# GH-39595: Always return a copy
result = values.copy()

elif not self._can_hold_element(other):
elif not self._can_hold_element(other) and errors != "raise":
# we cannot coerce, return a compat dtype
block = self.coerce_to_target_dtype(other)
blocks = block.where(orig_other, cond)
blocks = block.where(orig_other, cond, errors=errors)
return self._maybe_downcast(blocks, "infer")

else:
Expand Down Expand Up @@ -1330,7 +1331,7 @@ class EABackedBlock(Block):

values: ExtensionArray

def where(self, other, cond) -> list[Block]:
def where(self, other, cond, errors) -> list[Block]:
arr = self.values.T

cond = extract_bool_array(cond)
Expand All @@ -1350,6 +1351,9 @@ def where(self, other, cond) -> list[Block]:
try:
res_values = arr._where(cond, other).T
except (ValueError, TypeError) as err:
if errors == "raise":
raise

if isinstance(err, ValueError):
# TODO(2.0): once DTA._validate_setitem_value deprecation
# is enforced, stop catching ValueError here altogether
Expand All @@ -1362,8 +1366,24 @@ def where(self, other, cond) -> list[Block]:
if blk.dtype == _dtype_obj:
# For now at least only support casting e.g.
# Interval[int64]->Interval[float64]
raise
return blk.where(other, cond)
if errors is lib.no_default:
# TODO: no tests get here 2022-01-04
warnings.warn(
"The default behavior of 'where' when setting an "
f"incompatible value into an array with dtype={self.dtype} "
"is deprecated. In a future version, the array and "
"value will be coerced to a common dtype (matching "
"the behavior of other dtypes). To retain the old "
"behavior, pass `errors='raise'`. To get the future "
"behavior, pass `errors='coerce'`.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise
else:
# "raise" case handled above
assert errors == "coerce"
return blk.where(other, cond, errors=errors)

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
Expand All @@ -1372,18 +1392,51 @@ def where(self, other, cond) -> list[Block]:
# TODO: don't special-case
# Note: this is the main place where the fallback logic
# is different from EABackedBlock.putmask.
raise
if errors is lib.no_default:
warnings.warn(
"The default behavior of 'where' when setting an "
f"incompatible value into an array with dtype={self.dtype} "
"is deprecated. In a future version, the array and "
"value will be coerced to a common dtype (matching "
"the behavior of other dtypes). To retain the old "
"behavior, pass `errors='raise'`. To get the future "
"behavior, pass `errors='coerce'`.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise
else:
# "raise" case handled above
assert errors == "coerce"
blk = self.coerce_to_target_dtype(other)
nbs = blk.where(other, cond)
nbs = blk.where(other, cond, errors=errors)
return self._maybe_downcast(nbs, "infer")

else:
raise
if errors is lib.no_default:
warnings.warn(
"The default behavior of 'where' when setting an "
f"incompatible value into an array with dtype={self.dtype} "
"is deprecated. In a future version, the array and value "
"will be coerced to a common dtype (matching the behavior "
"of other dtypes). To retain the old behavior, pass "
"`errors='raise'`. To get the future behavior, pass "
"`errors='coerce'`.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise
else:
# "raise" case handled above
assert errors == "coerce"
blk = self.coerce_to_target_dtype(other)
nbs = blk.where(other, cond, errors=errors)
return self._maybe_downcast(nbs, "infer")

nb = self.make_block_same_class(res_values)
return [nb]

def putmask(self, mask, new) -> list[Block]:
def putmask(self, mask, new, errors) -> list[Block]:
"""
See Block.putmask.__doc__
"""
Expand All @@ -1397,6 +1450,9 @@ def putmask(self, mask, new) -> list[Block]:
# Caller is responsible for ensuring matching lengths
values._putmask(mask, new)
except (TypeError, ValueError) as err:
if errors == "raise":
raise

if isinstance(err, ValueError) and "Timezones don't match" not in str(err):
# TODO(2.0): remove catching ValueError at all since
# DTA raising here is deprecated
Expand All @@ -1409,17 +1465,51 @@ def putmask(self, mask, new) -> list[Block]:
if blk.dtype == _dtype_obj:
# For now at least, only support casting e.g.
# Interval[int64]->Interval[float64],
raise
return blk.putmask(mask, new)
if errors is lib.no_default:
# TODO: no tests get here 2022-01-04
warnings.warn(
"The default behavior of 'putmask' when setting an "
f"incompatible value into an array with dtype={self.dtype} "
"is deprecated. In a future version, the array and "
"value will be coerced to a common dtype (matching "
"the behavior of other dtypes). To retain the old "
"behavior, pass `errors='raise'`. To get the future "
"behavior, pass `errors='coerce'`.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise
else:
# "raise" case handled above
assert errors == "coerce"
return blk.putmask(mask, new, errors=errors)

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(new)
return blk.putmask(mask, new)
return blk.putmask(mask, new, errors=errors)

else:
raise
if errors is lib.no_default:
warnings.warn(
"The default behavior of 'putmask' when setting an "
f"incompatible value into an array with dtype={self.dtype} "
"is deprecated. In a future version, the array and value "
"will be coerced to a common dtype (matching the behavior "
"of other dtypes). To retain the old behavior, pass "
"`errors='raise'`. To get the future behavior, pass "
"`errors='coerce'`.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise
else:
# "raise" case handled above
assert errors == "coerce"
# TODO: no tests get here 2022-01-04
blk = self.coerce_to_target_dtype(new)
return blk.putmask(mask, new, errors=errors)

return [self]

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def apply(
out = type(self).from_blocks(result_blocks, self.axes)
return out

def where(self: T, other, cond, align: bool) -> T:
def where(self: T, other, cond, align: bool, errors) -> T:
if align:
align_keys = ["other", "cond"]
else:
Expand All @@ -326,6 +326,7 @@ def where(self: T, other, cond, align: bool) -> T:
align_keys=align_keys,
other=other,
cond=cond,
errors=errors,
)

def setitem(self: T, indexer, value) -> T:
Expand All @@ -336,7 +337,7 @@ def setitem(self: T, indexer, value) -> T:
"""
return self.apply("setitem", indexer=indexer, value=value)

def putmask(self, mask, new, align: bool = True):
def putmask(self, mask, new, align: bool = True, errors=lib.no_default):

if align:
align_keys = ["new", "mask"]
Expand All @@ -349,6 +350,7 @@ def putmask(self, mask, new, align: bool = True):
align_keys=align_keys,
mask=mask,
new=new,
errors=errors,
)

def diff(self: T, n: int, axis: int) -> T:
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/arrays/categorical/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,12 @@ def test_where_other_categorical(self):
def test_where_new_category_raises(self):
ser = Series(Categorical(["a", "b", "c"]))
msg = "Cannot setitem on a Categorical with a new category"
warn_msg = "The default behavior of 'where' when"
with pytest.raises(TypeError, match=msg):
ser.where([True, False, True], "d")
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
ser.where([True, False, True], "d")
with pytest.raises(TypeError, match=msg):
ser.where([True, False, True], "d", errors="raise")

def test_where_ordered_differs_rasies(self):
ser = Series(
Expand All @@ -270,8 +274,12 @@ def test_where_ordered_differs_rasies(self):
other = Categorical(
["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True
)
warn_msg = "The default behavior of 'where' when"
with pytest.raises(TypeError, match="without identical categories"):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
ser.where([True, False, True], other)
with pytest.raises(TypeError, match="without identical categories"):
ser.where([True, False, True], other)
ser.where([True, False, True], other, errors="raise")


class TestContains:
Expand Down
Loading