-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG: Don't raise for NDFrame.mask with nullable boolean #36201
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
4197885
0ff4f41
d79fcc7
6be29fb
17bdca0
aa3c357
566d0d6
d6998dd
8d35401
3169909
db45e2e
a1fcf51
bd584da
f2e8b7d
ea457ea
346606c
92ae6bf
d4704a0
f7a1f64
0ac0930
6dd6c1e
b40243e
363560c
2020fd6
1cf5a0c
d6fb23a
e292606
2f024fe
0b90786
054a8cd
cd601f6
424b6bd
8e8cb9d
2151cc0
59cf6a1
dfb7b59
a313268
ee2849c
5a4bacf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8816,8 +8816,7 @@ def _where( | |
cond = self._constructor(cond, **self._construct_axes_dict()) | ||
|
||
# make sure we are boolean | ||
fill_value = bool(inplace) | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
cond = cond.fillna(fill_value) | ||
cond = cond.fillna(False) | ||
|
||
msg = "Boolean array expected for the condition, not {dtype}" | ||
|
||
|
@@ -8834,7 +8833,7 @@ def _where( | |
# GH#21947 we have an empty DataFrame/Series, could be object-dtype | ||
cond = cond.astype(bool) | ||
|
||
cond = -cond if inplace else cond | ||
cond = ~cond if inplace else cond | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you are goin to change this then let's clean starting on L834. |
||
|
||
# try to align with other | ||
try_quick = True | ||
|
@@ -9094,9 +9093,12 @@ def mask( | |
cond = com.apply_if_callable(cond, self) | ||
|
||
# see gh-21891 | ||
if not hasattr(cond, "__invert__"): | ||
if not hasattr(cond, "__array__"): | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
cond = np.array(cond) | ||
|
||
cond[isna(cond)] = False | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
cond = cond.astype(bool, copy=False) | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return self.where( | ||
~cond, | ||
other=other, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,7 +159,7 @@ def test_where_set(self, where_frame, float_string_frame): | |
|
||
def _check_set(df, cond, check_dtypes=True): | ||
dfi = df.copy() | ||
econd = cond.reindex_like(df).fillna(True) | ||
econd = cond.reindex_like(df).fillna(False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test was broken by no longer setting fill_value = bool(inplace) so had to update |
||
expected = dfi.mask(~econd) | ||
|
||
return_value = dfi.where(cond, np.nan, inplace=True) | ||
|
@@ -169,7 +169,7 @@ def _check_set(df, cond, check_dtypes=True): | |
# dtypes (and confirm upcasts)x | ||
if check_dtypes: | ||
for k, v in df.dtypes.items(): | ||
if issubclass(v.type, np.integer) and not cond[k].all(): | ||
if issubclass(v.type, np.integer) and not econd[k].all(): | ||
v = np.dtype("float64") | ||
assert dfi[k].dtype == v | ||
|
||
|
@@ -642,3 +642,18 @@ def test_df_where_with_category(self, kwargs): | |
expected = Series(A, name="A") | ||
|
||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("inplace", [True, False]) | ||
def test_where_nullable_boolean_mask(self, inplace): | ||
# https://github.com/pandas-dev/pandas/issues/35429 | ||
df = DataFrame([1, 2, 3]) | ||
mask = Series([True, False, None], dtype="boolean") | ||
expected = DataFrame([1, 999, 999]) | ||
|
||
if inplace: | ||
result = df.copy() | ||
result.where(mask, 999, inplace=True) | ||
else: | ||
result = df.where(mask, 999, inplace=False) | ||
|
||
tm.assert_frame_equal(result, expected) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -452,3 +452,19 @@ def test_where_empty_series_and_empty_cond_having_non_bool_dtypes(): | |
ser = Series([], dtype=float) | ||
result = ser.where([]) | ||
tm.assert_series_equal(result, ser) | ||
|
||
|
||
@pytest.mark.parametrize("inplace", [True, False]) | ||
def test_where_nullable_boolean_mask(inplace): | ||
# https://github.com/pandas-dev/pandas/issues/35429 | ||
ser = Series([1, 2, 3]) | ||
mask = Series([True, False, None], dtype="boolean") | ||
expected = Series([1, 999, 999]) | ||
|
||
if inplace: | ||
result = ser.copy() | ||
result.where(mask, 999, inplace=True) | ||
else: | ||
result = ser.where(mask, 999, inplace=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. following on from previous comment.
using a nullable mask with a nullable type gives the wrong output? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This output is correct IMO, but the output changes if the operation is inplace: [ins] In [4]: s.where(mask, 999)
Out[4]:
0 1
1 999
2 999
dtype: Int64
[ins] In [5]: s.where(mask, 999, inplace=True)
[ins] In [6]: s
Out[6]:
0 1
1 999
2 3
dtype: Int64 |
||
|
||
tm.assert_series_equal(result, expected) |
Uh oh!
There was an error while loading. Please reload this page.