Skip to content

PERF: replace_list #38097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,15 @@ def _replace_list(
"""
See BlockManager._replace_list docstring.
"""
src_len = len(src_list) - 1
# Exclude anything that we know we won't contain
pairs = [
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
]
if not len(pairs):
# shortcut, nothing to replace
return [self] if inplace else [self.copy()]

src_len = len(pairs) - 1

def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
"""
Expand All @@ -874,15 +882,19 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
s = maybe_box_datetimelike(s)
return compare_or_regex_search(self.values, s, regex, mask)

# Calculate the mask once, prior to the call of comp
# in order to avoid repeating the same computations
mask = ~isna(self.values)
if self.is_object:
# Calculate the mask once, prior to the call of comp
# in order to avoid repeating the same computations
mask = ~isna(self.values)
masks = [comp(s[0], mask, regex) for s in pairs]
else:
# GH#38086 faster if we know we dont need to check for regex
masks = [missing.mask_missing(self.values, s[0]) for s in pairs]

masks = [comp(s, mask, regex) for s in src_list]
masks = [_extract_bool_array(x) for x in masks]

rb = [self if inplace else self.copy()]
for i, (src, dest) in enumerate(zip(src_list, dest_list)):
for i, (src, dest) in enumerate(pairs):
new_rb: List["Block"] = []
for blk in rb:
m = masks[i]
Expand Down Expand Up @@ -1037,7 +1049,7 @@ def _putmask_simple(self, mask: np.ndarray, value: Any):
if lib.is_scalar(value) and isinstance(values, np.ndarray):
value = convert_scalar_for_putitemlike(value, values.dtype)

if self.is_extension or self.is_object:
if self.is_extension or (self.is_object and not lib.is_scalar(value)):
# GH#19266 using np.putmask gives unexpected results with listlike value
if is_list_like(value) and len(value) == len(values):
values[mask] = value[mask]
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,9 @@ def test_replace(self, datetime_series):
with pytest.raises(ValueError, match=msg):
ser.replace([1, 2, 3], [np.nan, 0])

# make sure that we aren't just masking a TypeError because bools don't
# implement indexing
with pytest.raises(TypeError, match="Cannot compare types .+"):
ser.replace([1, 2], [np.nan, 0])
# ser is dt64 so can't hold 1 or 2, so this replace is a no-op
result = ser.replace([1, 2], [np.nan, 0])
tm.assert_series_equal(result, ser)

ser = pd.Series([0, 1, 2, 3, 4])
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
Expand Down