diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9fd9faf057a8a..ad4dc9edffefd 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -247,6 +247,9 @@ Copy-on-Write improvements can never update the original Series or DataFrame. Therefore, an informative error is raised to the user instead of silently doing nothing (:issue:`49467`) +- :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism + when ``inplace=True``. + Copy-on-Write can be enabled through one of .. code-block:: python diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e66011acb978b..ce48e6b85c430 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -665,6 +665,7 @@ def replace_list( dest_list: Sequence[Any], inplace: bool = False, regex: bool = False, + using_cow: bool = False, ) -> list[Block]: """ See BlockManager.replace_list docstring. @@ -674,7 +675,11 @@ def replace_list( if isinstance(values, Categorical): # TODO: avoid special-casing # GH49404 - blk = self if inplace else self.copy() + if using_cow and inplace: + # TODO(CoW): Optimize + blk = self.copy() + else: + blk = self if inplace else self.copy() values = cast(Categorical, blk.values) values._replace(to_replace=src_list, value=dest_list, inplace=True) return [blk] @@ -703,7 +708,11 @@ def replace_list( masks = [extract_bool_array(x) for x in masks] - rb = [self if inplace else self.copy()] + if using_cow and inplace: + # TODO(CoW): Optimize + rb = [self.copy()] + else: + rb = [self if inplace else self.copy()] for i, (src, dest) in enumerate(pairs): convert = i == src_len # only convert once at the end new_rb: list[Block] = [] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 517e6d7e48275..4973c0827245f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -473,6 +473,7 @@ def replace_list( dest_list=dest_list, inplace=inplace, regex=regex, + using_cow=using_copy_on_write(), ) bm._consolidate_inplace() return bm diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py new file mode 100644 index 0000000000000..a1347d8e12950 --- /dev/null +++ b/pandas/tests/copy_view/test_replace.py @@ -0,0 +1,38 @@ +import numpy as np + +from pandas import ( + Categorical, + DataFrame, +) +import pandas._testing as tm +from pandas.tests.copy_view.util import get_array + + +def test_replace_categorical_inplace_reference(using_copy_on_write): + df = DataFrame({"a": Categorical([1, 2, 3])}) + df_orig = df.copy() + arr_a = get_array(df, "a") + view = df[:] # noqa + df.replace(to_replace=[1], value=2, inplace=True) + + if using_copy_on_write: + assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) + else: + assert np.shares_memory(get_array(df, "a").codes, arr_a.codes) + + +def test_replace_inplace_reference(using_copy_on_write): + df = DataFrame({"a": [1.5, 2, 3]}) + arr_a = get_array(df, "a") + view = df[:] # noqa + df.replace(to_replace=[1.5], value=15.5, inplace=True) + + if using_copy_on_write: + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + else: + assert np.shares_memory(get_array(df, "a"), arr_a)