From 899437217d1205b850891a5eaf2bb58e085946b2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 8 Jan 2023 16:07:24 +0100 Subject: [PATCH 1/2] BUG: Fix bug in putmask for CoW --- pandas/core/internals/managers.py | 6 ++---- pandas/tests/copy_view/test_methods.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c4e869a3f6a45..b67419e79224c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -384,10 +384,8 @@ def setitem(self: T, indexer, value) -> T: return self.apply("setitem", indexer=indexer, value=value) def putmask(self, mask, new, align: bool = True): - if ( - using_copy_on_write() - and self.refs is not None - and not all(ref is None for ref in self.refs) + if using_copy_on_write() and any( + not self._has_no_reference_block(i) for i in range(len(self.blocks)) ): # some reference -> copy full dataframe # TODO(CoW) this could be optimized to only copy the blocks that would diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 7deb5e50464d5..dfc004a3cc353 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -700,3 +700,18 @@ def test_squeeze(using_copy_on_write): # Without CoW the original will be modified assert np.shares_memory(series.values, get_array(df, "a")) assert df.loc[0, "a"] == 0 + + +def test_putmask(using_copy_on_write): + df = DataFrame({"a": [1, 2], "b": 1, "c": 2}) + view = df[:] + df_orig = df.copy() + df[df == df] = 5 + + if using_copy_on_write: + assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) + tm.assert_frame_equal(view, df_orig) + else: + # Without CoW the original will be modified + assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) + assert view.iloc[0, 0] == 5 From a40cec37c457812e4a5e3a5e44667faff1407d17 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 8 Jan 2023 16:14:32 +0100 Subject: [PATCH 2/2] Add whatsnew and gh ref --- doc/source/whatsnew/v1.5.3.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index 0cb8796e3fb5d..677fc904e6bf0 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -27,6 +27,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in the Copy-on-Write implementation losing track of views when indexing a :class:`DataFrame` with another :class:`DataFrame` (:issue:`50630`) - Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`) - Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) - Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`)