From 5c9e917db99af854f24473f752f49abea853d4d7 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Sun, 6 Nov 2022 14:51:57 +0100 Subject: [PATCH 1/3] use lazy copy on set_index methd --- pandas/core/frame.py | 3 ++- pandas/tests/copy_view/test_methods.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a8631f42fb2d6..00a03f6e4619f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5855,7 +5855,8 @@ def set_index( if inplace: frame = self else: - frame = self.copy() + # GH 49473 Use "lazy copy" with Copy-on-Write + frame = self.copy(deep=None) arrays = [] names: list[Hashable] = [] diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 956e2cf98c9b6..af72c6e0bc5fa 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -214,3 +214,20 @@ def test_chained_methods(request, method, idx, using_copy_on_write): df.iloc[0, 0] = 0 if not df2_is_view: tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) + + +def test_set_index(using_copy_on_write): + # GH 49473 + df = DataFrame( + { + "month": [1, 4, 7, 10], + "year": [2012, 2014, 2013, 2014], + "sale": [55, 40, 84, 31], + } + ) + df2 = df.set_index("month") + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "year"), get_array(df, "year")) + else: + assert not np.shares_memory(get_array(df2, "year"), get_array(df, "year")) From 345db896f8043b39ca1ea21c2bc29df6ee20a7f5 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Wed, 9 Nov 2022 08:58:48 +0100 Subject: [PATCH 2/3] improve test --- pandas/tests/copy_view/test_methods.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index af72c6e0bc5fa..78fc0439c73cf 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -218,16 +218,16 @@ def test_chained_methods(request, method, idx, using_copy_on_write): def test_set_index(using_copy_on_write): # GH 49473 - df = DataFrame( - { - "month": [1, 4, 7, 10], - "year": [2012, 2014, 2013, 2014], - "sale": [55, 40, 84, 31], - } - ) - df2 = df.set_index("month") + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.set_index("a") if using_copy_on_write: - assert np.shares_memory(get_array(df2, "year"), get_array(df, "year")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) else: - assert not np.shares_memory(get_array(df2, "year"), get_array(df, "year")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + + # mutating df2 triggers a copy-on-write for that column / block + df2.iloc[0, 1] = 0 + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + tm.assert_frame_equal(df, df_orig) \ No newline at end of file From 6c906d8b4494c8ecbd2fb0652b69f705b9ea7915 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Wed, 9 Nov 2022 10:05:11 +0100 Subject: [PATCH 3/3] pre commit test file (my bad) --- pandas/tests/copy_view/test_methods.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 78fc0439c73cf..9488211e2a0e6 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -227,7 +227,7 @@ def test_set_index(using_copy_on_write): else: assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - # mutating df2 triggers a copy-on-write for that column / block - df2.iloc[0, 1] = 0 + # mutating df2 triggers a copy-on-write for that column / block + df2.iloc[0, 1] = 0 assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - tm.assert_frame_equal(df, df_orig) \ No newline at end of file + tm.assert_frame_equal(df, df_orig)