From fc69f3b3f44e4a937b8333309191c047f12d6f8b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 15 Jan 2023 01:52:14 +0100 Subject: [PATCH] ENH: Add lazy copy to shift --- pandas/core/generic.py | 2 +- pandas/tests/copy_view/test_methods.py | 71 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4ef91f6de5e27..9bcf7793201cd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9908,7 +9908,7 @@ def shift( 2020-01-08 45 48 52 """ if periods == 0: - return self.copy() + return self.copy(deep=None) if freq is None: # when freq is None, data is shifted, index is not diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 5c24a180c4d6d..87903f20f5232 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -244,6 +244,77 @@ def test_filter(using_copy_on_write, filter_kwargs): tm.assert_frame_equal(df, df_orig) +def test_shift_no_op(using_copy_on_write): + df = DataFrame( + [[1, 2], [3, 4], [5, 6]], + index=date_range("2020-01-01", "2020-01-03"), + columns=["a", "b"], + ) + df_orig = df.copy() + df2 = df.shift(periods=0) + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + df.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + tm.assert_frame_equal(df2, df_orig) + + +def test_shift_index(using_copy_on_write): + df = DataFrame( + [[1, 2], [3, 4], [5, 6]], + index=date_range("2020-01-01", "2020-01-03"), + columns=["a", "b"], + ) + df2 = df.shift(periods=1, axis=0) + + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + +def test_shift_rows_freq(using_copy_on_write): + df = DataFrame( + [[1, 2], [3, 4], [5, 6]], + index=date_range("2020-01-01", "2020-01-03"), + columns=["a", "b"], + ) + df_orig = df.copy() + df_orig.index = date_range("2020-01-02", "2020-01-04") + df2 = df.shift(periods=1, freq="1D") + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + df.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + tm.assert_frame_equal(df2, df_orig) + + +def test_shift_columns(using_copy_on_write): + df = DataFrame( + [[1, 2], [3, 4], [5, 6]], columns=date_range("2020-01-01", "2020-01-02") + ) + df2 = df.shift(periods=1, axis=1) + + assert np.shares_memory(get_array(df2, "2020-01-02"), get_array(df, "2020-01-01")) + df.iloc[0, 1] = 0 + if using_copy_on_write: + assert not np.shares_memory( + get_array(df2, "2020-01-02"), get_array(df, "2020-01-01") + ) + expected = DataFrame( + [[np.nan, 1], [np.nan, 3], [np.nan, 5]], + columns=date_range("2020-01-01", "2020-01-02"), + ) + tm.assert_frame_equal(df2, expected) + + def test_pop(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy()