Skip to content

CoW: Remove copy-on-write conditions from frame tests #57255

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions pandas/tests/frame/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,18 +391,13 @@ def test_getitem_empty_frame_with_boolean(self):
df2 = df[df > 0]
tm.assert_frame_equal(df, df2)

def test_getitem_returns_view_when_column_is_unique_in_df(
self, using_copy_on_write
):
def test_getitem_returns_view_when_column_is_unique_in_df(self):
# GH#45316
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
df_orig = df.copy()
view = df["b"]
view.loc[:] = 100
if using_copy_on_write:
expected = df_orig
else:
expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"])
expected = df_orig
tm.assert_frame_equal(df, expected)

def test_getitem_frozenset_unique_in_column(self):
Expand Down
47 changes: 11 additions & 36 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,9 +565,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self):
with pytest.raises(KeyError, match=r"^3$"):
df2.loc[3:11] = 0

def test_fancy_getitem_slice_mixed(
self, float_frame, float_string_frame, using_copy_on_write
):
def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame):
sliced = float_string_frame.iloc[:, -3:]
assert sliced["D"].dtype == np.float64

Expand All @@ -579,13 +577,7 @@ def test_fancy_getitem_slice_mixed(
assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values)

sliced.loc[:, "C"] = 4.0
if not using_copy_on_write:
assert (float_frame["C"] == 4).all()

# with the enforcement of GH#45333 in 2.0, this remains a view
np.shares_memory(sliced["C"]._values, float_frame["C"]._values)
else:
tm.assert_frame_equal(float_frame, original)
tm.assert_frame_equal(float_frame, original)

def test_getitem_setitem_non_ix_labels(self):
df = DataFrame(range(20), index=date_range("2020-01-01", periods=20))
Expand Down Expand Up @@ -1053,7 +1045,7 @@ def test_iloc_row(self):
expected = df.reindex(df.index[[1, 2, 4, 6]])
tm.assert_frame_equal(result, expected)

def test_iloc_row_slice_view(self, using_copy_on_write):
def test_iloc_row_slice_view(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)), index=range(0, 20, 2)
)
Expand All @@ -1067,11 +1059,6 @@ def test_iloc_row_slice_view(self, using_copy_on_write):

exp_col = original[2].copy()
subset.loc[:, 2] = 0.0
if not using_copy_on_write:
exp_col._values[4:8] = 0.0

# With the enforcement of GH#45333 in 2.0, this remains a view
assert np.shares_memory(df[2], subset[2])
tm.assert_series_equal(df[2], exp_col)

def test_iloc_col(self):
Expand All @@ -1097,32 +1084,20 @@ def test_iloc_col(self):
expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
tm.assert_frame_equal(result, expected)

def test_iloc_col_slice_view(self, using_copy_on_write):
def test_iloc_col_slice_view(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 10)), columns=range(0, 20, 2)
)
original = df.copy()
subset = df.iloc[:, slice(4, 8)]

if not using_copy_on_write:
# verify slice is view
assert np.shares_memory(df[8]._values, subset[8]._values)

subset.loc[:, 8] = 0.0

assert (df[8] == 0).all()

# with the enforcement of GH#45333 in 2.0, this remains a view
assert np.shares_memory(df[8]._values, subset[8]._values)
else:
if using_copy_on_write:
# verify slice is view
assert np.shares_memory(df[8]._values, subset[8]._values)
subset[8] = 0.0
# subset changed
assert (subset[8] == 0).all()
# but df itself did not change (setitem replaces full column)
tm.assert_frame_equal(df, original)
# verify slice is view
assert np.shares_memory(df[8]._values, subset[8]._values)
subset[8] = 0.0
# subset changed
assert (subset[8] == 0).all()
# but df itself did not change (setitem replaces full column)
tm.assert_frame_equal(df, original)

def test_loc_duplicates(self):
# gh-17105
Expand Down
13 changes: 4 additions & 9 deletions pandas/tests/frame/indexing/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_insert_with_columns_dups(self):
)
tm.assert_frame_equal(df, exp)

def test_insert_item_cache(self, using_copy_on_write):
def test_insert_item_cache(self):
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
ser = df[0]
expected_warning = PerformanceWarning
Expand All @@ -80,14 +80,9 @@ def test_insert_item_cache(self, using_copy_on_write):
for n in range(100):
df[n + 3] = df[1] * n

if using_copy_on_write:
ser.iloc[0] = 99
assert df.iloc[0, 0] == df[0][0]
assert df.iloc[0, 0] != 99
else:
ser.values[0] = 99
assert df.iloc[0, 0] == df[0][0]
assert df.iloc[0, 0] == 99
ser.iloc[0] = 99
assert df.iloc[0, 0] == df[0][0]
assert df.iloc[0, 0] != 99

def test_insert_EA_no_warning(self):
# PerformanceWarning about fragmented frame should not be raised when
Expand Down
35 changes: 9 additions & 26 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def test_frame_setitem_existing_datetime64_col_other_units(self, unit):
df["dates"] = vals
assert (df["dates"].values == ex_vals).all()

def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write):
def test_setitem_dt64tz(self, timezone_frame):
df = timezone_frame
idx = df["B"].rename("foo")

Expand All @@ -345,10 +345,7 @@ def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write):
tm.assert_extension_array_equal(v1, v2)
v1base = v1._ndarray.base
v2base = v2._ndarray.base
if not using_copy_on_write:
assert v1base is None or (id(v1base) != id(v2base))
else:
assert id(v1base) == id(v2base)
assert id(v1base) == id(v2base)

# with nan
df2 = df.copy()
Expand Down Expand Up @@ -844,7 +841,7 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected):


class TestDataFrameSetItemWithExpansion:
def test_setitem_listlike_views(self, using_copy_on_write):
def test_setitem_listlike_views(self):
# GH#38148
df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]})

Expand All @@ -857,10 +854,7 @@ def test_setitem_listlike_views(self, using_copy_on_write):
# edit in place the first column to check view semantics
df.iloc[0, 0] = 100

if using_copy_on_write:
expected = Series([1, 2, 3], name="a")
else:
expected = Series([100, 2, 3], name="a")
expected = Series([1, 2, 3], name="a")
tm.assert_series_equal(ser, expected)

def test_setitem_string_column_numpy_dtype_raising(self):
Expand All @@ -870,7 +864,7 @@ def test_setitem_string_column_numpy_dtype_raising(self):
expected = DataFrame([[1, 2, 5], [3, 4, 6]], columns=[0, 1, "0 - Name"])
tm.assert_frame_equal(df, expected)

def test_setitem_empty_df_duplicate_columns(self, using_copy_on_write):
def test_setitem_empty_df_duplicate_columns(self):
# GH#38521
df = DataFrame(columns=["a", "b", "b"], dtype="float64")
df.loc[:, "a"] = list(range(2))
Expand Down Expand Up @@ -1199,7 +1193,7 @@ def test_setitem_always_copy(self, float_frame):
assert notna(s[5:10]).all()

@pytest.mark.parametrize("consolidate", [True, False])
def test_setitem_partial_column_inplace(self, consolidate, using_copy_on_write):
def test_setitem_partial_column_inplace(self, consolidate):
# This setting should be in-place, regardless of whether frame is
# single-block or multi-block
# GH#304 this used to be incorrectly not-inplace, in which case
Expand All @@ -1215,18 +1209,11 @@ def test_setitem_partial_column_inplace(self, consolidate, using_copy_on_write):
else:
assert len(df._mgr.blocks) == 2

zvals = df["z"]._values

df.loc[2:, "z"] = 42

expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z")
tm.assert_series_equal(df["z"], expected)

# check setting occurred in-place
if not using_copy_on_write:
tm.assert_numpy_array_equal(zvals, expected.values)
assert np.shares_memory(zvals, df["z"]._values)

def test_setitem_duplicate_columns_not_inplace(self):
# GH#39510
cols = ["A", "B"] * 2
Expand Down Expand Up @@ -1298,7 +1285,7 @@ def test_setitem_not_operating_inplace(self, value, set_value, indexer):
df[indexer] = set_value
tm.assert_frame_equal(view, expected)

def test_setitem_column_update_inplace(self, using_copy_on_write):
def test_setitem_column_update_inplace(self):
# https://github.com/pandas-dev/pandas/issues/47172

labels = [f"c{i}" for i in range(10)]
Expand All @@ -1308,12 +1295,8 @@ def test_setitem_column_update_inplace(self, using_copy_on_write):
with tm.raises_chained_assignment_error():
for label in df.columns:
df[label][label] = 1
if not using_copy_on_write:
# diagonal values all updated
assert np.all(values[np.arange(10), np.arange(10)] == 1)
else:
# original dataframe not updated
assert np.all(values[np.arange(10), np.arange(10)] == 0)
# original dataframe not updated
assert np.all(values[np.arange(10), np.arange(10)] == 0)

def test_setitem_column_frame_as_category(self):
# GH31581
Expand Down
32 changes: 9 additions & 23 deletions pandas/tests/frame/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_xs_dt_error(self, datetime_frame):
):
datetime_frame.xs(datetime_frame.index[0] - BDay())

def test_xs_other(self, float_frame, using_copy_on_write):
def test_xs_other(self, float_frame):
float_frame_orig = float_frame.copy()
# xs get column
series = float_frame.xs("A", axis=1)
Expand All @@ -68,12 +68,9 @@ def test_xs_other(self, float_frame, using_copy_on_write):
# view is returned if possible
series = float_frame.xs("A", axis=1)
series[:] = 5
if using_copy_on_write:
# but with CoW the view shouldn't propagate mutations
tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
assert not (expected == 5).all()
else:
assert (expected == 5).all()
# The view shouldn't propagate mutations
tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
assert not (expected == 5).all()

def test_xs_corner(self):
# pathological mixed-type reordering case
Expand Down Expand Up @@ -363,34 +360,23 @@ def test_xs_droplevel_false(self):
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)

def test_xs_droplevel_false_view(self, using_copy_on_write):
def test_xs_droplevel_false_view(self):
# GH#37832
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
result = df.xs("a", axis=1, drop_level=False)
# check that result still views the same data as df
assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)

df.iloc[0, 0] = 2
if using_copy_on_write:
# with copy on write the subset is never modified
expected = DataFrame({"a": [1]})
else:
# modifying original df also modifies result when having a single block
expected = DataFrame({"a": [2]})
# The subset is never modified
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)

# with mixed dataframe, modifying the parent doesn't modify result
# TODO the "split" path behaves differently here as with single block
df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
result = df.xs("a", axis=1, drop_level=False)
df.iloc[0, 0] = 2
if using_copy_on_write:
# with copy on write the subset is never modified
expected = DataFrame({"a": [1]})
else:
# FIXME: iloc does not update the array inplace using
# "split" path
expected = DataFrame({"a": [1]})
# The subset is never modified
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)

def test_xs_list_indexer_droplevel_false(self):
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/frame/methods/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,12 @@ def test_frame_align_aware(self):
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc

def test_align_float(self, float_frame, using_copy_on_write):
def test_align_float(self, float_frame):
af, bf = float_frame.align(float_frame)
assert af._mgr is not float_frame._mgr

af, bf = float_frame.align(float_frame, copy=False)
if not using_copy_on_write:
assert af._mgr is float_frame._mgr
else:
assert af._mgr is not float_frame._mgr
assert af._mgr is not float_frame._mgr

# axis = 0
other = float_frame.iloc[:-5, :3]
Expand Down
13 changes: 3 additions & 10 deletions pandas/tests/frame/methods/test_cov_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method):
expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
tm.assert_frame_equal(result, expected)

def test_corr_item_cache(self, using_copy_on_write):
def test_corr_item_cache(self):
# Check that corr does not lead to incorrect entries in item_cache

df = DataFrame({"A": range(10)})
Expand All @@ -218,15 +218,8 @@ def test_corr_item_cache(self, using_copy_on_write):

_ = df.corr(numeric_only=True)

if using_copy_on_write:
ser.iloc[0] = 99
assert df.loc[0, "A"] == 0
else:
# Check that the corr didn't break link between ser and df
ser.values[0] = 99
assert df.loc[0, "A"] == 99
assert df["A"] is ser
assert df.values[0, 0] == 99
ser.iloc[0] = 99
assert df.loc[0, "A"] == 0

@pytest.mark.parametrize("length", [2, 20, 200, 2000])
def test_corr_for_constant_columns(self, length):
Expand Down
Loading