-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
API / CoW: constructing DataFrame from DataFrame creates lazy copy #50499
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1ba93e4
24912bd
11a1db8
bd304fc
c51a204
2984d21
f221e0c
a434340
57919a5
094c0e9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -205,6 +205,7 @@ | |
to_arrays, | ||
treat_as_nested, | ||
) | ||
from pandas.core.internals.managers import using_copy_on_write | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This import can now be removed (after a merge with main) |
||
from pandas.core.reshape.melt import melt | ||
from pandas.core.series import Series | ||
from pandas.core.shared_docs import _shared_docs | ||
|
@@ -637,6 +638,8 @@ def __init__( | |
|
||
if isinstance(data, DataFrame): | ||
data = data._mgr | ||
if not copy and using_copy_on_write(): | ||
data = data.copy(deep=False) | ||
|
||
if isinstance(data, (BlockManager, ArrayManager)): | ||
# first check if a Manager is passed without any other arguments | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5257,7 +5257,12 @@ def _reindex_with_indexers( | |
# If we've made a copy once, no need to make another one | ||
copy = False | ||
|
||
if (copy or copy is None) and new_data is self._mgr: | ||
if ( | ||
(copy or copy is None) | ||
and new_data is self._mgr | ||
or not copy | ||
and using_copy_on_write() | ||
): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain a bit how this change is related? We are passing here a Manager object to the constructor, I suppose, and you only changed the constructor when it receives a DataFrame? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah sorry, this is unrelated to the constructor change, I'll make a separate pr. Currently, when reindexing with copy=False we don't track references, causing problems. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also have some skips in #50536 related to |
||
new_data = new_data.copy(deep=copy) | ||
|
||
return self._constructor(new_data).__finalize__(self) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
from pandas import DataFrame | ||
import pandas._testing as tm | ||
from pandas.tests.copy_view.util import get_array | ||
|
||
|
||
@pytest.mark.parametrize("columns", [None, ["a"]]) | ||
def test_dataframe_constructor_mgr(using_copy_on_write, columns): | ||
df = DataFrame({"a": [1, 2, 3]}) | ||
df_orig = df.copy() | ||
|
||
new_df = DataFrame(df) | ||
|
||
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) | ||
new_df.iloc[0] = 100 | ||
|
||
if using_copy_on_write: | ||
assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) | ||
tm.assert_frame_equal(df, df_orig) | ||
else: | ||
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) | ||
tm.assert_frame_equal(df, new_df) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the equivalent Series change I listed this in the "Copy-on-Write improvements" section instead of seeing it as a bug fix (it was a known gap left out of the initial implementation, and it also changes behaviour, so at this stage I wouldn't yet label it as a bug fix)