Skip to content

Commit 032d112

Browse files
authored
ENH: Add lazy copy to align (#50432)
* ENH: Add lazy copy to align * ENH: Add lazy copy to align
1 parent fa613b3 commit 032d112

File tree

5 files changed

+72
-13
lines changed

5 files changed

+72
-13
lines changed

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4879,7 +4879,7 @@ def align(
48794879
join: AlignJoin = "outer",
48804880
axis: Axis | None = None,
48814881
level: Level = None,
4882-
copy: bool = True,
4882+
copy: bool | None = None,
48834883
fill_value=None,
48844884
method: FillnaOptions | None = None,
48854885
limit: int | None = None,

pandas/core/generic.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -5256,7 +5256,7 @@ def _reindex_with_indexers(
52565256
self: NDFrameT,
52575257
reindexers,
52585258
fill_value=None,
5259-
copy: bool_t = False,
5259+
copy: bool_t | None = False,
52605260
allow_dups: bool_t = False,
52615261
) -> NDFrameT:
52625262
"""allow_dups indicates an internal call here"""
@@ -5285,8 +5285,8 @@ def _reindex_with_indexers(
52855285
# If we've made a copy once, no need to make another one
52865286
copy = False
52875287

5288-
if copy and new_data is self._mgr:
5289-
new_data = new_data.copy()
5288+
if (copy or copy is None) and new_data is self._mgr:
5289+
new_data = new_data.copy(deep=copy)
52905290

52915291
return self._constructor(new_data).__finalize__(self)
52925292

@@ -9060,7 +9060,7 @@ def align(
90609060
join: AlignJoin = "outer",
90619061
axis: Axis | None = None,
90629062
level: Level = None,
9063-
copy: bool_t = True,
9063+
copy: bool_t | None = None,
90649064
fill_value: Hashable = None,
90659065
method: FillnaOptions | None = None,
90669066
limit: int | None = None,
@@ -9253,7 +9253,7 @@ def _align_frame(
92539253
join: AlignJoin = "outer",
92549254
axis: Axis | None = None,
92559255
level=None,
9256-
copy: bool_t = True,
9256+
copy: bool_t | None = None,
92579257
fill_value=None,
92589258
method=None,
92599259
limit=None,
@@ -9317,7 +9317,7 @@ def _align_series(
93179317
join: AlignJoin = "outer",
93189318
axis: Axis | None = None,
93199319
level=None,
9320-
copy: bool_t = True,
9320+
copy: bool_t | None = None,
93219321
fill_value=None,
93229322
method=None,
93239323
limit=None,
@@ -9346,7 +9346,7 @@ def _align_series(
93469346
if is_series:
93479347
left = self._reindex_indexer(join_index, lidx, copy)
93489348
elif lidx is None or join_index is None:
9349-
left = self.copy() if copy else self
9349+
left = self.copy(deep=copy) if copy or copy is None else self
93509350
else:
93519351
left = self._constructor(
93529352
self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)
@@ -9375,7 +9375,7 @@ def _align_series(
93759375
left = self._constructor(fdata)
93769376

93779377
if ridx is None:
9378-
right = other
9378+
right = other.copy(deep=copy) if copy or copy is None else other
93799379
else:
93809380
right = other.reindex(join_index, level=level)
93819381

pandas/core/series.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -4591,15 +4591,18 @@ def _reduce(
45914591
return op(delegate, skipna=skipna, **kwds)
45924592

45934593
def _reindex_indexer(
4594-
self, new_index: Index | None, indexer: npt.NDArray[np.intp] | None, copy: bool
4594+
self,
4595+
new_index: Index | None,
4596+
indexer: npt.NDArray[np.intp] | None,
4597+
copy: bool | None,
45954598
) -> Series:
45964599
# Note: new_index is None iff indexer is None
45974600
# if not None, indexer is np.intp
45984601
if indexer is None and (
45994602
new_index is None or new_index.names == self.index.names
46004603
):
4601-
if copy:
4602-
return self.copy()
4604+
if copy or copy is None:
4605+
return self.copy(deep=copy)
46034606
return self
46044607

46054608
new_values = algorithms.take_nd(
@@ -4626,7 +4629,7 @@ def align(
46264629
join: AlignJoin = "outer",
46274630
axis: Axis | None = None,
46284631
level: Level = None,
4629-
copy: bool = True,
4632+
copy: bool | None = None,
46304633
fill_value: Hashable = None,
46314634
method: FillnaOptions | None = None,
46324635
limit: int | None = None,

pandas/tests/copy_view/test_methods.py

+47
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,53 @@ def test_select_dtypes(using_copy_on_write):
172172
tm.assert_frame_equal(df, df_orig)
173173

174174

175+
@pytest.mark.parametrize(
176+
"func",
177+
[
178+
lambda x, y: x.align(y),
179+
lambda x, y: x.align(y.a, axis=0),
180+
lambda x, y: x.align(y.a.iloc[slice(0, 1)], axis=1),
181+
],
182+
)
183+
def test_align_frame(using_copy_on_write, func):
184+
df = DataFrame({"a": [1, 2, 3], "b": "a"})
185+
df_orig = df.copy()
186+
df_changed = df[["b", "a"]].copy()
187+
df2, _ = func(df, df_changed)
188+
189+
if using_copy_on_write:
190+
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
191+
else:
192+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
193+
194+
df2.iloc[0, 0] = 0
195+
if using_copy_on_write:
196+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
197+
tm.assert_frame_equal(df, df_orig)
198+
199+
200+
def test_align_series(using_copy_on_write):
201+
ser = Series([1, 2])
202+
ser_orig = ser.copy()
203+
ser_other = ser.copy()
204+
ser2, ser_other_result = ser.align(ser_other)
205+
206+
if using_copy_on_write:
207+
assert np.shares_memory(ser2.values, ser.values)
208+
assert np.shares_memory(ser_other_result.values, ser_other.values)
209+
else:
210+
assert not np.shares_memory(ser2.values, ser.values)
211+
assert not np.shares_memory(ser_other_result.values, ser_other.values)
212+
213+
ser2.iloc[0] = 0
214+
ser_other_result.iloc[0] = 0
215+
if using_copy_on_write:
216+
assert not np.shares_memory(ser2.values, ser.values)
217+
assert not np.shares_memory(ser_other_result.values, ser_other.values)
218+
tm.assert_series_equal(ser, ser_orig)
219+
tm.assert_series_equal(ser_other, ser_orig)
220+
221+
175222
def test_to_frame(using_copy_on_write):
176223
# Case: converting a Series to a DataFrame with to_frame
177224
ser = Series([1, 2, 3])

pandas/tests/frame/methods/test_align.py

+9
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,12 @@ def _check_align_fill(self, frame, kind, meth, ax, fax):
402402
self._check_align(
403403
empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
404404
)
405+
406+
def test_align_series_check_copy(self):
407+
# GH#
408+
df = DataFrame({0: [1, 2]})
409+
ser = Series([1], name=0)
410+
expected = ser.copy()
411+
result, other = df.align(ser, axis=1)
412+
ser.iloc[0] = 100
413+
tm.assert_series_equal(other, expected)

0 commit comments

Comments
 (0)