Skip to content

Commit c184fde

Browse files
authored
Retain views with listlike indexers setitem (#38204)
1 parent 7757844 commit c184fde

File tree

5 files changed

+48
-13
lines changed

5 files changed

+48
-13
lines changed

asv_bench/benchmarks/indexing.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,14 @@ def time_assign_with_setitem(self):
358358
for i in range(100):
359359
self.df[i] = np.random.randn(self.N)
360360

361+
def time_assign_list_like_with_setitem(self):
362+
np.random.seed(1234)
363+
self.df[list(range(100))] = np.random.randn(self.N, 100)
364+
365+
def time_assign_list_of_columns_concat(self):
366+
df = DataFrame(np.random.randn(self.N, 100))
367+
concat([self.df, df], axis=1)
368+
361369

362370
class ChainIndexing:
363371

doc/source/whatsnew/v1.2.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ Performance improvements
548548
- Performance improvement in :meth:`Series.astype` and :meth:`DataFrame.astype` for :class:`Categorical` (:issue:`8628`)
549549
- Performance improvement in :meth:`DataFrame.groupby` for ``float`` ``dtype`` (:issue:`28303`), changes of the underlying hash-function can lead to changes in float based indexes sort ordering for ties (e.g. :meth:`Index.value_counts`)
550550
- Performance improvement in :meth:`pd.isin` for inputs with more than 1e6 elements (:issue:`36611`)
551+
- Performance improvement for :meth:`DataFrame.__setitem__` with list-like indexers (:issue:`37954`)
551552

552553
.. ---------------------------------------------------------------------------
553554

pandas/core/indexing.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -672,17 +672,12 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None):
672672
and not com.is_bool_indexer(key)
673673
and all(is_hashable(k) for k in key)
674674
):
675-
for i, k in enumerate(key):
676-
if k not in self.obj:
677-
if value is None:
678-
self.obj[k] = np.nan
679-
elif is_array_like(value) and value.ndim == 2:
680-
# GH#37964 have to select columnwise in case of array
681-
self.obj[k] = value[:, i]
682-
elif is_list_like(value):
683-
self.obj[k] = value[i]
684-
else:
685-
self.obj[k] = value
675+
# GH#38148
676+
keys = self.obj.columns.union(key, sort=False)
677+
678+
self.obj._mgr = self.obj._mgr.reindex_axis(
679+
keys, axis=0, copy=False, consolidate=False, only_slice=True
680+
)
686681

687682
def __setitem__(self, key, value):
688683
if isinstance(key, tuple):

pandas/core/internals/managers.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,8 @@ def reindex_axis(
12361236
limit=None,
12371237
fill_value=None,
12381238
copy: bool = True,
1239+
consolidate: bool = True,
1240+
only_slice: bool = False,
12391241
):
12401242
"""
12411243
Conform block manager to new index.
@@ -1246,7 +1248,13 @@ def reindex_axis(
12461248
)
12471249

12481250
return self.reindex_indexer(
1249-
new_index, indexer, axis=axis, fill_value=fill_value, copy=copy
1251+
new_index,
1252+
indexer,
1253+
axis=axis,
1254+
fill_value=fill_value,
1255+
copy=copy,
1256+
consolidate=consolidate,
1257+
only_slice=only_slice,
12501258
)
12511259

12521260
def reindex_indexer(
@@ -1258,6 +1266,7 @@ def reindex_indexer(
12581266
allow_dups: bool = False,
12591267
copy: bool = True,
12601268
consolidate: bool = True,
1269+
only_slice: bool = False,
12611270
) -> T:
12621271
"""
12631272
Parameters
@@ -1270,6 +1279,8 @@ def reindex_indexer(
12701279
copy : bool, default True
12711280
consolidate: bool, default True
12721281
Whether to consolidate inplace before reindexing.
1282+
only_slice : bool, default False
1283+
Whether to take views, not copies, along columns.
12731284
12741285
pandas-indexer with -1's only.
12751286
"""
@@ -1293,7 +1304,9 @@ def reindex_indexer(
12931304
raise IndexError("Requested axis not found in manager")
12941305

12951306
if axis == 0:
1296-
new_blocks = self._slice_take_blocks_ax0(indexer, fill_value=fill_value)
1307+
new_blocks = self._slice_take_blocks_ax0(
1308+
indexer, fill_value=fill_value, only_slice=only_slice
1309+
)
12971310
else:
12981311
new_blocks = [
12991312
blk.take_nd(

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,24 @@ def test_setitem_bool_with_numeric_index(self, dtype):
319319
tm.assert_index_equal(df.columns, expected_cols)
320320

321321

322+
class TestDataFrameSetItemWithExpansion:
323+
def test_setitem_listlike_views(self):
324+
# GH#38148
325+
df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]})
326+
327+
# get one column as a view of df
328+
ser = df["a"]
329+
330+
# add columns with list-like indexer
331+
df[["c", "d"]] = np.array([[0.1, 0.2], [0.3, 0.4], [0.4, 0.5]])
332+
333+
# edit in place the first column to check view semantics
334+
df.iloc[0, 0] = 100
335+
336+
expected = Series([100, 2, 3], name="a")
337+
tm.assert_series_equal(ser, expected)
338+
339+
322340
class TestDataFrameSetItemSlicing:
323341
def test_setitem_slice_position(self):
324342
# GH#31469

0 commit comments

Comments
 (0)