From fb8d7ce0638f54dcdbb520d3cc022e68a5bc2899 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Mar 2020 15:29:28 -0700 Subject: [PATCH 1/7] BUG: df.iloc[:, :1] with EA column --- pandas/core/internals/blocks.py | 5 +++++ pandas/tests/extension/base/getitem.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 935ff09585b17..1978a1185a550 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1809,6 +1809,11 @@ def _slice(self, slicer): if not com.is_null_slice(slicer[0]): raise AssertionError("invalid slicing for a 1-ndim categorical") slicer = slicer[1] + elif not isinstance(slicer, tuple) and self.ndim == 2: + # reached via getitem_block via _slice_take_blocks_ax0 + # TODO(EA2D): wont be necessary with 2D EAs + # treat this like (slicer, slice(None) + slicer = slice(None) return self.values[slicer] diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index b08a64cc076b6..57929b9e77e37 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -40,6 +40,10 @@ def test_iloc_frame(self, data): result = df.iloc[:4, 0] self.assert_series_equal(result, expected) + # GH#32957 null slice along index, slice along rows + result = df.iloc[:, :1] + self.assert_frame_equal(result, df[["A"]]) + def test_loc_series(self, data): ser = pd.Series(data) result = ser.loc[:3] From 628e2425d23a64b0492973b2fb0fbfe00d671dc8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Mar 2020 15:29:30 -0700 Subject: [PATCH 2/7] Fix test, remove DatetimeTZBlock._slice --- pandas/core/internals/blocks.py | 10 +--------- pandas/tests/extension/base/getitem.py | 4 ++-- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1978a1185a550..0f925cc7e261e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2240,6 +2240,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): to_native_types = DatetimeBlock.to_native_types fill_value = np.datetime64("NaT", "ns") should_store = DatetimeBlock.should_store + _slice = ExtensionBlock._slice @property def _holder(self): @@ -2307,15 +2308,6 @@ def get_values(self, dtype=None): values = values.reshape(1, -1) return values - def _slice(self, slicer): - """ return a slice of my values """ - if isinstance(slicer, tuple): - col, loc = slicer - if not com.is_null_slice(col) and col != 0: - raise IndexError(f"{self} only contains one item") - return self.values[loc] - return self.values[slicer] - def diff(self, n: int, axis: int = 0) -> List["Block"]: """ 1st discrete difference. diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 57929b9e77e37..13c9dd35fd128 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -40,8 +40,8 @@ def test_iloc_frame(self, data): result = df.iloc[:4, 0] self.assert_series_equal(result, expected) - # GH#32957 null slice along index, slice along rows - result = df.iloc[:, :1] + # GH#32957 null slice along index, slice along columns with single-block + result = df[["A"]].iloc[:, :1] self.assert_frame_equal(result, df[["A"]]) def test_loc_series(self, data): From 338c8abe0097447ee41f5db0a392758a5d552633 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 26 Mar 2020 21:05:57 -0700 Subject: [PATCH 3/7] improved check, docstring --- pandas/core/internals/blocks.py | 37 ++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 84c57f0b0963b..cd8e25cbe149e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -296,6 +296,7 @@ def __setstate__(self, state): def _slice(self, slicer): """ return a slice of my values """ + return self.values[slicer] def getitem_block(self, slicer, new_mgr_locs=None): @@ -1796,19 +1797,36 @@ def _can_hold_element(self, element: Any) -> bool: return True def _slice(self, slicer): - """ return a slice of my values """ - # slice the category + """ + Return a slice of my values. + + Parameters + ---------- + slicer : slice, ndarray[int], or a tuple of these + Valid (non-reducing) indexer for self.values. + + Returns + ------- + np.ndarray or ExtensionArray + """ # return same dims as we currently have + if not isinstance(slicer, tuple) and self.ndim == 2: + # reached via getitem_block via _slice_take_blocks_ax0 + # TODO(EA2D): wont be necessary with 2D EAs + slicer = (slicer, slice(None)) if isinstance(slicer, tuple) and len(slicer) == 2: - if not com.is_null_slice(slicer[0]): - raise AssertionError("invalid slicing for a 1-ndim categorical") + first = slicer[0] + if not isinstance(first, slice): + raise AssertionError( + "invalid slicing for a 1-ndim ExtensionArray", first + ) + elif not (first == slice(None) or first == slice(1)): + # TODO(EA2D): wont be necessary with 2D EAs + # Since self.shape[0] is always 1, the slicer[0] must be + # either slice(None) or slice(1) + raise AssertionError("invalid slicing for a 1-ndim categorical", first) slicer = slicer[1] - elif not isinstance(slicer, tuple) and self.ndim == 2: - # reached via getitem_block via _slice_take_blocks_ax0 - # TODO(EA2D): wont be necessary with 2D EAs - # treat this like (slicer, slice(None) - slicer = slice(None) return self.values[slicer] @@ -2235,7 +2253,6 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): to_native_types = DatetimeBlock.to_native_types fill_value = np.datetime64("NaT", "ns") should_store = DatetimeBlock.should_store - _slice = ExtensionBlock._slice @property def _holder(self): From 81b95559490db71f8175d765c3b8d8db2ee0ae49 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 30 Mar 2020 21:00:55 -0700 Subject: [PATCH 4/7] check earlier, flesh out tests --- pandas/core/internals/blocks.py | 16 ++++++++++------ pandas/core/internals/managers.py | 4 ++++ pandas/tests/extension/base/getitem.py | 25 +++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c5524d9137cd3..5e73e4e24b3af 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1796,12 +1796,16 @@ def _slice(self, slicer): raise AssertionError( "invalid slicing for a 1-ndim ExtensionArray", first ) - elif not (first == slice(None) or first == slice(1)): - # TODO(EA2D): wont be necessary with 2D EAs - # Since self.shape[0] is always 1, the slicer[0] must be - # either slice(None) or slice(1) - raise AssertionError("invalid slicing for a 1-ndim categorical", first) - slicer = slicer[1] + # GH#32959 only full-slicers along fake-dim0 are valid + # TODO(EA2D): wont be necessary with 2D EAs + new_locs = self.mgr_locs[first] + if len(new_locs): + # effectively slice(None) + slicer = slicer[1] + else: + raise AssertionError( + "invalid slicing for a 1-ndim ExtensionArray", slicer + ) return self.values[slicer] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 182a5b14a1242..1577c897fea72 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1331,6 +1331,10 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): blk = self.blocks[0] if sl_type in ("slice", "mask"): + if blk.is_extension: + # GH#32959 EABlock would fail since we cant make 0-width + if sllen == 0: + return [] return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_tuple[0] is None: diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 13c9dd35fd128..626a6e4381087 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -40,9 +40,30 @@ def test_iloc_frame(self, data): result = df.iloc[:4, 0] self.assert_series_equal(result, expected) - # GH#32957 null slice along index, slice along columns with single-block - result = df[["A"]].iloc[:, :1] + # GH#32959 slice columns with step + result = df.iloc[:, ::2] self.assert_frame_equal(result, df[["A"]]) + result = df[["B", "A"]].iloc[:, ::2] + self.assert_frame_equal(result, df[["B"]]) + + def test_iloc_frame_single_block(self, data): + # GH#32959 null slice along index, slice along columns with single-block + df = pd.DataFrame({"A": data}) + + result = df.iloc[:, :] + self.assert_frame_equal(result, df) + + result = df.iloc[:, :1] + self.assert_frame_equal(result, df) + + result = df.iloc[:, :2] + self.assert_frame_equal(result, df) + + result = df.iloc[:, ::2] + self.assert_frame_equal(result, df) + + result = df.iloc[:, 1:2] + self.assert_frame_equal(result, df.iloc[:, :0]) def test_loc_series(self, data): ser = pd.Series(data) From e465651f3314c743bea92c94c868a3635fb30b53 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 31 Mar 2020 14:46:45 -0700 Subject: [PATCH 5/7] remove is_extension check --- pandas/core/internals/managers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1577c897fea72..f7b9cb19be2b4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1331,10 +1331,10 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): blk = self.blocks[0] if sl_type in ("slice", "mask"): - if blk.is_extension: - # GH#32959 EABlock would fail since we cant make 0-width - if sllen == 0: - return [] + # GH#32959 EABlock would fail since we cant make 0-width + # TODO(EA2D): special casing unnecessary with 2D EAs + if sllen == 0: + return [] return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_tuple[0] is None: From 86fffb7c716dbeeeed96e46293ce0ad380074850 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 1 Apr 2020 08:26:22 -0700 Subject: [PATCH 6/7] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 25f847c698278..68de42451ac94 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -360,6 +360,8 @@ Indexing - Bug in :class:`Index` constructor where an unhelpful error message was raised for ``numpy`` scalars (:issue:`33017`) - Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) - Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`) +- Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`) + Missing ^^^^^^^ From 02f0be8019368cfe2576a0a71fcb46cbee0be529 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 10 Apr 2020 10:43:45 -0700 Subject: [PATCH 7/7] extra test with [:, -1:] --- pandas/tests/extension/base/getitem.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 5d5d7a2d53c9d..dc94bffd320b1 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -65,6 +65,9 @@ def test_iloc_frame_single_block(self, data): result = df.iloc[:, 1:2] self.assert_frame_equal(result, df.iloc[:, :0]) + result = df.iloc[:, -1:] + self.assert_frame_equal(result, df) + def test_loc_series(self, data): ser = pd.Series(data) result = ser.loc[:3]