From fb8b349909bed39ef0db63e02ea5e61deac3c0fb Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 21:44:40 +0100 Subject: [PATCH 1/5] BUG: __getitem__ raise blank KeyError for IntervalIndex and missing keys --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/interval.py | 2 +- pandas/tests/indexing/interval/test_interval.py | 8 ++++---- pandas/tests/indexing/interval/test_interval_new.py | 12 ++++++++++-- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 28f7df98cb86b..5352c42cc73e0 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -584,6 +584,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`) - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) +- Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` returns blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2aec86c9cdfae..6da2dd2cf8572 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -845,7 +845,7 @@ def _convert_list_indexer(self, keyarr): # we have missing values if (locs == -1).any(): - raise KeyError + raise KeyError(keyarr[locs == -1].tolist()) return locs diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index df59d09edd3ef..3d981d4da3fe2 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -65,10 +65,10 @@ def test_non_matching(self): # this is a departure from our current # indexing scheme, but simpler - with pytest.raises(KeyError, match="^$"): + with pytest.raises(KeyError, match="^\[-1\]$"): s.loc[[-1, 3, 4, 5]] - with pytest.raises(KeyError, match="^$"): + with pytest.raises(KeyError, match="^\[-1\]$"): s.loc[[-1, 3]] @pytest.mark.arm_slow @@ -107,11 +107,11 @@ def test_loc_getitem_frame(self): expected = df.take([4, 5, 4, 5]) tm.assert_frame_equal(result, expected) - with pytest.raises(KeyError, match="^$"): + with pytest.raises(KeyError, match=r"^\[10\]$"): df.loc[[10]] # partial missing - with pytest.raises(KeyError, match="^$"): + with pytest.raises(KeyError, match=r"^\[10\]$"): df.loc[[10, 4]] diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py index 03c3034772bc6..a9512bc97d9de 100644 --- a/pandas/tests/indexing/interval/test_interval_new.py +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -204,13 +204,13 @@ def test_loc_with_overlap(self): with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s.loc[Interval(3, 5)] - with pytest.raises(KeyError, match="^$"): + with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): s.loc[[Interval(3, 5)]] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] - with pytest.raises(KeyError, match="^$"): + with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): s[[Interval(3, 5)]] # slices with interval (only exact matches) @@ -266,3 +266,11 @@ def test_non_unique_moar(self): expected = s.iloc[[0, 1]] result = s[[Interval(1, 3)]] tm.assert_series_equal(expected, result) + + def test_missing_key_error_message(self, frame_or_series): + # GH#27365 + obj = frame_or_series( + np.arange(5), index=IntervalIndex.from_breaks(np.arange(6)) + ) + with pytest.raises(KeyError, match=r"\[6\]"): + obj.loc[[4, 5, 6]] From 3708f381355b2bf591ae17de0ea61b1e428e21b6 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 21:45:06 +0100 Subject: [PATCH 2/5] Escape sequence --- pandas/tests/indexing/interval/test_interval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 3d981d4da3fe2..65a3632a0b0a9 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -65,10 +65,10 @@ def test_non_matching(self): # this is a departure from our current # indexing scheme, but simpler - with pytest.raises(KeyError, match="^\[-1\]$"): + with pytest.raises(KeyError, match=r"^\[-1\]$"): s.loc[[-1, 3, 4, 5]] - with pytest.raises(KeyError, match="^\[-1\]$"): + with pytest.raises(KeyError, match=r"^\[-1\]$"): s.loc[[-1, 3]] @pytest.mark.arm_slow From 7127fb140e96ae99999edc01294c240e7cd7a90d Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 18 Nov 2020 22:43:26 +0100 Subject: [PATCH 3/5] Remove code --- pandas/core/indexes/category.py | 4 ---- pandas/core/indexes/interval.py | 8 +------- pandas/tests/indexing/interval/test_interval.py | 7 +++++-- pandas/tests/indexing/interval/test_interval_new.py | 7 ++++--- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 06df8f85cded7..51708fd0553d3 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -582,10 +582,6 @@ def _convert_list_indexer(self, keyarr): # Return our indexer or raise if all of the values are not included in # the categories - if self.categories._defer_to_indexing: - indexer = self.categories._convert_list_indexer(keyarr) - return Index(self.codes).get_indexer_for(indexer) - msg = "a list-indexer must only include values that are in the categories" if self.hasnans: msg += " or NA" diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 6da2dd2cf8572..4630d9e564320 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -841,13 +841,7 @@ def _convert_list_indexer(self, keyarr): we are passed a list-like indexer. Return the indexer for matching intervals. """ - locs = self.get_indexer_for(keyarr) - - # we have missing values - if (locs == -1).any(): - raise KeyError(keyarr[locs == -1].tolist()) - - return locs + return self.get_indexer_for(keyarr) # -------------------------------------------------------------------- diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 65a3632a0b0a9..e6534c01eb84f 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -65,10 +65,13 @@ def test_non_matching(self): # this is a departure from our current # indexing scheme, but simpler - with pytest.raises(KeyError, match=r"^\[-1\]$"): + msg = r"Passing list-likes to \.loc or \[\] with any missing labels is no " \ + r"longer supported. The following labels were missing: " \ + r"Int64Index\(\[-1\], dtype='int64'\)" + with pytest.raises(KeyError, match=msg): s.loc[[-1, 3, 4, 5]] - with pytest.raises(KeyError, match=r"^\[-1\]$"): + with pytest.raises(KeyError, match=msg): s.loc[[-1, 3]] @pytest.mark.arm_slow diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py index a9512bc97d9de..681ec6e97f208 100644 --- a/pandas/tests/indexing/interval/test_interval_new.py +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -203,14 +203,15 @@ def test_loc_with_overlap(self): with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s.loc[Interval(3, 5)] - - with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): + msg = r"None of \[IntervalIndex\(\[\(3, 5\]\],\\n \s* closed='right',\\n \s* " \ + r"dtype='interval\[int64\]'\)\] are in the \[index\]" + with pytest.raises(KeyError, match=msg): s.loc[[Interval(3, 5)]] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] - with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): + with pytest.raises(KeyError, match=msg): s[[Interval(3, 5)]] # slices with interval (only exact matches) From 6099f95b3e00391ffff7eec9ef21abe5e5670d1e Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 19 Nov 2020 20:42:15 +0100 Subject: [PATCH 4/5] Revert "Remove code" This reverts commit 7127fb14 --- pandas/core/indexes/category.py | 4 ++++ pandas/core/indexes/interval.py | 8 +++++++- pandas/tests/indexing/interval/test_interval.py | 7 ++----- pandas/tests/indexing/interval/test_interval_new.py | 7 +++---- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 51708fd0553d3..06df8f85cded7 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -582,6 +582,10 @@ def _convert_list_indexer(self, keyarr): # Return our indexer or raise if all of the values are not included in # the categories + if self.categories._defer_to_indexing: + indexer = self.categories._convert_list_indexer(keyarr) + return Index(self.codes).get_indexer_for(indexer) + msg = "a list-indexer must only include values that are in the categories" if self.hasnans: msg += " or NA" diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 4630d9e564320..6da2dd2cf8572 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -841,7 +841,13 @@ def _convert_list_indexer(self, keyarr): we are passed a list-like indexer. Return the indexer for matching intervals. """ - return self.get_indexer_for(keyarr) + locs = self.get_indexer_for(keyarr) + + # we have missing values + if (locs == -1).any(): + raise KeyError(keyarr[locs == -1].tolist()) + + return locs # -------------------------------------------------------------------- diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index e6534c01eb84f..65a3632a0b0a9 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -65,13 +65,10 @@ def test_non_matching(self): # this is a departure from our current # indexing scheme, but simpler - msg = r"Passing list-likes to \.loc or \[\] with any missing labels is no " \ - r"longer supported. The following labels were missing: " \ - r"Int64Index\(\[-1\], dtype='int64'\)" - with pytest.raises(KeyError, match=msg): + with pytest.raises(KeyError, match=r"^\[-1\]$"): s.loc[[-1, 3, 4, 5]] - with pytest.raises(KeyError, match=msg): + with pytest.raises(KeyError, match=r"^\[-1\]$"): s.loc[[-1, 3]] @pytest.mark.arm_slow diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py index 681ec6e97f208..a9512bc97d9de 100644 --- a/pandas/tests/indexing/interval/test_interval_new.py +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -203,15 +203,14 @@ def test_loc_with_overlap(self): with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s.loc[Interval(3, 5)] - msg = r"None of \[IntervalIndex\(\[\(3, 5\]\],\\n \s* closed='right',\\n \s* " \ - r"dtype='interval\[int64\]'\)\] are in the \[index\]" - with pytest.raises(KeyError, match=msg): + + with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): s.loc[[Interval(3, 5)]] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] - with pytest.raises(KeyError, match=msg): + with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): s[[Interval(3, 5)]] # slices with interval (only exact matches) From 4e95c212eec06c3e4aed79235b0c0fb09a190d66 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 19 Nov 2020 20:42:47 +0100 Subject: [PATCH 5/5] Improve whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 5352c42cc73e0..74660441e27df 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -584,7 +584,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`) - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) -- Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` returns blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`) +- Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` raising blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`) Missing ^^^^^^^