diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 5152e6d8eaf63..e6a09a37e929f 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -532,6 +532,7 @@ Bug Fixes - Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`) - Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`) - Bug in multi-index slicing with missing indexers (:issue:`7866`) +- Bug in multi-index slicing with various edge cases (:issue:`8132`) - Regression in multi-index indexing with a non-scalar type object (:issue:`7914`) - Bug in Timestamp comparisons with ``==`` and dtype of int64 (:issue:`8058`) - Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`) diff --git a/pandas/core/index.py b/pandas/core/index.py index 505b557fc0d85..065dcd90b8d76 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -3927,9 +3927,21 @@ def _get_level_indexer(self, key, level=0): # handle a slice, returnig a slice if we can # otherwise a boolean indexer - start = level_index.get_loc(key.start or 0) - stop = level_index.get_loc(key.stop or len(level_index)-1) - step = key.step + try: + if key.start is not None: + start = level_index.get_loc(key.start) + else: + start = 0 + if key.stop is not None: + stop = level_index.get_loc(key.stop) + else: + stop = len(level_index)-1 + step = key.step + except (KeyError): + + # we have a partial slice (like looking up a partial date string) + start = stop = level_index.slice_indexer(key.start, key.stop, key.step) + step = start.step if isinstance(start,slice) or isinstance(stop,slice): # we have a slice for start and/or stop diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index daeef9b78b037..7c55ea860f8d1 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1664,6 +1664,76 @@ def test_multiindex_slicers_datetimelike(self): result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'],1), idx['A','B']] assert_frame_equal(result,expected) + + def test_multiindex_slicers_edges(self): + + # GH 8132 + # various edge cases + df = DataFrame({'A': ['A0'] * 5 + ['A1']*5 + ['A2']*5, + 'B': ['B0','B0','B1','B1','B2'] * 3, + 'DATE': ["2013-06-11", + "2013-07-02", + "2013-07-09", + "2013-07-30", + "2013-08-06", + "2013-06-11", + "2013-07-02", + "2013-07-09", + "2013-07-30", + "2013-08-06", + "2013-09-03", + "2013-10-01", + "2013-07-09", + "2013-08-06", + "2013-09-03"], + 'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3,4, 2]}) + + df['DATE'] = pd.to_datetime(df['DATE']) + df1 = df.set_index(['A', 'B', 'DATE']) + df1 = df1.sortlevel() + df2 = df.set_index('DATE') + + # A1 - Get all values under "A0" and "A1" + result = df1.loc[(slice('A1')),:] + expected = df1.iloc[0:10] + assert_frame_equal(result, expected) + + # A2 - Get all values from the start to "A2" + result = df1.loc[(slice('A2')),:] + expected = df1 + assert_frame_equal(result, expected) + + # A3 - Get all values under "B1" or "B2" + result = df1.loc[(slice(None),slice('B1','B2')),:] + expected = df1.iloc[[2,3,4,7,8,9,12,13,14]] + assert_frame_equal(result, expected) + + # A4 - Get all values between 2013-07-02 and 2013-07-09 + result = df1.loc[(slice(None),slice(None),slice('20130702','20130709')),:] + expected = df1.iloc[[1,2,6,7,12]] + assert_frame_equal(result, expected) + + # B1 - Get all values in B0 that are also under A0, A1 and A2 + result = df1.loc[(slice('A2'),slice('B0')),:] + expected = df1.iloc[[0,1,5,6,10,11]] + assert_frame_equal(result, expected) + + # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for the As) + result = df1.loc[(slice(None),slice('B2')),:] + expected = df1 + assert_frame_equal(result, expected) + + # B3 - Get all values from B1 to B2 and up to 2013-08-06 + result = df1.loc[(slice(None),slice('B1','B2'),slice('2013-08-06')),:] + expected = df1.iloc[[2,3,4,7,8,9,12,13]] + assert_frame_equal(result, expected) + + # B4 - Same as A4 but the start of the date slice is not a key. + # shows indexing on a partial selection slice + result = df1.loc[(slice(None),slice(None),slice('20130701','20130709')),:] + expected = df1.iloc[[1,2,6,7,12]] + assert_frame_equal(result, expected) + def test_per_axis_per_level_doc_examples(self): # test index maker @@ -3831,11 +3901,11 @@ class TestSeriesNoneCoercion(tm.TestCase): # For numeric series, we should coerce to NaN. ([1, 2, 3], [np.nan, 2, 3]), ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), - + # For datetime series, we should coerce to NaT. ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), - + # For objects, we should preserve the None value. (["foo", "bar", "baz"], [None, "bar", "baz"]), ] @@ -3851,7 +3921,7 @@ def test_coercion_with_setitem(self): self.assert_numpy_array_equivalent( start_series.values, expected_series.values, strict_nan=True) - + def test_coercion_with_loc_setitem(self): for start_data, expected_result in self.EXPECTED_RESULTS: start_series = Series(start_data) @@ -3863,7 +3933,7 @@ def test_coercion_with_loc_setitem(self): self.assert_numpy_array_equivalent( start_series.values, expected_series.values, strict_nan=True) - + def test_coercion_with_setitem_and_series(self): for start_data, expected_result in self.EXPECTED_RESULTS: start_series = Series(start_data) @@ -3875,7 +3945,7 @@ def test_coercion_with_setitem_and_series(self): self.assert_numpy_array_equivalent( start_series.values, expected_series.values, strict_nan=True) - + def test_coercion_with_loc_and_series(self): for start_data, expected_result in self.EXPECTED_RESULTS: start_series = Series(start_data) @@ -3887,18 +3957,18 @@ def test_coercion_with_loc_and_series(self): self.assert_numpy_array_equivalent( start_series.values, expected_series.values, strict_nan=True) - + class TestDataframeNoneCoercion(tm.TestCase): EXPECTED_SINGLE_ROW_RESULTS = [ # For numeric series, we should coerce to NaN. ([1, 2, 3], [np.nan, 2, 3]), ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), - + # For datetime series, we should coerce to NaT. ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), - + # For objects, we should preserve the None value. (["foo", "bar", "baz"], [None, "bar", "baz"]), ]