Skip to content

Commit 855f9aa

Browse files
committed
Merge pull request #8134 from jreback/mi_slicers
BUG: Bug in multi-index slicing with various edge cases (GH8132)
2 parents 1e3da90 + 21ccaf2 commit 855f9aa

File tree

3 files changed

+94
-11
lines changed

3 files changed

+94
-11
lines changed

doc/source/v0.15.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,7 @@ Bug Fixes
532532
- Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`)
533533
- Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`)
534534
- Bug in multi-index slicing with missing indexers (:issue:`7866`)
535+
- Bug in multi-index slicing with various edge cases (:issue:`8132`)
535536
- Regression in multi-index indexing with a non-scalar type object (:issue:`7914`)
536537
- Bug in Timestamp comparisons with ``==`` and dtype of int64 (:issue:`8058`)
537538
- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`)

pandas/core/index.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -3927,9 +3927,21 @@ def _get_level_indexer(self, key, level=0):
39273927
# handle a slice, returnig a slice if we can
39283928
# otherwise a boolean indexer
39293929

3930-
start = level_index.get_loc(key.start or 0)
3931-
stop = level_index.get_loc(key.stop or len(level_index)-1)
3932-
step = key.step
3930+
try:
3931+
if key.start is not None:
3932+
start = level_index.get_loc(key.start)
3933+
else:
3934+
start = 0
3935+
if key.stop is not None:
3936+
stop = level_index.get_loc(key.stop)
3937+
else:
3938+
stop = len(level_index)-1
3939+
step = key.step
3940+
except (KeyError):
3941+
3942+
# we have a partial slice (like looking up a partial date string)
3943+
start = stop = level_index.slice_indexer(key.start, key.stop, key.step)
3944+
step = start.step
39333945

39343946
if isinstance(start,slice) or isinstance(stop,slice):
39353947
# we have a slice for start and/or stop

pandas/tests/test_indexing.py

+78-8
Original file line numberDiff line numberDiff line change
@@ -1664,6 +1664,76 @@ def test_multiindex_slicers_datetimelike(self):
16641664
result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'],1), idx['A','B']]
16651665
assert_frame_equal(result,expected)
16661666

1667+
1668+
def test_multiindex_slicers_edges(self):
1669+
1670+
# GH 8132
1671+
# various edge cases
1672+
df = DataFrame({'A': ['A0'] * 5 + ['A1']*5 + ['A2']*5,
1673+
'B': ['B0','B0','B1','B1','B2'] * 3,
1674+
'DATE': ["2013-06-11",
1675+
"2013-07-02",
1676+
"2013-07-09",
1677+
"2013-07-30",
1678+
"2013-08-06",
1679+
"2013-06-11",
1680+
"2013-07-02",
1681+
"2013-07-09",
1682+
"2013-07-30",
1683+
"2013-08-06",
1684+
"2013-09-03",
1685+
"2013-10-01",
1686+
"2013-07-09",
1687+
"2013-08-06",
1688+
"2013-09-03"],
1689+
'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3,4, 2]})
1690+
1691+
df['DATE'] = pd.to_datetime(df['DATE'])
1692+
df1 = df.set_index(['A', 'B', 'DATE'])
1693+
df1 = df1.sortlevel()
1694+
df2 = df.set_index('DATE')
1695+
1696+
# A1 - Get all values under "A0" and "A1"
1697+
result = df1.loc[(slice('A1')),:]
1698+
expected = df1.iloc[0:10]
1699+
assert_frame_equal(result, expected)
1700+
1701+
# A2 - Get all values from the start to "A2"
1702+
result = df1.loc[(slice('A2')),:]
1703+
expected = df1
1704+
assert_frame_equal(result, expected)
1705+
1706+
# A3 - Get all values under "B1" or "B2"
1707+
result = df1.loc[(slice(None),slice('B1','B2')),:]
1708+
expected = df1.iloc[[2,3,4,7,8,9,12,13,14]]
1709+
assert_frame_equal(result, expected)
1710+
1711+
# A4 - Get all values between 2013-07-02 and 2013-07-09
1712+
result = df1.loc[(slice(None),slice(None),slice('20130702','20130709')),:]
1713+
expected = df1.iloc[[1,2,6,7,12]]
1714+
assert_frame_equal(result, expected)
1715+
1716+
# B1 - Get all values in B0 that are also under A0, A1 and A2
1717+
result = df1.loc[(slice('A2'),slice('B0')),:]
1718+
expected = df1.iloc[[0,1,5,6,10,11]]
1719+
assert_frame_equal(result, expected)
1720+
1721+
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for the As)
1722+
result = df1.loc[(slice(None),slice('B2')),:]
1723+
expected = df1
1724+
assert_frame_equal(result, expected)
1725+
1726+
# B3 - Get all values from B1 to B2 and up to 2013-08-06
1727+
result = df1.loc[(slice(None),slice('B1','B2'),slice('2013-08-06')),:]
1728+
expected = df1.iloc[[2,3,4,7,8,9,12,13]]
1729+
assert_frame_equal(result, expected)
1730+
1731+
# B4 - Same as A4 but the start of the date slice is not a key.
1732+
# shows indexing on a partial selection slice
1733+
result = df1.loc[(slice(None),slice(None),slice('20130701','20130709')),:]
1734+
expected = df1.iloc[[1,2,6,7,12]]
1735+
assert_frame_equal(result, expected)
1736+
16671737
def test_per_axis_per_level_doc_examples(self):
16681738

16691739
# test index maker
@@ -3831,11 +3901,11 @@ class TestSeriesNoneCoercion(tm.TestCase):
38313901
# For numeric series, we should coerce to NaN.
38323902
([1, 2, 3], [np.nan, 2, 3]),
38333903
([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),
3834-
3904+
38353905
# For datetime series, we should coerce to NaT.
38363906
([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
38373907
[NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),
3838-
3908+
38393909
# For objects, we should preserve the None value.
38403910
(["foo", "bar", "baz"], [None, "bar", "baz"]),
38413911
]
@@ -3851,7 +3921,7 @@ def test_coercion_with_setitem(self):
38513921
self.assert_numpy_array_equivalent(
38523922
start_series.values,
38533923
expected_series.values, strict_nan=True)
3854-
3924+
38553925
def test_coercion_with_loc_setitem(self):
38563926
for start_data, expected_result in self.EXPECTED_RESULTS:
38573927
start_series = Series(start_data)
@@ -3863,7 +3933,7 @@ def test_coercion_with_loc_setitem(self):
38633933
self.assert_numpy_array_equivalent(
38643934
start_series.values,
38653935
expected_series.values, strict_nan=True)
3866-
3936+
38673937
def test_coercion_with_setitem_and_series(self):
38683938
for start_data, expected_result in self.EXPECTED_RESULTS:
38693939
start_series = Series(start_data)
@@ -3875,7 +3945,7 @@ def test_coercion_with_setitem_and_series(self):
38753945
self.assert_numpy_array_equivalent(
38763946
start_series.values,
38773947
expected_series.values, strict_nan=True)
3878-
3948+
38793949
def test_coercion_with_loc_and_series(self):
38803950
for start_data, expected_result in self.EXPECTED_RESULTS:
38813951
start_series = Series(start_data)
@@ -3887,18 +3957,18 @@ def test_coercion_with_loc_and_series(self):
38873957
self.assert_numpy_array_equivalent(
38883958
start_series.values,
38893959
expected_series.values, strict_nan=True)
3890-
3960+
38913961

38923962
class TestDataframeNoneCoercion(tm.TestCase):
38933963
EXPECTED_SINGLE_ROW_RESULTS = [
38943964
# For numeric series, we should coerce to NaN.
38953965
([1, 2, 3], [np.nan, 2, 3]),
38963966
([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),
3897-
3967+
38983968
# For datetime series, we should coerce to NaT.
38993969
([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
39003970
[NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),
3901-
3971+
39023972
# For objects, we should preserve the None value.
39033973
(["foo", "bar", "baz"], [None, "bar", "baz"]),
39043974
]

0 commit comments

Comments
 (0)