Skip to content

BUG: Bug in multi-index slicing with various edge cases (GH8132) #8134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 28, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ Bug Fixes
- Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`)
- Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`)
- Bug in multi-index slicing with missing indexers (:issue:`7866`)
- Bug in multi-index slicing with various edge cases (:issue:`8132`)
- Regression in multi-index indexing with a non-scalar type object (:issue:`7914`)
- Bug in Timestamp comparisons with ``==`` and dtype of int64 (:issue:`8058`)
- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`)
Expand Down
18 changes: 15 additions & 3 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3927,9 +3927,21 @@ def _get_level_indexer(self, key, level=0):
# handle a slice, returnig a slice if we can
# otherwise a boolean indexer

start = level_index.get_loc(key.start or 0)
stop = level_index.get_loc(key.stop or len(level_index)-1)
step = key.step
try:
if key.start is not None:
start = level_index.get_loc(key.start)
else:
start = 0
if key.stop is not None:
stop = level_index.get_loc(key.stop)
else:
stop = len(level_index)-1
step = key.step
except (KeyError):

# we have a partial slice (like looking up a partial date string)
start = stop = level_index.slice_indexer(key.start, key.stop, key.step)
step = start.step

if isinstance(start,slice) or isinstance(stop,slice):
# we have a slice for start and/or stop
Expand Down
86 changes: 78 additions & 8 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,76 @@ def test_multiindex_slicers_datetimelike(self):
result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'],1), idx['A','B']]
assert_frame_equal(result,expected)


def test_multiindex_slicers_edges(self):

# GH 8132
# various edge cases
df = DataFrame({'A': ['A0'] * 5 + ['A1']*5 + ['A2']*5,
'B': ['B0','B0','B1','B1','B2'] * 3,
'DATE': ["2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-09-03",
"2013-10-01",
"2013-07-09",
"2013-08-06",
"2013-09-03"],
'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3,4, 2]})

df['DATE'] = pd.to_datetime(df['DATE'])
df1 = df.set_index(['A', 'B', 'DATE'])
df1 = df1.sortlevel()
df2 = df.set_index('DATE')

# A1 - Get all values under "A0" and "A1"
result = df1.loc[(slice('A1')),:]
expected = df1.iloc[0:10]
assert_frame_equal(result, expected)

# A2 - Get all values from the start to "A2"
result = df1.loc[(slice('A2')),:]
expected = df1
assert_frame_equal(result, expected)

# A3 - Get all values under "B1" or "B2"
result = df1.loc[(slice(None),slice('B1','B2')),:]
expected = df1.iloc[[2,3,4,7,8,9,12,13,14]]
assert_frame_equal(result, expected)

# A4 - Get all values between 2013-07-02 and 2013-07-09
result = df1.loc[(slice(None),slice(None),slice('20130702','20130709')),:]
expected = df1.iloc[[1,2,6,7,12]]
assert_frame_equal(result, expected)

# B1 - Get all values in B0 that are also under A0, A1 and A2
result = df1.loc[(slice('A2'),slice('B0')),:]
expected = df1.iloc[[0,1,5,6,10,11]]
assert_frame_equal(result, expected)

# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for the As)
result = df1.loc[(slice(None),slice('B2')),:]
expected = df1
assert_frame_equal(result, expected)

# B3 - Get all values from B1 to B2 and up to 2013-08-06
result = df1.loc[(slice(None),slice('B1','B2'),slice('2013-08-06')),:]
expected = df1.iloc[[2,3,4,7,8,9,12,13]]
assert_frame_equal(result, expected)

# B4 - Same as A4 but the start of the date slice is not a key.
# shows indexing on a partial selection slice
result = df1.loc[(slice(None),slice(None),slice('20130701','20130709')),:]
expected = df1.iloc[[1,2,6,7,12]]
assert_frame_equal(result, expected)

def test_per_axis_per_level_doc_examples(self):

# test index maker
Expand Down Expand Up @@ -3831,11 +3901,11 @@ class TestSeriesNoneCoercion(tm.TestCase):
# For numeric series, we should coerce to NaN.
([1, 2, 3], [np.nan, 2, 3]),
([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),

# For datetime series, we should coerce to NaT.
([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
[NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),

# For objects, we should preserve the None value.
(["foo", "bar", "baz"], [None, "bar", "baz"]),
]
Expand All @@ -3851,7 +3921,7 @@ def test_coercion_with_setitem(self):
self.assert_numpy_array_equivalent(
start_series.values,
expected_series.values, strict_nan=True)

def test_coercion_with_loc_setitem(self):
for start_data, expected_result in self.EXPECTED_RESULTS:
start_series = Series(start_data)
Expand All @@ -3863,7 +3933,7 @@ def test_coercion_with_loc_setitem(self):
self.assert_numpy_array_equivalent(
start_series.values,
expected_series.values, strict_nan=True)

def test_coercion_with_setitem_and_series(self):
for start_data, expected_result in self.EXPECTED_RESULTS:
start_series = Series(start_data)
Expand All @@ -3875,7 +3945,7 @@ def test_coercion_with_setitem_and_series(self):
self.assert_numpy_array_equivalent(
start_series.values,
expected_series.values, strict_nan=True)

def test_coercion_with_loc_and_series(self):
for start_data, expected_result in self.EXPECTED_RESULTS:
start_series = Series(start_data)
Expand All @@ -3887,18 +3957,18 @@ def test_coercion_with_loc_and_series(self):
self.assert_numpy_array_equivalent(
start_series.values,
expected_series.values, strict_nan=True)


class TestDataframeNoneCoercion(tm.TestCase):
EXPECTED_SINGLE_ROW_RESULTS = [
# For numeric series, we should coerce to NaN.
([1, 2, 3], [np.nan, 2, 3]),
([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),

# For datetime series, we should coerce to NaT.
([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
[NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),

# For objects, we should preserve the None value.
(["foo", "bar", "baz"], [None, "bar", "baz"]),
]
Expand Down