Skip to content

Commit a96b53d

Browse files
committed
Merge pull request #6443 from jreback/iloc_bounds2
BUG/TST: iloc will now raise IndexError on out-of-bounds list indexers (GH6296 / GH6299)
2 parents 9564ead + 76b5816 commit a96b53d

File tree

6 files changed

+62
-53
lines changed

6 files changed

+62
-53
lines changed

doc/source/indexing.rst

+19-10
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ of multi-axis indexing.
7777
See more at :ref:`Selection by Label <indexing.label>`
7878

7979
- ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of
80-
the axis), will raise ``IndexError`` if a single index is requested and it
81-
is out-of-bounds, otherwise it will conform the bounds to size of the object.
82-
Allowed inputs are:
80+
the axis), will raise ``IndexError`` if an indexer is requested and it
81+
is out-of-bounds, except *slice* indexers which allow out-of-bounds indexing.
82+
(this conforms with python/numpy *slice* semantics). Allowed inputs are:
8383

8484
- An integer e.g. ``5``
8585
- A list or array of integers ``[4, 3, 0]``
@@ -421,19 +421,28 @@ python/numpy allow slicing past the end of an array without an associated error.
421421
x[4:10]
422422
x[8:10]
423423
424-
- as of v0.14.0, ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
424+
- as of v0.14.0, ``iloc`` will now accept out-of-bounds indexers for slices, e.g. a value that exceeds the length of the object being
425425
indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
426-
values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
427-
``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned)
426+
values. A single indexer / list of indexers that is out-of-bounds will still raise
427+
``IndexError`` (:issue:`6296`, :issue:`6299`). This could result in an empty axis (e.g. an empty DataFrame being returned)
428428

429429
.. ipython:: python
430430
431431
dfl = DataFrame(np.random.randn(5,2),columns=list('AB'))
432432
dfl
433-
dfl.iloc[[4,5,6]]
434-
dfl.iloc[4:6]
435433
dfl.iloc[:,2:3]
436434
dfl.iloc[:,1:3]
435+
dfl.iloc[4:6]
436+
437+
These are out-of-bounds selections
438+
439+
.. code-block:: python
440+
441+
dfl.iloc[[4,5,6]]
442+
IndexError: positional indexers are out-of-bounds
443+
444+
dfl.iloc[:,4]
445+
IndexError: single positional indexer is out-of-bounds
437446
438447
.. _indexing.basics.partial_setting:
439448

@@ -911,9 +920,9 @@ You can combine this with other expressions for very succinct queries:
911920
**expression itself** is evaluated in vanilla Python. For example, in the
912921
expression
913922

914-
.. code-block:: python
923+
.. code-block:: python
915924
916-
df.query('a in b + c + d')
925+
df.query('a in b + c + d')
917926
918927
``(b + c + d)`` is evaluated by ``numexpr`` and *then* the ``in``
919928
operation is evaluated in plain Python. In general, any operations that can

doc/source/v0.14.0.txt

+20-10
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,29 @@ Highlights include:
1515
API changes
1616
~~~~~~~~~~~
1717

18-
- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
18+
- ``iloc`` will now accept out-of-bounds indexers for slices, e.g. a value that exceeds the length of the object being
1919
indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
20-
values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
21-
``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned)
20+
values. A single indexer / list of indexers that is out-of-bounds will still raise
21+
``IndexError`` (:issue:`6296`, :issue:`6299`). This could result in an empty axis (e.g. an empty DataFrame being returned)
2222

23-
.. ipython:: python
23+
.. ipython:: python
24+
25+
dfl = DataFrame(np.random.randn(5,2),columns=list('AB'))
26+
dfl
27+
dfl.iloc[:,2:3]
28+
dfl.iloc[:,1:3]
29+
dfl.iloc[4:6]
30+
31+
These are out-of-bounds selections
32+
33+
.. code-block:: python
34+
35+
dfl.iloc[[4,5,6]]
36+
IndexError: positional indexers are out-of-bounds
37+
38+
dfl.iloc[:,4]
39+
IndexError: single positional indexer is out-of-bounds
2440

25-
df = DataFrame(np.random.randn(5,2),columns=list('AB'))
26-
df
27-
df.iloc[[4,5,6]]
28-
df.iloc[4:6]
29-
df.iloc[:,2:3]
30-
df.iloc[:,1:3]
3141

3242
- The ``DataFrame.interpolate()`` ``downcast`` keyword default has been changed from ``infer`` to
3343
``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`).

pandas/core/indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1376,7 +1376,7 @@ def _getitem_axis(self, key, axis=0, validate_iterable=False):
13761376
arr = np.array(key)
13771377
l = len(ax)
13781378
if len(arr) and (arr.max() >= l or arr.min() <= -l):
1379-
key = arr[(arr>-l) & (arr<l)]
1379+
raise IndexError("positional indexers are out-of-bounds")
13801380

13811381
# force an actual list
13821382
key = list(key)
@@ -1389,7 +1389,7 @@ def _getitem_axis(self, key, axis=0, validate_iterable=False):
13891389
"non-integer key")
13901390

13911391
if key > len(ax):
1392-
raise IndexError("single indexer is out-of-bounds")
1392+
raise IndexError("single positional indexer is out-of-bounds")
13931393

13941394
return self._get_loc(key, axis=axis)
13951395

pandas/tests/test_groupby.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,7 @@ def test_frame_groupby(self):
961961
assert_frame_equal(stragged, aggregated, check_names=False)
962962

963963
# transform
964+
grouped = self.tsframe.head(30).groupby(lambda x: x.weekday())
964965
transformed = grouped.transform(lambda x: x - x.mean())
965966
self.assertEqual(len(transformed), 30)
966967
self.assertEqual(len(transformed.columns), 4)
@@ -2203,7 +2204,7 @@ def test_panel_groupby(self):
22032204
grouped = self.panel.groupby(lambda x: x.month, axis='major')
22042205
agged = grouped.mean()
22052206

2206-
self.assert_numpy_array_equal(agged.major_axis, [1, 2])
2207+
self.assert_numpy_array_equal(agged.major_axis, sorted(list(set(self.panel.major_axis.month))))
22072208

22082209
grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
22092210
axis='minor')

pandas/tests/test_indexing.py

+18-30
Original file line numberDiff line numberDiff line change
@@ -348,17 +348,24 @@ def test_iloc_exceeds_bounds(self):
348348
# iloc should allow indexers that exceed the bounds
349349
df = DataFrame(np.random.random_sample((20,5)), columns=list('ABCDE'))
350350
expected = df
351-
result = df.iloc[:,[0,1,2,3,4,5]]
352-
assert_frame_equal(result,expected)
353351

354-
result = df.iloc[[1,30]]
355-
expected = df.iloc[[1]]
356-
assert_frame_equal(result,expected)
352+
# lists of positions should raise IndexErrror!
353+
with tm.assertRaisesRegexp(IndexError, 'positional indexers are out-of-bounds'):
354+
df.iloc[:,[0,1,2,3,4,5]]
355+
self.assertRaises(IndexError, lambda : df.iloc[[1,30]])
356+
self.assertRaises(IndexError, lambda : df.iloc[[1,-30]])
357+
self.assertRaises(IndexError, lambda : df.iloc[[100]])
357358

358-
result = df.iloc[[1,-30]]
359-
expected = df.iloc[[1]]
360-
assert_frame_equal(result,expected)
359+
s = df['A']
360+
self.assertRaises(IndexError, lambda : s.iloc[[100]])
361+
self.assertRaises(IndexError, lambda : s.iloc[[-100]])
361362

363+
# still raise on a single indexer
364+
with tm.assertRaisesRegexp(IndexError, 'single positional indexer is out-of-bounds'):
365+
df.iloc[30]
366+
self.assertRaises(IndexError, lambda : df.iloc[-30])
367+
368+
# slices are ok
362369
result = df.iloc[:,4:10]
363370
expected = df.iloc[:,4:]
364371
assert_frame_equal(result,expected)
@@ -367,34 +374,15 @@ def test_iloc_exceeds_bounds(self):
367374
expected = df.iloc[:,-4:]
368375
assert_frame_equal(result,expected)
369376

370-
result = df.iloc[[100]]
371-
expected = DataFrame(columns=df.columns)
372-
assert_frame_equal(result,expected)
373-
374-
# still raise on a single indexer
375-
def f():
376-
df.iloc[30]
377-
self.assertRaises(IndexError, f)
378-
379-
s = df['A']
380-
result = s.iloc[[100]]
381-
expected = Series()
382-
assert_series_equal(result,expected)
383-
384-
result = s.iloc[[-100]]
385-
expected = Series()
386-
assert_series_equal(result,expected)
387-
388-
# slice
377+
# slice bounds exceeding is ok
389378
result = s.iloc[18:30]
390379
expected = s.iloc[18:]
391380
assert_series_equal(result,expected)
392381

393382
# doc example
394383
df = DataFrame(np.random.randn(5,2),columns=list('AB'))
395-
result = df.iloc[[4,5,6]]
396-
expected = df.iloc[[4]]
397-
assert_frame_equal(result,expected)
384+
self.assertRaises(IndexError, lambda : df.iloc[[4,5,6]])
385+
self.assertRaises(IndexError, lambda : df.iloc[:,4])
398386

399387
result = df.iloc[4:6]
400388
expected = df.iloc[[4]]

pandas/tseries/index.py

+1
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ class DatetimeIndex(Int64Index):
144144

145145
_engine_type = _index.DatetimeEngine
146146

147+
tz = None
147148
offset = None
148149
_comparables = ['name','freqstr','tz']
149150
_allow_datetime_index_ops = True

0 commit comments

Comments
 (0)