-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Axis slicer #4994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Axis slicer #4994
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -861,9 +861,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): | |
raise | ||
|
||
def _tuplify(self, loc): | ||
tup = [slice(None, None) for _ in range(self.ndim)] | ||
tup[0] = loc | ||
return tuple(tup) | ||
return _axis_slicer(loc, axis=0, ndim=self.ndim) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just replace all calls to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. specifically, you just need to change pandas/core/indexing line 135 and line 239 |
||
|
||
def _get_slice_axis(self, slice_obj, axis=0): | ||
obj = self.obj | ||
|
@@ -1372,3 +1370,91 @@ def _maybe_droplevels(index, key): | |
pass | ||
|
||
return index | ||
|
||
_missing = object() | ||
def _axis_slicer(indexer, stop=_missing, axis=None, ndim=None): | ||
""" | ||
Return a slicer (tuple of slices) that selects data along | ||
the proper axis. Useful for programatically selecting data | ||
via .iloc/.ix for NDFrame. | ||
|
||
Parameters | ||
---------- | ||
indexer : None, int, ndarray, slice | ||
Can either be valid indexer for `.iloc`, or valid `start` param for `slice` | ||
stop : None, int (optional) | ||
If passed in, `slice(indexer, stop)` will be used as indexer | ||
axis : int | ||
ndim : int (optional) | ||
If pass in, slicer will always have `ndim` elements. | ||
|
||
Notes | ||
----- | ||
Without ndim, the slicer will only large enough to target the required axis. | ||
Since fancy indexing normally assumes that missing indices are select all, | ||
this is not required unless your function assumes otherwise | ||
|
||
Returns | ||
------- | ||
slices : slicer (tuple of slices) | ||
Indices that will select data along proper axis | ||
|
||
Examples | ||
-------- | ||
>>> _axis_slicer(10, axis=1) | ||
(slice(None, None, None), 10) | ||
|
||
>>> _axis_slicer(5, 10, axis=1) | ||
(slice(None, None, None), slice(5, 10, None)) | ||
|
||
>>> df = pd.DataFrame(np.arange(30).reshape(3,10)) | ||
>>> df.iloc[_axis_slicer(3, 5, axis=1)] | ||
3 4 | ||
0 3 4 | ||
1 13 14 | ||
2 23 24 | ||
|
||
>>> df.iloc[_axis_slicer(None, 2, axis=0)] | ||
0 1 2 3 4 5 6 7 8 9 | ||
0 0 1 2 3 4 5 6 7 8 9 | ||
1 10 11 12 13 14 15 16 17 18 19 | ||
|
||
>>> df.iloc[_axis_slicer(np.array([1,3,5]), axis=1)] | ||
1 3 5 | ||
0 1 3 5 | ||
1 11 13 15 | ||
2 21 23 25 | ||
""" | ||
if not isinstance(axis, int): | ||
raise TypeError("axis paramter must be an int and not {axis}".format(axis=axis)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please change this to |
||
|
||
if indexer is None and stop is _missing: | ||
raise Exception("indexer can only be None when stop is missing") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First, I think your message is wrong. Don't you mean that your indexer can only be None if stop is passed? ditto, make this an assert statement. (and then you should add a note to the docstring if it's not already there). Also, probably clearer to say:
|
||
|
||
if np.ndim(indexer) > 1: | ||
raise Exception("indexer.ndim cannot be >= 2") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please change to |
||
|
||
size = axis + 1 | ||
if ndim: | ||
if axis >= ndim: | ||
raise Exception("axis cannot be greater than ndim." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. assert axis < ndim, ("Axis must be less than ndim."
" axis: %r, ndim: %r" % (axis, ndim)) |
||
" axis: {axis}, ndim: {ndim}".format(axis=axis,ndim=ndim)) | ||
size = ndim | ||
|
||
slices = [slice(None) for x in range(size)] | ||
|
||
axis_slicer = None | ||
# indexers | ||
if stop is _missing: | ||
axis_slicer = indexer | ||
else: | ||
# for now, pass thru, quasi supports non-int slices | ||
axis_slicer = slice(indexer, stop) | ||
|
||
# catch all, above statements used to be more restrictive. | ||
if axis_slicer is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again, just make this an assert. Fine to use format string if you want. |
||
raise Exception("indexer:{indexer}, stop:{stop} did not create a valid " | ||
"slicer".format(indexer=indexer, stop=stop)) | ||
slices[axis] = axis_slicer | ||
|
||
return tuple(slices) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
from pandas.core.categorical import Categorical | ||
from pandas.core.index import (Index, MultiIndex, _ensure_index, | ||
_get_combined_index) | ||
from pandas.core.indexing import _maybe_droplevels, _is_list_like | ||
from pandas.core.indexing import _maybe_droplevels, _is_list_like, _axis_slicer | ||
from pandas.core.internals import (BlockManager, | ||
create_block_manager_from_arrays, | ||
create_block_manager_from_blocks) | ||
|
@@ -319,8 +319,7 @@ def _getitem_multilevel(self, key): | |
if isinstance(loc, (slice, np.ndarray)): | ||
new_index = info[loc] | ||
result_index = _maybe_droplevels(new_index, key) | ||
slices = [loc] + [slice(None) for x in range( | ||
self._AXIS_LEN - 1)] | ||
slices = _axis_slicer(loc, axis=0, ndim=self._AXIS_LEN) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are you sure that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry, I realize misread your function - it's that the indexer can't have dimension of 2 - sorry about that! |
||
new_values = self.values[slices] | ||
|
||
d = self._construct_axes_dict(self._AXIS_ORDERS[1:]) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1821,6 +1821,96 @@ def check_slicing_positional(index): | |
#self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) | ||
#self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) | ||
|
||
def test_axis_slicer(self): | ||
from pandas.core.indexing import _axis_slicer | ||
|
||
# axis check | ||
self.assertRaises(TypeError, lambda : _axis_slicer(0, axis='items')) | ||
# ndim check | ||
self.assertRaises(Exception, lambda : _axis_slicer(np.arange(100).reshape(10,10), axis=1)) | ||
|
||
self.assertRaises(Exception, lambda : _axis_slicer(None, axis=1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and, clearly, change all of these assertRaises to |
||
|
||
# certain core parts expect a slice(None, None) for every axis | ||
slicer = _axis_slicer(0, axis=0, ndim=3) | ||
assert len(slicer) == 3 | ||
|
||
slicer = _axis_slicer(0, axis=2) | ||
assert len(slicer) == 3 | ||
|
||
slicer = _axis_slicer(0, axis=1) | ||
assert len(slicer) == 2 | ||
|
||
slicer = _axis_slicer(0, axis=1, ndim=2) | ||
assert len(slicer) == 2 | ||
|
||
# axis >= ndim | ||
self.assertRaises(Exception, lambda : _axis_slicer(0, axis=1, ndim=1)) | ||
|
||
# indexers | ||
indexer = np.array([0, 4, 10]) | ||
slicer = _axis_slicer(indexer, axis=0, ndim=3) | ||
assert_array_equal(indexer, slicer[0]) | ||
|
||
indexer = np.array([0, 4, 10]) | ||
slicer = _axis_slicer(indexer, axis=1) | ||
assert_array_equal(indexer, slicer[1]) | ||
|
||
# slice | ||
indexer = slice(10, 20) | ||
slicer = _axis_slicer(indexer, axis=1) | ||
assert_array_equal(indexer, slicer[1]) | ||
|
||
# single | ||
slicer = _axis_slicer(3, axis=1) | ||
assert slicer[1] == 3 | ||
|
||
# start/stop | ||
# [:10] | ||
slicer = _axis_slicer(None, 10, axis=1) | ||
assert slicer[1] == slice(None, 10) | ||
|
||
# [5:10] | ||
slicer = _axis_slicer(5, 10, axis=1) | ||
assert slicer[1] == slice(5, 10) | ||
|
||
# [5:-10] | ||
slicer = _axis_slicer(5, -10, axis=1) | ||
assert slicer[1] == slice(5, -10) | ||
|
||
df = pd.DataFrame(np.arange(100).reshape(10,10)) | ||
|
||
indexer = np.array([0, 4, 3]) | ||
correct = df.iloc[:, indexer] | ||
test = df.iloc[_axis_slicer(indexer, axis=1)] | ||
assert_frame_equal(test, correct) | ||
|
||
indexer = 0 | ||
correct = df.iloc[:, indexer] | ||
test = df.iloc[_axis_slicer(indexer, axis=1)] | ||
assert_series_equal(test, df.iloc[:, indexer]) | ||
|
||
#[:-3] | ||
indexer = slice(-3, None) | ||
correct = df.iloc[:, indexer] | ||
test = df.iloc[_axis_slicer(indexer, axis=1)] | ||
assert_frame_equal(test, correct) | ||
|
||
#[:3] | ||
indexer = slice(3) | ||
correct = df.iloc[:, indexer] | ||
test = df.iloc[_axis_slicer(indexer, axis=1)] | ||
assert_frame_equal(test, correct) | ||
|
||
#[-9,-5] | ||
correct = df.iloc[:, slice(-9, -5)] | ||
test = df.iloc[_axis_slicer(-9, -5, axis=1)] | ||
assert_frame_equal(test, correct) | ||
|
||
#[:5] | ||
correct = df.iloc[slice(None, 5)] | ||
test = df.iloc[_axis_slicer(None, 5, axis=0)] | ||
assert_frame_equal(test, correct) | ||
|
||
if __name__ == '__main__': | ||
import nose | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you make sure that indexer and ndim will always be correct for passing here? I know
_get_axis_number
checks and raises an apporpriate error.