Skip to content

Axis slicer #4994

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas.core.base import PandasObject
from pandas.core.index import Index, MultiIndex, _ensure_index, InvalidIndexError
import pandas.core.indexing as indexing
from pandas.core.indexing import _maybe_convert_indices
from pandas.core.indexing import _maybe_convert_indices, _axis_slicer
from pandas.tseries.index import DatetimeIndex
from pandas.tseries.period import PeriodIndex
from pandas.core.internals import BlockManager
Expand Down Expand Up @@ -997,10 +997,9 @@ def drop(self, labels, axis=0, level=None):
else:
indexer = -axis.isin(labels)

slicer = [slice(None)] * self.ndim
slicer[self._get_axis_number(axis_name)] = indexer
slicer = _axis_slicer(indexer, axis=self._get_axis_number(axis_name), ndim=self.ndim)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make sure that indexer and ndim will always be correct for passing here? I know _get_axis_number checks and raises an apporpriate error.


return self.ix[tuple(slicer)]
return self.ix[slicer]

def add_prefix(self, prefix):
"""
Expand Down
92 changes: 89 additions & 3 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,9 +861,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
raise

def _tuplify(self, loc):
tup = [slice(None, None) for _ in range(self.ndim)]
tup[0] = loc
return tuple(tup)
return _axis_slicer(loc, axis=0, ndim=self.ndim)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just replace all calls to _tuplify to just be _axis_slicer(loc, axis=0, ndim=self.ndim). Little point to have a one liner function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

specifically, you just need to change pandas/core/indexing line 135 and line 239


def _get_slice_axis(self, slice_obj, axis=0):
obj = self.obj
Expand Down Expand Up @@ -1372,3 +1370,91 @@ def _maybe_droplevels(index, key):
pass

return index

_missing = object()
def _axis_slicer(indexer, stop=_missing, axis=None, ndim=None):
"""
Return a slicer (tuple of slices) that selects data along
the proper axis. Useful for programatically selecting data
via .iloc/.ix for NDFrame.

Parameters
----------
indexer : None, int, ndarray, slice
Can either be valid indexer for `.iloc`, or valid `start` param for `slice`
stop : None, int (optional)
If passed in, `slice(indexer, stop)` will be used as indexer
axis : int
ndim : int (optional)
If pass in, slicer will always have `ndim` elements.

Notes
-----
Without ndim, the slicer will only large enough to target the required axis.
Since fancy indexing normally assumes that missing indices are select all,
this is not required unless your function assumes otherwise

Returns
-------
slices : slicer (tuple of slices)
Indices that will select data along proper axis

Examples
--------
>>> _axis_slicer(10, axis=1)
(slice(None, None, None), 10)

>>> _axis_slicer(5, 10, axis=1)
(slice(None, None, None), slice(5, 10, None))

>>> df = pd.DataFrame(np.arange(30).reshape(3,10))
>>> df.iloc[_axis_slicer(3, 5, axis=1)]
3 4
0 3 4
1 13 14
2 23 24

>>> df.iloc[_axis_slicer(None, 2, axis=0)]
0 1 2 3 4 5 6 7 8 9
0 0 1 2 3 4 5 6 7 8 9
1 10 11 12 13 14 15 16 17 18 19

>>> df.iloc[_axis_slicer(np.array([1,3,5]), axis=1)]
1 3 5
0 1 3 5
1 11 13 15
2 21 23 25
"""
if not isinstance(axis, int):
raise TypeError("axis paramter must be an int and not {axis}".format(axis=axis))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please change this to assert isinstance(axis, int), "Expected 'axis' to be an int. Got: %r" % axis (since we're assuming that this check is made already by callers - so it's a completely internal error.


if indexer is None and stop is _missing:
raise Exception("indexer can only be None when stop is missing")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First, I think your message is wrong. Don't you mean that your indexer can only be None if stop is passed? ditto, make this an assert statement. (and then you should add a note to the docstring if it's not already there). Also, probably clearer to say:

assert not (indexer is None and stop is _missing), "Must pass stop if indexer is None."


if np.ndim(indexer) > 1:
raise Exception("indexer.ndim cannot be >= 2")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please change to assert np.ndim(indexer) < 2, "indexer.ndim must be < 2. Was %d" % np.ndim(indexer) [it won't execute the second part of the statement if the assertion passes.


size = axis + 1
if ndim:
if axis >= ndim:
raise Exception("axis cannot be greater than ndim."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert axis < ndim, ("Axis must be less than ndim."
                     " axis: %r, ndim: %r" % (axis, ndim))

" axis: {axis}, ndim: {ndim}".format(axis=axis,ndim=ndim))
size = ndim

slices = [slice(None) for x in range(size)]

axis_slicer = None
# indexers
if stop is _missing:
axis_slicer = indexer
else:
# for now, pass thru, quasi supports non-int slices
axis_slicer = slice(indexer, stop)

# catch all, above statements used to be more restrictive.
if axis_slicer is None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again, just make this an assert. Fine to use format string if you want.

raise Exception("indexer:{indexer}, stop:{stop} did not create a valid "
"slicer".format(indexer=indexer, stop=stop))
slices[axis] = axis_slicer

return tuple(slices)
14 changes: 5 additions & 9 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pandas.core.index import (Index, MultiIndex, _ensure_index,
_handle_legacy_indexes)
from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices,
_length_of_indexer)
_length_of_indexer, _axis_slicer)
import pandas.core.common as com
from pandas.sparse.array import _maybe_to_sparse, SparseArray
import pandas.lib as lib
Expand Down Expand Up @@ -975,8 +975,8 @@ def func(c, v, o):
for m in [mask, ~mask]:
if m.any():
items = self.items[m]
slices = [slice(None)] * cond.ndim
slices[axis] = self.items.get_indexer(items)
slices = _axis_slicer(self.items.get_indexer(items), axis=axis, ndim=cond.ndim)

r = self._try_cast_result(result[slices])
result_blocks.append(make_block(r.T, items, self.ref_items))

Expand Down Expand Up @@ -2295,9 +2295,7 @@ def get_slice(self, slobj, axis=0, raise_on_error=False):
def _slice_blocks(self, slobj, axis):
new_blocks = []

slicer = [slice(None, None) for _ in range(self.ndim)]
slicer[axis] = slobj
slicer = tuple(slicer)
slicer = _axis_slicer(slobj, axis=axis, ndim=self.ndim)

for block in self.blocks:
newb = make_block(block._slice(slicer),
Expand Down Expand Up @@ -2400,9 +2398,7 @@ def xs(self, key, axis=1, copy=True):
% axis)

loc = self.axes[axis].get_loc(key)
slicer = [slice(None, None) for _ in range(self.ndim)]
slicer[axis] = loc
slicer = tuple(slicer)
slicer = _axis_slicer(loc, axis=axis, ndim=self.ndim)

new_axes = list(self.axes)

Expand Down
5 changes: 2 additions & 3 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pandas.core.categorical import Categorical
from pandas.core.index import (Index, MultiIndex, _ensure_index,
_get_combined_index)
from pandas.core.indexing import _maybe_droplevels, _is_list_like
from pandas.core.indexing import _maybe_droplevels, _is_list_like, _axis_slicer
from pandas.core.internals import (BlockManager,
create_block_manager_from_arrays,
create_block_manager_from_blocks)
Expand Down Expand Up @@ -319,8 +319,7 @@ def _getitem_multilevel(self, key):
if isinstance(loc, (slice, np.ndarray)):
new_index = info[loc]
result_index = _maybe_droplevels(new_index, key)
slices = [loc] + [slice(None) for x in range(
self._AXIS_LEN - 1)]
slices = _axis_slicer(loc, axis=0, ndim=self._AXIS_LEN)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you sure that _AXIS_LEN is always going to be < 2 here? Can't you have things like 4D+ panel? @jreback ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, I realize misread your function - it's that the indexer can't have dimension of 2 - sorry about that!

new_values = self.values[slices]

d = self._construct_axes_dict(self._AXIS_ORDERS[1:])
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pandas.core.categorical import Categorical
from pandas.core.common import _asarray_tuplesafe
from pandas.core.internals import BlockManager, make_block
from pandas.core.indexing import _axis_slicer
from pandas.core.reshape import block2d_to_blocknd, factor_indexer
from pandas.core.index import _ensure_index
from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type
Expand Down Expand Up @@ -3790,9 +3791,8 @@ def _reindex_axis(obj, axis, labels, other=None):
if other is not None:
labels = labels & _ensure_index(other.unique())
if not labels.equals(ax):
slicer = [ slice(None, None) ] * obj.ndim
slicer[axis] = labels
obj = obj.loc[tuple(slicer)]
slicer = _axis_slicer(labels, axis=axis, ndim=obj.ndim)
obj = obj.loc[slicer]
return obj

def _get_info(info, name):
Expand Down
14 changes: 6 additions & 8 deletions pandas/stats/moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pandas.algos as algos
import pandas.core.common as com
from pandas.core.common import _values_from_object
from pandas.core.indexing import _axis_slicer

from pandas.util.decorators import Substitution, Appender

Expand Down Expand Up @@ -299,17 +300,14 @@ def _center_window(rs, window, axis):
if isinstance(rs, (Series, DataFrame, Panel)):
rs = rs.shift(-offset, axis=axis)
else:
rs_indexer = [slice(None)] * rs.ndim
rs_indexer[axis] = slice(None, -offset)
rs_indexer = _axis_slicer(slice(None, -offset), axis=axis, ndim=rs.ndim)

lead_indexer = [slice(None)] * rs.ndim
lead_indexer[axis] = slice(offset, None)
lead_indexer = _axis_slicer(slice(offset, None), axis=axis, ndim=rs.ndim)

na_indexer = [slice(None)] * rs.ndim
na_indexer[axis] = slice(-offset, None)
na_indexer = _axis_slicer(slice(-offset, None), axis=axis, ndim=rs.ndim)

rs[tuple(rs_indexer)] = np.copy(rs[tuple(lead_indexer)])
rs[tuple(na_indexer)] = np.nan
rs[rs_indexer] = np.copy(rs[tuple(lead_indexer)])
rs[na_indexer] = np.nan
return rs


Expand Down
90 changes: 90 additions & 0 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1821,6 +1821,96 @@ def check_slicing_positional(index):
#self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0])
#self.assertRaises(TypeError, lambda : s.iloc[2:5.0])

def test_axis_slicer(self):
from pandas.core.indexing import _axis_slicer

# axis check
self.assertRaises(TypeError, lambda : _axis_slicer(0, axis='items'))
# ndim check
self.assertRaises(Exception, lambda : _axis_slicer(np.arange(100).reshape(10,10), axis=1))

self.assertRaises(Exception, lambda : _axis_slicer(None, axis=1))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and, clearly, change all of these assertRaises to AssertionError not Exception


# certain core parts expect a slice(None, None) for every axis
slicer = _axis_slicer(0, axis=0, ndim=3)
assert len(slicer) == 3

slicer = _axis_slicer(0, axis=2)
assert len(slicer) == 3

slicer = _axis_slicer(0, axis=1)
assert len(slicer) == 2

slicer = _axis_slicer(0, axis=1, ndim=2)
assert len(slicer) == 2

# axis >= ndim
self.assertRaises(Exception, lambda : _axis_slicer(0, axis=1, ndim=1))

# indexers
indexer = np.array([0, 4, 10])
slicer = _axis_slicer(indexer, axis=0, ndim=3)
assert_array_equal(indexer, slicer[0])

indexer = np.array([0, 4, 10])
slicer = _axis_slicer(indexer, axis=1)
assert_array_equal(indexer, slicer[1])

# slice
indexer = slice(10, 20)
slicer = _axis_slicer(indexer, axis=1)
assert_array_equal(indexer, slicer[1])

# single
slicer = _axis_slicer(3, axis=1)
assert slicer[1] == 3

# start/stop
# [:10]
slicer = _axis_slicer(None, 10, axis=1)
assert slicer[1] == slice(None, 10)

# [5:10]
slicer = _axis_slicer(5, 10, axis=1)
assert slicer[1] == slice(5, 10)

# [5:-10]
slicer = _axis_slicer(5, -10, axis=1)
assert slicer[1] == slice(5, -10)

df = pd.DataFrame(np.arange(100).reshape(10,10))

indexer = np.array([0, 4, 3])
correct = df.iloc[:, indexer]
test = df.iloc[_axis_slicer(indexer, axis=1)]
assert_frame_equal(test, correct)

indexer = 0
correct = df.iloc[:, indexer]
test = df.iloc[_axis_slicer(indexer, axis=1)]
assert_series_equal(test, df.iloc[:, indexer])

#[:-3]
indexer = slice(-3, None)
correct = df.iloc[:, indexer]
test = df.iloc[_axis_slicer(indexer, axis=1)]
assert_frame_equal(test, correct)

#[:3]
indexer = slice(3)
correct = df.iloc[:, indexer]
test = df.iloc[_axis_slicer(indexer, axis=1)]
assert_frame_equal(test, correct)

#[-9,-5]
correct = df.iloc[:, slice(-9, -5)]
test = df.iloc[_axis_slicer(-9, -5, axis=1)]
assert_frame_equal(test, correct)

#[:5]
correct = df.iloc[slice(None, 5)]
test = df.iloc[_axis_slicer(None, 5, axis=0)]
assert_frame_equal(test, correct)

if __name__ == '__main__':
import nose
Expand Down