diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4553e4804e98b..e94cb2827d0cf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9,7 +9,7 @@ from pandas.core.base import PandasObject from pandas.core.index import Index, MultiIndex, _ensure_index, InvalidIndexError import pandas.core.indexing as indexing -from pandas.core.indexing import _maybe_convert_indices +from pandas.core.indexing import _maybe_convert_indices, _axis_slicer from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex from pandas.core.internals import BlockManager @@ -997,10 +997,9 @@ def drop(self, labels, axis=0, level=None): else: indexer = -axis.isin(labels) - slicer = [slice(None)] * self.ndim - slicer[self._get_axis_number(axis_name)] = indexer + slicer = _axis_slicer(indexer, axis=self._get_axis_number(axis_name), ndim=self.ndim) - return self.ix[tuple(slicer)] + return self.ix[slicer] def add_prefix(self, prefix): """ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index afbeb53d857e2..9d0bf8e9b4988 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -861,9 +861,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): raise def _tuplify(self, loc): - tup = [slice(None, None) for _ in range(self.ndim)] - tup[0] = loc - return tuple(tup) + return _axis_slicer(loc, axis=0, ndim=self.ndim) def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj @@ -1372,3 +1370,91 @@ def _maybe_droplevels(index, key): pass return index + +_missing = object() +def _axis_slicer(indexer, stop=_missing, axis=None, ndim=None): + """ + Return a slicer (tuple of slices) that selects data along + the proper axis. Useful for programatically selecting data + via .iloc/.ix for NDFrame. + + Parameters + ---------- + indexer : None, int, ndarray, slice + Can either be valid indexer for `.iloc`, or valid `start` param for `slice` + stop : None, int (optional) + If passed in, `slice(indexer, stop)` will be used as indexer + axis : int + ndim : int (optional) + If pass in, slicer will always have `ndim` elements. + + Notes + ----- + Without ndim, the slicer will only large enough to target the required axis. + Since fancy indexing normally assumes that missing indices are select all, + this is not required unless your function assumes otherwise + + Returns + ------- + slices : slicer (tuple of slices) + Indices that will select data along proper axis + + Examples + -------- + >>> _axis_slicer(10, axis=1) + (slice(None, None, None), 10) + + >>> _axis_slicer(5, 10, axis=1) + (slice(None, None, None), slice(5, 10, None)) + + >>> df = pd.DataFrame(np.arange(30).reshape(3,10)) + >>> df.iloc[_axis_slicer(3, 5, axis=1)] + 3 4 + 0 3 4 + 1 13 14 + 2 23 24 + + >>> df.iloc[_axis_slicer(None, 2, axis=0)] + 0 1 2 3 4 5 6 7 8 9 + 0 0 1 2 3 4 5 6 7 8 9 + 1 10 11 12 13 14 15 16 17 18 19 + + >>> df.iloc[_axis_slicer(np.array([1,3,5]), axis=1)] + 1 3 5 + 0 1 3 5 + 1 11 13 15 + 2 21 23 25 + """ + if not isinstance(axis, int): + raise TypeError("axis paramter must be an int and not {axis}".format(axis=axis)) + + if indexer is None and stop is _missing: + raise Exception("indexer can only be None when stop is missing") + + if np.ndim(indexer) > 1: + raise Exception("indexer.ndim cannot be >= 2") + + size = axis + 1 + if ndim: + if axis >= ndim: + raise Exception("axis cannot be greater than ndim." + " axis: {axis}, ndim: {ndim}".format(axis=axis,ndim=ndim)) + size = ndim + + slices = [slice(None) for x in range(size)] + + axis_slicer = None + # indexers + if stop is _missing: + axis_slicer = indexer + else: + # for now, pass thru, quasi supports non-int slices + axis_slicer = slice(indexer, stop) + + # catch all, above statements used to be more restrictive. + if axis_slicer is None: + raise Exception("indexer:{indexer}, stop:{stop} did not create a valid " + "slicer".format(indexer=indexer, stop=stop)) + slices[axis] = axis_slicer + + return tuple(slices) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8fcb64e6d0eda..dd92cd11a9589 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -13,7 +13,7 @@ from pandas.core.index import (Index, MultiIndex, _ensure_index, _handle_legacy_indexes) from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices, - _length_of_indexer) + _length_of_indexer, _axis_slicer) import pandas.core.common as com from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -975,8 +975,8 @@ def func(c, v, o): for m in [mask, ~mask]: if m.any(): items = self.items[m] - slices = [slice(None)] * cond.ndim - slices[axis] = self.items.get_indexer(items) + slices = _axis_slicer(self.items.get_indexer(items), axis=axis, ndim=cond.ndim) + r = self._try_cast_result(result[slices]) result_blocks.append(make_block(r.T, items, self.ref_items)) @@ -2295,9 +2295,7 @@ def get_slice(self, slobj, axis=0, raise_on_error=False): def _slice_blocks(self, slobj, axis): new_blocks = [] - slicer = [slice(None, None) for _ in range(self.ndim)] - slicer[axis] = slobj - slicer = tuple(slicer) + slicer = _axis_slicer(slobj, axis=axis, ndim=self.ndim) for block in self.blocks: newb = make_block(block._slice(slicer), @@ -2400,9 +2398,7 @@ def xs(self, key, axis=1, copy=True): % axis) loc = self.axes[axis].get_loc(key) - slicer = [slice(None, None) for _ in range(self.ndim)] - slicer[axis] = loc - slicer = tuple(slicer) + slicer = _axis_slicer(loc, axis=axis, ndim=self.ndim) new_axes = list(self.axes) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 34b65f169b904..eb8bb6d44c857 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -14,7 +14,7 @@ from pandas.core.categorical import Categorical from pandas.core.index import (Index, MultiIndex, _ensure_index, _get_combined_index) -from pandas.core.indexing import _maybe_droplevels, _is_list_like +from pandas.core.indexing import _maybe_droplevels, _is_list_like, _axis_slicer from pandas.core.internals import (BlockManager, create_block_manager_from_arrays, create_block_manager_from_blocks) @@ -319,8 +319,7 @@ def _getitem_multilevel(self, key): if isinstance(loc, (slice, np.ndarray)): new_index = info[loc] result_index = _maybe_droplevels(new_index, key) - slices = [loc] + [slice(None) for x in range( - self._AXIS_LEN - 1)] + slices = _axis_slicer(loc, axis=0, ndim=self._AXIS_LEN) new_values = self.values[slices] d = self._construct_axes_dict(self._AXIS_ORDERS[1:]) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 42a434c005a4c..6cf7061a1cca6 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -23,6 +23,7 @@ from pandas.core.categorical import Categorical from pandas.core.common import _asarray_tuplesafe from pandas.core.internals import BlockManager, make_block +from pandas.core.indexing import _axis_slicer from pandas.core.reshape import block2d_to_blocknd, factor_indexer from pandas.core.index import _ensure_index from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type @@ -3790,9 +3791,8 @@ def _reindex_axis(obj, axis, labels, other=None): if other is not None: labels = labels & _ensure_index(other.unique()) if not labels.equals(ax): - slicer = [ slice(None, None) ] * obj.ndim - slicer[axis] = labels - obj = obj.loc[tuple(slicer)] + slicer = _axis_slicer(labels, axis=axis, ndim=obj.ndim) + obj = obj.loc[slicer] return obj def _get_info(info, name): diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index fd81bd119fe09..de1ffdc63e6d7 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -13,6 +13,7 @@ import pandas.algos as algos import pandas.core.common as com from pandas.core.common import _values_from_object +from pandas.core.indexing import _axis_slicer from pandas.util.decorators import Substitution, Appender @@ -299,17 +300,14 @@ def _center_window(rs, window, axis): if isinstance(rs, (Series, DataFrame, Panel)): rs = rs.shift(-offset, axis=axis) else: - rs_indexer = [slice(None)] * rs.ndim - rs_indexer[axis] = slice(None, -offset) + rs_indexer = _axis_slicer(slice(None, -offset), axis=axis, ndim=rs.ndim) - lead_indexer = [slice(None)] * rs.ndim - lead_indexer[axis] = slice(offset, None) + lead_indexer = _axis_slicer(slice(offset, None), axis=axis, ndim=rs.ndim) - na_indexer = [slice(None)] * rs.ndim - na_indexer[axis] = slice(-offset, None) + na_indexer = _axis_slicer(slice(-offset, None), axis=axis, ndim=rs.ndim) - rs[tuple(rs_indexer)] = np.copy(rs[tuple(lead_indexer)]) - rs[tuple(na_indexer)] = np.nan + rs[rs_indexer] = np.copy(rs[tuple(lead_indexer)]) + rs[na_indexer] = np.nan return rs diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 837acb90407ea..51a81b06531ca 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1821,6 +1821,96 @@ def check_slicing_positional(index): #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + def test_axis_slicer(self): + from pandas.core.indexing import _axis_slicer + + # axis check + self.assertRaises(TypeError, lambda : _axis_slicer(0, axis='items')) + # ndim check + self.assertRaises(Exception, lambda : _axis_slicer(np.arange(100).reshape(10,10), axis=1)) + + self.assertRaises(Exception, lambda : _axis_slicer(None, axis=1)) + + # certain core parts expect a slice(None, None) for every axis + slicer = _axis_slicer(0, axis=0, ndim=3) + assert len(slicer) == 3 + + slicer = _axis_slicer(0, axis=2) + assert len(slicer) == 3 + + slicer = _axis_slicer(0, axis=1) + assert len(slicer) == 2 + + slicer = _axis_slicer(0, axis=1, ndim=2) + assert len(slicer) == 2 + + # axis >= ndim + self.assertRaises(Exception, lambda : _axis_slicer(0, axis=1, ndim=1)) + + # indexers + indexer = np.array([0, 4, 10]) + slicer = _axis_slicer(indexer, axis=0, ndim=3) + assert_array_equal(indexer, slicer[0]) + + indexer = np.array([0, 4, 10]) + slicer = _axis_slicer(indexer, axis=1) + assert_array_equal(indexer, slicer[1]) + + # slice + indexer = slice(10, 20) + slicer = _axis_slicer(indexer, axis=1) + assert_array_equal(indexer, slicer[1]) + + # single + slicer = _axis_slicer(3, axis=1) + assert slicer[1] == 3 + + # start/stop + # [:10] + slicer = _axis_slicer(None, 10, axis=1) + assert slicer[1] == slice(None, 10) + + # [5:10] + slicer = _axis_slicer(5, 10, axis=1) + assert slicer[1] == slice(5, 10) + + # [5:-10] + slicer = _axis_slicer(5, -10, axis=1) + assert slicer[1] == slice(5, -10) + + df = pd.DataFrame(np.arange(100).reshape(10,10)) + + indexer = np.array([0, 4, 3]) + correct = df.iloc[:, indexer] + test = df.iloc[_axis_slicer(indexer, axis=1)] + assert_frame_equal(test, correct) + + indexer = 0 + correct = df.iloc[:, indexer] + test = df.iloc[_axis_slicer(indexer, axis=1)] + assert_series_equal(test, df.iloc[:, indexer]) + + #[:-3] + indexer = slice(-3, None) + correct = df.iloc[:, indexer] + test = df.iloc[_axis_slicer(indexer, axis=1)] + assert_frame_equal(test, correct) + + #[:3] + indexer = slice(3) + correct = df.iloc[:, indexer] + test = df.iloc[_axis_slicer(indexer, axis=1)] + assert_frame_equal(test, correct) + + #[-9,-5] + correct = df.iloc[:, slice(-9, -5)] + test = df.iloc[_axis_slicer(-9, -5, axis=1)] + assert_frame_equal(test, correct) + + #[:5] + correct = df.iloc[slice(None, 5)] + test = df.iloc[_axis_slicer(None, 5, axis=0)] + assert_frame_equal(test, correct) if __name__ == '__main__': import nose