From 420fae89ae6d74a575db7102c0d62a0a27f5dbb0 Mon Sep 17 00:00:00 2001 From: jreback Date: Wed, 2 Oct 2013 21:55:53 -0400 Subject: [PATCH] BUG: non-unique indexing in a Panel (GH4960) TST: update Panel tests to iterate by position rather than location (for matching non-unique) --- doc/source/release.rst | 1 + pandas/core/indexing.py | 3 +- pandas/core/internals.py | 9 ++++-- pandas/core/panel.py | 29 ++++++++++++++++--- pandas/sparse/panel.py | 15 ++++++++++ pandas/tests/test_panel.py | 59 ++++++++++++++++++++++++++++++++++++++ pandas/util/testing.py | 14 +++++---- 7 files changed, 117 insertions(+), 13 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 4f4681b112664..9b755c9ad2cda 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -290,6 +290,7 @@ API Changes call with additional keyword args (:issue:`4435`) - Provide __dir__ method (and local context) for tab completion / remove ipython completers code (:issue:`4501`) + - Support non-unique axes in a Panel via indexing operations (:issue:`4960`) Internal Refactoring diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0d19736ed8083..7502b3898d7fb 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -623,8 +623,9 @@ def _getitem_lowerdim(self, tup): # might have been a MultiIndex elif section.ndim == self.ndim: + new_key = tup[:i] + (_NS,) + tup[i + 1:] - # new_key = tup[:i] + tup[i+1:] + else: new_key = tup[:i] + tup[i + 1:] diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6fddc44d7552e..3b451e2a3b196 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2413,12 +2413,17 @@ def _interleave(self, items): return result - def xs(self, key, axis=1, copy=True): + def xs(self, key, axis=1, copy=True, takeable=False): if axis < 1: raise AssertionError('Can only take xs across axis >= 1, got %d' % axis) - loc = self.axes[axis].get_loc(key) + # take by position + if takeable: + loc = key + else: + loc = self.axes[axis].get_loc(key) + slicer = [slice(None, None) for _ in range(self.ndim)] slicer[axis] = loc slicer = tuple(slicer) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index b1752f94b8d97..1185e9514f7fc 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -504,6 +504,15 @@ def set_value(self, *args): return result.set_value(*args) def _box_item_values(self, key, values): + if self.ndim == values.ndim: + result = self._constructor(values) + + # a dup selection will yield a full ndim + if result._get_axis(0).is_unique: + result = result[key] + + return result + d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]) return self._constructor_sliced(values, **d) @@ -745,15 +754,27 @@ def xs(self, key, axis=1, copy=True): _xs = xs def _ixs(self, i, axis=0): - # for compatibility with .ix indexing - # Won't work with hierarchical indexing yet + """ + i : int, slice, or sequence of integers + axis : int + """ + key = self._get_axis(axis)[i] # xs cannot handle a non-scalar key, so just reindex here if _is_list_like(key): - return self.reindex(**{self._get_axis_name(axis): key}) + indexer = { self._get_axis_name(axis): key } + return self.reindex(**indexer) + + # a reduction + if axis == 0: + values = self._data.iget(i) + return self._box_item_values(key,values) - return self.xs(key, axis=axis) + # xs by position + self._consolidate_inplace() + new_data = self._data.xs(i, axis=axis, copy=True, takeable=True) + return self._construct_return_type(new_data) def groupby(self, function, axis='major'): """ diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index dd0204f11edfb..65a24dc1bf25f 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -172,6 +172,21 @@ def _set_items(self, new_items): # DataFrame's columns / "items" minor_axis = SparsePanelAxis('_minor_axis', 'columns') + def _ixs(self, i, axis=0): + """ + for compat as we don't support Block Manager here + i : int, slice, or sequence of integers + axis : int + """ + + key = self._get_axis(axis)[i] + + # xs cannot handle a non-scalar key, so just reindex here + if com.is_list_like(key): + return self.reindex(**{self._get_axis_name(axis): key}) + + return self.xs(key, axis=axis) + def _get_item_cache(self, key): return self._frames[key] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 5d3f7b350250d..5c94f378b88ea 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1335,6 +1335,65 @@ def test_to_panel_duplicates(self): idf = df.set_index(['a', 'b']) assertRaisesRegexp(ValueError, 'non-uniquely indexed', idf.to_panel) + def test_panel_dups(self): + + # GH 4960 + # duplicates in an index + + # items + data = np.random.randn(5, 100, 5) + no_dup_panel = Panel(data, items=list("ABCDE")) + panel = Panel(data, items=list("AACDE")) + + expected = no_dup_panel['A'] + result = panel.iloc[0] + assert_frame_equal(result, expected) + + expected = no_dup_panel['E'] + result = panel.loc['E'] + assert_frame_equal(result, expected) + + expected = no_dup_panel.loc[['A','B']] + expected.items = ['A','A'] + result = panel.loc['A'] + assert_panel_equal(result, expected) + + # major + data = np.random.randn(5, 5, 5) + no_dup_panel = Panel(data, major_axis=list("ABCDE")) + panel = Panel(data, major_axis=list("AACDE")) + + expected = no_dup_panel.loc[:,'A'] + result = panel.iloc[:,0] + assert_frame_equal(result, expected) + + expected = no_dup_panel.loc[:,'E'] + result = panel.loc[:,'E'] + assert_frame_equal(result, expected) + + expected = no_dup_panel.loc[:,['A','B']] + expected.major_axis = ['A','A'] + result = panel.loc[:,'A'] + assert_panel_equal(result, expected) + + # minor + data = np.random.randn(5, 100, 5) + no_dup_panel = Panel(data, minor_axis=list("ABCDE")) + panel = Panel(data, minor_axis=list("AACDE")) + + expected = no_dup_panel.loc[:,:,'A'] + result = panel.iloc[:,:,0] + assert_frame_equal(result, expected) + + expected = no_dup_panel.loc[:,:,'E'] + result = panel.loc[:,:,'E'] + assert_frame_equal(result, expected) + + expected = no_dup_panel.loc[:,:,['A','B']] + expected.minor_axis = ['A','A'] + result = panel.loc[:,:,'A'] + assert_panel_equal(result, expected) + def test_filter(self): pass diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b25f85c961798..946a4d94b6045 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -357,12 +357,14 @@ def assert_panelnd_equal(left, right, right_ind = getattr(right, axis) assert_index_equal(left_ind, right_ind) - for col, series in compat.iteritems(left): - assert col in right, "non-matching column '%s'" % col - assert_func(series, right[col], check_less_precise=check_less_precise) - - for col in right: - assert col in left + for i, item in enumerate(left._get_axis(0)): + assert item in right, "non-matching item (right) '%s'" % item + litem = left.iloc[i] + ritem = right.iloc[i] + assert_func(litem, ritem, check_less_precise=check_less_precise) + + for i, item in enumerate(right._get_axis(0)): + assert item in left, "non-matching item (left) '%s'" % item # TODO: strangely check_names fails in py3 ? _panel_frame_equal = partial(assert_frame_equal, check_names=False)