From 420fae89ae6d74a575db7102c0d62a0a27f5dbb0 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 2 Oct 2013 21:55:53 -0400
Subject: [PATCH] BUG: non-unique indexing in a Panel (GH4960)

TST: update Panel tests to iterate by position rather than location (for matching non-unique)
---
 doc/source/release.rst     |  1 +
 pandas/core/indexing.py    |  3 +-
 pandas/core/internals.py   |  9 ++++--
 pandas/core/panel.py       | 29 ++++++++++++++++---
 pandas/sparse/panel.py     | 15 ++++++++++
 pandas/tests/test_panel.py | 59 ++++++++++++++++++++++++++++++++++++++
 pandas/util/testing.py     | 14 +++++----
 7 files changed, 117 insertions(+), 13 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 4f4681b112664..9b755c9ad2cda 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -290,6 +290,7 @@ API Changes
     call with additional keyword args (:issue:`4435`)
   - Provide __dir__ method (and local context) for tab completion / remove ipython completers code
     (:issue:`4501`)
+  - Support non-unique axes in a Panel via indexing operations (:issue:`4960`)
 
 
 Internal Refactoring
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 0d19736ed8083..7502b3898d7fb 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -623,8 +623,9 @@ def _getitem_lowerdim(self, tup):
 
                 # might have been a MultiIndex
                 elif section.ndim == self.ndim:
+
                     new_key = tup[:i] + (_NS,) + tup[i + 1:]
-                    # new_key = tup[:i] + tup[i+1:]
+
                 else:
                     new_key = tup[:i] + tup[i + 1:]
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 6fddc44d7552e..3b451e2a3b196 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -2413,12 +2413,17 @@ def _interleave(self, items):
 
         return result
 
-    def xs(self, key, axis=1, copy=True):
+    def xs(self, key, axis=1, copy=True, takeable=False):
         if axis < 1:
             raise AssertionError('Can only take xs across axis >= 1, got %d'
                                  % axis)
 
-        loc = self.axes[axis].get_loc(key)
+        # take by position
+        if takeable:
+            loc = key
+        else:
+            loc = self.axes[axis].get_loc(key)
+
         slicer = [slice(None, None) for _ in range(self.ndim)]
         slicer[axis] = loc
         slicer = tuple(slicer)
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index b1752f94b8d97..1185e9514f7fc 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -504,6 +504,15 @@ def set_value(self, *args):
             return result.set_value(*args)
 
     def _box_item_values(self, key, values):
+        if self.ndim == values.ndim:
+            result = self._constructor(values)
+
+            # a dup selection will yield a full ndim
+            if result._get_axis(0).is_unique:
+                result = result[key]
+
+            return result
+
         d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])
         return self._constructor_sliced(values, **d)
 
@@ -745,15 +754,27 @@ def xs(self, key, axis=1, copy=True):
     _xs = xs
 
     def _ixs(self, i, axis=0):
-        # for compatibility with .ix indexing
-        # Won't work with hierarchical indexing yet
+        """
+        i : int, slice, or sequence of integers
+        axis : int
+        """
+
         key = self._get_axis(axis)[i]
 
         # xs cannot handle a non-scalar key, so just reindex here
         if _is_list_like(key):
-            return self.reindex(**{self._get_axis_name(axis): key})
+            indexer = { self._get_axis_name(axis): key }
+            return self.reindex(**indexer)
+
+        # a reduction
+        if axis == 0:
+            values = self._data.iget(i)
+            return self._box_item_values(key,values)
 
-        return self.xs(key, axis=axis)
+        # xs by position
+        self._consolidate_inplace()
+        new_data = self._data.xs(i, axis=axis, copy=True, takeable=True)
+        return self._construct_return_type(new_data)
 
     def groupby(self, function, axis='major'):
         """
diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py
index dd0204f11edfb..65a24dc1bf25f 100644
--- a/pandas/sparse/panel.py
+++ b/pandas/sparse/panel.py
@@ -172,6 +172,21 @@ def _set_items(self, new_items):
     # DataFrame's columns / "items"
     minor_axis = SparsePanelAxis('_minor_axis', 'columns')
 
+    def _ixs(self, i, axis=0):
+        """
+        for compat as we don't support Block Manager here
+        i : int, slice, or sequence of integers
+        axis : int
+        """
+
+        key = self._get_axis(axis)[i]
+
+        # xs cannot handle a non-scalar key, so just reindex here
+        if com.is_list_like(key):
+            return self.reindex(**{self._get_axis_name(axis): key})
+
+        return self.xs(key, axis=axis)
+
     def _get_item_cache(self, key):
         return self._frames[key]
 
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 5d3f7b350250d..5c94f378b88ea 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -1335,6 +1335,65 @@ def test_to_panel_duplicates(self):
         idf = df.set_index(['a', 'b'])
         assertRaisesRegexp(ValueError, 'non-uniquely indexed', idf.to_panel)
 
+    def test_panel_dups(self):
+
+        # GH 4960
+        # duplicates in an index
+
+        # items
+        data = np.random.randn(5, 100, 5)
+        no_dup_panel = Panel(data, items=list("ABCDE"))
+        panel = Panel(data, items=list("AACDE"))
+
+        expected = no_dup_panel['A']
+        result = panel.iloc[0]
+        assert_frame_equal(result, expected)
+
+        expected = no_dup_panel['E']
+        result = panel.loc['E']
+        assert_frame_equal(result, expected)
+
+        expected = no_dup_panel.loc[['A','B']]
+        expected.items = ['A','A']
+        result = panel.loc['A']
+        assert_panel_equal(result, expected)
+
+        # major
+        data = np.random.randn(5, 5, 5)
+        no_dup_panel = Panel(data, major_axis=list("ABCDE"))
+        panel = Panel(data, major_axis=list("AACDE"))
+
+        expected = no_dup_panel.loc[:,'A']
+        result = panel.iloc[:,0]
+        assert_frame_equal(result, expected)
+
+        expected = no_dup_panel.loc[:,'E']
+        result = panel.loc[:,'E']
+        assert_frame_equal(result, expected)
+
+        expected = no_dup_panel.loc[:,['A','B']]
+        expected.major_axis = ['A','A']
+        result = panel.loc[:,'A']
+        assert_panel_equal(result, expected)
+
+        # minor
+        data = np.random.randn(5, 100, 5)
+        no_dup_panel = Panel(data, minor_axis=list("ABCDE"))
+        panel = Panel(data, minor_axis=list("AACDE"))
+
+        expected = no_dup_panel.loc[:,:,'A']
+        result = panel.iloc[:,:,0]
+        assert_frame_equal(result, expected)
+
+        expected = no_dup_panel.loc[:,:,'E']
+        result = panel.loc[:,:,'E']
+        assert_frame_equal(result, expected)
+
+        expected = no_dup_panel.loc[:,:,['A','B']]
+        expected.minor_axis = ['A','A']
+        result = panel.loc[:,:,'A']
+        assert_panel_equal(result, expected)
+
     def test_filter(self):
         pass
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index b25f85c961798..946a4d94b6045 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -357,12 +357,14 @@ def assert_panelnd_equal(left, right,
         right_ind = getattr(right, axis)
         assert_index_equal(left_ind, right_ind)
 
-    for col, series in compat.iteritems(left):
-        assert col in right, "non-matching column '%s'" % col
-        assert_func(series, right[col], check_less_precise=check_less_precise)
-
-    for col in right:
-        assert col in left
+    for i, item in enumerate(left._get_axis(0)):
+        assert item in right, "non-matching item (right) '%s'" % item
+        litem = left.iloc[i]
+        ritem = right.iloc[i]
+        assert_func(litem, ritem, check_less_precise=check_less_precise)
+
+    for i, item in enumerate(right._get_axis(0)):
+        assert item in left, "non-matching item (left) '%s'" % item
 
 # TODO: strangely check_names fails in py3 ?
 _panel_frame_equal = partial(assert_frame_equal, check_names=False)