Skip to content

BUG: non-unique indexing in a Panel (GH4960) #5097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 3, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ API Changes
call with additional keyword args (:issue:`4435`)
- Provide __dir__ method (and local context) for tab completion / remove ipython completers code
(:issue:`4501`)
- Support non-unique axes in a Panel via indexing operations (:issue:`4960`)


Internal Refactoring
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,8 +623,9 @@ def _getitem_lowerdim(self, tup):

# might have been a MultiIndex
elif section.ndim == self.ndim:

new_key = tup[:i] + (_NS,) + tup[i + 1:]
# new_key = tup[:i] + tup[i+1:]

else:
new_key = tup[:i] + tup[i + 1:]

Expand Down
9 changes: 7 additions & 2 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2413,12 +2413,17 @@ def _interleave(self, items):

return result

def xs(self, key, axis=1, copy=True):
def xs(self, key, axis=1, copy=True, takeable=False):
if axis < 1:
raise AssertionError('Can only take xs across axis >= 1, got %d'
% axis)

loc = self.axes[axis].get_loc(key)
# take by position
if takeable:
loc = key
else:
loc = self.axes[axis].get_loc(key)

slicer = [slice(None, None) for _ in range(self.ndim)]
slicer[axis] = loc
slicer = tuple(slicer)
Expand Down
29 changes: 25 additions & 4 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,15 @@ def set_value(self, *args):
return result.set_value(*args)

def _box_item_values(self, key, values):
if self.ndim == values.ndim:
result = self._constructor(values)

# a dup selection will yield a full ndim
if result._get_axis(0).is_unique:
result = result[key]

return result

d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])
return self._constructor_sliced(values, **d)

Expand Down Expand Up @@ -745,15 +754,27 @@ def xs(self, key, axis=1, copy=True):
_xs = xs

def _ixs(self, i, axis=0):
# for compatibility with .ix indexing
# Won't work with hierarchical indexing yet
"""
i : int, slice, or sequence of integers
axis : int
"""

key = self._get_axis(axis)[i]

# xs cannot handle a non-scalar key, so just reindex here
if _is_list_like(key):
return self.reindex(**{self._get_axis_name(axis): key})
indexer = { self._get_axis_name(axis): key }
return self.reindex(**indexer)

# a reduction
if axis == 0:
values = self._data.iget(i)
return self._box_item_values(key,values)

return self.xs(key, axis=axis)
# xs by position
self._consolidate_inplace()
new_data = self._data.xs(i, axis=axis, copy=True, takeable=True)
return self._construct_return_type(new_data)

def groupby(self, function, axis='major'):
"""
Expand Down
15 changes: 15 additions & 0 deletions pandas/sparse/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,21 @@ def _set_items(self, new_items):
# DataFrame's columns / "items"
minor_axis = SparsePanelAxis('_minor_axis', 'columns')

def _ixs(self, i, axis=0):
"""
for compat as we don't support Block Manager here
i : int, slice, or sequence of integers
axis : int
"""

key = self._get_axis(axis)[i]

# xs cannot handle a non-scalar key, so just reindex here
if com.is_list_like(key):
return self.reindex(**{self._get_axis_name(axis): key})

return self.xs(key, axis=axis)

def _get_item_cache(self, key):
return self._frames[key]

Expand Down
59 changes: 59 additions & 0 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,65 @@ def test_to_panel_duplicates(self):
idf = df.set_index(['a', 'b'])
assertRaisesRegexp(ValueError, 'non-uniquely indexed', idf.to_panel)

def test_panel_dups(self):

# GH 4960
# duplicates in an index

# items
data = np.random.randn(5, 100, 5)
no_dup_panel = Panel(data, items=list("ABCDE"))
panel = Panel(data, items=list("AACDE"))

expected = no_dup_panel['A']
result = panel.iloc[0]
assert_frame_equal(result, expected)

expected = no_dup_panel['E']
result = panel.loc['E']
assert_frame_equal(result, expected)

expected = no_dup_panel.loc[['A','B']]
expected.items = ['A','A']
result = panel.loc['A']
assert_panel_equal(result, expected)

# major
data = np.random.randn(5, 5, 5)
no_dup_panel = Panel(data, major_axis=list("ABCDE"))
panel = Panel(data, major_axis=list("AACDE"))

expected = no_dup_panel.loc[:,'A']
result = panel.iloc[:,0]
assert_frame_equal(result, expected)

expected = no_dup_panel.loc[:,'E']
result = panel.loc[:,'E']
assert_frame_equal(result, expected)

expected = no_dup_panel.loc[:,['A','B']]
expected.major_axis = ['A','A']
result = panel.loc[:,'A']
assert_panel_equal(result, expected)

# minor
data = np.random.randn(5, 100, 5)
no_dup_panel = Panel(data, minor_axis=list("ABCDE"))
panel = Panel(data, minor_axis=list("AACDE"))

expected = no_dup_panel.loc[:,:,'A']
result = panel.iloc[:,:,0]
assert_frame_equal(result, expected)

expected = no_dup_panel.loc[:,:,'E']
result = panel.loc[:,:,'E']
assert_frame_equal(result, expected)

expected = no_dup_panel.loc[:,:,['A','B']]
expected.minor_axis = ['A','A']
result = panel.loc[:,:,'A']
assert_panel_equal(result, expected)

def test_filter(self):
pass

Expand Down
14 changes: 8 additions & 6 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,14 @@ def assert_panelnd_equal(left, right,
right_ind = getattr(right, axis)
assert_index_equal(left_ind, right_ind)

for col, series in compat.iteritems(left):
assert col in right, "non-matching column '%s'" % col
assert_func(series, right[col], check_less_precise=check_less_precise)

for col in right:
assert col in left
for i, item in enumerate(left._get_axis(0)):
assert item in right, "non-matching item (right) '%s'" % item
litem = left.iloc[i]
ritem = right.iloc[i]
assert_func(litem, ritem, check_less_precise=check_less_precise)

for i, item in enumerate(right._get_axis(0)):
assert item in left, "non-matching item (left) '%s'" % item

# TODO: strangely check_names fails in py3 ?
_panel_frame_equal = partial(assert_frame_equal, check_names=False)
Expand Down