diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 3160b35386fa2..1289f8a386f9f 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -173,6 +173,7 @@ Bug Fixes - Bug in setitem with list-of-lists and single vs mixed types (:issue:`7551`:) - Bug in timeops with non-aligned Series (:issue:`7500`) - Bug in timedelta inference when assigning an incomplete Series (:issue:`7592`) +- Bug in groupby ``.nth`` with a Series and integer-like column name (:issue:`7559`) - Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c7611d9829308..1a10ad912211b 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -467,7 +467,7 @@ def _selected_obj(self): def _set_selection_from_grouper(self): """ we may need create a selection if we have non-level groupers """ grp = self.grouper - if self.as_index and getattr(grp,'groupings',None) is not None: + if self.as_index and getattr(grp,'groupings',None) is not None and self.obj.ndim > 1: ax = self.obj._info_axis groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ] if len(groupers): @@ -759,7 +759,7 @@ def nth(self, n, dropna=None): Examples -------- - >>> DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + >>> df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) >>> g = df.groupby('A') >>> g.nth(0) A B @@ -804,7 +804,10 @@ def nth(self, n, dropna=None): if self.as_index: ax = self.obj._info_axis names = self.grouper.names - if all([ n in ax for n in names ]): + if self.obj.ndim == 1: + # this is a pass-thru + pass + elif all([ n in ax for n in names ]): result.index = Index(self.obj[names][is_nth].values.ravel()).set_names(names) elif self._group_selection is not None: result.index = self.obj._get_axis(self.axis)[is_nth] @@ -821,17 +824,29 @@ def nth(self, n, dropna=None): "(was passed %s)." % (dropna),) # old behaviour, but with all and any support for DataFrames. - + # modified in GH 7559 to have better perf max_len = n if n >= 0 else - 1 - n + dropped = self.obj.dropna(how=dropna, axis=self.axis) - def picker(x): - x = x.dropna(how=dropna) # Note: how is ignored if Series - if len(x) <= max_len: - return np.nan - else: - return x.iloc[n] + # get a new grouper for our dropped obj + grouper, exclusions, obj = _get_grouper(dropped, key=self.keys, axis=self.axis, + level=self.level, sort=self.sort) + + sizes = obj.groupby(grouper).size() + result = obj.groupby(grouper).nth(n) + mask = (sizes