From 1bcfe35d40755ad471695abb4e9b6aab2287c515 Mon Sep 17 00:00:00 2001 From: David Cottrell Date: Tue, 10 Feb 2015 22:21:42 +0000 Subject: [PATCH] Fix bug in multiindex series groupby where sort argument is ignored (issue 9444). One test added. --- doc/source/whatsnew/v0.16.0.txt | 1 + pandas/core/groupby.py | 2 +- pandas/tests/test_groupby.py | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 238a838cf727e..9395d730d99ee 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -268,3 +268,4 @@ Bug Fixes - Bug in ``read_csv`` with buffer overflows with certain malformed input files (:issue:`9205`) - Bug in groupby MultiIndex with missing pair (:issue:`9049`, :issue:`9344`) +- Fixed bug in ``Series.groupby`` where grouping on ``MultiIndex`` levels would ignore the sort argument (:issue:`9444`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 29bdbe93866ed..440c0966ac066 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1368,7 +1368,7 @@ def _get_compressed_labels(self): if len(all_labels) > 1: group_index = get_group_index(all_labels, self.shape, sort=True, xnull=True) - return _compress_group_index(group_index) + return _compress_group_index(group_index, sort=self.sort) ping = self.groupings[0] return ping.labels, np.arange(len(ping.group_index)) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 1d309e2a6389f..e9a0a6eb73dbe 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -3270,6 +3270,23 @@ def test_no_dummy_key_names(self): self.df['B'].values]).sum() self.assertEqual(result.index.names, (None, None)) + def test_groupby_sort_multiindex_series(self): + # series multiindex groupby sort argument was not being passed through _compress_group_index + # GH 9444 + index = MultiIndex(levels=[[1, 2], [1, 2]], + labels=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]], + names=['a', 'b']) + mseries = Series([0, 1, 2, 3, 4, 5], index=index) + index = MultiIndex(levels=[[1, 2], [1, 2]], + labels=[[0, 0, 1], [1, 0, 0]], + names=['a', 'b']) + mseries_result = Series([0, 2, 4], index=index) + + result = mseries.groupby(level=['a', 'b'], sort=False).first() + assert_series_equal(result, mseries_result) + result = mseries.groupby(level=['a', 'b'], sort=True).first() + assert_series_equal(result, mseries_result.sort_index()) + def test_groupby_categorical(self): levels = ['foo', 'bar', 'baz', 'qux'] codes = np.random.randint(0, 4, size=100)