From dfd3e097fe7b1fe004db7965958c0f1d77ec1a61 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Mon, 3 Oct 2016 20:09:43 -0400 Subject: [PATCH 1/6] Added test case for GH 14334 --- pandas/tests/test_groupby.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index f3791ee1d5c91..b5e2d0d361c91 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -442,6 +442,11 @@ def test_grouper_creation_bug(self): result = g.sum() assert_frame_equal(result, expected) + # GH14334 + g = df.groupby([pd.Grouper(key='A')]) + result = g.sum() + assert_frame_equal(result, expected) + # GH8866 s = Series(np.arange(8, dtype='int64'), index=pd.MultiIndex.from_product( From 9805c306784eb94b9a4b6f7673d1fb08c8860fa3 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Mon, 3 Oct 2016 20:15:13 -0400 Subject: [PATCH 2/6] Fix for GH 14334 --- pandas/core/groupby.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 5223c0ac270f3..5e08f6c3368a6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2208,7 +2208,10 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, index._get_grouper_for_level(self.grouper, level) else: - if isinstance(self.grouper, (list, tuple)): + if self.grouper is None and self.name is not None: + self.grouper = self.obj[self.name] + + elif isinstance(self.grouper, (list, tuple)): self.grouper = com._asarray_tuplesafe(self.grouper) # a passed Categorical @@ -2448,7 +2451,10 @@ def is_in_obj(gpr): elif is_in_axis(gpr): # df.groupby('name') in_axis, name, gpr = True, gpr, obj[gpr] exclusions.append(name) - + elif isinstance(gpr, Grouper) and gpr.key is not None: + # Add key to exclusions + exclusions.append(gpr.key) + in_axis, name = False, None else: in_axis, name = False, None From 14a4ae6fc99bd45e7ee8250f6d8403ac845dba63 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Mon, 3 Oct 2016 20:17:43 -0400 Subject: [PATCH 3/6] Added whatsnew for GH 14334 --- doc/source/whatsnew/v0.20.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7fa9991138fba..4b528be104ff6 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -80,3 +80,4 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in ``df.groupby`` where ``TypeError`` raised when key-only Grouper is passed in a list (:issue:`14334`) From f9ef05b2238eb173421f620c673af06518913f92 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Wed, 19 Oct 2016 19:55:19 -0400 Subject: [PATCH 4/6] Moved whatsnew to 0.19.1 and clarified description --- doc/source/whatsnew/v0.19.1.txt | 1 + doc/source/whatsnew/v0.20.0.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index b2facd4e2d0ec..61d5a199acc3c 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -54,6 +54,7 @@ Bug Fixes - Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`) - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) +- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`) - Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` is not scalar and ``values`` is not specified (:issue:`14380`) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4b528be104ff6..7fa9991138fba 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -80,4 +80,3 @@ Performance Improvements Bug Fixes ~~~~~~~~~ -- Bug in ``df.groupby`` where ``TypeError`` raised when key-only Grouper is passed in a list (:issue:`14334`) From cee5ce60ab500853d4a80760c6263b22d2ff3477 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Wed, 19 Oct 2016 19:55:56 -0400 Subject: [PATCH 5/6] Added bug description to new test case --- pandas/tests/test_groupby.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index b5e2d0d361c91..dce7d1862b011 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -443,6 +443,7 @@ def test_grouper_creation_bug(self): assert_frame_equal(result, expected) # GH14334 + # pd.Grouper(key=...) may be passed in a list g = df.groupby([pd.Grouper(key='A')]) result = g.sum() assert_frame_equal(result, expected) From 5e9679773630ab18d14f6d0ed4b61a1ba0f910f4 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Thu, 20 Oct 2016 19:42:39 -0400 Subject: [PATCH 6/6] Add tests for grouping on two columns --- pandas/tests/test_groupby.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index dce7d1862b011..89aaafe9b2c02 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -444,10 +444,34 @@ def test_grouper_creation_bug(self): # GH14334 # pd.Grouper(key=...) may be passed in a list + df = DataFrame({'A': [0, 0, 0, 1, 1, 1], + 'B': [1, 1, 2, 2, 3, 3], + 'C': [1, 2, 3, 4, 5, 6]}) + # Group by single column + expected = df.groupby('A').sum() g = df.groupby([pd.Grouper(key='A')]) result = g.sum() assert_frame_equal(result, expected) + # Group by two columns + # using a combination of strings and Grouper objects + expected = df.groupby(['A', 'B']).sum() + + # Group with two Grouper objects + g = df.groupby([pd.Grouper(key='A'), pd.Grouper(key='B')]) + result = g.sum() + assert_frame_equal(result, expected) + + # Group with a string and a Grouper object + g = df.groupby(['A', pd.Grouper(key='B')]) + result = g.sum() + assert_frame_equal(result, expected) + + # Group with a Grouper object and a string + g = df.groupby([pd.Grouper(key='A'), 'B']) + result = g.sum() + assert_frame_equal(result, expected) + # GH8866 s = Series(np.arange(8, dtype='int64'), index=pd.MultiIndex.from_product(