From 66ea20e54e4731e844a0fc8ee4e084e81b5914e0 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 14 Jun 2022 20:15:59 +0200 Subject: [PATCH 1/3] Bug: GroupBy raising error with None in first level of MultiIndex --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/groupby/grouper.py | 3 +++ pandas/tests/groupby/test_groupby.py | 11 +++++++++++ 3 files changed, 15 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 5891eeea98cbb..7373e3a00b6d0 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -904,6 +904,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`) - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) +- Bug in :meth:`DataFrame.GroupBy` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`) - Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) - Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) - Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 05ef155ecbcda..9456fdbb1af4b 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -835,6 +835,9 @@ def get_grouper( # if the actual grouper should be obj[key] def is_in_axis(key) -> bool: + if key is None: + return False + if not _is_label_like(key): # items -> .columns for DataFrame, .index for Series items = obj.axes[-1] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 97e616ef14cef..2652a76bb76d5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2776,3 +2776,14 @@ def test_by_column_values_with_same_starting_value(): ).set_index("Name") tm.assert_frame_equal(result, expected_result) + + +def test_groupby_none_in_first_mi_level(): + # GH#47348 + arr = [[None, 1, 0, 1], [2, 3, 2, 3]] + ser = Series(1, index=MultiIndex.from_arrays(arr, names=["a", "b"])) + result = ser.groupby(level=[0, 1]).sum() + expected = Series( + [1, 2], MultiIndex.from_tuples([(0.0, 2), (1.0, 3)], names=["a", "b"]) + ) + tm.assert_series_equal(result, expected) From c48478dfcfe09d2f87e5076f5801fb4de2012220 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 24 Jun 2022 16:57:17 +0200 Subject: [PATCH 2/3] Add test --- pandas/core/groupby/grouper.py | 5 +++-- pandas/tests/groupby/test_groupby.py | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 9456fdbb1af4b..a0866646ef148 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -835,10 +835,11 @@ def get_grouper( # if the actual grouper should be obj[key] def is_in_axis(key) -> bool: - if key is None: - return False if not _is_label_like(key): + if obj.ndim == 1: + return False + # items -> .columns for DataFrame, .index for Series items = obj.axes[-1] try: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2652a76bb76d5..920b869ef799b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2787,3 +2787,11 @@ def test_groupby_none_in_first_mi_level(): [1, 2], MultiIndex.from_tuples([(0.0, 2), (1.0, 3)], names=["a", "b"]) ) tm.assert_series_equal(result, expected) + + +def test_groupby_none_column_name(): + # GH#47348 + df = DataFrame({None: [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [4, 5, 6, 7]}) + result = df.groupby(by=[None]).sum() + expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None)) + tm.assert_frame_equal(result, expected) From 0fae4f6e6c972dd28b19a0bbf60a0169be0491e5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 25 Jun 2022 16:27:39 +0200 Subject: [PATCH 3/3] Change whatsnew --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d3063f6b0eeda..a572a7141db03 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -917,7 +917,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`) - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) -- Bug in :meth:`DataFrame.GroupBy` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`) +- Bug in :meth:`DataFrame.groupby` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`) - Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) - Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) - Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`)