diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a76b682f135db..ec2c395ce1393 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -927,6 +927,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`) - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) +- Bug in :meth:`DataFrame.groupby` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`) - Bug in :meth:`.GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) - Bug in :meth:`.GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) - Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e524021f3a1b8..0c9b64dc8cec3 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -831,7 +831,11 @@ def get_grouper( # if the actual grouper should be obj[key] def is_in_axis(key) -> bool: + if not _is_label_like(key): + if obj.ndim == 1: + return False + # items -> .columns for DataFrame, .index for Series items = obj.axes[-1] try: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 97e616ef14cef..920b869ef799b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2776,3 +2776,22 @@ def test_by_column_values_with_same_starting_value(): ).set_index("Name") tm.assert_frame_equal(result, expected_result) + + +def test_groupby_none_in_first_mi_level(): + # GH#47348 + arr = [[None, 1, 0, 1], [2, 3, 2, 3]] + ser = Series(1, index=MultiIndex.from_arrays(arr, names=["a", "b"])) + result = ser.groupby(level=[0, 1]).sum() + expected = Series( + [1, 2], MultiIndex.from_tuples([(0.0, 2), (1.0, 3)], names=["a", "b"]) + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_none_column_name(): + # GH#47348 + df = DataFrame({None: [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [4, 5, 6, 7]}) + result = df.groupby(by=[None]).sum() + expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None)) + tm.assert_frame_equal(result, expected)