diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5cb0eac5d9074..18c84d9aa88bf 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1676,7 +1676,9 @@ def _wrap_transformed_output( def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: if not self.as_index: - index = Index(range(mgr.shape[1])) + # GH 41998 - empty mgr always gets index of length 0 + rows = mgr.shape[1] if mgr.shape[0] > 0 else 0 + index = Index(range(rows)) mgr.set_axis(1, index) result = self.obj._constructor(mgr) diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index 9e7d6d2a7c5bd..d699d05963b46 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -12,6 +12,11 @@ ) +@pytest.fixture(params=[True, False]) +def as_index(request): + return request.param + + @pytest.fixture def mframe(): index = MultiIndex( diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 89944e2a745e4..0181481b29c44 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -15,6 +15,7 @@ Grouper, Index, MultiIndex, + RangeIndex, Series, Timestamp, date_range, @@ -2360,19 +2361,21 @@ def test_groupby_all_nan_groups_drop(): tm.assert_series_equal(result, expected) -def test_groupby_empty_multi_column(): - # GH 15106 +@pytest.mark.parametrize("numeric_only", [True, False]) +def test_groupby_empty_multi_column(as_index, numeric_only): + # GH 15106 & GH 41998 df = DataFrame(data=[], columns=["A", "B", "C"]) - gb = df.groupby(["A", "B"]) - result = gb.sum(numeric_only=False) - expected = DataFrame( - [], columns=["C"], index=MultiIndex([[], []], [[], []], names=["A", "B"]) - ) + gb = df.groupby(["A", "B"], as_index=as_index) + result = gb.sum(numeric_only=numeric_only) + if as_index: + index = MultiIndex([[], []], [[], []], names=["A", "B"]) + columns = ["C"] if not numeric_only else [] + else: + index = RangeIndex(0) + columns = ["A", "B", "C"] if not numeric_only else ["A", "B"] + expected = DataFrame([], columns=columns, index=index) tm.assert_frame_equal(result, expected) - result = gb.sum(numeric_only=True) - tm.assert_frame_equal(result, expected[[]]) - def test_groupby_filtered_df_std(): # GH 16174