BUG: groupby.agg with numba and as_index=False (#51228)

jbrockmendel · web-flow · commit 2a6d7b73aca3 · 2023-02-08T14:13:07.000-08:00
* BUG: groupby.agg with numba and as_index=False

* smaller-diff implementation

* Whatsnew
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -1301,6 +1301,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`)
 - Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"ngroup"`` argument (:issue:`45986`)
 - Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`)
+- Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`)
 -
 
 Reshaping
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1266,7 +1266,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                 data, func, *args, engine_kwargs=engine_kwargs, **kwargs
             )
             index = self.grouper.result_index
-            return self.obj._constructor(result, index=index, columns=data.columns)
+            result = self.obj._constructor(result, index=index, columns=data.columns)
+            if not self.as_index:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
+            return result
 
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py
@@ -51,7 +51,8 @@ def incorrect_function(values, index):
 # Filter warnings when parallel=True and the function can't be parallelized by Numba
 @pytest.mark.parametrize("jit", [True, False])
 @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"])
-def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython):
+@pytest.mark.parametrize("as_index", [True, False])
+def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython, as_index):
     def func_numba(values, index):
         return np.mean(values) * 2.7
 
@@ -65,7 +66,7 @@ def func_numba(values, index):
         {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1]
     )
     engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
-    grouped = data.groupby(0)
+    grouped = data.groupby(0, as_index=as_index)
     if pandas_obj == "Series":
         grouped = grouped[1]
 
diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py
@@ -48,7 +48,8 @@ def incorrect_function(values, index):
 # Filter warnings when parallel=True and the function can't be parallelized by Numba
 @pytest.mark.parametrize("jit", [True, False])
 @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"])
-def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython):
+@pytest.mark.parametrize("as_index", [True, False])
+def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython, as_index):
     def func(values, index):
         return values + 1
 
@@ -62,7 +63,7 @@ def func(values, index):
         {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1]
     )
     engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
-    grouped = data.groupby(0)
+    grouped = data.groupby(0, as_index=as_index)
     if pandas_obj == "Series":
         grouped = grouped[1]
 

Original file line number	Diff line number	Diff line change
`@@ -1301,6 +1301,7 @@ Groupby/resample/rolling`
`1301`	`1301`	- Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`)
`1302`	`1302`	- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"ngroup"`` argument (:issue:`45986`)
`1303`	`1303`	- Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`)
	`1304`	+- Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`)
`1304`	`1305`	`-`
`1305`	`1306`
`1306`	`1307`	`Reshaping`