From c2330ad518c6b52b0211023f5e640e8aa4e23ab4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 22 Aug 2021 16:56:33 -0700 Subject: [PATCH 1/4] BUG: Pass index data correctly in groupby.transform/agg w/ engine=numba --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/groupby/groupby.py | 8 +++++++- pandas/tests/groupby/aggregate/test_numba.py | 14 ++++++++++++++ pandas/tests/groupby/transform/test_numba.py | 12 ++++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 205a49e7786a7..8e06f56fd6f90 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -341,7 +341,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`) - Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) - Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`) -- +- Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ab29dea3190c8..a2d3a0f368ca5 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1141,9 +1141,15 @@ def _numba_prep(self, func, data): sorted_ids = algorithms.take_nd(ids, sorted_index, allow_fill=False) sorted_data = data.take(sorted_index, axis=self.axis).to_numpy() + sorted_index_data = data.index.take(sorted_index).to_numpy() starts, ends = lib.generate_slices(sorted_ids, ngroups) - return starts, ends, sorted_index, sorted_data + return ( + starts, + ends, + sorted_index_data, + sorted_data, + ) @final def _transform_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs): diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index ba2d6eeb287c0..4b915cd4c29ae 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -173,3 +173,17 @@ def sum_last(values, index, n): result = grouped_x.agg(sum_last, 2, engine="numba") expected = Series([2.0] * 2, name="x", index=Index([0, 1], name="id")) tm.assert_series_equal(result, expected) + + +@td.skip_if_no("numba", "0.46.0") +def test_index_data_correctly_passed(): + # GH 43133 + def f(values, index): + return np.mean(index) + + df = DataFrame({"group": ["A", "A", "B"], "v": [4, 5, 6]}, index=[-1, -2, -3]) + result = df.groupby("group").aggregate(f, engine="numba") + expected = DataFrame( + [-1.5, -3.0], columns=["v"], index=Index(["A", "B"], name="group") + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index 8019071be72f3..b2d72aec0527f 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -164,3 +164,15 @@ def sum_last(values, index, n): result = grouped_x.transform(sum_last, 2, engine="numba") expected = Series([2.0] * 4, name="x") tm.assert_series_equal(result, expected) + + +@td.skip_if_no("numba", "0.46.0") +def test_index_data_correctly_passed(): + # GH 43133 + def f(values, index): + return index - 1 + + df = DataFrame({"group": ["A", "A", "B"], "v": [4, 5, 6]}, index=[-1, -2, -3]) + result = df.groupby("group").transform(f, engine="numba") + expected = DataFrame([-4.0, -3.0, -2.0], columns=["v"], index=[-1, -2, -3]) + tm.assert_frame_equal(result, expected) From 5f02c457f5320454cf5f8acdf6cc6c954251395b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 26 Aug 2021 21:10:58 -0700 Subject: [PATCH 2/4] Move to 1.3.2 --- doc/source/whatsnew/v1.3.2.rst | 1 + doc/source/whatsnew/v1.4.0.rst | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 7a9549affef00..efc517966d45c 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -39,6 +39,7 @@ Bug fixes - :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`) - :meth:`.Styler.set_sticky` has amended CSS to control the column/index names and ensure the correct sticky positions (:issue:`42537`) - Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode (:issue:`42866`) +- Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 91c8e35871eb6..9d9483578ab9c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -352,7 +352,6 @@ Groupby/resample/rolling - Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`) - Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) - Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`) -- Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) Reshaping ^^^^^^^^^ From b6208cf7c8822362a8cdfcb312e30d161dcc0fe9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 26 Aug 2021 21:12:19 -0700 Subject: [PATCH 3/4] Move to 1.3.3 --- doc/source/whatsnew/v1.3.2.rst | 1 - doc/source/whatsnew/v1.3.3.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index efc517966d45c..7a9549affef00 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -39,7 +39,6 @@ Bug fixes - :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`) - :meth:`.Styler.set_sticky` has amended CSS to control the column/index names and ensure the correct sticky positions (:issue:`42537`) - Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode (:issue:`42866`) -- Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 1340188c3d609..3dee3aa5e7c7a 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -25,7 +25,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) - .. --------------------------------------------------------------------------- From 0b93277ba8a70f50d554c5048966576c0d83db24 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 26 Aug 2021 21:15:20 -0700 Subject: [PATCH 4/4] Add back - --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 9d9483578ab9c..fc488504f1fdf 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -352,6 +352,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`) - Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) - Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`) +- Reshaping ^^^^^^^^^