From b472e66abd34e75679ba89e09e3e6b6bd6470065 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Tue, 12 Mar 2019 15:13:44 -0400 Subject: [PATCH 1/5] Replace dicts with OrderedDicts in groupby aggregation functions --- pandas/core/groupby/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 683c21f7bd47a..bdae6f36b5572 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -219,7 +219,7 @@ def _aggregate_generic(self, func, *args, **kwargs): axis = self.axis obj = self._obj_with_exclusions - result = {} + result = collections.OrderedDict() if axis != obj._info_axis_number: try: for name, data in self: @@ -246,7 +246,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): # only for axis==0 obj = self._obj_with_exclusions - result = {} + result = collections.OrderedDict() cannot_agg = [] errors = None for item in obj: @@ -822,7 +822,7 @@ def _aggregate_multiple_funcs(self, arg, _level): columns.append(com.get_callable_name(f)) arg = lzip(columns, arg) - results = {} + results = collections.OrderedDict() for name, func in arg: obj = self if name in results: @@ -899,7 +899,7 @@ def _get_index(): name=self._selection_name) def _aggregate_named(self, func, *args, **kwargs): - result = {} + result = collections.OrderedDict() for name, group in self: group.name = name From 833b98a588b0c6b5fa0ce99adbd2f372ae04c197 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Thu, 14 Mar 2019 11:37:40 -0400 Subject: [PATCH 2/5] Added a test and release note --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/tests/groupby/aggregate/test_aggregate.py | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 72c40b04a1195..5b30963d8960f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -236,6 +236,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) +- Replaced regular dictionaries with ``OrderedDict`` in the aggregation functions of ``groupby`` to make the ordering consistent in all versions of Python (:issue:`25692`) Reshaping diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 0c2e74c0b735f..20b2336b504bb 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -303,3 +303,14 @@ def test_groupby_agg_coercing_bools(): result = gp['c'].aggregate(lambda x: x.isnull().all()) expected = Series([True, False], index=index, name='c') tm.assert_series_equal(result, expected) + +def test_order_aggregate_multiple_funcs(): + + df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + res = df.groupby('A').agg(['sum', 'max', 'mean', 'ohlc', 'min']) + result = res.columns.levels[1] + + expected = pd.Index(['sum', 'max', 'mean', 'ohlc', 'min']) + + tm.assert_index_equal(result, expected) \ No newline at end of file From df808743fb074143d0e07dbd5e7cdc72216d89af Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Thu, 14 Mar 2019 11:40:35 -0400 Subject: [PATCH 3/5] Flake 8 issues fixed --- pandas/tests/groupby/aggregate/test_aggregate.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 20b2336b504bb..ae8ed8db0aa5d 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -304,8 +304,9 @@ def test_groupby_agg_coercing_bools(): expected = Series([True, False], index=index, name='c') tm.assert_series_equal(result, expected) -def test_order_aggregate_multiple_funcs(): +def test_order_aggregate_multiple_funcs(): + # GH 25692 df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) res = df.groupby('A').agg(['sum', 'max', 'mean', 'ohlc', 'min']) @@ -313,4 +314,4 @@ def test_order_aggregate_multiple_funcs(): expected = pd.Index(['sum', 'max', 'mean', 'ohlc', 'min']) - tm.assert_index_equal(result, expected) \ No newline at end of file + tm.assert_index_equal(result, expected) From c435b9e3bcb298c98b22b6028583ec68cf56e894 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Thu, 14 Mar 2019 13:03:20 -0400 Subject: [PATCH 4/5] Trailing whitespace fixed in release note --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 5b30963d8960f..d93d0c02c792a 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -236,7 +236,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) -- Replaced regular dictionaries with ``OrderedDict`` in the aggregation functions of ``groupby`` to make the ordering consistent in all versions of Python (:issue:`25692`) +- Replaced regular dictionaries with ``OrderedDict`` in the aggregation functions of ``groupby`` to make the ordering consistent in all versions of Python (:issue:`25692`) Reshaping From 0a6c65de016ecfab2c5d5776138862769363031c Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Mon, 18 Mar 2019 13:16:01 -0400 Subject: [PATCH 5/5] Release note update --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index d93d0c02c792a..13802f92dd846 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -236,7 +236,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) -- Replaced regular dictionaries with ``OrderedDict`` in the aggregation functions of ``groupby`` to make the ordering consistent in all versions of Python (:issue:`25692`) +- Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`) Reshaping