diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0f8afe14a2369..a711e31c204ca 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -277,6 +277,7 @@ Categorical ^^^^^^^^^^^ - Bug in :meth:`Categorical.set_categories` losing dtype information (:issue:`48812`) - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`) +- Bug in :class:`GroupBy` when a categorical column was used as a grouper with a range index, ordering of the result would depend on the `sort` argument but if the index was categorical it would depend on the `order` attribute of the index (:issue:`49223`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index b11bbf35312c9..777aad8391b60 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -53,7 +53,7 @@ def recode_for_groupby( unique_codes = unique1d(c.codes) take_codes = unique_codes[unique_codes != -1] - if c.ordered: + if sort: take_codes = np.sort(take_codes) # we recode according to the uniques diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 7ae6495f15541..6e831d7a45fd0 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -502,6 +502,17 @@ def __init__( self._group_index, ) = index._get_grouper_for_level(mapper, level=ilevel, dropna=dropna) + if is_categorical_dtype(self.grouping_vector): + self.grouping_vector: Categorical + + self._passed_categorical = True + self._orig_cats = self.grouping_vector.categories + + # Should the sort arg be just `sort` or `sort or self.grouping_vector.ordered`? + self.grouping_vector, self._all_grouper = recode_for_groupby( + self.grouping_vector, sort, observed + ) + # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes elif isinstance(self.grouping_vector, Grouper): @@ -529,6 +540,8 @@ def __init__( self._passed_categorical = True self._orig_cats = self.grouping_vector.categories + + # Should the sort arg be just `sort` or `sort or self.grouping_vector.ordered`? self.grouping_vector, self._all_grouper = recode_for_groupby( self.grouping_vector, sort, observed ) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 092fd4a4d6be0..3402cd6fc5357 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -2046,3 +2046,34 @@ def test_many_categories(as_index, sort, index_kind, ordered): expected = DataFrame({"a": Series(index), "b": data}) tm.assert_frame_equal(result, expected) + + +def test_categorical_vs_range_index_sorting(): + categories = np.arange(4, -1, -1) + for range_index in [True, False]: + df_ordered = DataFrame( + { + "a": Categorical([2, 1, 2, 3], categories=categories, ordered=True), + "b": range(4), + } + ) + df_unordered = DataFrame( + { + "a": Categorical([2, 1, 2, 3], categories=categories, ordered=False), + "b": range(4), + } + ) + + if not range_index: + df_ordered = df_ordered.set_index("a") + df_unordered = df_unordered.set_index("a") + + gb_ordered_sort = df_ordered.groupby("a", sort=True, observed=True) + gb_ordered_nosort = df_ordered.groupby("a", sort=False, observed=True) + gb_unordered_sort = df_unordered.groupby("a", sort=True, observed=True) + gb_unordered_nosort = df_unordered.groupby("a", sort=False, observed=True) + + assert gb_ordered_sort.sum()["b"].tolist() == [3, 2, 1] + assert gb_ordered_nosort.sum()["b"].tolist() == [2, 1, 3] + assert gb_unordered_sort.sum()["b"].tolist() == [3, 2, 1] + assert gb_unordered_nosort.sum()["b"].tolist() == [2, 1, 3]