diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ac522fc7863b2..0cb43e3d5d1e5 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1185,8 +1185,6 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): if len(keys) == 0: return DataFrame(index=keys) - key_names = self.grouper.names - # GH12824. def first_not_none(values): try: @@ -1203,27 +1201,9 @@ def first_not_none(values): elif isinstance(v, DataFrame): return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) elif self.grouper.groupings is not None: - if len(self.grouper.groupings) > 1: - key_index = self.grouper.result_index - - else: - ping = self.grouper.groupings[0] - if len(keys) == ping.ngroups: - key_index = ping.group_index - key_index.name = key_names[0] - - key_lookup = Index(keys) - indexer = key_lookup.get_indexer(key_index) - - # reorder the values - values = [values[i] for i in indexer] - else: - - key_index = Index(keys, name=key_names[0]) - - # don't use the key indexer - if not self.as_index: - key_index = None + key_index = self.grouper.result_index + if not self.as_index: + key_index = None # make Nones an empty object v = first_not_none(values) @@ -1635,7 +1615,7 @@ def _gotitem(self, key, ndim: int, subset=None): raise AssertionError("invalid ndim for _gotitem") def _wrap_frame_output(self, result, obj) -> DataFrame: - result_index = self.grouper.levels[0] + result_index = self.grouper.result_index if self.axis == 0: return DataFrame(result, index=obj.columns, columns=result_index).T diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 21e171f937de8..dc3df010c1eb0 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -412,7 +412,7 @@ def _make_codes(self) -> None: codes = self.grouper.codes_info uniques = self.grouper.result_index else: - codes, uniques = algorithms.factorize(self.grouper, sort=self.sort) + codes, uniques = algorithms.factorize(self.grouper, sort=True) uniques = Index(uniques, name=self.name) self._codes = codes self._group_index = uniques diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 7259268ac3f2b..c16826b2a9b54 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -39,12 +39,13 @@ from pandas.core.dtypes.missing import _maybe_fill, isna import pandas.core.algorithms as algorithms +from pandas.core.arrays import Categorical from pandas.core.base import SelectionMixin import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.groupby import base, grouper -from pandas.core.indexes.api import Index, MultiIndex, ensure_index +from pandas.core.indexes.api import CategoricalIndex, Index, MultiIndex, ensure_index from pandas.core.series import Series from pandas.core.sorting import ( compress_group_index, @@ -141,7 +142,7 @@ def _get_grouper(self): def _get_group_keys(self): if len(self.groupings) == 1: - return self.levels[0] + return self.result_index else: comp_ids, _, ngroups = self.group_info @@ -277,12 +278,13 @@ def codes_info(self) -> np.ndarray: return codes def _get_compressed_codes(self) -> Tuple[np.ndarray, np.ndarray]: + ping = self.groupings[0] all_codes = self.codes - if len(all_codes) > 1: + if len(all_codes) > 1 or not isinstance( + ping.grouper, (Categorical, CategoricalIndex, BinGrouper) + ): group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True) return compress_group_index(group_index, sort=self.sort) - - ping = self.groupings[0] return ping.codes, np.arange(len(ping.group_index)) @cache_readonly @@ -297,14 +299,13 @@ def reconstructed_codes(self) -> List[np.ndarray]: @cache_readonly def result_index(self) -> Index: - if not self.compressed and len(self.groupings) == 1: - return self.groupings[0].result_index.rename(self.names[0]) - codes = self.reconstructed_codes levels = [ping.result_index for ping in self.groupings] result = MultiIndex( levels=levels, codes=codes, verify_integrity=False, names=self.names ) + if not self.compressed and len(self.groupings) == 1: + return result.get_level_values(0) return result def get_group_levels(self): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 5662d41e19885..845af651aa6b1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2057,3 +2057,27 @@ def test_groups_repr_truncates(max_seq_items, expected): result = df.groupby(np.array(df.a)).groups.__repr__() assert result == expected + + +def test_sort_false_multiindex_lexsorted(): + # GH 32259 + d = pd.to_datetime( + [ + "2020-11-02", + "2019-01-02", + "2020-01-02", + "2020-02-04", + "2020-11-03", + "2019-11-03", + "2019-11-13", + "2019-11-13", + ] + ) + a = np.arange(len(d)) + b = np.random.rand(len(d)) + df = pd.DataFrame({"d": d, "a": a, "b": b}) + t = df.groupby(["d", "a"], sort=False).mean() + assert not t.index.is_lexsorted() + + t = df.groupby(["d", "a"], sort=True).mean() + assert t.index.is_lexsorted() diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index efcd22f9c0c82..f9a6ab13d256d 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -575,16 +575,12 @@ def test_groupby_args(self, mframe): frame.groupby(by=None, level=None) @pytest.mark.parametrize( - "sort,labels", - [ - [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]], - [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]], - ], + "sort", [True, False], ) - def test_level_preserve_order(self, sort, labels, mframe): + def test_level_preserve_order(self, sort, mframe): # GH 17537 grouped = mframe.groupby(level=0, sort=sort) - exp_labels = np.array(labels, np.intp) + exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], np.intp) tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels) def test_grouping_labels(self, mframe):