From 04872a9d4568bba28b64f83db993408b3e6b0ba2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 26 Apr 2025 13:04:18 -0600 Subject: [PATCH 1/2] BinGrouper: Support setting labels when provided with IntervalIndex Removes a pandas limitation that we don't need. --- xarray/groupers.py | 11 ++++++++--- xarray/tests/test_groupby.py | 6 ++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/xarray/groupers.py b/xarray/groupers.py index 025f8fae486..4b93aa02ff5 100644 --- a/xarray/groupers.py +++ b/xarray/groupers.py @@ -319,7 +319,7 @@ class BinGrouper(Grouper): the resulting bins. If False, returns only integer indicators of the bins. This affects the type of the output container (see below). This argument is ignored when `bins` is an IntervalIndex. If True, - raises an error. When `ordered=False`, labels must be provided. + raises an error. retbins : bool, default False Whether to return the bins or not. Useful when bins is provided as a scalar. @@ -394,8 +394,13 @@ def factorize(self, group: T_Group) -> EncodedGroups: # This seems silly, but it lets us have Pandas handle the complexity # of `labels`, `precision`, and `include_lowest`, even when group is a chunked array - dummy, _ = self._cut(np.array([0]).astype(group.dtype)) - full_index = dummy.categories + # Pandas ignores labels when IntervalIndex is passed + if not isinstance(self.bins, pd.IntervalIndex): + dummy, _ = self._cut(np.array([0]).astype(group.dtype)) + full_index = dummy.categories + else: + full_index = pd.Index(self.labels) + if not by_is_chunked: uniques = np.sort(pd.unique(codes.data.ravel())) unique_values = full_index[uniques[uniques != -1]] diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 52ab8c4d232..2b4bdb1281c 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1062,6 +1062,12 @@ def test_groupby_bins_cut_kwargs(use_flox: bool) -> None: ).mean() assert_identical(expected, actual) + with xr.set_options(use_flox=use_flox): + bins_index = pd.IntervalIndex.from_breaks(x_bins) + labels = ["one", "two", "three"] + actual = da.groupby(x=BinGrouper(bins=bins_index, labels=labels)).sum() + assert actual.xindexes["x_bins"].index.equals(pd.Index(labels)) + @pytest.mark.parametrize("indexed_coord", [True, False]) @pytest.mark.parametrize( From db7545f480eac459821a76ec6a65c3c780c13faf Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 26 Apr 2025 14:34:22 -0600 Subject: [PATCH 2/2] ignore type --- xarray/tests/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 2b4bdb1281c..e47019f55c8 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1066,7 +1066,7 @@ def test_groupby_bins_cut_kwargs(use_flox: bool) -> None: bins_index = pd.IntervalIndex.from_breaks(x_bins) labels = ["one", "two", "three"] actual = da.groupby(x=BinGrouper(bins=bins_index, labels=labels)).sum() - assert actual.xindexes["x_bins"].index.equals(pd.Index(labels)) + assert actual.xindexes["x_bins"].index.equals(pd.Index(labels)) # type: ignore[attr-defined] @pytest.mark.parametrize("indexed_coord", [True, False])