From ab952e71a560d377b5175ce5564dbab46cf7ccbf Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 5 Oct 2020 23:13:17 -0700 Subject: [PATCH 1/4] BUG: RollingGroupby not respecting sort --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/groupby/grouper.py | 12 ++++++++++-- pandas/tests/window/test_grouper.py | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index e752eb54d0c15..05e973a369512 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -44,6 +44,7 @@ Fixed regressions - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) - Fixed regression in :func:`read_table` raised ``ValueError`` when ``delim_whitespace`` was set to ``True`` (:issue:`35958`) - Fixed regression in :meth:`Series.dt.normalize` when normalizing pre-epoch dates the result was shifted one day (:issue:`36294`) +- Fixed regression in :class:`RollingGroupby` with ``sort=False`` not being respected (:issue:`36889`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index a509acb3604e1..255a08557983f 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -563,8 +563,16 @@ def indices(self): if isinstance(self.grouper, ops.BaseGrouper): return self.grouper.indices - values = Categorical(self.grouper) - return values._reverse_indexer() + if self.sort: + values = Categorical(self.grouper) + return values._reverse_indexer() + else: + # GH 36889 + indices_dict = {} + codes, uniques = algorithms.factorize(self.grouper, sort=False) + for i, category in enumerate(uniques): + indices_dict[category] = np.flatnonzero(codes == i) + return indices_dict @property def codes(self) -> np.ndarray: diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 7cfac7c6a752a..38ce868d51011 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -428,3 +428,18 @@ def test_groupby_rolling_empty_frame(self): result = expected.groupby(["s1", "s2"]).rolling(window=1).sum() expected.index = pd.MultiIndex.from_tuples([], names=["s1", "s2", None]) tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_no_sort(self): + # GH 36889 + result = ( + pd.DataFrame({"foo": [2, 1], "bar": [2, 1]}) + .groupby("foo", sort=False) + .rolling(1) + .min() + ) + expected = pd.DataFrame( + np.array([[2.0, 2.0], [1.0, 1.0]]), + columns=["foo", "bar"], + index=pd.MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]), + ) + tm.assert_frame_equal(result, expected) From 615a1aa2c89f015386cd91cfdac758fa6f25b8ec Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 5 Oct 2020 23:19:17 -0700 Subject: [PATCH 2/4] move to 1.1.4 --- doc/source/whatsnew/v1.1.3.rst | 1 - doc/source/whatsnew/v1.1.4.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 05e973a369512..e752eb54d0c15 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -44,7 +44,6 @@ Fixed regressions - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) - Fixed regression in :func:`read_table` raised ``ValueError`` when ``delim_whitespace`` was set to ``True`` (:issue:`35958`) - Fixed regression in :meth:`Series.dt.normalize` when normalizing pre-epoch dates the result was shifted one day (:issue:`36294`) -- Fixed regression in :class:`RollingGroupby` with ``sort=False`` not being respected (:issue:`36889`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index e63912ebc8fee..6e8f03b7e0bc7 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :class:`RollingGroupby` with ``sort=False`` not being respected (:issue:`36889`) .. --------------------------------------------------------------------------- From ea510f2a29fc41411d493df2e7b09a1b4960f543 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 6 Oct 2020 18:25:52 -0700 Subject: [PATCH 3/4] Just use factorize --- pandas/core/groupby/grouper.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 255a08557983f..cf19115b28321 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -563,16 +563,12 @@ def indices(self): if isinstance(self.grouper, ops.BaseGrouper): return self.grouper.indices - if self.sort: - values = Categorical(self.grouper) - return values._reverse_indexer() - else: - # GH 36889 - indices_dict = {} - codes, uniques = algorithms.factorize(self.grouper, sort=False) - for i, category in enumerate(uniques): - indices_dict[category] = np.flatnonzero(codes == i) - return indices_dict + # GH 36889 + indices_dict = {} + codes, uniques = algorithms.factorize(self.grouper, sort=self.sort) + for i, category in enumerate(Index(uniques)): + indices_dict[category] = np.flatnonzero(codes == i) + return indices_dict @property def codes(self) -> np.ndarray: From 5fefc30f8972e3b5774aaeec6e03613b465b393a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 8 Oct 2020 14:46:15 -0700 Subject: [PATCH 4/4] Add comments and turn into dict comprenehsion --- pandas/core/groupby/grouper.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index cf19115b28321..9f0d953a2cc71 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -563,12 +563,13 @@ def indices(self): if isinstance(self.grouper, ops.BaseGrouper): return self.grouper.indices - # GH 36889 - indices_dict = {} + # Return a dictionary of {group label: [indices belonging to the group label]} + # respecting whether sort was specified codes, uniques = algorithms.factorize(self.grouper, sort=self.sort) - for i, category in enumerate(Index(uniques)): - indices_dict[category] = np.flatnonzero(codes == i) - return indices_dict + return { + category: np.flatnonzero(codes == i) + for i, category in enumerate(Index(uniques)) + } @property def codes(self) -> np.ndarray: