Skip to content

Commit fd0e2f1

Browse files
authored
BUG: GroupBy.sum,prod,cumsum,cumprod with PeriodDtype (#51040)
* BUG: GroupBy.sum,prod,cumsum,cumprod with PeriodDtype * GH ref * update test for int32 * troubleshoot 32bit builds
1 parent 8aad1e7 commit fd0e2f1

File tree

4 files changed

+39
-7
lines changed

4 files changed

+39
-7
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,7 @@ Period
11081108
- Bug in adding a :class:`Period` object to an array of :class:`DateOffset` objects incorrectly raising ``TypeError`` (:issue:`50162`)
11091109
- Bug in :class:`Period` where passing a string with finer resolution than nanosecond would result in a ``KeyError`` instead of dropping the extra precision (:issue:`50417`)
11101110
- Bug in parsing strings representing Week-periods e.g. "2017-01-23/2017-01-29" as minute-frequency instead of week-frequency (:issue:`50803`)
1111+
- Bug in :meth:`GroupBy.sum`, :meth:`GroupBy.cumsum`, :meth:`GroupBy.prod`, :meth:`GroupBy.cumprod` with :class:`PeriodDtype` failing to raise ``TypeError`` (:issue:`51040`)
11111112
-
11121113

11131114
Plotting

pandas/core/groupby/ops.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
is_float_dtype,
5656
is_integer_dtype,
5757
is_numeric_dtype,
58+
is_period_dtype,
5859
is_sparse,
5960
is_timedelta64_dtype,
6061
needs_i8_conversion,
@@ -249,16 +250,17 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
249250
raise NotImplementedError(f"{dtype} dtype not supported")
250251

251252
elif is_sparse(dtype):
252-
# categoricals are only 1d, so we
253-
# are not setup for dim transforming
254253
raise NotImplementedError(f"{dtype} dtype not supported")
255254
elif is_datetime64_any_dtype(dtype):
256-
# TODO: same for period_dtype? no for these methods with Period
257-
# we raise NotImplemented if this is an invalid operation
258-
# entirely, e.g. adding datetimes
255+
# Adding/multiplying datetimes is not valid
259256
if how in ["sum", "prod", "cumsum", "cumprod"]:
260257
raise TypeError(f"datetime64 type does not support {how} operations")
258+
elif is_period_dtype(dtype):
259+
# Adding/multiplying Periods is not valid
260+
if how in ["sum", "prod", "cumsum", "cumprod"]:
261+
raise TypeError(f"Period type does not support {how} operations")
261262
elif is_timedelta64_dtype(dtype):
263+
# timedeltas we can add but not multiply
262264
if how in ["prod", "cumprod"]:
263265
raise TypeError(f"timedelta64 type does not support {how} operations")
264266

pandas/tests/extension/base/groupby.py

-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
is_bool_dtype,
55
is_numeric_dtype,
66
is_object_dtype,
7-
is_period_dtype,
87
is_string_dtype,
98
)
109

@@ -135,7 +134,6 @@ def test_in_numeric_groupby(self, data_for_grouping):
135134
or is_bool_dtype(dtype)
136135
or dtype.name == "decimal"
137136
or is_string_dtype(dtype)
138-
or is_period_dtype(dtype)
139137
or is_object_dtype(dtype)
140138
or dtype.kind == "m" # in particular duration[*][pyarrow]
141139
):

pandas/tests/groupby/test_groupby.py

+31
Original file line numberDiff line numberDiff line change
@@ -2867,3 +2867,34 @@ def test_groupby_method_drop_na(method):
28672867
else:
28682868
expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4])
28692869
tm.assert_frame_equal(result, expected)
2870+
2871+
2872+
def test_groupby_reduce_period():
2873+
# GH#51040
2874+
pi = pd.period_range("2016-01-01", periods=100, freq="D")
2875+
grps = list(range(10)) * 10
2876+
ser = pi.to_series()
2877+
gb = ser.groupby(grps)
2878+
2879+
with pytest.raises(TypeError, match="Period type does not support sum operations"):
2880+
gb.sum()
2881+
with pytest.raises(
2882+
TypeError, match="Period type does not support cumsum operations"
2883+
):
2884+
gb.cumsum()
2885+
with pytest.raises(TypeError, match="Period type does not support prod operations"):
2886+
gb.prod()
2887+
with pytest.raises(
2888+
TypeError, match="Period type does not support cumprod operations"
2889+
):
2890+
gb.cumprod()
2891+
2892+
res = gb.max()
2893+
expected = ser[-10:]
2894+
expected.index = Index(range(10), dtype=np.int_)
2895+
tm.assert_series_equal(res, expected)
2896+
2897+
res = gb.min()
2898+
expected = ser[:10]
2899+
expected.index = Index(range(10), dtype=np.int_)
2900+
tm.assert_series_equal(res, expected)

0 commit comments

Comments
 (0)