diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ffd756bed43b6..8530d30af06a7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -75,7 +75,14 @@ group_selection_context, ) from pandas.core.groupby.numba_ import generate_numba_func, split_for_numba -from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + MultiIndex, + PeriodIndex, + TimedeltaIndex, + all_indexes_same, +) import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager from pandas.core.series import Series @@ -257,17 +264,27 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) if self.grouper.nkeys > 1: return self._python_agg_general(func, *args, **kwargs) - try: - return self._python_agg_general(func, *args, **kwargs) - except (ValueError, KeyError): - # TODO: KeyError is raised in _python_agg_general, - # see see test_groupby.test_basic - result = self._aggregate_named(func, *args, **kwargs) + if isinstance( + self._selected_obj.index, (DatetimeIndex, TimedeltaIndex, PeriodIndex) + ): + # using _python_agg_general would end up incorrectly patching + # _index_data in reduction.pyx + result = self._aggregate_maybe_named(func, *args, **kwargs) + else: + try: + return self._python_agg_general(func, *args, **kwargs) + except (ValueError, KeyError): + # TODO: KeyError is raised in _python_agg_general, + # see see test_groupby.test_basic + result = self._aggregate_maybe_named(func, *args, **kwargs) + + index = self.grouper.result_index + assert index.name == self.grouper.names[0] - index = Index(sorted(result), name=self.grouper.names[0]) ret = create_series_with_explicit_dtype( result, index=index, dtype_if_empty=object ) + ret.name = self._selected_obj.name # test_metadata_propagation_indiv if not self.as_index: # pragma: no cover print("Warning, ignoring as_index=True") @@ -470,14 +487,34 @@ def _get_index() -> Index: ) return self._reindex_output(result) - def _aggregate_named(self, func, *args, **kwargs): + def _aggregate_maybe_named(self, func, *args, **kwargs): + """ + Try the named-aggregator first, then unnamed, which better matches + what libreduction does. + """ + try: + return self._aggregate_named(func, *args, named=True, **kwargs) + except KeyError: + return self._aggregate_named(func, *args, named=False, **kwargs) + + def _aggregate_named(self, func, *args, named: bool = True, **kwargs): result = {} - for name, group in self: - group.name = name + for name, group in self: # TODO: could we have duplicate names? + if named: + group.name = name + output = func(group, *args, **kwargs) if isinstance(output, (Series, Index, np.ndarray)): - raise ValueError("Must produce aggregated value") + if ( + isinstance(output, Series) + and len(output) == 1 + and name in output.index + ): + # FIXME: kludge for test_resampler_grouper.test_apply + output = output.iloc[0] + else: + raise ValueError("Must produce aggregated value") result[name] = output return result diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e9525f03368fa..054d6165b31aa 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -45,7 +45,7 @@ from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.groupby import base, grouper -from pandas.core.indexes.api import Index, MultiIndex, ensure_index +from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, ensure_index from pandas.core.series import Series from pandas.core.sorting import ( compress_group_index, @@ -616,8 +616,10 @@ def agg_series(self, obj: Series, func: F): # TODO: can we get a performant workaround for EAs backed by ndarray? return self._aggregate_series_pure_python(obj, func) - elif obj.index._has_complex_internals: + elif obj.index._has_complex_internals or isinstance(obj.index, RangeIndex): # Preempt TypeError in _aggregate_series_fast + # exclude RangeIndex because patching it in libreduction would + # silently be incorrect return self._aggregate_series_pure_python(obj, func) try: diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 28d33ebb23c20..5827b1f456bd7 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -195,14 +195,17 @@ def test_resample_empty_dtypes(index, dtype, resample_method): @all_ts -def test_apply_to_empty_series(empty_series_dti): +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_apply_to_empty_series(empty_series_dti, freq): # GH 14313 s = empty_series_dti - for freq in ["M", "D", "H"]: - result = s.resample(freq).apply(lambda x: 1) - expected = s.resample(freq).apply(np.sum) - tm.assert_series_equal(result, expected, check_dtype=False) + result = s.resample(freq).apply(lambda x: 1) + expected = s.resample(freq).apply(np.sum) + + assert result.index.dtype == expected.index.dtype + + tm.assert_series_equal(result, expected, check_dtype=False) @all_ts