From 205d3b359122e6566929534309aab1a58bab9297 Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 3 Jul 2023 13:08:56 -0400 Subject: [PATCH 1/6] REF: Separate groupby, rolling, and window agg/apply list/dict-like --- pandas/core/apply.py | 335 +++++++++++++++++++++++++++---------------- 1 file changed, 213 insertions(+), 122 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 83a3b29bfd7f0..27ba5fb279e13 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -2,14 +2,12 @@ import abc from collections import defaultdict -from contextlib import nullcontext from functools import partial import inspect from typing import ( TYPE_CHECKING, Any, Callable, - ContextManager, DefaultDict, Dict, Hashable, @@ -298,17 +296,50 @@ def agg_list_like(self) -> DataFrame | Series: """ return self.agg_or_apply_list_like(op_name="agg") + def compute_results_list_like(self, op_name, selected_obj, kwargs): + func = cast(List[AggFuncTypeBase], self.func) + obj = self.obj + + results = [] + keys = [] + + # degenerate case + if selected_obj.ndim == 1: + for a in func: + colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) + args = ( + [self.axis, *self.args] + if include_axis(op_name, colg) + else self.args + ) + new_res = getattr(colg, op_name)(a, *args, **kwargs) + results.append(new_res) + + # make sure we find a good name + name = com.get_callable_name(a) or a + keys.append(name) + + else: + indices = [] + for index, col in enumerate(selected_obj): + colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) + args = ( + [self.axis, *self.args] + if include_axis(op_name, colg) + else self.args + ) + new_res = getattr(colg, op_name)(func, *args, **kwargs) + results.append(new_res) + indices.append(index) + keys = selected_obj.columns.take(indices) + + return keys, results + def agg_or_apply_list_like( self, op_name: Literal["agg", "apply"] ) -> DataFrame | Series: - from pandas.core.groupby.generic import ( - DataFrameGroupBy, - SeriesGroupBy, - ) - from pandas.core.reshape.concat import concat - obj = self.obj - func = cast(List[AggFuncTypeBase], self.func) + kwargs = self.kwargs if op_name == "apply": if isinstance(self, FrameApply): @@ -323,55 +354,26 @@ def agg_or_apply_list_like( if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") + selected_obj = obj if not isinstance(obj, SelectionMixin): # i.e. obj is Series or DataFrame selected_obj = obj elif obj._selected_obj.ndim == 1: + assert False # For SeriesGroupBy this matches _obj_with_exclusions selected_obj = obj._selected_obj else: + assert False selected_obj = obj._obj_with_exclusions - results = [] - keys = [] - - is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) - - context_manager: ContextManager - if is_groupby: - # When as_index=False, we combine all results using indices - # and adjust index after - context_manager = com.temp_setattr(obj, "as_index", True) - else: - context_manager = nullcontext() - - def include_axis(colg) -> bool: - return isinstance(colg, ABCDataFrame) or ( - isinstance(colg, ABCSeries) and op_name == "agg" - ) - - with context_manager: - # degenerate case - if selected_obj.ndim == 1: - for a in func: - colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - args = [self.axis, *self.args] if include_axis(colg) else self.args - new_res = getattr(colg, op_name)(a, *args, **kwargs) - results.append(new_res) + keys, results = self.compute_results_list_like(op_name, selected_obj, kwargs) + result = self.wrap_results_list_like(keys, results) + return result - # make sure we find a good name - name = com.get_callable_name(a) or a - keys.append(name) + def wrap_results_list_like(self, keys, results): + from pandas.core.reshape.concat import concat - else: - indices = [] - for index, col in enumerate(selected_obj): - colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - args = [self.axis, *self.args] if include_axis(colg) else self.args - new_res = getattr(colg, op_name)(func, *args, **kwargs) - results.append(new_res) - indices.append(index) - keys = selected_obj.columns.take(indices) + obj = self.obj try: return concat(results, keys=keys, axis=1, sort=False) @@ -397,20 +399,60 @@ def agg_dict_like(self) -> DataFrame | Series: """ return self.agg_or_apply_dict_like(op_name="agg") + def compute_results_dict_like(self, op_name, selected_obj, selection, kwargs): + obj = self.obj + func = cast(AggFuncTypeDict, self.func) + func = self.normalize_dictlike_arg(op_name, selected_obj, func) + + is_non_unique_col = ( + selected_obj.ndim == 2 + and selected_obj.columns.nunique() < len(selected_obj.columns) + ) + + if selected_obj.ndim == 1: + # key only used for output + colg = obj._gotitem(selection, ndim=1) + result_data = [ + getattr(colg, op_name)(how, **kwargs) for _, how in func.items() + ] + result_index = list(func.keys()) + elif is_non_unique_col: + # key used for column selection and output + # GH#51099 + result_data = [] + result_index = [] + for key, how in func.items(): + indices = selected_obj.columns.get_indexer_for([key]) + labels = selected_obj.columns.take(indices) + label_to_indices = defaultdict(list) + for index, label in zip(indices, labels): + label_to_indices[label].append(index) + + key_data = [ + getattr(selected_obj._ixs(indice, axis=1), op_name)(how, **kwargs) + for label, indices in label_to_indices.items() + for indice in indices + ] + + result_index += [key] * len(key_data) + result_data += key_data + else: + # key used for column selection and output + result_data = [ + getattr(obj._gotitem(key, ndim=1), op_name)(how, **kwargs) + for key, how in func.items() + ] + result_index = list(func.keys()) + + return result_index, result_data + def agg_or_apply_dict_like( self, op_name: Literal["agg", "apply"] ) -> DataFrame | Series: - from pandas import Index - from pandas.core.groupby.generic import ( - DataFrameGroupBy, - SeriesGroupBy, - ) - from pandas.core.reshape.concat import concat - assert op_name in ["agg", "apply"] obj = self.obj - func = cast(AggFuncTypeDict, self.func) + kwargs = {} if op_name == "apply": by_row = "_compat" if self.by_row else False @@ -419,78 +461,24 @@ def agg_or_apply_dict_like( if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") - if not isinstance(obj, SelectionMixin): - # i.e. obj is Series or DataFrame - selected_obj = obj - selection = None - else: - selected_obj = obj._selected_obj - selection = obj._selection - - func = self.normalize_dictlike_arg(op_name, selected_obj, func) - - is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) - context_manager: ContextManager - if is_groupby: - # When as_index=False, we combine all results using indices - # and adjust index after - context_manager = com.temp_setattr(obj, "as_index", True) - else: - context_manager = nullcontext() + selected_obj = obj + selection = None - is_non_unique_col = ( - selected_obj.ndim == 2 - and selected_obj.columns.nunique() < len(selected_obj.columns) + result_index, result_data = self.compute_results_dict_like( + op_name, selected_obj, selection, kwargs ) + result = self.wrap_results_dict_like(selected_obj, result_index, result_data) + return result - # Numba Groupby engine/engine-kwargs passthrough - if is_groupby: - engine = self.kwargs.get("engine", None) - engine_kwargs = self.kwargs.get("engine_kwargs", None) - kwargs.update({"engine": engine, "engine_kwargs": engine_kwargs}) - - with context_manager: - if selected_obj.ndim == 1: - # key only used for output - colg = obj._gotitem(selection, ndim=1) - result_data = [ - getattr(colg, op_name)(how, **kwargs) for _, how in func.items() - ] - result_index = list(func.keys()) - elif is_non_unique_col: - # key used for column selection and output - # GH#51099 - result_data = [] - result_index = [] - for key, how in func.items(): - indices = selected_obj.columns.get_indexer_for([key]) - labels = selected_obj.columns.take(indices) - label_to_indices = defaultdict(list) - for index, label in zip(indices, labels): - label_to_indices[label].append(index) - - key_data = [ - getattr(selected_obj._ixs(indice, axis=1), op_name)( - how, **kwargs - ) - for label, indices in label_to_indices.items() - for indice in indices - ] - - result_index += [key] * len(key_data) - result_data += key_data - else: - # key used for column selection and output - result_data = [ - getattr(obj._gotitem(key, ndim=1), op_name)(how, **kwargs) - for key, how in func.items() - ] - result_index = list(func.keys()) + def wrap_results_dict_like(self, selected_obj, result_index, result_data): + from pandas import Index + from pandas.core.reshape.concat import concat + + obj = self.obj # Avoid making two isinstance calls in all and any below is_ndframe = [isinstance(r, ABCNDFrame) for r in result_data] - # combine results if all(is_ndframe): results = dict(zip(result_index, result_data)) keys_to_use: Iterable[Hashable] @@ -1281,8 +1269,61 @@ def apply(self): def transform(self): raise NotImplementedError + def agg_or_apply_list_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: + obj = self.obj + kwargs = self.kwargs + if op_name == "apply": + kwargs = {**kwargs, "by_row": False} + + if getattr(obj, "axis", 0) == 1: + raise NotImplementedError("axis other than 0 is not supported") + + if obj._selected_obj.ndim == 1: + # For SeriesGroupBy this matches _obj_with_exclusions + selected_obj = obj._selected_obj + else: + selected_obj = obj._obj_with_exclusions + + with com.temp_setattr(obj, "as_index", True): + keys, results = self.compute_results_list_like( + op_name, selected_obj, kwargs + ) + result = self.wrap_results_list_like(keys, results) + return result + + def agg_or_apply_dict_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: + assert op_name in ["agg", "apply"] + + obj = self.obj + kwargs = {} + if op_name == "apply": + by_row = "_compat" if self.by_row else False + kwargs.update({"by_row": by_row}) + + if getattr(obj, "axis", 0) == 1: + raise NotImplementedError("axis other than 0 is not supported") + + selected_obj = obj._selected_obj + selection = obj._selection + + # Numba Groupby engine/engine-kwargs passthrough + engine = self.kwargs.get("engine", None) + engine_kwargs = self.kwargs.get("engine_kwargs", None) + kwargs.update({"engine": engine, "engine_kwargs": engine_kwargs}) + + with com.temp_setattr(obj, "as_index", True): + result_index, result_data = self.compute_results_dict_like( + op_name, selected_obj, selection, kwargs + ) + result = self.wrap_results_dict_like(selected_obj, result_index, result_data) + return result + -class ResamplerWindowApply(Apply): +class ResamplerWindowApply(GroupByApply): axis: AxisInt = 0 obj: Resampler | BaseWindow @@ -1294,7 +1335,7 @@ def __init__( args, kwargs, ) -> None: - super().__init__( + super(GroupByApply, self).__init__( obj, func, raw=False, @@ -1309,6 +1350,50 @@ def apply(self): def transform(self): raise NotImplementedError + def agg_or_apply_list_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: + obj = self.obj + kwargs = self.kwargs + if op_name == "apply": + kwargs = {**kwargs, "by_row": False} + + if getattr(obj, "axis", 0) == 1: + raise NotImplementedError("axis other than 0 is not supported") + + if obj._selected_obj.ndim == 1: + # For SeriesGroupBy this matches _obj_with_exclusions + selected_obj = obj._selected_obj + else: + selected_obj = obj._obj_with_exclusions + + keys, results = self.compute_results_list_like(op_name, selected_obj, kwargs) + result = self.wrap_results_list_like(keys, results) + return result + + def agg_or_apply_dict_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: + assert op_name in ["agg", "apply"] + + obj = self.obj + kwargs = {} + if op_name == "apply": + by_row = "_compat" if self.by_row else False + kwargs.update({"by_row": by_row}) + + if getattr(obj, "axis", 0) == 1: + raise NotImplementedError("axis other than 0 is not supported") + + selected_obj = obj._selected_obj + selection = obj._selection + + result_index, result_data = self.compute_results_dict_like( + op_name, selected_obj, selection, kwargs + ) + result = self.wrap_results_dict_like(selected_obj, result_index, result_data) + return result + def reconstruct_func( func: AggFuncType | None, **kwargs @@ -1695,3 +1780,9 @@ def validate_func_kwargs( no_arg_message = "Must provide 'func' or named aggregation **kwargs." raise TypeError(no_arg_message) return columns, func + + +def include_axis(op_name, colg) -> bool: + return isinstance(colg, ABCDataFrame) or ( + isinstance(colg, ABCSeries) and op_name == "agg" + ) From e99ed99a94cc781a7f6f93d08e06e70ee26f1299 Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 3 Jul 2023 13:27:03 -0400 Subject: [PATCH 2/6] type-hints --- pandas/core/apply.py | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 27ba5fb279e13..6e012ca58fb41 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -296,7 +296,12 @@ def agg_list_like(self) -> DataFrame | Series: """ return self.agg_or_apply_list_like(op_name="agg") - def compute_results_list_like(self, op_name, selected_obj, kwargs): + def compute_list_like( + self, + op_name: Literal["agg", "apply"], + selected_obj: Series | DataFrame, + kwargs: dict[str, Any], + ) -> tuple[list[Hashable], list[Series | DataFrame]]: func = cast(List[AggFuncTypeBase], self.func) obj = self.obj @@ -366,11 +371,13 @@ def agg_or_apply_list_like( assert False selected_obj = obj._obj_with_exclusions - keys, results = self.compute_results_list_like(op_name, selected_obj, kwargs) + keys, results = self.compute_list_like(op_name, selected_obj, kwargs) result = self.wrap_results_list_like(keys, results) return result - def wrap_results_list_like(self, keys, results): + def wrap_results_list_like( + self, keys: list[Hashable], results: list[Series | DataFrame] + ): from pandas.core.reshape.concat import concat obj = self.obj @@ -399,7 +406,13 @@ def agg_dict_like(self) -> DataFrame | Series: """ return self.agg_or_apply_dict_like(op_name="agg") - def compute_results_dict_like(self, op_name, selected_obj, selection, kwargs): + def compute_dict_like( + self, + op_name: Literal["agg", "apply"], + selected_obj: Series | DataFrame, + selection: Hashable | Sequence[Hashable], + kwargs: dict[str, Any], + ) -> tuple[list[Hashable], list[Any]]: obj = self.obj func = cast(AggFuncTypeDict, self.func) func = self.normalize_dictlike_arg(op_name, selected_obj, func) @@ -462,15 +475,21 @@ def agg_or_apply_dict_like( raise NotImplementedError("axis other than 0 is not supported") selected_obj = obj + assert isinstance(selected_obj, (ABCSeries, ABCDataFrame)) selection = None - result_index, result_data = self.compute_results_dict_like( + result_index, result_data = self.compute_dict_like( op_name, selected_obj, selection, kwargs ) result = self.wrap_results_dict_like(selected_obj, result_index, result_data) return result - def wrap_results_dict_like(self, selected_obj, result_index, result_data): + def wrap_results_dict_like( + self, + selected_obj: Series | DataFrame, + result_index: list[Hashable], + result_data: list, + ): from pandas import Index from pandas.core.reshape.concat import concat @@ -1287,9 +1306,7 @@ def agg_or_apply_list_like( selected_obj = obj._obj_with_exclusions with com.temp_setattr(obj, "as_index", True): - keys, results = self.compute_results_list_like( - op_name, selected_obj, kwargs - ) + keys, results = self.compute_list_like(op_name, selected_obj, kwargs) result = self.wrap_results_list_like(keys, results) return result @@ -1316,7 +1333,7 @@ def agg_or_apply_dict_like( kwargs.update({"engine": engine, "engine_kwargs": engine_kwargs}) with com.temp_setattr(obj, "as_index", True): - result_index, result_data = self.compute_results_dict_like( + result_index, result_data = self.compute_dict_like( op_name, selected_obj, selection, kwargs ) result = self.wrap_results_dict_like(selected_obj, result_index, result_data) @@ -1367,7 +1384,7 @@ def agg_or_apply_list_like( else: selected_obj = obj._obj_with_exclusions - keys, results = self.compute_results_list_like(op_name, selected_obj, kwargs) + keys, results = self.compute_list_like(op_name, selected_obj, kwargs) result = self.wrap_results_list_like(keys, results) return result @@ -1388,7 +1405,7 @@ def agg_or_apply_dict_like( selected_obj = obj._selected_obj selection = obj._selection - result_index, result_data = self.compute_results_dict_like( + result_index, result_data = self.compute_dict_like( op_name, selected_obj, selection, kwargs ) result = self.wrap_results_dict_like(selected_obj, result_index, result_data) @@ -1782,7 +1799,7 @@ def validate_func_kwargs( return columns, func -def include_axis(op_name, colg) -> bool: +def include_axis(op_name: Literal["agg", "apply"], colg: Series | DataFrame) -> bool: return isinstance(colg, ABCDataFrame) or ( isinstance(colg, ABCSeries) and op_name == "agg" ) From 9e104564c0f1f41057d0f8c2a50e3bfb9cd1fa69 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 5 Jul 2023 09:51:12 -0400 Subject: [PATCH 3/6] cleanup --- pandas/core/apply.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 6e012ca58fb41..7bd52c4afa9d7 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -55,7 +55,6 @@ ABCSeries, ) -from pandas.core.base import SelectionMixin import pandas.core.common as com from pandas.core.construction import ensure_wrapped_if_datetimelike @@ -359,19 +358,7 @@ def agg_or_apply_list_like( if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") - selected_obj = obj - if not isinstance(obj, SelectionMixin): - # i.e. obj is Series or DataFrame - selected_obj = obj - elif obj._selected_obj.ndim == 1: - assert False - # For SeriesGroupBy this matches _obj_with_exclusions - selected_obj = obj._selected_obj - else: - assert False - selected_obj = obj._obj_with_exclusions - - keys, results = self.compute_list_like(op_name, selected_obj, kwargs) + keys, results = self.compute_list_like(op_name, obj, kwargs) result = self.wrap_results_list_like(keys, results) return result @@ -465,6 +452,7 @@ def agg_or_apply_dict_like( assert op_name in ["agg", "apply"] obj = self.obj + assert isinstance(obj, (ABCSeries, ABCDataFrame)) kwargs = {} if op_name == "apply": @@ -474,14 +462,11 @@ def agg_or_apply_dict_like( if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") - selected_obj = obj - assert isinstance(selected_obj, (ABCSeries, ABCDataFrame)) selection = None - result_index, result_data = self.compute_dict_like( - op_name, selected_obj, selection, kwargs + op_name, obj, selection, kwargs ) - result = self.wrap_results_dict_like(selected_obj, result_index, result_data) + result = self.wrap_results_dict_like(obj, result_index, result_data) return result def wrap_results_dict_like( From fb06909b9da90adec004927c9b823066221a5dc8 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 5 Jul 2023 09:59:55 -0400 Subject: [PATCH 4/6] mypy fixup --- pandas/core/apply.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7bd52c4afa9d7..543b03be46cc1 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -343,6 +343,7 @@ def agg_or_apply_list_like( self, op_name: Literal["agg", "apply"] ) -> DataFrame | Series: obj = self.obj + assert isinstance(obj, (ABCSeries, ABCDataFrame)) kwargs = self.kwargs if op_name == "apply": From 3e181aa43f1abf86ac35a31677f633b3bb20978a Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 5 Jul 2023 21:46:16 -0400 Subject: [PATCH 5/6] Rework --- pandas/core/apply.py | 171 ++++++++++++++++++------------------------ pandas/core/common.py | 14 +++- 2 files changed, 85 insertions(+), 100 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 543b03be46cc1..7fda5b07923a8 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -141,6 +141,18 @@ def __init__( def apply(self) -> DataFrame | Series: pass + @abc.abstractmethod + def agg_or_apply_list_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: + pass + + @abc.abstractmethod + def agg_or_apply_dict_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: + pass + def agg(self) -> DataFrame | Series | None: """ Provide an implementation for the aggregators. @@ -339,30 +351,6 @@ def compute_list_like( return keys, results - def agg_or_apply_list_like( - self, op_name: Literal["agg", "apply"] - ) -> DataFrame | Series: - obj = self.obj - assert isinstance(obj, (ABCSeries, ABCDataFrame)) - - kwargs = self.kwargs - if op_name == "apply": - if isinstance(self, FrameApply): - by_row = self.by_row - - elif isinstance(self, SeriesApply): - by_row = "_compat" if self.by_row else False - else: - by_row = False - kwargs = {**kwargs, "by_row": by_row} - - if getattr(obj, "axis", 0) == 1: - raise NotImplementedError("axis other than 0 is not supported") - - keys, results = self.compute_list_like(op_name, obj, kwargs) - result = self.wrap_results_list_like(keys, results) - return result - def wrap_results_list_like( self, keys: list[Hashable], results: list[Series | DataFrame] ): @@ -447,29 +435,6 @@ def compute_dict_like( return result_index, result_data - def agg_or_apply_dict_like( - self, op_name: Literal["agg", "apply"] - ) -> DataFrame | Series: - assert op_name in ["agg", "apply"] - - obj = self.obj - assert isinstance(obj, (ABCSeries, ABCDataFrame)) - - kwargs = {} - if op_name == "apply": - by_row = "_compat" if self.by_row else False - kwargs.update({"by_row": by_row}) - - if getattr(obj, "axis", 0) == 1: - raise NotImplementedError("axis other than 0 is not supported") - - selection = None - result_index, result_data = self.compute_dict_like( - op_name, obj, selection, kwargs - ) - result = self.wrap_results_dict_like(obj, result_index, result_data) - return result - def wrap_results_dict_like( self, selected_obj: Series | DataFrame, @@ -684,6 +649,50 @@ def index(self) -> Index: def agg_axis(self) -> Index: return self.obj._get_agg_axis(self.axis) + def agg_or_apply_list_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: + obj = self.obj + kwargs = self.kwargs + + if op_name == "apply": + if isinstance(self, FrameApply): + by_row = self.by_row + + elif isinstance(self, SeriesApply): + by_row = "_compat" if self.by_row else False + else: + by_row = False + kwargs = {**kwargs, "by_row": by_row} + + if getattr(obj, "axis", 0) == 1: + raise NotImplementedError("axis other than 0 is not supported") + + keys, results = self.compute_list_like(op_name, obj, kwargs) + result = self.wrap_results_list_like(keys, results) + return result + + def agg_or_apply_dict_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: + assert op_name in ["agg", "apply"] + obj = self.obj + + kwargs = {} + if op_name == "apply": + by_row = "_compat" if self.by_row else False + kwargs.update({"by_row": by_row}) + + if getattr(obj, "axis", 0) == 1: + raise NotImplementedError("axis other than 0 is not supported") + + selection = None + result_index, result_data = self.compute_dict_like( + op_name, obj, selection, kwargs + ) + result = self.wrap_results_dict_like(obj, result_index, result_data) + return result + class FrameApply(NDFrameApply): obj: DataFrame @@ -1249,6 +1258,8 @@ def curried(x): class GroupByApply(Apply): + obj: GroupBy | Resampler | BaseWindow + def __init__( self, obj: GroupBy[NDFrameT], @@ -1291,7 +1302,9 @@ def agg_or_apply_list_like( else: selected_obj = obj._obj_with_exclusions - with com.temp_setattr(obj, "as_index", True): + with com.temp_setattr( + obj, "as_index", True, condition=hasattr(obj, "as_index") + ): keys, results = self.compute_list_like(op_name, selected_obj, kwargs) result = self.wrap_results_list_like(keys, results) return result @@ -1299,6 +1312,11 @@ def agg_or_apply_list_like( def agg_or_apply_dict_like( self, op_name: Literal["agg", "apply"] ) -> DataFrame | Series: + from pandas.core.groupby.generic import ( + DataFrameGroupBy, + SeriesGroupBy, + ) + assert op_name in ["agg", "apply"] obj = self.obj @@ -1313,12 +1331,17 @@ def agg_or_apply_dict_like( selected_obj = obj._selected_obj selection = obj._selection + is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) + # Numba Groupby engine/engine-kwargs passthrough - engine = self.kwargs.get("engine", None) - engine_kwargs = self.kwargs.get("engine_kwargs", None) - kwargs.update({"engine": engine, "engine_kwargs": engine_kwargs}) + if is_groupby: + engine = self.kwargs.get("engine", None) + engine_kwargs = self.kwargs.get("engine_kwargs", None) + kwargs.update({"engine": engine, "engine_kwargs": engine_kwargs}) - with com.temp_setattr(obj, "as_index", True): + with com.temp_setattr( + obj, "as_index", True, condition=hasattr(obj, "as_index") + ): result_index, result_data = self.compute_dict_like( op_name, selected_obj, selection, kwargs ) @@ -1353,50 +1376,6 @@ def apply(self): def transform(self): raise NotImplementedError - def agg_or_apply_list_like( - self, op_name: Literal["agg", "apply"] - ) -> DataFrame | Series: - obj = self.obj - kwargs = self.kwargs - if op_name == "apply": - kwargs = {**kwargs, "by_row": False} - - if getattr(obj, "axis", 0) == 1: - raise NotImplementedError("axis other than 0 is not supported") - - if obj._selected_obj.ndim == 1: - # For SeriesGroupBy this matches _obj_with_exclusions - selected_obj = obj._selected_obj - else: - selected_obj = obj._obj_with_exclusions - - keys, results = self.compute_list_like(op_name, selected_obj, kwargs) - result = self.wrap_results_list_like(keys, results) - return result - - def agg_or_apply_dict_like( - self, op_name: Literal["agg", "apply"] - ) -> DataFrame | Series: - assert op_name in ["agg", "apply"] - - obj = self.obj - kwargs = {} - if op_name == "apply": - by_row = "_compat" if self.by_row else False - kwargs.update({"by_row": by_row}) - - if getattr(obj, "axis", 0) == 1: - raise NotImplementedError("axis other than 0 is not supported") - - selected_obj = obj._selected_obj - selection = obj._selection - - result_index, result_data = self.compute_dict_like( - op_name, selected_obj, selection, kwargs - ) - result = self.wrap_results_dict_like(selected_obj, result_index, result_data) - return result - def reconstruct_func( func: AggFuncType | None, **kwargs diff --git a/pandas/core/common.py b/pandas/core/common.py index ee8fe220698b5..3981f288879d6 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -524,23 +524,29 @@ def convert_to_list_like( @contextlib.contextmanager -def temp_setattr(obj, attr: str, value) -> Generator[None, None, None]: +def temp_setattr( + obj, attr: str, value, condition: bool = True +) -> Generator[None, None, None]: """Temporarily set attribute on an object. Args: obj: Object whose attribute will be modified. attr: Attribute to modify. value: Value to temporarily set attribute to. + condition: Whether to set the attribute. Provided in order to not have to + conditionally use this context manager. Yields: obj with modified attribute. """ - old_value = getattr(obj, attr) - setattr(obj, attr, value) + if condition: + old_value = getattr(obj, attr) + setattr(obj, attr, value) try: yield obj finally: - setattr(obj, attr, old_value) + if condition: + setattr(obj, attr, old_value) def require_length_match(data, index: Index) -> None: From 2d8317dd5127dd033934fd7ae726ca977a300109 Mon Sep 17 00:00:00 2001 From: richard Date: Fri, 7 Jul 2023 10:40:56 -0400 Subject: [PATCH 6/6] docstrings and comment --- pandas/core/apply.py | 66 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7fda5b07923a8..566055bc138ff 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -312,7 +312,27 @@ def compute_list_like( op_name: Literal["agg", "apply"], selected_obj: Series | DataFrame, kwargs: dict[str, Any], - ) -> tuple[list[Hashable], list[Series | DataFrame]]: + ) -> tuple[list[Hashable], list[Any]]: + """ + Compute agg/apply results for like-like input. + + Parameters + ---------- + op_name : {"agg", "apply"} + Operation being performed. + selected_obj : Series or DataFrame + Data to perform operation on. + kwargs : dict + Keyword arguments to pass to the functions. + + Returns + ------- + keys : list[hashable] + Index labels for result. + results : list + Data for result. When aggregating with a Series, this can contain any + Python objects. + """ func = cast(List[AggFuncTypeBase], self.func) obj = self.obj @@ -389,6 +409,28 @@ def compute_dict_like( selection: Hashable | Sequence[Hashable], kwargs: dict[str, Any], ) -> tuple[list[Hashable], list[Any]]: + """ + Compute agg/apply results for dict-like input. + + Parameters + ---------- + op_name : {"agg", "apply"} + Operation being performed. + selected_obj : Series or DataFrame + Data to perform operation on. + selection : hashable or sequence of hashables + Used by GroupBy, Window, and Resample if selection is applied to the object. + kwargs : dict + Keyword arguments to pass to the functions. + + Returns + ------- + keys : list[hashable] + Index labels for result. + results : list + Data for result. When aggregating with a Series, this can contain any + Python object. + """ obj = self.obj func = cast(AggFuncTypeDict, self.func) func = self.normalize_dictlike_arg(op_name, selected_obj, func) @@ -401,15 +443,13 @@ def compute_dict_like( if selected_obj.ndim == 1: # key only used for output colg = obj._gotitem(selection, ndim=1) - result_data = [ - getattr(colg, op_name)(how, **kwargs) for _, how in func.items() - ] - result_index = list(func.keys()) + results = [getattr(colg, op_name)(how, **kwargs) for _, how in func.items()] + keys = list(func.keys()) elif is_non_unique_col: # key used for column selection and output # GH#51099 - result_data = [] - result_index = [] + results = [] + keys = [] for key, how in func.items(): indices = selected_obj.columns.get_indexer_for([key]) labels = selected_obj.columns.take(indices) @@ -423,17 +463,17 @@ def compute_dict_like( for indice in indices ] - result_index += [key] * len(key_data) - result_data += key_data + keys += [key] * len(key_data) + results += key_data else: # key used for column selection and output - result_data = [ + results = [ getattr(obj._gotitem(key, ndim=1), op_name)(how, **kwargs) for key, how in func.items() ] - result_index = list(func.keys()) + keys = list(func.keys()) - return result_index, result_data + return keys, results def wrap_results_dict_like( self, @@ -1302,6 +1342,8 @@ def agg_or_apply_list_like( else: selected_obj = obj._obj_with_exclusions + # Only set as_index=True on groupby objects, not Window or Resample + # that inherit from this class. with com.temp_setattr( obj, "as_index", True, condition=hasattr(obj, "as_index") ):