Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 59 additions & 4 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from pandas.core._numba.executor import generate_apply_looper
import pandas.core.common as com
from pandas.core.construction import ensure_wrapped_if_datetimelike
from pandas.core.reshape.concat import concat
from pandas.core.util.numba_ import (
get_jit_arguments,
prepare_function_arguments,
Expand Down Expand Up @@ -367,7 +368,6 @@ def transform_dict_like(self, func) -> DataFrame:
"""
Compute transform in the case of a dict-like func
"""
from pandas.core.reshape.concat import concat

obj = self.obj
args = self.args
Expand Down Expand Up @@ -483,8 +483,6 @@ def compute_list_like(
def wrap_results_list_like(
self, keys: Iterable[Hashable], results: list[Series | DataFrame]
):
from pandas.core.reshape.concat import concat

obj = self.obj

try:
Expand Down Expand Up @@ -613,7 +611,6 @@ def wrap_results_dict_like(
result_data: list,
):
from pandas import Index
from pandas.core.reshape.concat import concat

obj = self.obj

Expand Down Expand Up @@ -841,10 +838,68 @@ def agg_or_apply_list_like(
if getattr(obj, "axis", 0) == 1:
raise NotImplementedError("axis other than 0 is not supported")

if op_name == "agg" and obj.ndim == 2:
result = self._agg_list_like_frame_reductions()
if result is not None:
return result

keys, results = self.compute_list_like(op_name, obj, kwargs)
result = self.wrap_results_list_like(keys, results)
return result

def _agg_list_like_frame_reductions(self) -> DataFrame | None:
"""
Aggregate a list of named functions using DataFrame-level reductions.

Instead of extracting each column as a Series and calling
Series.agg per column, call DataFrame-level reductions directly.
Operates per dtype group to preserve per-column dtypes.

Returns None if the fast path cannot be used (e.g. non-string
functions, functions that aren't valid DataFrame methods, or
functions that don't return a reduction result).
"""
func = cast("list[AggFuncTypeBase]", self.func)

if not all(isinstance(f, str) for f in func):
return None

obj = self.obj
func_names = cast("list[str]", func)

# Cannot reindex with duplicate column names
if not obj.columns.is_unique:
return None

# Verify all function names are valid methods on the DataFrame
for func_name in func_names:
if not hasattr(obj, func_name):
return None

# Compute reductions per dtype group to preserve per-column dtypes.
# Using to_frame().T for each result avoids the slow
# DataFrame(list-of-Series) construction path.
groups = obj.columns.groupby(obj.dtypes) # type: ignore[arg-type]

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this - but do you feel certain that we can rely on equality of dtypes here? I don't know of any examples that would cause problems, just wondering if there are edge cases where dtypes would give as equal when there is some subtle difference (e.g. time resolution).

As long as it's the case that if two dtypes say they are equal when they are not precisely equal we would call this a bug, I'm good here.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't prevent a hypothetical 3rd party EADtype from lying about its equality, but im pretty confident this works as expected for all our dtypes.

pieces = []
for dtype in groups:
cols = groups[dtype]
sub = obj[cols]
group_pieces = []
for func_name in func_names:
try:
row = getattr(sub, func_name)(*self.args, **self.kwargs)
except TypeError:
return None
if not isinstance(row, ABCSeries):
# Not a reduction (e.g. returns DataFrame), fall back
return None
group_pieces.append(row.to_frame(func_name).T)
pieces.append(concat(group_pieces))

result = concat(pieces, axis=1)
result = result.reindex(columns=obj.columns)
return result

def agg_or_apply_dict_like(
self, op_name: Literal["agg", "apply"]
) -> DataFrame | Series:
Expand Down
Loading