diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c987588097953..f849aa11990c4 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1197,7 +1197,6 @@ Numeric - Bug in :meth:`Series.__add__` casting to object for list and masked :class:`Series` (:issue:`22962`) - Bug in :meth:`~arrays.ArrowExtensionArray.mode` where ``dropna=False`` was not respected when there was ``NA`` values (:issue:`50982`) - Bug in :meth:`DataFrame.query` with ``engine="numexpr"`` and column names are ``min`` or ``max`` would raise a ``TypeError`` (:issue:`50937`) -- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with tz-aware data containing ``pd.NaT`` and ``axis=1`` would return incorrect results (:issue:`51242`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 70019030da182..eb1fc01916868 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -102,6 +102,7 @@ is_integer_dtype, is_iterator, is_list_like, + is_object_dtype, is_scalar, is_sequence, needs_i8_conversion, @@ -10917,44 +10918,43 @@ def _get_data() -> DataFrame: data = self._get_bool_data() return data - # Case with EAs see GH#35881 df = self if numeric_only: df = _get_data() if axis is None: return func(df.values) - elif axis == 1: - if len(df.index) == 0: - # Taking a transpose would result in no columns, losing the dtype. - # In the empty case, reducing along axis 0 or 1 gives the same - # result dtype, so reduce with axis=0 and ignore values - result = df._reduce( - op, - name, - axis=0, - skipna=skipna, - numeric_only=False, - filter_type=filter_type, - **kwds, - ).iloc[:0] - result.index = df.index - return result - df = df.T - - # After possibly _get_data and transposing, we are now in the - # simple case where we can use BlockManager.reduce - res = df._mgr.reduce(blk_func) - out = df._constructor(res).iloc[0] - if out_dtype is not None: - out = out.astype(out_dtype) - elif (df._mgr.get_dtypes() == object).any(): - out = out.astype(object) - elif len(self) == 0 and name in ("sum", "prod"): - # Even if we are object dtype, follow numpy and return - # float64, see test_apply_funcs_over_empty - out = out.astype(np.float64) + elif axis == 0: + res = df._mgr.reduce(blk_func) + out = df._constructor(res).iloc[0] + if out_dtype is not None: + out = out.astype(out_dtype) + elif axis == 0 and len(self) == 0 and name in ["sum", "prod"]: + # Even if we are object dtype, follow numpy and return + # float64, see test_apply_funcs_over_empty + out = out.astype(np.float64) + elif (df._mgr.get_dtypes() == object).any(): + out = out.astype(object) + + return out + + values = df.values + result = func(values) + + if hasattr(result, "dtype"): + if filter_type == "bool" and notna(result).all(): + result = result.astype(np.bool_) + elif (df._mgr.get_dtypes() == object).any(): + result = result.astype(object) + elif filter_type is None and is_object_dtype(result.dtype): + try: + result = result.astype(np.float64) + except (ValueError, TypeError): + # try to coerce to the original dtypes item by item if we can + pass - return out + labels = self._get_agg_axis(axis) + result = self._constructor_sliced(result, index=labels) + return result def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: """ diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 28809e2ecb788..7944063ef23b3 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1464,6 +1464,7 @@ def test_preserve_timezone(self, initial: str, method): result = getattr(df, method)(axis=1) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(reason="GH#51955 - avoid perf regression in axis=1 ops") @pytest.mark.parametrize("method", ["min", "max"]) def test_minmax_tzaware_skipna_axis_1(self, method, skipna): # GH#51242