diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 7555fb50f16af..b954600016d4b 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -21,7 +21,10 @@
 
 import numpy as np
 
-from pandas._config import option_context
+from pandas._config import (
+    get_option,
+    option_context,
+)
 
 from pandas._libs import lib
 from pandas._typing import (
@@ -82,6 +85,7 @@ def frame_apply(
     result_type: str | None = None,
     args=None,
     kwargs=None,
+    renamer=None,
 ) -> FrameApply:
     """construct and return a row or column based frame apply object"""
     axis = obj._get_axis_number(axis)
@@ -98,6 +102,7 @@ def frame_apply(
         result_type=result_type,
         args=args,
         kwargs=kwargs,
+        renamer=renamer,
     )
 
 
@@ -112,6 +117,7 @@ def __init__(
         result_type: str | None,
         args,
         kwargs,
+        renamer=None,
     ):
         self.obj = obj
         self.raw = raw
@@ -141,6 +147,7 @@ def f(x):
 
         self.orig_f: AggFuncType = func
         self.f: AggFuncType = f
+        self.renamer = renamer
 
     @abc.abstractmethod
     def apply(self) -> DataFrame | Series:
@@ -164,10 +171,16 @@ def agg(self) -> DataFrame | Series | None:
             return self.apply_str()
 
         if is_dict_like(arg):
-            return self.agg_dict_like()
+            if get_option("new_udf_methods"):
+                return self.new_dict_like("agg")
+            else:
+                return self.agg_dict_like()
         elif is_list_like(arg):
             # we require a list, but not a 'str'
-            return self.agg_list_like()
+            if get_option("new_udf_methods"):
+                return self.new_list_like("agg")
+            else:
+                return self.agg_list_like()
 
         if callable(arg):
             f = com.get_cython_func(arg)
@@ -408,6 +421,70 @@ def agg_list_like(self) -> DataFrame | Series:
             )
             return concatenated.reindex(full_ordered_index, copy=False)
 
+    def new_list_like(self, method: str) -> DataFrame | Series:
+        """
+        Compute aggregation in the case of a list-like argument.
+
+        Returns
+        -------
+        Result of aggregation.
+        """
+        from pandas.core.reshape.concat import concat
+
+        obj = self.obj
+        arg = cast(List[AggFuncTypeBase], self.f)
+
+        results = []
+        keys = []
+        result_dim = None
+
+        for a in arg:
+            name = None
+            try:
+                if isinstance(a, (tuple, list)):
+                    # Handle (name, value) pairs
+                    name, a = a
+                new_res = getattr(obj, method)(a)
+                if result_dim is None:
+                    result_dim = getattr(new_res, "ndim", 0)
+                elif getattr(new_res, "ndim", 0) != result_dim:
+                    raise ValueError(
+                        "cannot combine transform and aggregation operations"
+                    )
+            except TypeError:
+                pass
+            else:
+                results.append(new_res)
+
+                # make sure we find a good name
+                if name is None:
+                    name = com.get_callable_name(a) or a
+                keys.append(name)
+
+        # if we are empty
+        if not len(results):
+            raise ValueError("no results")
+
+        try:
+            concatenated = concat(results, keys=keys, axis=1, sort=False)
+        except TypeError:
+            # we are concatting non-NDFrame objects,
+            # e.g. a list of scalars
+            from pandas import Series
+
+            result = Series(results, index=keys, name=obj.name)
+            return result
+        else:
+            # Concat uses the first index to determine the final indexing order.
+            # The union of a shorter first index with the other indices causes
+            # the index sorting to be different from the order of the aggregating
+            # functions. Reindex if this is the case.
+            index_size = concatenated.index.size
+            full_ordered_index = next(
+                result.index for result in results if result.index.size == index_size
+            )
+            return concatenated.reindex(full_ordered_index, copy=False)
+
     def agg_dict_like(self) -> DataFrame | Series:
         """
         Compute aggregation in the case of a dict-like argument.
@@ -486,6 +563,86 @@ def agg_dict_like(self) -> DataFrame | Series:
 
         return result
 
+    def new_dict_like(self, method: str) -> DataFrame | Series:
+        """
+        Compute aggregation in the case of a dict-like argument.
+
+        Returns
+        -------
+        Result of aggregation.
+        """
+        from pandas import Index
+        from pandas.core.reshape.concat import concat
+
+        obj = self.obj
+        arg = cast(AggFuncTypeDict, self.f)
+
+        if not isinstance(obj, SelectionMixin):
+            # i.e. obj is Series or DataFrame
+            selected_obj = obj
+            selection = None
+        else:
+            selected_obj = obj._selected_obj
+            selection = obj._selection
+
+        arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
+
+        if selected_obj.ndim == 1:
+            # key only used for output
+            colg = obj._gotitem(selection, ndim=1)
+            results = {key: getattr(colg, method)(how) for key, how in arg.items()}
+
+        else:
+            # key used for column selection and output
+            results = {
+                key: getattr(obj._gotitem(key, ndim=1), method)(how)
+                for key, how in arg.items()
+            }
+            if self.renamer is not None:
+                for key, columns in self.renamer.items():
+                    results[key].columns = columns
+
+        # Avoid making two isinstance calls in all and any below
+        if isinstance(results, dict):
+            is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
+        else:
+            is_ndframe = [isinstance(r, ABCNDFrame) for r in results]
+
+        # combine results
+        result: DataFrame | Series
+        if all(is_ndframe):
+            keys_to_use: Iterable[Hashable]
+            keys_to_use = [k for k in arg.keys() if not results[k].empty]
+            keys_to_use = keys_to_use if keys_to_use != [] else arg.keys()
+            if selected_obj.ndim == 2:
+                # keys are columns, so we can preserve names
+                ktu = Index(keys_to_use)
+                ktu._set_names(selected_obj.columns.names)
+                keys_to_use = ktu
+            keys = None if selected_obj.ndim == 1 else keys_to_use
+            result = concat({k: results[k] for k in keys_to_use}, keys=keys, axis=1)
+        elif any(is_ndframe):
+            # There is a mix of NDFrames and scalars
+            raise ValueError(
+                "cannot perform both aggregation "
+                "and transformation operations "
+                "simultaneously"
+            )
+        else:
+            from pandas import Series
+
+            # we have a dict of scalars
+            # GH 36212 use name only if obj is a series
+            if obj.ndim == 1:
+                obj = cast("Series", obj)
+                name = obj.name
+            else:
+                name = None
+
+            result = Series(results, index=arg.keys(), name=name)
+
+        return result
+
     def apply_str(self) -> DataFrame | Series:
         """
         Compute apply in case of a string.
@@ -522,6 +679,35 @@ def apply_multiple(self) -> DataFrame | Series:
         """
         return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs)
 
+    def new_apply_multiple(self) -> DataFrame | Series:
+        """
+        Compute apply in case of a list-like or dict-like.
+
+        Returns
+        -------
+        result: Series, DataFrame, or None
+            Result when self.f is a list-like or dict-like, None otherwise.
+        """
+        obj = self.obj
+        axis = self.axis
+
+        self.obj = obj if axis == 0 else obj.T
+        self.axis = 0
+
+        try:
+            if is_dict_like(self.f):
+                result = self.new_dict_like("apply")
+            else:
+                result = self.new_list_like("apply")
+        finally:
+            self.obj = obj
+            self.axis = axis
+
+        if axis == 1:
+            result = result.T if result is not None else result
+
+        return result
+
     def normalize_dictlike_arg(
         self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict
     ) -> AggFuncTypeDict:
@@ -661,7 +847,10 @@ def apply(self) -> DataFrame | Series:
         """compute the results"""
         # dispatch to agg
         if is_list_like(self.f):
-            return self.apply_multiple()
+            if get_option("new_udf_methods"):
+                return self.new_apply_multiple()
+            else:
+                return self.apply_multiple()
 
         # all empty
         if len(self.columns) == 0 and len(self.index) == 0:
@@ -1039,7 +1228,10 @@ def apply(self) -> DataFrame | Series:
 
         # dispatch to agg
         if is_list_like(self.f):
-            return self.apply_multiple()
+            if get_option("new_udf_methods"):
+                return self.new_apply_multiple()
+            else:
+                return self.apply_multiple()
 
         if isinstance(self.f, str):
             # if we are a string, try to dispatch
@@ -1172,7 +1364,13 @@ def transform(self):
 
 def reconstruct_func(
     func: AggFuncType | None, **kwargs
-) -> tuple[bool, AggFuncType | None, list[str] | None, list[int] | None]:
+) -> tuple[
+    bool,
+    AggFuncType | None,
+    list[str] | None,
+    list[int] | None,
+    dict[str, list[str]] | None,
+]:
     """
     This is the internal function to reconstruct func given if there is relabeling
     or not and also normalize the keyword to get new order of columns.
@@ -1204,14 +1402,16 @@ def reconstruct_func(
     Examples
     --------
     >>> reconstruct_func(None, **{"foo": ("col", "min")})
-    (True, defaultdict(<class 'list'>, {'col': ['min']}), ('foo',), array([0]))
+    (True, defaultdict(<class 'list'>, {'col': ['min']}), ('foo',), array([0]),
+    defaultdict(<class 'list'>, {'col': ['foo']}))
 
     >>> reconstruct_func("min")
-    (False, 'min', None, None)
+    (False, 'min', None, None, None)
     """
     relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
     columns: list[str] | None = None
     order: list[int] | None = None
+    renamer: dict[str, list[str]] | None = None
 
     if not relabeling:
         if isinstance(func, list) and len(func) > len(set(func)):
@@ -1227,9 +1427,9 @@ def reconstruct_func(
             raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
 
     if relabeling:
-        func, columns, order = normalize_keyword_aggregation(kwargs)
+        func, columns, order, renamer = normalize_keyword_aggregation(kwargs)
 
-    return relabeling, func, columns, order
+    return relabeling, func, columns, order, renamer
 
 
 def is_multi_agg_with_relabel(**kwargs) -> bool:
@@ -1258,7 +1458,9 @@ def is_multi_agg_with_relabel(**kwargs) -> bool:
     )
 
 
-def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[int]]:
+def normalize_keyword_aggregation(
+    kwargs: dict,
+) -> tuple[dict, list[str], list[int], dict[str, list]]:
     """
     Normalize user-provided "named aggregation" kwargs.
     Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs
@@ -1280,7 +1482,8 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i
     Examples
     --------
     >>> normalize_keyword_aggregation({"output": ("input", "sum")})
-    (defaultdict(<class 'list'>, {'input': ['sum']}), ('output',), array([0]))
+    (defaultdict(<class 'list'>, {'input': ['sum']}), ('output',), array([0]),
+    defaultdict(<class 'list'>, {'input': ['output']}))
     """
     from pandas.core.indexes.base import Index
 
@@ -1290,11 +1493,13 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i
     # May be hitting https://github.com/python/mypy/issues/5958
     # saying it doesn't have an attribute __name__
     aggspec: DefaultDict = defaultdict(list)
+    renamer: DefaultDict = defaultdict(list)
     order = []
     columns, pairs = list(zip(*kwargs.items()))
 
-    for column, aggfunc in pairs:
+    for name, (column, aggfunc) in zip(kwargs, pairs):
         aggspec[column].append(aggfunc)
+        renamer[column].append(name)
         order.append((column, com.get_callable_name(aggfunc) or aggfunc))
 
     # uniquify aggfunc name if duplicated in order list
@@ -1314,7 +1519,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i
     col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order)
     # error: Incompatible return value type (got "Tuple[defaultdict[Any, Any],
     # Any, ndarray]", expected "Tuple[Dict[Any, Any], List[str], List[int]]")
-    return aggspec, columns, col_idx_order  # type: ignore[return-value]
+    return aggspec, columns, col_idx_order, renamer  # type: ignore[return-value]
 
 
 def _make_unique_kwarg_list(
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index cf41bcff3d0c8..2df98a59cb184 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -511,6 +511,23 @@ def use_inf_as_na_cb(key):
         validator=is_one_of_factory(["block", "array"]),
     )
 
+new_udf_methods = """
+: boolean
+    Whether to use the new UDF method implementations. Currently experimental.
+    Defaults to False.
+"""
+
+
+with cf.config_prefix("mode"):
+    cf.register_option(
+        "new_udf_methods",
+        # Get the default from an environment variable, if set, otherwise defaults
+        # to "block". This environment variable can be set for testing.
+        os.environ.get("PANDAS_NEW_UDF_METHODS", "false").lower() == "true",
+        new_udf_methods,
+        validator=is_bool,
+    )
+
 
 # user warnings
 chained_assignment = """
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bfdfeabbd389c..853f7009113d5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8609,9 +8609,11 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):
 
         axis = self._get_axis_number(axis)
 
-        relabeling, func, columns, order = reconstruct_func(func, **kwargs)
+        relabeling, func, columns, order, renamer = reconstruct_func(func, **kwargs)
 
-        op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs)
+        op = frame_apply(
+            self, func=func, axis=axis, args=args, kwargs=kwargs, renamer=renamer
+        )
         result = op.agg()
 
         if relabeling:
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 38f1d41494fd2..833d011e4745d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -26,6 +26,8 @@
 
 import numpy as np
 
+from pandas._config import get_option
+
 from pandas._libs import reduction as libreduction
 from pandas._typing import (
     ArrayLike,
@@ -883,7 +885,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             index = self.grouper.result_index
             return self.obj._constructor(result, index=index, columns=data.columns)
 
-        relabeling, func, columns, order = reconstruct_func(func, **kwargs)
+        relabeling, func, columns, order, _ = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
 
         op = GroupByApply(self, func, args, kwargs)
@@ -897,49 +899,65 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             result.columns = columns
 
         if result is None:
-
-            # grouper specific aggregations
-            if self.grouper.nkeys > 1:
-                # test_groupby_as_index_series_scalar gets here with 'not self.as_index'
-                return self._python_agg_general(func, *args, **kwargs)
-            elif args or kwargs:
-                # test_pass_args_kwargs gets here (with and without as_index)
-                # can't return early
-                result = self._aggregate_frame(func, *args, **kwargs)
-
-            elif self.axis == 1:
-                # _aggregate_multiple_funcs does not allow self.axis == 1
-                # Note: axis == 1 precludes 'not self.as_index', see __init__
-                result = self._aggregate_frame(func)
-                return result
-
+            if get_option("new_udf_methods"):
+                if args or kwargs:
+                    # test_pass_args_kwargs gets here (with and without as_index)
+                    # can't return early
+                    result = self._aggregate_frame(func, *args, **kwargs)
+
+                elif self.axis == 1 and self.grouper.nkeys == 1:
+                    # _aggregate_multiple_funcs does not allow self.axis == 1
+                    # Note: axis == 1 precludes 'not self.as_index', see __init__
+                    result = self._aggregate_frame(func)
+                    return result
+                else:
+                    # test_groupby_as_index_series_scalar gets here
+                    # with 'not self.as_index'
+                    return self._python_agg_general(func, *args, **kwargs)
             else:
-
-                # try to treat as if we are passing a list
-                gba = GroupByApply(self, [func], args=(), kwargs={})
-                try:
-                    result = gba.agg()
-
-                except ValueError as err:
-                    if "no results" not in str(err):
-                        # raised directly by _aggregate_multiple_funcs
-                        raise
+                # grouper specific aggregations
+                if self.grouper.nkeys > 1:
+                    # test_groupby_as_index_series_scalar gets here with
+                    # 'not self.as_index'
+                    return self._python_agg_general(func, *args, **kwargs)
+                elif args or kwargs:
+                    # test_pass_args_kwargs gets here (with and without as_index)
+                    # can't return early
+                    result = self._aggregate_frame(func, *args, **kwargs)
+
+                elif self.axis == 1:
+                    # _aggregate_multiple_funcs does not allow self.axis == 1
+                    # Note: axis == 1 precludes 'not self.as_index', see __init__
                     result = self._aggregate_frame(func)
+                    return result
 
                 else:
-                    sobj = self._selected_obj
+                    # try to treat as if we are passing a list
+                    gba = GroupByApply(self, [func], args=(), kwargs={})
+                    try:
+                        result = gba.agg()
+
+                    except ValueError as err:
+                        if "no results" not in str(err):
+                            # raised directly by _aggregate_multiple_funcs
+                            raise
+                        result = self._aggregate_frame(func)
 
-                    if isinstance(sobj, Series):
-                        # GH#35246 test_groupby_as_index_select_column_sum_empty_df
-                        result.columns = self._obj_with_exclusions.columns.copy()
                     else:
-                        # Retain our column names
-                        result.columns._set_names(
-                            sobj.columns.names, level=list(range(sobj.columns.nlevels))
-                        )
-                        # select everything except for the last level, which is the one
-                        # containing the name of the function(s), see GH#32040
-                        result.columns = result.columns.droplevel(-1)
+                        sobj = self._selected_obj
+
+                        if isinstance(sobj, Series):
+                            # GH#35246 test_groupby_as_index_select_column_sum_empty_df
+                            result.columns = self._obj_with_exclusions.columns.copy()
+                        else:
+                            # Retain our column names
+                            result.columns._set_names(
+                                sobj.columns.names,
+                                level=list(range(sobj.columns.nlevels)),
+                            )
+                            # select everything except for the last level, which is the
+                            # one containing the name of the function(s), see GH#32040
+                            result.columns = result.columns.droplevel(-1)
 
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 62983b5327a26..d79317e48bd5b 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -13,6 +13,7 @@
     Series,
     Timestamp,
     date_range,
+    get_option,
 )
 import pandas._testing as tm
 from pandas.tests.frame.common import zip_frames
@@ -639,6 +640,8 @@ def test_apply_dup_names_multi_agg():
     # GH 21063
     df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"])
     expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"])
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
     result = df.agg(["min"])
 
     tm.assert_frame_equal(result, expected)
@@ -1010,25 +1013,46 @@ def test_agg_transform(axis, float_frame):
         # list-like
         result = float_frame.apply([np.sqrt], axis=axis)
         expected = f_sqrt.copy()
-        if axis in {0, "index"}:
-            expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]])
+        if get_option("mode.new_udf_methods"):
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [["sqrt"], float_frame.columns]
+                )
+            else:
+                expected.index = MultiIndex.from_product([["sqrt"], float_frame.index])
         else:
-            expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]])
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [float_frame.columns, ["sqrt"]]
+                )
+            else:
+                expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]])
         tm.assert_frame_equal(result, expected)
 
         # multiple items in list
         # these are in the order as if we are applying both
         # functions per series and then concatting
         result = float_frame.apply([np.abs, np.sqrt], axis=axis)
-        expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
-        if axis in {0, "index"}:
-            expected.columns = MultiIndex.from_product(
-                [float_frame.columns, ["absolute", "sqrt"]]
-            )
+        if get_option("mode.new_udf_methods"):
+            expected = pd.concat([f_abs, f_sqrt], axis=other_axis)
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [["absolute", "sqrt"], float_frame.columns]
+                )
+            else:
+                expected.index = MultiIndex.from_product(
+                    [["absolute", "sqrt"], float_frame.index]
+                )
         else:
-            expected.index = MultiIndex.from_product(
-                [float_frame.index, ["absolute", "sqrt"]]
-            )
+            expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [float_frame.columns, ["absolute", "sqrt"]]
+                )
+            else:
+                expected.index = MultiIndex.from_product(
+                    [float_frame.index, ["absolute", "sqrt"]]
+                )
         tm.assert_frame_equal(result, expected)
 
 
@@ -1040,6 +1064,8 @@ def test_demo():
     expected = DataFrame(
         {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"]
     )
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
     result = df.agg({"A": ["min", "max"], "B": ["sum", "max"]})
@@ -1086,18 +1112,29 @@ def test_agg_multiple_mixed_no_warning():
         },
         index=["min", "sum"],
     )
+    klass, match = None, None
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
+        klass, match = FutureWarning, "Dropping of nuisance columns"
     # sorted index
-    with tm.assert_produces_warning(None):
+    with tm.assert_produces_warning(klass, match=match, check_stacklevel=False):
         result = mdf.agg(["min", "sum"])
 
     tm.assert_frame_equal(result, expected)
 
-    with tm.assert_produces_warning(None):
+    klass, match = None, None
+    if get_option("mode.new_udf_methods"):
+        klass, match = FutureWarning, "Dropping of nuisance columns"
+
+    with tm.assert_produces_warning(klass, match=match, check_stacklevel=False):
         result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
 
     # GH40420: the result of .agg should have an index that is sorted
     # according to the arguments provided to agg.
-    expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
+    if get_option("mode.new_udf_methods"):
+        expected = expected.loc[["D", "C", "B", "A"], ["sum", "min"]]
+    else:
+        expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
     tm.assert_frame_equal(result, expected)
 
 
@@ -1116,6 +1153,8 @@ def test_agg_reduce(axis, float_frame):
     )
     expected.columns = ["mean", "max", "sum"]
     expected = expected.T if axis in {0, "index"} else expected
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
 
     result = float_frame.agg(["mean", "max", "sum"], axis=axis)
     tm.assert_frame_equal(result, expected)
@@ -1192,6 +1231,8 @@ def test_nuiscance_columns():
         index=["min"],
         columns=df.columns,
     )
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
     with tm.assert_produces_warning(
@@ -1205,6 +1246,8 @@ def test_nuiscance_columns():
     expected = DataFrame(
         [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
     )
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
 
@@ -1244,8 +1287,12 @@ def test_non_callable_aggregates(how):
         }
     )
 
-    tm.assert_frame_equal(result1, result2, check_like=True)
-    tm.assert_frame_equal(result2, expected, check_like=True)
+    if get_option("new_udf_methods"):
+        tm.assert_frame_equal(result2, expected)
+        tm.assert_frame_equal(result1, expected.T)
+    else:
+        tm.assert_frame_equal(result1, result2, check_like=True)
+        tm.assert_frame_equal(result2, expected, check_like=True)
 
     # Just functional string arg is same as calling df.arg()
     result = getattr(df, how)("count")
@@ -1282,7 +1329,9 @@ def func(group_col):
     tm.assert_series_equal(result, expected)
 
     result = df.agg([func])
-    expected = expected.to_frame("func").T
+    expected = expected.to_frame("func")
+    if not get_option("mode.new_udf_methods"):
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
 
@@ -1395,14 +1444,20 @@ def test_apply_empty_list_reduce():
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_no_suffix_index():
+def test_apply_no_suffix_index(request):
     # GH36189
     pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"])
-    result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
-    expected = DataFrame(
-        {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "<lambda>", "<lambda>"]
-    )
-
+    result = pdf.apply([np.square, lambda x: x, lambda x: x])
+    if get_option("mode.new_udf_methods"):
+        columns = MultiIndex.from_product(
+            [["square", "<lambda>", "<lambda>"], ["A", "B"]]
+        )
+        expected = DataFrame(3 * [[16, 81, 4, 9, 4, 9]], columns=columns)
+    else:
+        columns = MultiIndex.from_product(
+            [["A", "B"], ["square", "<lambda>", "<lambda>"]]
+        )
+        expected = DataFrame(3 * [[16, 4, 4, 81, 9, 9]], columns=columns)
     tm.assert_frame_equal(result, expected)
 
 
@@ -1434,15 +1489,25 @@ def foo(s):
 
     aggs = ["sum", foo, "count", "min"]
     result = df.agg(aggs)
-    expected = DataFrame(
-        {
-            "item": ["123456", np.nan, 6, "1"],
-            "att1": [21.0, 10.5, 6.0, 1.0],
-            "att2": [18.0, 9.0, 6.0, 0.0],
-            "att3": [17.0, 8.5, 6.0, 0.0],
-        },
-        index=["sum", "foo", "count", "min"],
-    )
+    if get_option("mode.new_udf_methods"):
+        expected = DataFrame(
+            {
+                "sum": ["123456", 21, 18, 17],
+                "count": [6, 6, 6, 6],
+                "min": ["1", 1, 0, 0],
+            },
+            index=["item", "att1", "att2", "att3"],
+        )
+    else:
+        expected = DataFrame(
+            {
+                "item": ["123456", np.nan, 6, "1"],
+                "att1": [21.0, 10.5, 6.0, 1.0],
+                "att2": [18.0, 9.0, 6.0, 0.0],
+                "att3": [17.0, 8.5, 6.0, 0.0],
+            },
+            index=["sum", "foo", "count", "min"],
+        )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index 2af340f0c1bb9..513f074c7beb5 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -13,6 +13,7 @@
     MultiIndex,
     Series,
     concat,
+    get_option,
     isna,
     timedelta_range,
 )
@@ -254,10 +255,14 @@ def test_transform(string_series):
         # dict, provide renaming
         expected = concat([f_sqrt, f_abs], axis=1)
         expected.columns = ["foo", "bar"]
-        expected = expected.unstack().rename("series")
+        if not get_option("new_udf_methods"):
+            expected = expected.unstack().rename("series")
 
         result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
-        tm.assert_series_equal(result.reindex_like(expected), expected)
+        if get_option("new_udf_methods"):
+            tm.assert_frame_equal(result, expected)
+        else:
+            tm.assert_series_equal(result.reindex_like(expected), expected)
 
 
 @pytest.mark.parametrize("op", series_transform_kernels)
@@ -364,18 +369,32 @@ def test_with_nested_series(datetime_series):
 def test_replicate_describe(string_series):
     # this also tests a result set that is all scalars
     expected = string_series.describe()
-    result = string_series.apply(
-        {
-            "count": "count",
-            "mean": "mean",
-            "std": "std",
-            "min": "min",
-            "25%": lambda x: x.quantile(0.25),
-            "50%": "median",
-            "75%": lambda x: x.quantile(0.75),
-            "max": "max",
-        }
-    )
+    if get_option("new_udf_methods"):
+        result = string_series.agg(
+            {
+                "count": "count",
+                "mean": "mean",
+                "std": "std",
+                "min": "min",
+                "25%": lambda x: x.quantile(0.25),
+                "50%": "median",
+                "75%": lambda x: x.quantile(0.75),
+                "max": "max",
+            }
+        )
+    else:
+        result = string_series.apply(
+            {
+                "count": "count",
+                "mean": "mean",
+                "std": "std",
+                "min": "min",
+                "25%": lambda x: x.quantile(0.25),
+                "50%": "median",
+                "75%": lambda x: x.quantile(0.75),
+                "max": "max",
+            }
+        )
     tm.assert_series_equal(result, expected)
 
 
@@ -410,10 +429,14 @@ def test_non_callable_aggregates(how):
 def test_series_apply_no_suffix_index():
     # GH36189
     s = Series([4] * 3)
-    result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
-    expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
-
-    tm.assert_series_equal(result, expected)
+    if get_option("new_udf_methods"):
+        result = s.apply(["sqrt", lambda x: np.sqrt(x), lambda x: np.sqrt(x)])
+        expected = DataFrame([[2.0] * 3] * 3, columns=["sqrt", "<lambda>", "<lambda>"])
+        tm.assert_frame_equal(result, expected)
+    else:
+        result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
+        expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
+        tm.assert_series_equal(result, expected)
 
 
 def test_map(datetime_series):
@@ -795,10 +818,19 @@ def test_apply_to_timedelta():
 @pytest.mark.parametrize("how", ["agg", "apply"])
 def test_apply_listlike_reducer(string_series, ops, names, how):
     # GH 39140
-    expected = Series({name: op(string_series) for name, op in zip(names, ops)})
-    expected.name = "series"
     result = getattr(string_series, how)(ops)
-    tm.assert_series_equal(result, expected)
+    if get_option("new_udf_methods"):
+        if how == "apply":
+            expected = DataFrame({name: string_series for name, op in zip(names, ops)})
+        else:
+            expected = Series(
+                {name: op(string_series) for name, op in zip(names, ops)}, name="series"
+            )
+        tm.assert_equal(result, expected)
+    else:
+        expected = Series({name: op(string_series) for name, op in zip(names, ops)})
+        expected.name = "series"
+        tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -813,10 +845,21 @@ def test_apply_listlike_reducer(string_series, ops, names, how):
 @pytest.mark.parametrize("how", ["agg", "apply"])
 def test_apply_dictlike_reducer(string_series, ops, how):
     # GH 39140
-    expected = Series({name: op(string_series) for name, op in ops.items()})
-    expected.name = string_series.name
-    result = getattr(string_series, how)(ops)
-    tm.assert_series_equal(result, expected)
+    if get_option("new_udf_methods"):
+        if how == "apply":
+            names = ops.keys() if isinstance(ops, dict) else ops.index
+            expected = concat([string_series.rename(name) for name in names], axis=1)
+        else:
+            expected = Series(
+                {name: op(string_series) for name, op in ops.items()}, name="series"
+            )
+        result = getattr(string_series, how)(ops)
+        tm.assert_equal(result, expected)
+    else:
+        expected = Series({name: op(string_series) for name, op in ops.items()})
+        expected.name = string_series.name
+        result = getattr(string_series, how)(ops)
+        tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -849,7 +892,14 @@ def test_apply_listlike_transformer(string_series, ops, names):
 def test_apply_dictlike_transformer(string_series, ops):
     # GH 39140
     with np.errstate(all="ignore"):
-        expected = concat({name: op(string_series) for name, op in ops.items()})
-        expected.name = string_series.name
-        result = string_series.apply(ops)
-        tm.assert_series_equal(result, expected)
+        if get_option("new_udf_methods"):
+            expected = concat(
+                {name: op(string_series) for name, op in ops.items()}, axis=1
+            )
+            result = string_series.apply(ops)
+            tm.assert_frame_equal(result, expected)
+        else:
+            expected = concat({name: op(string_series) for name, op in ops.items()})
+            expected.name = string_series.name
+            result = string_series.apply(ops)
+            tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 4bda0e6ef9872..78194a806f456 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -20,6 +20,7 @@
     MultiIndex,
     Series,
     concat,
+    get_option,
     to_datetime,
 )
 import pandas._testing as tm
@@ -499,12 +500,18 @@ def test_order_aggregate_multiple_funcs():
     # GH 25692
     df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
 
-    res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
-    result = res.columns.levels[1]
+    if get_option("new_udf_methods"):
+        # TODO (GH 35725): This will not raise when agg-must-agg is implemented
+        msg = "Cannot concat indices that do not have the same number of levels"
+        with pytest.raises(AssertionError, match=msg):
+            df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
+    else:
+        res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
+        result = res.columns.levels[1]
 
-    expected = Index(["sum", "max", "mean", "ohlc", "min"])
+        expected = Index(["sum", "max", "mean", "ohlc", "min"])
 
-    tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)
 
 
 @pytest.mark.parametrize("dtype", [np.int64, np.uint64])
@@ -1207,7 +1214,10 @@ def test_nonagg_agg():
     g = df.groupby("a")
 
     result = g.agg(["cumsum"])
-    result.columns = result.columns.droplevel(-1)
+    if get_option("new_udf_methods"):
+        result.columns = result.columns.droplevel(0)
+    else:
+        result.columns = result.columns.droplevel(-1)
     expected = g.agg("cumsum")
 
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 79990deed261d..d34538a4f5935 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import get_option
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -201,13 +203,21 @@ def test_aggregate_api_consistency():
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg([np.sum, np.mean])
-    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
-    expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]])
+    else:
+        expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped[["D", "C"]].agg([np.sum, np.mean])
-    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
-    expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]])
+    else:
+        expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg({"C": "mean", "D": "sum"})
@@ -393,7 +403,10 @@ def P1(a):
     g = df.groupby("date")
 
     expected = g.agg([P1])
-    expected.columns = expected.columns.levels[0]
+    if get_option("new_udf_methods"):
+        expected.columns = expected.columns.levels[1]
+    else:
+        expected.columns = expected.columns.levels[0]
 
     result = g.agg(P1)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 3ae11847cc06b..caa04d7994223 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -14,6 +14,7 @@
     Series,
     Timestamp,
     date_range,
+    get_option,
 )
 import pandas._testing as tm
 import pandas.core.nanops as nanops
@@ -1138,7 +1139,10 @@ def test_apply_to_nullable_integer_returns_float(values, function):
     tm.assert_frame_equal(result, expected)
 
     result = groups.agg([function])
-    expected.columns = MultiIndex.from_tuples([("b", function)])
+    if get_option("new_udf_methods"):
+        expected.columns = MultiIndex.from_tuples([(function, "b")])
+    else:
+        expected.columns = MultiIndex.from_tuples([("b", function)])
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index b9a6730996a02..1cf36ddbb1772 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -20,6 +20,7 @@
     Timedelta,
     Timestamp,
     date_range,
+    get_option,
     read_csv,
     to_datetime,
 )
@@ -584,11 +585,18 @@ def test_frame_multi_key_function_list():
     grouped = data.groupby(["A", "B"])
     funcs = [np.mean, np.std]
     agged = grouped.agg(funcs)
-    expected = pd.concat(
-        [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
-        keys=["D", "E", "F"],
-        axis=1,
-    )
+    if get_option("new_udf_methods"):
+        expected = pd.concat(
+            [grouped.agg(funcs[0]), grouped.agg(funcs[1])],
+            keys=["mean", "std"],
+            axis=1,
+        )
+    else:
+        expected = pd.concat(
+            [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
+            keys=["D", "E", "F"],
+            axis=1,
+        )
     assert isinstance(agged.index, MultiIndex)
     assert isinstance(expected.index, MultiIndex)
     tm.assert_frame_equal(agged, expected)
@@ -1985,9 +1993,14 @@ def test_groupby_agg_ohlc_non_first():
         index=date_range("2018-01-01", periods=2, freq="D", name="dti"),
     )
 
-    result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
-
-    tm.assert_frame_equal(result, expected)
+    if get_option("new_udf_methods"):
+        # TODO (GH 35725): This will not raise when agg-must-agg is implemented
+        msg = "Cannot concat indices that do not have the same number of levels"
+        with pytest.raises(AssertionError, match=msg):
+            df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
+    else:
+        result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
+        tm.assert_frame_equal(result, expected)
 
 
 def test_groupby_multiindex_nat():
diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py
index 359c3cea62f9c..ff3cb8d873bb9 100644
--- a/pandas/tests/resample/test_deprecated.py
+++ b/pandas/tests/resample/test_deprecated.py
@@ -10,6 +10,7 @@
 from pandas import (
     DataFrame,
     Series,
+    get_option,
 )
 import pandas._testing as tm
 from pandas.core.indexes.datetimes import date_range
@@ -97,7 +98,10 @@ def test_resample_loffset_arg_type(frame, create_index, arg):
         result_agg = df.resample("2D", loffset="2H").agg(arg)
 
     if isinstance(arg, list):
-        expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
+        if get_option("new_udf_methods"):
+            expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
+        else:
+            expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
 
     tm.assert_frame_equal(result_agg, expected)
 
@@ -216,7 +220,10 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
     with tm.assert_produces_warning(FutureWarning):
         result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
     if isinstance(agg_arg, list):
-        expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
+        if get_option("new_udf_methods"):
+            expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
+        else:
+            expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
     tm.assert_frame_equal(result_agg, expected)
 
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 3b3bd402e4cc7..d73a99ab80d0d 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -8,6 +8,7 @@
     DataFrame,
     NamedAgg,
     Series,
+    get_option,
 )
 import pandas._testing as tm
 from pandas.core.indexes.datetimes import date_range
@@ -347,15 +348,14 @@ def test_agg():
     b_std = r["B"].std()
     b_sum = r["B"].sum()
 
-    expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
-    expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1)
+        expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]])
+    else:
+        expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
+        expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     for t in cases:
-        warn = FutureWarning if t in cases[1:3] else None
-        with tm.assert_produces_warning(
-            warn, match="Dropping invalid columns", check_stacklevel=False
-        ):
-            # .var on dt64 column raises and is dropped
-            result = t.aggregate([np.mean, np.std])
+        result = t.aggregate([np.mean, np.std])
         tm.assert_frame_equal(result, expected)
 
     expected = pd.concat([a_mean, b_std], axis=1)
@@ -628,11 +628,22 @@ def test_agg_with_datetime_index_list_agg_func(col_name):
         columns=[col_name],
     )
     result = df.resample("1d").aggregate(["mean"])
-    expected = DataFrame(
-        [47.5, 143.5, 195.5],
-        index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
-        columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
-    )
+    if get_option("new_udf_methods"):
+        expected = DataFrame(
+            [47.5, 143.5, 195.5],
+            index=date_range(
+                start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
+            ),
+            columns=pd.MultiIndex(levels=[["mean"], [col_name]], codes=[[0], [0]]),
+        )
+    else:
+        expected = DataFrame(
+            [47.5, 143.5, 195.5],
+            index=date_range(
+                start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
+            ),
+            columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
+        )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 88607f4b036a0..4c13c9733cf68 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import get_option
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -1905,8 +1907,14 @@ def test_pivot_margins_name_unicode(self):
             frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek
         )
         index = Index([1, 2, 3, greek], dtype="object", name="foo")
-        expected = DataFrame(index=index)
-        tm.assert_frame_equal(table, expected)
+
+        if get_option("new_udf_methods"):
+            expected = Series([1, 1, 1, 3], index=index)
+            expected.index.name = None
+            tm.assert_series_equal(table, expected)
+        else:
+            expected = DataFrame(index=index)
+            tm.assert_frame_equal(table, expected)
 
     def test_pivot_string_as_func(self):
         # GH #18713
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index 5cc22249c26f0..0089f092dd439 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -10,6 +10,7 @@
     Timestamp,
     concat,
     date_range,
+    get_option,
     timedelta_range,
 )
 import pandas._testing as tm
@@ -90,8 +91,12 @@ def test_agg():
     b_std = r["B"].std()
 
     result = r.aggregate([np.mean, np.std])
-    expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
-    expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
+    if get_option("new_udf_methods"):
+        expected = concat([a_mean, b_mean, a_std, b_std], axis=1)
+        expected.columns = MultiIndex.from_product([["mean", "std"], ["A", "B"]])
+    else:
+        expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
+        expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     tm.assert_frame_equal(result, expected)
 
     result = r.aggregate({"A": np.mean, "B": np.std})
@@ -147,7 +152,10 @@ def test_agg_consistency():
     r = df.rolling(window=3)
 
     result = r.agg([np.sum, np.mean]).columns
-    expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
+    if get_option("new_udf_methods"):
+        expected = MultiIndex.from_product([["sum", "mean"], list("AB")])
+    else:
+        expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
     tm.assert_index_equal(result, expected)
 
     result = r["A"].agg([np.sum, np.mean]).columns