diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 8198cc532d998..86f03b04fddb3 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -160,6 +160,33 @@ def test_agg_apply_corner(ts, tsframe):
     tm.assert_frame_equal(res, exp_df)
 
 
+def test_with_na_groups(any_real_numpy_dtype):
+    index = Index(np.arange(10))
+    values = Series(np.ones(10), index, dtype=any_real_numpy_dtype)
+    labels = Series(
+        [np.nan, "foo", "bar", "bar", np.nan, np.nan, "bar", "bar", np.nan, "foo"],
+        index=index,
+    )
+
+    # this SHOULD be an int
+    grouped = values.groupby(labels)
+    agged = grouped.agg(len)
+    expected = Series([4, 2], index=["bar", "foo"])
+
+    tm.assert_series_equal(agged, expected, check_dtype=False)
+
+    # assert issubclass(agged.dtype.type, np.integer)
+
+    # explicitly return a float from my function
+    def f(x):
+        return float(len(x))
+
+    agged = grouped.agg(f)
+    expected = Series([4.0, 2.0], index=["bar", "foo"])
+
+    tm.assert_series_equal(agged, expected)
+
+
 def test_agg_grouping_is_list_tuple(ts):
     df = DataFrame(
         np.random.default_rng(2).standard_normal((30, 4)),
@@ -1049,6 +1076,73 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
     tm.assert_frame_equal(result, expected)
 
 
+def test_groupby_as_index_agg(df):
+    grouped = df.groupby("A", as_index=False)
+
+    # single-key
+
+    result = grouped[["C", "D"]].agg("mean")
+    expected = grouped.mean(numeric_only=True)
+    tm.assert_frame_equal(result, expected)
+
+    result2 = grouped.agg({"C": "mean", "D": "sum"})
+    expected2 = grouped.mean(numeric_only=True)
+    expected2["D"] = grouped.sum()["D"]
+    tm.assert_frame_equal(result2, expected2)
+
+    grouped = df.groupby("A", as_index=True)
+
+    msg = r"nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=msg):
+        grouped["C"].agg({"Q": "sum"})
+
+    # multi-key
+
+    grouped = df.groupby(["A", "B"], as_index=False)
+
+    result = grouped.agg("mean")
+    expected = grouped.mean()
+    tm.assert_frame_equal(result, expected)
+
+    result2 = grouped.agg({"C": "mean", "D": "sum"})
+    expected2 = grouped.mean()
+    expected2["D"] = grouped.sum()["D"]
+    tm.assert_frame_equal(result2, expected2)
+
+    expected3 = grouped["C"].sum()
+    expected3 = DataFrame(expected3).rename(columns={"C": "Q"})
+    msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result3 = grouped["C"].agg({"Q": "sum"})
+    tm.assert_frame_equal(result3, expected3)
+
+    # GH7115 & GH8112 & GH8582
+    df = DataFrame(
+        np.random.default_rng(2).integers(0, 100, (50, 3)),
+        columns=["jim", "joe", "jolie"],
+    )
+    ts = Series(np.random.default_rng(2).integers(5, 10, 50), name="jim")
+
+    gr = df.groupby(ts)
+    gr.nth(0)  # invokes set_selection_from_grouper internally
+
+    msg = "The behavior of DataFrame.sum with axis=None is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
+        res = gr.apply(sum)
+    with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
+        alt = df.groupby(ts).apply(sum)
+    tm.assert_frame_equal(res, alt)
+
+    for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
+        gr = df.groupby(ts, as_index=False)
+        left = getattr(gr, attr)()
+
+        gr = df.groupby(ts.values, as_index=True)
+        right = getattr(gr, attr)().reset_index(drop=True)
+
+        tm.assert_frame_equal(left, right)
+
+
 @pytest.mark.parametrize(
     "func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)]
 )
@@ -1252,6 +1346,28 @@ def test_agg_multiple_lambda(self):
         tm.assert_frame_equal(result2, expected)
 
 
+def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
+    # go through _aggregate_frame with self.axis == 0 and duplicate columns
+    tsframe.columns = ["A", "B", "A", "C"]
+    gb = tsframe.groupby(lambda x: x.month, as_index=as_index)
+
+    warn = None if as_index else FutureWarning
+    msg = "A grouping .* was excluded from the result"
+    with tm.assert_produces_warning(warn, match=msg):
+        res = gb.agg(np.percentile, 80, axis=0)
+
+    ex_data = {
+        1: tsframe[tsframe.index.month == 1].quantile(0.8),
+        2: tsframe[tsframe.index.month == 2].quantile(0.8),
+    }
+    expected = DataFrame(ex_data).T
+    if not as_index:
+        # TODO: try to get this more consistent?
+        expected.index = Index(range(2))
+
+    tm.assert_frame_equal(res, expected)
+
+
 def test_groupby_get_by_index():
     # GH 33439
     df = DataFrame({"A": ["S", "W", "W"], "B": [1.0, 1.0, 2.0]})
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index f4b228eb5b326..5de98156b44e1 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1602,3 +1602,75 @@ def test_builtins_apply(keys, f):
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
     tm.assert_series_equal(getattr(result, fname)(axis=0), getattr(df, fname)(axis=0))
+
+
+def test_inconsistent_return_type():
+    # GH5592
+    # inconsistent return type
+    df = DataFrame(
+        {
+            "A": ["Tiger", "Tiger", "Tiger", "Lamb", "Lamb", "Pony", "Pony"],
+            "B": Series(np.arange(7), dtype="int64"),
+            "C": pd.date_range("20130101", periods=7),
+        }
+    )
+
+    def f_0(grp):
+        return grp.iloc[0]
+
+    expected = df.groupby("A").first()[["B"]]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_0)[["B"]]
+    tm.assert_frame_equal(result, expected)
+
+    def f_1(grp):
+        if grp.name == "Tiger":
+            return None
+        return grp.iloc[0]
+
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_1)[["B"]]
+    e = expected.copy()
+    e.loc["Tiger"] = np.nan
+    tm.assert_frame_equal(result, e)
+
+    def f_2(grp):
+        if grp.name == "Pony":
+            return None
+        return grp.iloc[0]
+
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_2)[["B"]]
+    e = expected.copy()
+    e.loc["Pony"] = np.nan
+    tm.assert_frame_equal(result, e)
+
+    # 5592 revisited, with datetimes
+    def f_3(grp):
+        if grp.name == "Pony":
+            return None
+        return grp.iloc[0]
+
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_3)[["C"]]
+    e = df.groupby("A").first()[["C"]]
+    e.loc["Pony"] = pd.NaT
+    tm.assert_frame_equal(result, e)
+
+    # scalar outputs
+    def f_4(grp):
+        if grp.name == "Pony":
+            return None
+        return grp.iloc[0].loc["C"]
+
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_4)
+    e = df.groupby("A").first()["C"].copy()
+    e.loc["Pony"] = np.nan
+    e.name = None
+    tm.assert_series_equal(result, e)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 14c5c21d41772..8750dd18b3db4 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -43,99 +43,6 @@ def test_repr():
     assert result == expected
 
 
-def test_groupby_std_datetimelike(warn_copy_on_write):
-    # GH#48481
-    tdi = pd.timedelta_range("1 Day", periods=10000)
-    ser = Series(tdi)
-    ser[::5] *= 2  # get different std for different groups
-
-    df = ser.to_frame("A").copy()
-
-    df["B"] = ser + Timestamp(0)
-    df["C"] = ser + Timestamp(0, tz="UTC")
-    df.iloc[-1] = pd.NaT  # last group includes NaTs
-
-    gb = df.groupby(list(range(5)) * 2000)
-
-    result = gb.std()
-
-    # Note: this does not _exactly_ match what we would get if we did
-    # [gb.get_group(i).std() for i in gb.groups]
-    #  but it _does_ match the floating point error we get doing the
-    #  same operation on int64 data xref GH#51332
-    td1 = Timedelta("2887 days 11:21:02.326710176")
-    td4 = Timedelta("2886 days 00:42:34.664668096")
-    exp_ser = Series([td1 * 2, td1, td1, td1, td4], index=np.arange(5))
-    expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser})
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"])
-def test_basic_aggregations(dtype):
-    data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype)
-
-    index = np.arange(9)
-    np.random.default_rng(2).shuffle(index)
-    data = data.reindex(index)
-
-    grouped = data.groupby(lambda x: x // 3, group_keys=False)
-
-    for k, v in grouped:
-        assert len(v) == 3
-
-    msg = "using SeriesGroupBy.mean"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        agged = grouped.aggregate(np.mean)
-    assert agged[1] == 1
-
-    msg = "using SeriesGroupBy.mean"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        expected = grouped.agg(np.mean)
-    tm.assert_series_equal(agged, expected)  # shorthand
-    tm.assert_series_equal(agged, grouped.mean())
-    result = grouped.sum()
-    msg = "using SeriesGroupBy.sum"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        expected = grouped.agg(np.sum)
-    tm.assert_series_equal(result, expected)
-
-    expected = grouped.apply(lambda x: x * x.sum())
-    transformed = grouped.transform(lambda x: x * x.sum())
-    assert transformed[7] == 12
-    tm.assert_series_equal(transformed, expected)
-
-    value_grouped = data.groupby(data)
-    msg = "using SeriesGroupBy.mean"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = value_grouped.aggregate(np.mean)
-    tm.assert_series_equal(result, agged, check_index_type=False)
-
-    # complex agg
-    msg = "using SeriesGroupBy.[mean|std]"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        agged = grouped.aggregate([np.mean, np.std])
-
-    msg = r"nested renamer is not supported"
-    with pytest.raises(SpecificationError, match=msg):
-        grouped.aggregate({"one": np.mean, "two": np.std})
-
-    group_constants = {0: 10, 1: 20, 2: 30}
-    msg = (
-        "Pinning the groupby key to each group in SeriesGroupBy.agg is deprecated, "
-        "and cases that relied on it will raise in a future version"
-    )
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        # GH#41090
-        agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
-    assert agged[1] == 21
-
-    # corner cases
-    msg = "Must produce aggregated value"
-    # exception raised is type Exception
-    with pytest.raises(Exception, match=msg):
-        grouped.aggregate(lambda x: x * 2)
-
-
 def test_groupby_nonobject_dtype(multiindex_dataframe_random_data):
     key = multiindex_dataframe_random_data.index.codes[0]
     grouped = multiindex_dataframe_random_data.groupby(key)
@@ -170,78 +77,6 @@ def max_value(group):
     tm.assert_series_equal(result, expected)
 
 
-def test_inconsistent_return_type():
-    # GH5592
-    # inconsistent return type
-    df = DataFrame(
-        {
-            "A": ["Tiger", "Tiger", "Tiger", "Lamb", "Lamb", "Pony", "Pony"],
-            "B": Series(np.arange(7), dtype="int64"),
-            "C": date_range("20130101", periods=7),
-        }
-    )
-
-    def f_0(grp):
-        return grp.iloc[0]
-
-    expected = df.groupby("A").first()[["B"]]
-    msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.groupby("A").apply(f_0)[["B"]]
-    tm.assert_frame_equal(result, expected)
-
-    def f_1(grp):
-        if grp.name == "Tiger":
-            return None
-        return grp.iloc[0]
-
-    msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.groupby("A").apply(f_1)[["B"]]
-    e = expected.copy()
-    e.loc["Tiger"] = np.nan
-    tm.assert_frame_equal(result, e)
-
-    def f_2(grp):
-        if grp.name == "Pony":
-            return None
-        return grp.iloc[0]
-
-    msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.groupby("A").apply(f_2)[["B"]]
-    e = expected.copy()
-    e.loc["Pony"] = np.nan
-    tm.assert_frame_equal(result, e)
-
-    # 5592 revisited, with datetimes
-    def f_3(grp):
-        if grp.name == "Pony":
-            return None
-        return grp.iloc[0]
-
-    msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.groupby("A").apply(f_3)[["C"]]
-    e = df.groupby("A").first()[["C"]]
-    e.loc["Pony"] = pd.NaT
-    tm.assert_frame_equal(result, e)
-
-    # scalar outputs
-    def f_4(grp):
-        if grp.name == "Pony":
-            return None
-        return grp.iloc[0].loc["C"]
-
-    msg = "DataFrameGroupBy.apply operated on the grouping columns"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.groupby("A").apply(f_4)
-    e = df.groupby("A").first()["C"].copy()
-    e.loc["Pony"] = np.nan
-    e.name = None
-    tm.assert_series_equal(result, e)
-
-
 def test_pass_args_kwargs(ts, tsframe):
     def f(x, q=None, axis=0):
         return np.percentile(x, q, axis=axis)
@@ -295,28 +130,6 @@ def f(x, q=None, axis=0):
         tm.assert_frame_equal(apply_result, expected, check_names=False)
 
 
-def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
-    # go through _aggregate_frame with self.axis == 0 and duplicate columns
-    tsframe.columns = ["A", "B", "A", "C"]
-    gb = tsframe.groupby(lambda x: x.month, as_index=as_index)
-
-    warn = None if as_index else FutureWarning
-    msg = "A grouping .* was excluded from the result"
-    with tm.assert_produces_warning(warn, match=msg):
-        res = gb.agg(np.percentile, 80, axis=0)
-
-    ex_data = {
-        1: tsframe[tsframe.index.month == 1].quantile(0.8),
-        2: tsframe[tsframe.index.month == 2].quantile(0.8),
-    }
-    expected = DataFrame(ex_data).T
-    if not as_index:
-        # TODO: try to get this more consistent?
-        expected.index = Index(range(2))
-
-    tm.assert_frame_equal(res, expected)
-
-
 def test_len():
     df = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)),
@@ -350,33 +163,6 @@ def test_basic_regression():
     grouped.mean()
 
 
-def test_with_na_groups(any_real_numpy_dtype):
-    index = Index(np.arange(10))
-    values = Series(np.ones(10), index, dtype=any_real_numpy_dtype)
-    labels = Series(
-        [np.nan, "foo", "bar", "bar", np.nan, np.nan, "bar", "bar", np.nan, "foo"],
-        index=index,
-    )
-
-    # this SHOULD be an int
-    grouped = values.groupby(labels)
-    agged = grouped.agg(len)
-    expected = Series([4, 2], index=["bar", "foo"])
-
-    tm.assert_series_equal(agged, expected, check_dtype=False)
-
-    # assert issubclass(agged.dtype.type, np.integer)
-
-    # explicitly return a float from my function
-    def f(x):
-        return float(len(x))
-
-    agged = grouped.agg(f)
-    expected = Series([4.0, 2.0], index=["bar", "foo"])
-
-    tm.assert_series_equal(agged, expected)
-
-
 def test_indices_concatenation_order():
     # GH 2808
 
@@ -761,73 +547,6 @@ def test_groupby_as_index_select_column_sum_empty_df():
     tm.assert_frame_equal(left, expected)
 
 
-def test_groupby_as_index_agg(df):
-    grouped = df.groupby("A", as_index=False)
-
-    # single-key
-
-    result = grouped[["C", "D"]].agg("mean")
-    expected = grouped.mean(numeric_only=True)
-    tm.assert_frame_equal(result, expected)
-
-    result2 = grouped.agg({"C": "mean", "D": "sum"})
-    expected2 = grouped.mean(numeric_only=True)
-    expected2["D"] = grouped.sum()["D"]
-    tm.assert_frame_equal(result2, expected2)
-
-    grouped = df.groupby("A", as_index=True)
-
-    msg = r"nested renamer is not supported"
-    with pytest.raises(SpecificationError, match=msg):
-        grouped["C"].agg({"Q": "sum"})
-
-    # multi-key
-
-    grouped = df.groupby(["A", "B"], as_index=False)
-
-    result = grouped.agg("mean")
-    expected = grouped.mean()
-    tm.assert_frame_equal(result, expected)
-
-    result2 = grouped.agg({"C": "mean", "D": "sum"})
-    expected2 = grouped.mean()
-    expected2["D"] = grouped.sum()["D"]
-    tm.assert_frame_equal(result2, expected2)
-
-    expected3 = grouped["C"].sum()
-    expected3 = DataFrame(expected3).rename(columns={"C": "Q"})
-    msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result3 = grouped["C"].agg({"Q": "sum"})
-    tm.assert_frame_equal(result3, expected3)
-
-    # GH7115 & GH8112 & GH8582
-    df = DataFrame(
-        np.random.default_rng(2).integers(0, 100, (50, 3)),
-        columns=["jim", "joe", "jolie"],
-    )
-    ts = Series(np.random.default_rng(2).integers(5, 10, 50), name="jim")
-
-    gr = df.groupby(ts)
-    gr.nth(0)  # invokes set_selection_from_grouper internally
-
-    msg = "The behavior of DataFrame.sum with axis=None is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
-        res = gr.apply(sum)
-    with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
-        alt = df.groupby(ts).apply(sum)
-    tm.assert_frame_equal(res, alt)
-
-    for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
-        gr = df.groupby(ts, as_index=False)
-        left = getattr(gr, attr)()
-
-        gr = df.groupby(ts.values, as_index=True)
-        right = getattr(gr, attr)().reset_index(drop=True)
-
-        tm.assert_frame_equal(left, right)
-
-
 def test_ops_not_as_index(reduction_func):
     # GH 10355, 21090
     # Using as_index=False should not modify grouped column
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 273734e84d9aa..7530c9ca78cbc 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -20,6 +20,72 @@
 from pandas.util import _test_decorators as td
 
 
+@pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"])
+def test_basic_aggregations(dtype):
+    data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype)
+
+    index = np.arange(9)
+    np.random.default_rng(2).shuffle(index)
+    data = data.reindex(index)
+
+    grouped = data.groupby(lambda x: x // 3, group_keys=False)
+
+    for k, v in grouped:
+        assert len(v) == 3
+
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        agged = grouped.aggregate(np.mean)
+    assert agged[1] == 1
+
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = grouped.agg(np.mean)
+    tm.assert_series_equal(agged, expected)  # shorthand
+    tm.assert_series_equal(agged, grouped.mean())
+    result = grouped.sum()
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = grouped.agg(np.sum)
+    tm.assert_series_equal(result, expected)
+
+    expected = grouped.apply(lambda x: x * x.sum())
+    transformed = grouped.transform(lambda x: x * x.sum())
+    assert transformed[7] == 12
+    tm.assert_series_equal(transformed, expected)
+
+    value_grouped = data.groupby(data)
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = value_grouped.aggregate(np.mean)
+    tm.assert_series_equal(result, agged, check_index_type=False)
+
+    # complex agg
+    msg = "using SeriesGroupBy.[mean|std]"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        agged = grouped.aggregate([np.mean, np.std])
+
+    msg = r"nested renamer is not supported"
+    with pytest.raises(pd.errors.SpecificationError, match=msg):
+        grouped.aggregate({"one": np.mean, "two": np.std})
+
+    group_constants = {0: 10, 1: 20, 2: 30}
+    msg = (
+        "Pinning the groupby key to each group in SeriesGroupBy.agg is deprecated, "
+        "and cases that relied on it will raise in a future version"
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#41090
+        agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
+    assert agged[1] == 21
+
+    # corner cases
+    msg = "Must produce aggregated value"
+    # exception raised is type Exception
+    with pytest.raises(Exception, match=msg):
+        grouped.aggregate(lambda x: x * 2)
+
+
 @pytest.mark.parametrize(
     "vals",
     [
@@ -1071,3 +1137,30 @@ def test_groupby_prod_with_int64_dtype():
     result = df.groupby(["A"]).prod().reset_index()
     expected = DataFrame({"A": [1], "B": [180970905912331920]}, dtype="int64")
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_std_datetimelike(warn_copy_on_write):
+    # GH#48481
+    tdi = pd.timedelta_range("1 Day", periods=10000)
+    ser = Series(tdi)
+    ser[::5] *= 2  # get different std for different groups
+
+    df = ser.to_frame("A").copy()
+
+    df["B"] = ser + Timestamp(0)
+    df["C"] = ser + Timestamp(0, tz="UTC")
+    df.iloc[-1] = pd.NaT  # last group includes NaTs
+
+    gb = df.groupby(list(range(5)) * 2000)
+
+    result = gb.std()
+
+    # Note: this does not _exactly_ match what we would get if we did
+    # [gb.get_group(i).std() for i in gb.groups]
+    #  but it _does_ match the floating point error we get doing the
+    #  same operation on int64 data xref GH#51332
+    td1 = pd.Timedelta("2887 days 11:21:02.326710176")
+    td4 = pd.Timedelta("2886 days 00:42:34.664668096")
+    exp_ser = Series([td1 * 2, td1, td1, td1, td4], index=np.arange(5))
+    expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser})
+    tm.assert_frame_equal(result, expected)