Skip to content

Commit 9ad6bb6

Browse files
authored
TST: Move tests out of test_groupby (#56859)
1 parent 5f8bfac commit 9ad6bb6

File tree

4 files changed

+281
-281
lines changed

4 files changed

+281
-281
lines changed

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,33 @@ def test_agg_apply_corner(ts, tsframe):
160160
tm.assert_frame_equal(res, exp_df)
161161

162162

163+
def test_with_na_groups(any_real_numpy_dtype):
164+
index = Index(np.arange(10))
165+
values = Series(np.ones(10), index, dtype=any_real_numpy_dtype)
166+
labels = Series(
167+
[np.nan, "foo", "bar", "bar", np.nan, np.nan, "bar", "bar", np.nan, "foo"],
168+
index=index,
169+
)
170+
171+
# this SHOULD be an int
172+
grouped = values.groupby(labels)
173+
agged = grouped.agg(len)
174+
expected = Series([4, 2], index=["bar", "foo"])
175+
176+
tm.assert_series_equal(agged, expected, check_dtype=False)
177+
178+
# assert issubclass(agged.dtype.type, np.integer)
179+
180+
# explicitly return a float from my function
181+
def f(x):
182+
return float(len(x))
183+
184+
agged = grouped.agg(f)
185+
expected = Series([4.0, 2.0], index=["bar", "foo"])
186+
187+
tm.assert_series_equal(agged, expected)
188+
189+
163190
def test_agg_grouping_is_list_tuple(ts):
164191
df = DataFrame(
165192
np.random.default_rng(2).standard_normal((30, 4)),
@@ -1049,6 +1076,73 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
10491076
tm.assert_frame_equal(result, expected)
10501077

10511078

1079+
def test_groupby_as_index_agg(df):
1080+
grouped = df.groupby("A", as_index=False)
1081+
1082+
# single-key
1083+
1084+
result = grouped[["C", "D"]].agg("mean")
1085+
expected = grouped.mean(numeric_only=True)
1086+
tm.assert_frame_equal(result, expected)
1087+
1088+
result2 = grouped.agg({"C": "mean", "D": "sum"})
1089+
expected2 = grouped.mean(numeric_only=True)
1090+
expected2["D"] = grouped.sum()["D"]
1091+
tm.assert_frame_equal(result2, expected2)
1092+
1093+
grouped = df.groupby("A", as_index=True)
1094+
1095+
msg = r"nested renamer is not supported"
1096+
with pytest.raises(SpecificationError, match=msg):
1097+
grouped["C"].agg({"Q": "sum"})
1098+
1099+
# multi-key
1100+
1101+
grouped = df.groupby(["A", "B"], as_index=False)
1102+
1103+
result = grouped.agg("mean")
1104+
expected = grouped.mean()
1105+
tm.assert_frame_equal(result, expected)
1106+
1107+
result2 = grouped.agg({"C": "mean", "D": "sum"})
1108+
expected2 = grouped.mean()
1109+
expected2["D"] = grouped.sum()["D"]
1110+
tm.assert_frame_equal(result2, expected2)
1111+
1112+
expected3 = grouped["C"].sum()
1113+
expected3 = DataFrame(expected3).rename(columns={"C": "Q"})
1114+
msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated"
1115+
with tm.assert_produces_warning(FutureWarning, match=msg):
1116+
result3 = grouped["C"].agg({"Q": "sum"})
1117+
tm.assert_frame_equal(result3, expected3)
1118+
1119+
# GH7115 & GH8112 & GH8582
1120+
df = DataFrame(
1121+
np.random.default_rng(2).integers(0, 100, (50, 3)),
1122+
columns=["jim", "joe", "jolie"],
1123+
)
1124+
ts = Series(np.random.default_rng(2).integers(5, 10, 50), name="jim")
1125+
1126+
gr = df.groupby(ts)
1127+
gr.nth(0) # invokes set_selection_from_grouper internally
1128+
1129+
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
1130+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
1131+
res = gr.apply(sum)
1132+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
1133+
alt = df.groupby(ts).apply(sum)
1134+
tm.assert_frame_equal(res, alt)
1135+
1136+
for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
1137+
gr = df.groupby(ts, as_index=False)
1138+
left = getattr(gr, attr)()
1139+
1140+
gr = df.groupby(ts.values, as_index=True)
1141+
right = getattr(gr, attr)().reset_index(drop=True)
1142+
1143+
tm.assert_frame_equal(left, right)
1144+
1145+
10521146
@pytest.mark.parametrize(
10531147
"func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)]
10541148
)
@@ -1252,6 +1346,28 @@ def test_agg_multiple_lambda(self):
12521346
tm.assert_frame_equal(result2, expected)
12531347

12541348

1349+
def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
1350+
# go through _aggregate_frame with self.axis == 0 and duplicate columns
1351+
tsframe.columns = ["A", "B", "A", "C"]
1352+
gb = tsframe.groupby(lambda x: x.month, as_index=as_index)
1353+
1354+
warn = None if as_index else FutureWarning
1355+
msg = "A grouping .* was excluded from the result"
1356+
with tm.assert_produces_warning(warn, match=msg):
1357+
res = gb.agg(np.percentile, 80, axis=0)
1358+
1359+
ex_data = {
1360+
1: tsframe[tsframe.index.month == 1].quantile(0.8),
1361+
2: tsframe[tsframe.index.month == 2].quantile(0.8),
1362+
}
1363+
expected = DataFrame(ex_data).T
1364+
if not as_index:
1365+
# TODO: try to get this more consistent?
1366+
expected.index = Index(range(2))
1367+
1368+
tm.assert_frame_equal(res, expected)
1369+
1370+
12551371
def test_groupby_get_by_index():
12561372
# GH 33439
12571373
df = DataFrame({"A": ["S", "W", "W"], "B": [1.0, 1.0, 2.0]})

pandas/tests/groupby/test_apply.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,3 +1602,75 @@ def test_builtins_apply(keys, f):
16021602
tm.assert_frame_equal(result, expected, check_dtype=False)
16031603

16041604
tm.assert_series_equal(getattr(result, fname)(axis=0), getattr(df, fname)(axis=0))
1605+
1606+
1607+
def test_inconsistent_return_type():
1608+
# GH5592
1609+
# inconsistent return type
1610+
df = DataFrame(
1611+
{
1612+
"A": ["Tiger", "Tiger", "Tiger", "Lamb", "Lamb", "Pony", "Pony"],
1613+
"B": Series(np.arange(7), dtype="int64"),
1614+
"C": pd.date_range("20130101", periods=7),
1615+
}
1616+
)
1617+
1618+
def f_0(grp):
1619+
return grp.iloc[0]
1620+
1621+
expected = df.groupby("A").first()[["B"]]
1622+
msg = "DataFrameGroupBy.apply operated on the grouping columns"
1623+
with tm.assert_produces_warning(FutureWarning, match=msg):
1624+
result = df.groupby("A").apply(f_0)[["B"]]
1625+
tm.assert_frame_equal(result, expected)
1626+
1627+
def f_1(grp):
1628+
if grp.name == "Tiger":
1629+
return None
1630+
return grp.iloc[0]
1631+
1632+
msg = "DataFrameGroupBy.apply operated on the grouping columns"
1633+
with tm.assert_produces_warning(FutureWarning, match=msg):
1634+
result = df.groupby("A").apply(f_1)[["B"]]
1635+
e = expected.copy()
1636+
e.loc["Tiger"] = np.nan
1637+
tm.assert_frame_equal(result, e)
1638+
1639+
def f_2(grp):
1640+
if grp.name == "Pony":
1641+
return None
1642+
return grp.iloc[0]
1643+
1644+
msg = "DataFrameGroupBy.apply operated on the grouping columns"
1645+
with tm.assert_produces_warning(FutureWarning, match=msg):
1646+
result = df.groupby("A").apply(f_2)[["B"]]
1647+
e = expected.copy()
1648+
e.loc["Pony"] = np.nan
1649+
tm.assert_frame_equal(result, e)
1650+
1651+
# 5592 revisited, with datetimes
1652+
def f_3(grp):
1653+
if grp.name == "Pony":
1654+
return None
1655+
return grp.iloc[0]
1656+
1657+
msg = "DataFrameGroupBy.apply operated on the grouping columns"
1658+
with tm.assert_produces_warning(FutureWarning, match=msg):
1659+
result = df.groupby("A").apply(f_3)[["C"]]
1660+
e = df.groupby("A").first()[["C"]]
1661+
e.loc["Pony"] = pd.NaT
1662+
tm.assert_frame_equal(result, e)
1663+
1664+
# scalar outputs
1665+
def f_4(grp):
1666+
if grp.name == "Pony":
1667+
return None
1668+
return grp.iloc[0].loc["C"]
1669+
1670+
msg = "DataFrameGroupBy.apply operated on the grouping columns"
1671+
with tm.assert_produces_warning(FutureWarning, match=msg):
1672+
result = df.groupby("A").apply(f_4)
1673+
e = df.groupby("A").first()["C"].copy()
1674+
e.loc["Pony"] = np.nan
1675+
e.name = None
1676+
tm.assert_series_equal(result, e)

0 commit comments

Comments
 (0)