Skip to content

TST: Move tests out of test_groupby #56859

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,33 @@ def test_agg_apply_corner(ts, tsframe):
tm.assert_frame_equal(res, exp_df)


def test_with_na_groups(any_real_numpy_dtype):
index = Index(np.arange(10))
values = Series(np.ones(10), index, dtype=any_real_numpy_dtype)
labels = Series(
[np.nan, "foo", "bar", "bar", np.nan, np.nan, "bar", "bar", np.nan, "foo"],
index=index,
)

# this SHOULD be an int
grouped = values.groupby(labels)
agged = grouped.agg(len)
expected = Series([4, 2], index=["bar", "foo"])

tm.assert_series_equal(agged, expected, check_dtype=False)

# assert issubclass(agged.dtype.type, np.integer)

# explicitly return a float from my function
def f(x):
return float(len(x))

agged = grouped.agg(f)
expected = Series([4.0, 2.0], index=["bar", "foo"])

tm.assert_series_equal(agged, expected)


def test_agg_grouping_is_list_tuple(ts):
df = DataFrame(
np.random.default_rng(2).standard_normal((30, 4)),
Expand Down Expand Up @@ -1049,6 +1076,73 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
tm.assert_frame_equal(result, expected)


def test_groupby_as_index_agg(df):
grouped = df.groupby("A", as_index=False)

# single-key

result = grouped[["C", "D"]].agg("mean")
expected = grouped.mean(numeric_only=True)
tm.assert_frame_equal(result, expected)

result2 = grouped.agg({"C": "mean", "D": "sum"})
expected2 = grouped.mean(numeric_only=True)
expected2["D"] = grouped.sum()["D"]
tm.assert_frame_equal(result2, expected2)

grouped = df.groupby("A", as_index=True)

msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
grouped["C"].agg({"Q": "sum"})

# multi-key

grouped = df.groupby(["A", "B"], as_index=False)

result = grouped.agg("mean")
expected = grouped.mean()
tm.assert_frame_equal(result, expected)

result2 = grouped.agg({"C": "mean", "D": "sum"})
expected2 = grouped.mean()
expected2["D"] = grouped.sum()["D"]
tm.assert_frame_equal(result2, expected2)

expected3 = grouped["C"].sum()
expected3 = DataFrame(expected3).rename(columns={"C": "Q"})
msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result3 = grouped["C"].agg({"Q": "sum"})
tm.assert_frame_equal(result3, expected3)

# GH7115 & GH8112 & GH8582
df = DataFrame(
np.random.default_rng(2).integers(0, 100, (50, 3)),
columns=["jim", "joe", "jolie"],
)
ts = Series(np.random.default_rng(2).integers(5, 10, 50), name="jim")

gr = df.groupby(ts)
gr.nth(0) # invokes set_selection_from_grouper internally

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
res = gr.apply(sum)
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
alt = df.groupby(ts).apply(sum)
tm.assert_frame_equal(res, alt)

for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
gr = df.groupby(ts, as_index=False)
left = getattr(gr, attr)()

gr = df.groupby(ts.values, as_index=True)
right = getattr(gr, attr)().reset_index(drop=True)

tm.assert_frame_equal(left, right)


@pytest.mark.parametrize(
"func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)]
)
Expand Down Expand Up @@ -1252,6 +1346,28 @@ def test_agg_multiple_lambda(self):
tm.assert_frame_equal(result2, expected)


def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
# go through _aggregate_frame with self.axis == 0 and duplicate columns
tsframe.columns = ["A", "B", "A", "C"]
gb = tsframe.groupby(lambda x: x.month, as_index=as_index)

warn = None if as_index else FutureWarning
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(warn, match=msg):
res = gb.agg(np.percentile, 80, axis=0)

ex_data = {
1: tsframe[tsframe.index.month == 1].quantile(0.8),
2: tsframe[tsframe.index.month == 2].quantile(0.8),
}
expected = DataFrame(ex_data).T
if not as_index:
# TODO: try to get this more consistent?
expected.index = Index(range(2))

tm.assert_frame_equal(res, expected)


def test_groupby_get_by_index():
# GH 33439
df = DataFrame({"A": ["S", "W", "W"], "B": [1.0, 1.0, 2.0]})
Expand Down
72 changes: 72 additions & 0 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1602,3 +1602,75 @@ def test_builtins_apply(keys, f):
tm.assert_frame_equal(result, expected, check_dtype=False)

tm.assert_series_equal(getattr(result, fname)(axis=0), getattr(df, fname)(axis=0))


def test_inconsistent_return_type():
# GH5592
# inconsistent return type
df = DataFrame(
{
"A": ["Tiger", "Tiger", "Tiger", "Lamb", "Lamb", "Pony", "Pony"],
"B": Series(np.arange(7), dtype="int64"),
"C": pd.date_range("20130101", periods=7),
}
)

def f_0(grp):
return grp.iloc[0]

expected = df.groupby("A").first()[["B"]]
msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby("A").apply(f_0)[["B"]]
tm.assert_frame_equal(result, expected)

def f_1(grp):
if grp.name == "Tiger":
return None
return grp.iloc[0]

msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby("A").apply(f_1)[["B"]]
e = expected.copy()
e.loc["Tiger"] = np.nan
tm.assert_frame_equal(result, e)

def f_2(grp):
if grp.name == "Pony":
return None
return grp.iloc[0]

msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby("A").apply(f_2)[["B"]]
e = expected.copy()
e.loc["Pony"] = np.nan
tm.assert_frame_equal(result, e)

# 5592 revisited, with datetimes
def f_3(grp):
if grp.name == "Pony":
return None
return grp.iloc[0]

msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby("A").apply(f_3)[["C"]]
e = df.groupby("A").first()[["C"]]
e.loc["Pony"] = pd.NaT
tm.assert_frame_equal(result, e)

# scalar outputs
def f_4(grp):
if grp.name == "Pony":
return None
return grp.iloc[0].loc["C"]

msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby("A").apply(f_4)
e = df.groupby("A").first()["C"].copy()
e.loc["Pony"] = np.nan
e.name = None
tm.assert_series_equal(result, e)
Loading