Skip to content

TST (string) fix xfailed groupby tests (3) #59642

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions pandas/tests/groupby/methods/test_describe.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -73,7 +71,6 @@ def test_series_describe_as_index(as_index, keys):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_frame_describe_multikey(tsframe):
grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
result = grouped.describe()
Expand All @@ -82,7 +79,7 @@ def test_frame_describe_multikey(tsframe):
group = grouped[col].describe()
# GH 17464 - Remove duplicate MultiIndex levels
group_col = MultiIndex(
levels=[[col], group.columns],
levels=[Index([col], dtype=tsframe.columns.dtype), group.columns],
codes=[[0] * len(group.columns), range(len(group.columns))],
)
group = DataFrame(group.values, columns=group_col, index=group.index)
Expand Down Expand Up @@ -249,7 +246,6 @@ def test_describe_non_cython_paths():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("dtype", [int, float, object])
@pytest.mark.parametrize(
"kwargs",
Expand All @@ -271,5 +267,5 @@ def test_groupby_empty_dataset(dtype, kwargs):

result = df.iloc[:0].groupby("A").B.describe(**kwargs)
expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0]
expected.index = Index([])
expected.index = Index([], dtype=df.columns.dtype)
tm.assert_frame_equal(result, expected)
6 changes: 2 additions & 4 deletions pandas/tests/groupby/methods/test_nth.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -679,14 +677,14 @@ def test_first_multi_key_groupby_categorical():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("method", ["first", "last", "nth"])
def test_groupby_last_first_nth_with_none(method, nulls_fixture):
# GH29645
expected = Series(["y"])
expected = Series(["y"], dtype=object)
data = Series(
[nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture],
index=[0, 0, 0, 0, 0],
dtype=object,
).groupby(level=0)

if method == "nth":
Expand Down
16 changes: 0 additions & 16 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
from pandas.compat.pyarrow import pa_version_under10p1

from pandas.core.dtypes.missing import na_value_for_dtype
Expand All @@ -13,9 +12,6 @@
from pandas.tests.groupby import get_groupby_method_args


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, tuples, outputs",
[
Expand Down Expand Up @@ -59,9 +55,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group(
tm.assert_frame_equal(grouped, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, tuples, outputs",
[
Expand Down Expand Up @@ -138,9 +131,6 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
tm.assert_frame_equal(grouped, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, idx, expected",
[
Expand Down Expand Up @@ -215,9 +205,6 @@ def test_groupby_dataframe_slice_then_transform(dropna, index):
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, tuples, outputs",
[
Expand Down Expand Up @@ -299,9 +286,6 @@ def test_groupby_dropna_datetime_like_data(
tm.assert_frame_equal(grouped, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize(
"dropna, data, selected_data, levels",
[
Expand Down
29 changes: 20 additions & 9 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.common import ensure_platform_int

Expand Down Expand Up @@ -372,8 +373,7 @@ def test_transform_select_columns(df):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_transform_nuisance_raises(df):
def test_transform_nuisance_raises(df, using_infer_string):
# case that goes through _transform_item_by_item

df.columns = ["A", "B", "B", "D"]
Expand All @@ -383,10 +383,16 @@ def test_transform_nuisance_raises(df):
grouped = df.groupby("A")

gbc = grouped["B"]
with pytest.raises(TypeError, match="Could not convert"):
msg = "Could not convert"
if using_infer_string:
if df.columns.dtype.storage == "pyarrow":
msg = "with dtype str does not support operation 'mean'"
else:
msg = "Cannot perform reduction 'mean' with string dtype"
with pytest.raises(TypeError, match=msg):
gbc.transform(lambda x: np.mean(x))

with pytest.raises(TypeError, match="Could not convert"):
with pytest.raises(TypeError, match=msg):
df.groupby("A").transform(lambda x: np.mean(x))


Expand Down Expand Up @@ -445,8 +451,7 @@ def test_transform_coercion():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_groupby_transform_with_int():
def test_groupby_transform_with_int(using_infer_string):
# GH 3740, make sure that we might upcast on item-by-item transform

# floats
Expand Down Expand Up @@ -476,8 +481,14 @@ def test_groupby_transform_with_int():
"D": "foo",
}
)
msg = "Could not convert"
if using_infer_string:
if HAS_PYARROW:
msg = "with dtype str does not support operation 'mean'"
else:
msg = "Cannot perform reduction 'mean' with string dtype"
with np.errstate(all="ignore"):
with pytest.raises(TypeError, match="Could not convert"):
with pytest.raises(TypeError, match=msg):
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
result = df.groupby("A")[["B", "C"]].transform(
lambda x: (x - x.mean()) / x.std()
Expand All @@ -489,7 +500,7 @@ def test_groupby_transform_with_int():
s = Series([2, 3, 4, 10, 5, -1])
df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
with np.errstate(all="ignore"):
with pytest.raises(TypeError, match="Could not convert"):
with pytest.raises(TypeError, match=msg):
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
result = df.groupby("A")[["B", "C"]].transform(
lambda x: (x - x.mean()) / x.std()
Expand Down Expand Up @@ -705,7 +716,6 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.slow
@pytest.mark.parametrize(
"op, args, targop",
Expand Down Expand Up @@ -757,6 +767,7 @@ def test_cython_transform_frame_column(
"does not support operation",
".* is not supported for object dtype",
"is not implemented for this dtype",
".* is not supported for str dtype",
]
)
with pytest.raises(TypeError, match=msg):
Expand Down