Skip to content

Commit 53cadbb

Browse files
TST (string dtype): adjust pandas/tests/reshape tests (#59762)
1 parent b0593e2 commit 53cadbb

File tree

5 files changed

+34
-50
lines changed

5 files changed

+34
-50
lines changed

pandas/tests/reshape/concat/test_concat.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
import numpy as np
1111
import pytest
1212

13-
from pandas._config import using_string_dtype
14-
1513
from pandas.errors import InvalidIndexError
1614

1715
import pandas as pd
@@ -47,18 +45,11 @@ def test_append_concat(self):
4745
assert isinstance(result.index, PeriodIndex)
4846
assert result.index[0] == s1.index[0]
4947

50-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
5148
def test_concat_copy(self):
5249
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
5350
df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1))
5451
df3 = DataFrame({5: "foo"}, index=range(4))
5552

56-
# These are actual copies.
57-
result = concat([df, df2, df3], axis=1)
58-
for block in result._mgr.blocks:
59-
assert block.values.base is not None
60-
61-
# These are the same.
6253
result = concat([df, df2, df3], axis=1)
6354

6455
for block in result._mgr.blocks:
@@ -69,6 +60,8 @@ def test_concat_copy(self):
6960
assert arr.base is df2._mgr.blocks[0].values.base
7061
elif arr.dtype == object:
7162
assert arr.base is not None
63+
elif arr.dtype == "string":
64+
tm.shares_memory(arr, df3._mgr.blocks[0].values)
7265

7366
# Float block was consolidated.
7467
df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1)))

pandas/tests/reshape/merge/test_merge_asof.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas.util._test_decorators as td
97

108
import pandas as pd
@@ -3064,12 +3062,8 @@ def test_on_float_by_int(self):
30643062

30653063
tm.assert_frame_equal(result, expected)
30663064

3067-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
3068-
def test_merge_datatype_error_raises(self, using_infer_string):
3069-
if using_infer_string:
3070-
msg = "incompatible merge keys"
3071-
else:
3072-
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
3065+
def test_merge_datatype_error_raises(self):
3066+
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
30733067

30743068
left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
30753069
right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})

pandas/tests/reshape/test_get_dummies.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._config import using_string_dtype
8-
97
import pandas.util._test_decorators as td
108

119
from pandas.core.dtypes.common import is_integer_dtype
@@ -216,11 +214,10 @@ def test_dataframe_dummies_all_obj(self, df, sparse):
216214

217215
tm.assert_frame_equal(result, expected)
218216

219-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
220-
def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
217+
def test_dataframe_dummies_string_dtype(self, df, any_string_dtype):
221218
# GH44965
222219
df = df[["A", "B"]]
223-
df = df.astype({"A": "object", "B": "string"})
220+
df = df.astype({"A": "str", "B": any_string_dtype})
224221
result = get_dummies(df)
225222
expected = DataFrame(
226223
{
@@ -231,8 +228,7 @@ def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
231228
},
232229
dtype=bool,
233230
)
234-
if not using_infer_string:
235-
# infer_string returns numpy bools
231+
if any_string_dtype == "string" and any_string_dtype.na_value is pd.NA:
236232
expected[["B_b", "B_c"]] = expected[["B_b", "B_c"]].astype("boolean")
237233
tm.assert_frame_equal(result, expected)
238234

pandas/tests/reshape/test_melt.py

+9-16
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas as pd
97
from pandas import (
108
DataFrame,
@@ -21,7 +19,7 @@
2119
def df():
2220
res = DataFrame(
2321
np.random.default_rng(2).standard_normal((10, 4)),
24-
columns=Index(list("ABCD"), dtype=object),
22+
columns=Index(list("ABCD")),
2523
index=date_range("2000-01-01", periods=10, freq="B"),
2624
)
2725
res["id1"] = (res["A"] > 0).astype(np.int64)
@@ -83,7 +81,6 @@ def test_default_col_names(self, df):
8381
result2 = df.melt(id_vars=["id1", "id2"])
8482
assert result2.columns.tolist() == ["id1", "id2", "variable", "value"]
8583

86-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
8784
def test_value_vars(self, df):
8885
result3 = df.melt(id_vars=["id1", "id2"], value_vars="A")
8986
assert len(result3) == 10
@@ -100,7 +97,6 @@ def test_value_vars(self, df):
10097
)
10198
tm.assert_frame_equal(result4, expected4)
10299

103-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
104100
@pytest.mark.parametrize("type_", (tuple, list, np.array))
105101
def test_value_vars_types(self, type_, df):
106102
# GH 15348
@@ -178,7 +174,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
178174
with pytest.raises(ValueError, match=msg):
179175
df1.melt(id_vars=id_vars, value_vars=value_vars)
180176

181-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
182177
def test_custom_var_name(self, df, var_name):
183178
result5 = df.melt(var_name=var_name)
184179
assert result5.columns.tolist() == ["var", "value"]
@@ -206,7 +201,6 @@ def test_custom_var_name(self, df, var_name):
206201
)
207202
tm.assert_frame_equal(result9, expected9)
208203

209-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
210204
def test_custom_value_name(self, df, value_name):
211205
result10 = df.melt(value_name=value_name)
212206
assert result10.columns.tolist() == ["variable", "val"]
@@ -236,7 +230,6 @@ def test_custom_value_name(self, df, value_name):
236230
)
237231
tm.assert_frame_equal(result14, expected14)
238232

239-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
240233
def test_custom_var_and_value_name(self, df, value_name, var_name):
241234
result15 = df.melt(var_name=var_name, value_name=value_name)
242235
assert result15.columns.tolist() == ["var", "val"]
@@ -361,14 +354,15 @@ def test_melt_missing_columns_raises(self):
361354
with pytest.raises(KeyError, match=msg):
362355
df.melt(["A"], ["F"], col_level=0)
363356

364-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
365357
def test_melt_mixed_int_str_id_vars(self):
366358
# GH 29718
367359
df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]})
368360
result = melt(df, id_vars=[0, "a"], value_vars=["b", "d"])
369361
expected = DataFrame(
370362
{0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]}
371363
)
364+
# the df's columns are mixed type and thus object -> preserves object dtype
365+
expected["variable"] = expected["variable"].astype(object)
372366
tm.assert_frame_equal(result, expected)
373367

374368
def test_melt_mixed_int_str_value_vars(self):
@@ -1222,12 +1216,10 @@ def test_raise_of_column_name_value(self):
12221216
):
12231217
df.melt(id_vars="value", value_name="value")
12241218

1225-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
1226-
@pytest.mark.parametrize("dtype", ["O", "string"])
1227-
def test_missing_stubname(self, dtype):
1219+
def test_missing_stubname(self, any_string_dtype):
12281220
# GH46044
12291221
df = DataFrame({"id": ["1", "2"], "a-1": [100, 200], "a-2": [300, 400]})
1230-
df = df.astype({"id": dtype})
1222+
df = df.astype({"id": any_string_dtype})
12311223
result = wide_to_long(
12321224
df,
12331225
stubnames=["a", "b"],
@@ -1243,12 +1235,13 @@ def test_missing_stubname(self, dtype):
12431235
{"a": [100, 200, 300, 400], "b": [np.nan] * 4},
12441236
index=index,
12451237
)
1246-
new_level = expected.index.levels[0].astype(dtype)
1238+
new_level = expected.index.levels[0].astype(any_string_dtype)
1239+
if any_string_dtype == "object":
1240+
new_level = expected.index.levels[0].astype("str")
12471241
expected.index = expected.index.set_levels(new_level, level=0)
12481242
tm.assert_frame_equal(result, expected)
12491243

12501244

1251-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
12521245
def test_wide_to_long_pyarrow_string_columns():
12531246
# GH 57066
12541247
pytest.importorskip("pyarrow")
@@ -1267,7 +1260,7 @@ def test_wide_to_long_pyarrow_string_columns():
12671260
)
12681261
expected = DataFrame(
12691262
[[1, 1], [1, 1], [1, 2]],
1270-
columns=Index(["D", "R"], dtype=object),
1263+
columns=Index(["D", "R"]),
12711264
index=pd.MultiIndex.from_arrays(
12721265
[
12731266
[1, 1, 1],

pandas/tests/reshape/test_pivot.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -1068,7 +1068,6 @@ def test_margins_dtype_len(self, data):
10681068

10691069
tm.assert_frame_equal(expected, result)
10701070

1071-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
10721071
@pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
10731072
def test_pivot_table_multiindex_only(self, cols):
10741073
# GH 17038
@@ -1078,7 +1077,7 @@ def test_pivot_table_multiindex_only(self, cols):
10781077
expected = DataFrame(
10791078
[[4.0, 5.0, 6.0]],
10801079
columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
1081-
index=Index(["v"], dtype=object),
1080+
index=Index(["v"], dtype="str" if cols == ("a", "b") else "object"),
10821081
)
10831082

10841083
tm.assert_frame_equal(result, expected)
@@ -2570,13 +2569,16 @@ def test_pivot_empty(self):
25702569
expected = DataFrame(index=[], columns=[])
25712570
tm.assert_frame_equal(result, expected, check_names=False)
25722571

2573-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
2574-
@pytest.mark.parametrize("dtype", [object, "string"])
2575-
def test_pivot_integer_bug(self, dtype):
2576-
df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype)
2572+
def test_pivot_integer_bug(self, any_string_dtype):
2573+
df = DataFrame(
2574+
data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=any_string_dtype
2575+
)
25772576

25782577
result = df.pivot(index=1, columns=0, values=2)
2579-
tm.assert_index_equal(result.columns, Index(["A", "B"], name=0, dtype=dtype))
2578+
expected_columns = Index(["A", "B"], name=0, dtype=any_string_dtype)
2579+
if any_string_dtype == "object":
2580+
expected_columns = expected_columns.astype("str")
2581+
tm.assert_index_equal(result.columns, expected_columns)
25802582

25812583
def test_pivot_index_none(self):
25822584
# GH#3962
@@ -2658,7 +2660,9 @@ def test_pivot_columns_not_given(self):
26582660
with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
26592661
df.pivot()
26602662

2661-
@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
2663+
@pytest.mark.xfail(
2664+
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
2665+
)
26622666
def test_pivot_columns_is_none(self):
26632667
# GH#48293
26642668
df = DataFrame({None: [1], "b": 2, "c": 3})
@@ -2674,7 +2678,9 @@ def test_pivot_columns_is_none(self):
26742678
expected = DataFrame({1: 3}, index=Index([2], name="b"))
26752679
tm.assert_frame_equal(result, expected)
26762680

2677-
@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
2681+
@pytest.mark.xfail(
2682+
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
2683+
)
26782684
def test_pivot_index_is_none(self):
26792685
# GH#48293
26802686
df = DataFrame({None: [1], "b": 2, "c": 3})
@@ -2688,7 +2694,9 @@ def test_pivot_index_is_none(self):
26882694
expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
26892695
tm.assert_frame_equal(result, expected)
26902696

2691-
@pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
2697+
@pytest.mark.xfail(
2698+
using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
2699+
)
26922700
def test_pivot_values_is_none(self):
26932701
# GH#48293
26942702
df = DataFrame({None: [1], "b": 2, "c": 3})

0 commit comments

Comments
 (0)