3
3
import numpy as np
4
4
import pytest
5
5
6
- from pandas ._config import using_string_dtype
7
-
8
6
import pandas as pd
9
7
from pandas import (
10
8
DataFrame ,
21
19
def df ():
22
20
res = DataFrame (
23
21
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
24
- columns = Index (list ("ABCD" ), dtype = object ),
22
+ columns = Index (list ("ABCD" )),
25
23
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
26
24
)
27
25
res ["id1" ] = (res ["A" ] > 0 ).astype (np .int64 )
@@ -83,7 +81,6 @@ def test_default_col_names(self, df):
83
81
result2 = df .melt (id_vars = ["id1" , "id2" ])
84
82
assert result2 .columns .tolist () == ["id1" , "id2" , "variable" , "value" ]
85
83
86
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
87
84
def test_value_vars (self , df ):
88
85
result3 = df .melt (id_vars = ["id1" , "id2" ], value_vars = "A" )
89
86
assert len (result3 ) == 10
@@ -100,7 +97,6 @@ def test_value_vars(self, df):
100
97
)
101
98
tm .assert_frame_equal (result4 , expected4 )
102
99
103
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
104
100
@pytest .mark .parametrize ("type_" , (tuple , list , np .array ))
105
101
def test_value_vars_types (self , type_ , df ):
106
102
# GH 15348
@@ -178,7 +174,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
178
174
with pytest .raises (ValueError , match = msg ):
179
175
df1 .melt (id_vars = id_vars , value_vars = value_vars )
180
176
181
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
182
177
def test_custom_var_name (self , df , var_name ):
183
178
result5 = df .melt (var_name = var_name )
184
179
assert result5 .columns .tolist () == ["var" , "value" ]
@@ -206,7 +201,6 @@ def test_custom_var_name(self, df, var_name):
206
201
)
207
202
tm .assert_frame_equal (result9 , expected9 )
208
203
209
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
210
204
def test_custom_value_name (self , df , value_name ):
211
205
result10 = df .melt (value_name = value_name )
212
206
assert result10 .columns .tolist () == ["variable" , "val" ]
@@ -236,7 +230,6 @@ def test_custom_value_name(self, df, value_name):
236
230
)
237
231
tm .assert_frame_equal (result14 , expected14 )
238
232
239
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
240
233
def test_custom_var_and_value_name (self , df , value_name , var_name ):
241
234
result15 = df .melt (var_name = var_name , value_name = value_name )
242
235
assert result15 .columns .tolist () == ["var" , "val" ]
@@ -361,14 +354,15 @@ def test_melt_missing_columns_raises(self):
361
354
with pytest .raises (KeyError , match = msg ):
362
355
df .melt (["A" ], ["F" ], col_level = 0 )
363
356
364
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
365
357
def test_melt_mixed_int_str_id_vars (self ):
366
358
# GH 29718
367
359
df = DataFrame ({0 : ["foo" ], "a" : ["bar" ], "b" : [1 ], "d" : [2 ]})
368
360
result = melt (df , id_vars = [0 , "a" ], value_vars = ["b" , "d" ])
369
361
expected = DataFrame (
370
362
{0 : ["foo" ] * 2 , "a" : ["bar" ] * 2 , "variable" : list ("bd" ), "value" : [1 , 2 ]}
371
363
)
364
+ # the df's columns are mixed type and thus object -> preserves object dtype
365
+ expected ["variable" ] = expected ["variable" ].astype (object )
372
366
tm .assert_frame_equal (result , expected )
373
367
374
368
def test_melt_mixed_int_str_value_vars (self ):
@@ -1222,12 +1216,10 @@ def test_raise_of_column_name_value(self):
1222
1216
):
1223
1217
df .melt (id_vars = "value" , value_name = "value" )
1224
1218
1225
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
1226
- @pytest .mark .parametrize ("dtype" , ["O" , "string" ])
1227
- def test_missing_stubname (self , dtype ):
1219
+ def test_missing_stubname (self , any_string_dtype ):
1228
1220
# GH46044
1229
1221
df = DataFrame ({"id" : ["1" , "2" ], "a-1" : [100 , 200 ], "a-2" : [300 , 400 ]})
1230
- df = df .astype ({"id" : dtype })
1222
+ df = df .astype ({"id" : any_string_dtype })
1231
1223
result = wide_to_long (
1232
1224
df ,
1233
1225
stubnames = ["a" , "b" ],
@@ -1243,12 +1235,13 @@ def test_missing_stubname(self, dtype):
1243
1235
{"a" : [100 , 200 , 300 , 400 ], "b" : [np .nan ] * 4 },
1244
1236
index = index ,
1245
1237
)
1246
- new_level = expected .index .levels [0 ].astype (dtype )
1238
+ new_level = expected .index .levels [0 ].astype (any_string_dtype )
1239
+ if any_string_dtype == "object" :
1240
+ new_level = expected .index .levels [0 ].astype ("str" )
1247
1241
expected .index = expected .index .set_levels (new_level , level = 0 )
1248
1242
tm .assert_frame_equal (result , expected )
1249
1243
1250
1244
1251
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
1252
1245
def test_wide_to_long_pyarrow_string_columns ():
1253
1246
# GH 57066
1254
1247
pytest .importorskip ("pyarrow" )
@@ -1267,7 +1260,7 @@ def test_wide_to_long_pyarrow_string_columns():
1267
1260
)
1268
1261
expected = DataFrame (
1269
1262
[[1 , 1 ], [1 , 1 ], [1 , 2 ]],
1270
- columns = Index (["D" , "R" ], dtype = object ),
1263
+ columns = Index (["D" , "R" ]),
1271
1264
index = pd .MultiIndex .from_arrays (
1272
1265
[
1273
1266
[1 , 1 , 1 ],
0 commit comments