Skip to content

Commit 9592c2d

Browse files
committed
TST (string dtype): resolve xfails for frame methods
1 parent fae3e80 commit 9592c2d

10 files changed

+17
-43
lines changed

pandas/core/frame.py

+5
Original file line numberDiff line numberDiff line change
@@ -6251,6 +6251,7 @@ class max type
62516251
new_obj = self
62526252
else:
62536253
new_obj = self.copy(deep=False)
6254+
62546255
if allow_duplicates is not lib.no_default:
62556256
allow_duplicates = validate_bool_kwarg(allow_duplicates, "allow_duplicates")
62566257

@@ -6273,6 +6274,10 @@ class max type
62736274
else:
62746275
to_insert = ((self.index, None),)
62756276

6277+
if len(new_obj.columns) == 0 and names:
6278+
target_dtype = Index(names).dtype
6279+
new_obj.columns = new_obj.columns.astype(target_dtype)
6280+
62766281
multi_col = isinstance(self.columns, MultiIndex)
62776282
for j, (lev, lab) in enumerate(to_insert, start=1):
62786283
i = self.index.nlevels - j

pandas/core/internals/blocks.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2362,5 +2362,6 @@ def external_values(values: ArrayLike) -> ArrayLike:
23622362
values.flags.writeable = False
23632363

23642364
# TODO(CoW) we should also mark our ExtensionArrays as read-only
2365-
2365+
if isinstance(values, ExtensionArray):
2366+
... # this is why test_to_dict_of_blocks_item_cache fails
23662367
return values

pandas/tests/frame/methods/test_astype.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas.util._test_decorators as td
97

108
import pandas as pd
@@ -745,7 +743,6 @@ def test_astype_tz_object_conversion(self, tz):
745743
result = result.astype({"tz": "datetime64[ns, Europe/London]"})
746744
tm.assert_frame_equal(result, expected)
747745

748-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
749746
def test_astype_dt64_to_string(
750747
self, frame_or_series, tz_naive_fixture, using_infer_string
751748
):
@@ -757,6 +754,7 @@ def test_astype_dt64_to_string(
757754
dta[0] = NaT
758755

759756
obj = frame_or_series(dta)
757+
760758
result = obj.astype("string")
761759

762760
# Check that Series/DataFrame.astype matches DatetimeArray.astype
@@ -767,13 +765,9 @@ def test_astype_dt64_to_string(
767765
if frame_or_series is DataFrame:
768766
item = item.iloc[0]
769767
if using_infer_string:
770-
assert item is np.nan
771-
else:
772768
assert item is pd.NA
773-
774-
# For non-NA values, we should match what we get for non-EA str
775-
alt = obj.astype(str)
776-
assert np.all(alt.iloc[1:] == result.iloc[1:])
769+
else:
770+
assert item is np.nan
777771

778772
def test_astype_td64_to_string(self, frame_or_series):
779773
# GH#41409

pandas/tests/frame/methods/test_combine_first.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.core.dtypes.cast import find_common_type
97
from pandas.core.dtypes.common import is_dtype_equal
108

@@ -32,7 +30,6 @@ def test_combine_first_mixed(self):
3230
combined = f.combine_first(g)
3331
tm.assert_frame_equal(combined, exp)
3432

35-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
3633
def test_combine_first(self, float_frame, using_infer_string):
3734
# disjoint
3835
head, tail = float_frame[:5], float_frame[5:]
@@ -79,9 +76,7 @@ def test_combine_first(self, float_frame, using_infer_string):
7976
tm.assert_series_equal(combined["A"].reindex(g.index), g["A"])
8077

8178
# corner cases
82-
warning = FutureWarning if using_infer_string else None
83-
with tm.assert_produces_warning(warning, match="empty entries"):
84-
comb = float_frame.combine_first(DataFrame())
79+
comb = float_frame.combine_first(DataFrame())
8580
tm.assert_frame_equal(comb, float_frame)
8681

8782
comb = DataFrame().combine_first(float_frame)

pandas/tests/frame/methods/test_cov_corr.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas.util._test_decorators as td
75

86
import pandas as pd
@@ -320,7 +318,6 @@ def test_corrwith_non_timeseries_data(self):
320318
for row in index[:4]:
321319
tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
322320

323-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
324321
def test_corrwith_with_objects(self, using_infer_string):
325322
df1 = DataFrame(
326323
np.random.default_rng(2).standard_normal((10, 4)),
@@ -334,9 +331,7 @@ def test_corrwith_with_objects(self, using_infer_string):
334331
df2["obj"] = "bar"
335332

336333
if using_infer_string:
337-
import pyarrow as pa
338-
339-
with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
334+
with pytest.raises(TypeError, match="Cannot perform reduction"):
340335
df1.corrwith(df2)
341336
else:
342337
with pytest.raises(TypeError, match="Could not convert"):

pandas/tests/frame/methods/test_dropna.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._config import using_string_dtype
8-
97
import pandas as pd
108
from pandas import (
119
DataFrame,
@@ -184,13 +182,11 @@ def test_dropna_multiple_axes(self):
184182
with pytest.raises(TypeError, match="supplying multiple axes"):
185183
inp.dropna(how="all", axis=(0, 1), inplace=True)
186184

187-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
188185
def test_dropna_tz_aware_datetime(self):
189186
# GH13407
190-
df = DataFrame()
191187
dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc())
192188
dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc())
193-
df["Time"] = [dt1]
189+
df = DataFrame({"Time": [dt1]})
194190
result = df.dropna(axis=0)
195191
expected = DataFrame({"Time": [dt1]})
196192
tm.assert_frame_equal(result, expected)

pandas/tests/frame/methods/test_dtypes.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.core.dtypes.dtypes import DatetimeTZDtype
97

108
import pandas as pd
@@ -135,13 +133,9 @@ def test_dtypes_timedeltas(self):
135133
)
136134
tm.assert_series_equal(result, expected)
137135

138-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
139136
def test_frame_apply_np_array_return_type(self, using_infer_string):
140137
# GH 35517
141138
df = DataFrame([["foo"]])
142139
result = df.apply(lambda col: np.array("bar"))
143-
if using_infer_string:
144-
expected = Series([np.array(["bar"])])
145-
else:
146-
expected = Series(["bar"])
140+
expected = Series(np.array(["bar"]), dtype=object)
147141
tm.assert_series_equal(result, expected)

pandas/tests/frame/methods/test_interpolate.py

-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ def test_interpolate_inplace(self, frame_or_series, request):
6464
assert np.shares_memory(orig, obj.values)
6565
assert orig.squeeze()[1] == 1.5
6666

67-
# TODO(infer_string) raise proper TypeError in case of string dtype
6867
@pytest.mark.xfail(
6968
using_string_dtype(), reason="interpolate doesn't work for string"
7069
)

pandas/tests/frame/methods/test_reset_index.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._config import using_string_dtype
8-
97
from pandas.core.dtypes.common import (
108
is_float_dtype,
119
is_integer_dtype,
@@ -644,7 +642,6 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes):
644642
tm.assert_frame_equal(res, expected)
645643

646644

647-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
648645
@pytest.mark.parametrize(
649646
"array, dtype",
650647
[
@@ -660,10 +657,11 @@ def test_reset_index_dtypes_on_empty_frame_with_multiindex(
660657
):
661658
# GH 19602 - Preserve dtype on empty DataFrame with MultiIndex
662659
idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
663-
result = DataFrame(index=idx)[:0].reset_index().dtypes
660+
result = DataFrame(index=idx)[:0]
661+
result = result.reset_index().dtypes
664662
if using_infer_string and dtype == object:
665663
dtype = pd.StringDtype(na_value=np.nan)
666-
expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype})
664+
expected = Series({"level_0": int, "level_1": float, "level_2": dtype})
667665
tm.assert_series_equal(result, expected)
668666

669667

pandas/tests/frame/methods/test_to_dict_of_blocks.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
DataFrame,
86
MultiIndex,
@@ -27,7 +25,6 @@ def test_no_copy_blocks(self, float_frame):
2725
assert _last_df is not None and not _last_df[column].equals(df[column])
2826

2927

30-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
3128
def test_to_dict_of_blocks_item_cache():
3229
# Calling to_dict_of_blocks should not poison item_cache
3330
df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})

0 commit comments

Comments
 (0)