diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4e1ea07907cdb..aed8287926810 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5481,11 +5481,7 @@ def equals(self, other: Any) -> bool: # quickly return if the lengths are different return False - if ( - isinstance(self.dtype, StringDtype) - and self.dtype.na_value is np.nan - and other.dtype != self.dtype - ): + if isinstance(self.dtype, StringDtype) and other.dtype != self.dtype: # TODO(infer_string) can we avoid this special case? # special case for object behavior return other.equals(self.astype(object)) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index bc69ec388bf0c..e6a86dad8abf6 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -11,7 +11,7 @@ import numpy as np import pytest -from pandas.compat import HAS_PYARROW +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -2183,19 +2183,28 @@ def test_enum_column_equality(): tm.assert_series_equal(result, expected) -def test_mixed_col_index_dtype(using_infer_string): +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + "string[pyarrow]", + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + "str", + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_mixed_col_index_dtype(dtype): # GH 47382 df1 = DataFrame(columns=list("abc"), data=1.0, index=[0]) df2 = DataFrame(columns=list("abc"), data=0.0, index=[0]) - df1.columns = df2.columns.astype("string") + df1.columns = df2.columns.astype(dtype) result = df1 + df2 expected = DataFrame(columns=list("abc"), data=1.0, index=[0]) - if using_infer_string: - # df2.columns.dtype will be "str" instead of object, - # so the aligned result will be "string", not object - if HAS_PYARROW: - dtype = "string[pyarrow]" - else: - dtype = "string" - expected.columns = expected.columns.astype(dtype) + + expected.columns = expected.columns.astype(dtype) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5b75bd9afd6df..fa3510996b3b7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -40,6 +40,7 @@ ensure_index, ensure_index_from_sequences, ) +from pandas.testing import assert_series_equal class TestIndex: @@ -1717,3 +1718,51 @@ def test_is_monotonic_pyarrow_list_type(): idx = Index([[1], [2, 3]], dtype=pd.ArrowDtype(pa.list_(pa.int64()))) assert not idx.is_monotonic_increasing assert not idx.is_monotonic_decreasing + + +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + "string[pyarrow]", + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + "str", + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_index_equals_different_string_dtype(dtype): + # GH 61099 + idx_obj = Index(["a", "b", "c"]) + idx_str = Index(["a", "b", "c"], dtype=dtype) + + assert idx_obj.equals(idx_str) + assert idx_str.equals(idx_obj) + + +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param( + "string[pyarrow]", + marks=td.skip_if_no("pyarrow"), + ), + pytest.param( + "str", + marks=td.skip_if_no("pyarrow"), + ), + ], +) +def test_index_comparison_different_string_dtype(dtype): + # GH 61099 + idx = Index(["a", "b", "c"]) + s_obj = Series([1, 2, 3], index=idx) + s_str = Series([4, 5, 6], index=idx.astype(dtype)) + + expected = Series([True, True, True], index=["a", "b", "c"]) + result = s_obj < s_str + assert_series_equal(result, expected)