diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index d1881bf04826f..528226502da33 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -102,7 +102,7 @@ Conversion Strings ^^^^^^^ -- +- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`) - Interval diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 67114815341b6..7c359d1a3132b 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -350,9 +350,7 @@ def _str_replace( fallback_performancewarning() return super()._str_replace(pat, repl, n, case, flags, regex) - func = pc.replace_substring_regex if regex else pc.replace_substring - result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n) - return type(self)(result) + return ArrowExtensionArray._str_replace(self, pat, repl, n, case, flags, regex) def _str_repeat(self, repeats: int | Sequence[int]): if not isinstance(repeats, int): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index cdfac633e2ec1..3dbdda388d035 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1855,6 +1855,17 @@ def test_str_replace_negative_n(): expected = pd.Series(["bc", ""], dtype=ArrowDtype(pa.string())) tm.assert_series_equal(expected, actual) + # Same bug for pyarrow-backed StringArray GH#59628 + ser2 = ser.astype(pd.StringDtype(storage="pyarrow")) + actual2 = ser2.str.replace("a", "", -3, True) + expected2 = expected.astype(ser2.dtype) + tm.assert_series_equal(expected2, actual2) + + ser3 = ser.astype(pd.StringDtype(storage="pyarrow", na_value=np.nan)) + actual3 = ser3.str.replace("a", "", -3, True) + expected3 = expected.astype(ser3.dtype) + tm.assert_series_equal(expected3, actual3) + def test_str_repeat_unsupported(): ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))