From 507851b9be96834118968bdcd7b8e99277ed537d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Aug 2024 13:47:00 -0700 Subject: [PATCH 1/2] BUG (string): str.replace with negative n --- doc/source/whatsnew/v2.3.0.rst | 2 +- pandas/core/arrays/string_arrow.py | 4 +--- pandas/tests/extension/test_arrow.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index d1881bf04826f..e8e1be833e15e 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -102,7 +102,7 @@ Conversion Strings ^^^^^^^ -- +- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`56404`) - Interval diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 67114815341b6..7c359d1a3132b 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -350,9 +350,7 @@ def _str_replace( fallback_performancewarning() return super()._str_replace(pat, repl, n, case, flags, regex) - func = pc.replace_substring_regex if regex else pc.replace_substring - result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n) - return type(self)(result) + return ArrowExtensionArray._str_replace(self, pat, repl, n, case, flags, regex) def _str_repeat(self, repeats: int | Sequence[int]): if not isinstance(repeats, int): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index cdfac633e2ec1..d3f76c80081ea 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1855,6 +1855,16 @@ def test_str_replace_negative_n(): expected = pd.Series(["bc", ""], dtype=ArrowDtype(pa.string())) tm.assert_series_equal(expected, actual) + ser2 = ser.astype(pd.StringDtype(storage="pyarrow")) + actual2 = ser2.str.replace("a", "", -3, True) + expected2 = expected.astype(ser2.dtype) + tm.assert_series_equal(expected2, actual2) + + ser3 = ser.astype(pd.StringDtype(storage="pyarrow", na_value=np.nan)) + actual3 = ser3.str.replace("a", "", -3, True) + expected3 = expected.astype(ser3.dtype) + tm.assert_series_equal(expected3, actual3) + def test_str_repeat_unsupported(): ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string())) From b00da7f747e19935fc6bde93546a015c16f41e70 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Aug 2024 13:48:04 -0700 Subject: [PATCH 2/2] update GH ref --- doc/source/whatsnew/v2.3.0.rst | 2 +- pandas/tests/extension/test_arrow.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index e8e1be833e15e..528226502da33 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -102,7 +102,7 @@ Conversion Strings ^^^^^^^ -- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`56404`) +- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`) - Interval diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index d3f76c80081ea..3dbdda388d035 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1855,6 +1855,7 @@ def test_str_replace_negative_n(): expected = pd.Series(["bc", ""], dtype=ArrowDtype(pa.string())) tm.assert_series_equal(expected, actual) + # Same bug for pyarrow-backed StringArray GH#59628 ser2 = ser.astype(pd.StringDtype(storage="pyarrow")) actual2 = ser2.str.replace("a", "", -3, True) expected2 = expected.astype(ser2.dtype)