Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ Conversion

Strings
^^^^^^^
- Bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with ``string[pyarrow]`` dtype would raise an error (:issue:`57636`)
- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
-

Expand Down
14 changes: 12 additions & 2 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,20 @@ def _str_replace(
flags: int = 0,
regex: bool = True,
) -> Self:
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
if (
isinstance(pat, re.Pattern)
or callable(repl)
or not case
or flags
or (
isinstance(repl, str)
and (r"\g<" in repl or re.search(r"\\\d", repl) is not None)
)
):
raise NotImplementedError(
"replace is not supported with a re.Pattern, callable repl, "
"case=False, or flags!=0"
"case=False, flags!=0, or when the replacement string contains "
"named group references (\\g<...>, \\d+)"
)

func = pc.replace_substring_regex if regex else pc.replace_substring
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,17 @@ def _str_replace(
flags: int = 0,
regex: bool = True,
):
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
if (
isinstance(pat, re.Pattern)
or callable(repl)
or not case
or flags
or ( # substitution contains a named group pattern
# https://docs.python.org/3/library/re.html
isinstance(repl, str)
and (r"\g<" in repl or re.search(r"\\\d", repl) is not None)
)
):
return super()._str_replace(pat, repl, n, case, flags, regex)

return ArrowStringArrayMixin._str_replace(
Expand Down
62 changes: 62 additions & 0 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,68 @@ def test_replace_callable_raises(any_string_dtype, repl):
values.str.replace("a", repl, regex=True)


@pytest.mark.parametrize(
"repl, expected_list",
[
(
r"\g<three> \g<two> \g<one>",
["Three Two One", "Baz Bar Foo"],
),
(
r"\g<3> \g<2> \g<1>",
["Three Two One", "Baz Bar Foo"],
),
(
r"\g<2>0",
["Two0", "Bar0"],
),
(
r"\g<2>0 \1",
["Two0 One", "Bar0 Foo"],
),
],
ids=[
"named_groups_full_swap",
"numbered_groups_full_swap",
"single_group_with_literal",
"mixed_group_reference_with_literal",
],
)
@pytest.mark.parametrize("use_compile", [True, False])
def test_replace_named_groups_regex_swap(
any_string_dtype, use_compile, repl, expected_list
):
# GH#57636
ser = Series(["One Two Three", "Foo Bar Baz"], dtype=any_string_dtype)
pattern = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
if use_compile:
pattern = re.compile(pattern)
result = ser.str.replace(pattern, repl, regex=True)
expected = Series(expected_list, dtype=any_string_dtype)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"repl",
[
r"\g<20>",
r"\20",
],
)
@pytest.mark.parametrize("use_compile", [True, False])
def test_replace_named_groups_regex_swap_expected_fail(
any_string_dtype, repl, use_compile
):
# GH#57636
pattern = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
if use_compile:
pattern = re.compile(pattern)
ser = Series(["One Two Three", "Foo Bar Baz"], dtype=any_string_dtype)

with pytest.raises(re.error, match="invalid group reference"):
ser.str.replace(pattern, repl, regex=True)


def test_replace_callable_named_groups(any_string_dtype):
# test regex named groups
ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
Expand Down
Loading