Skip to content

Commit 0f172d8

Browse files
merge master and fix conflict
1 parent 129aaea commit 0f172d8

File tree

4 files changed

+103
-27
lines changed

4 files changed

+103
-27
lines changed

doc/source/whatsnew/v0.25.0.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ Other Deprecations
495495
Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`).
496496
- The :meth:`Series.ftype`, :meth:`Series.ftypes` and :meth:`DataFrame.ftypes` methods are deprecated and will be removed in a future version.
497497
Instead, use :meth:`Series.dtype` and :meth:`DataFrame.dtypes` (:issue:`26705`).
498-
498+
- :func:`Series.str.replace`, when ``pat`` is single special regex character (such as ``.|\`` etc) and regex is not defined, regex is by default ``False`` for now, but this might be deprecated in the future. (:issue:`24804`)
499499

500500
.. _whatsnew_0250.prior_deprecations:
501501

@@ -605,10 +605,14 @@ Conversion
605605

606606
Strings
607607
^^^^^^^
608+
- Bug in :func:`Series.str.replace` not applying regex in patterns of length 1 (:issue:`24804`)
608609

610+
<<<<<<< HEAD
611+
=======
609612
- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
610613
-
611614
-
615+
>>>>>>> github/master
612616

613617

614618
Interval

pandas/core/reshape/melt.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,8 @@ def melt_stub(df, stub, i, j, value_vars, sep):
413413
newdf = melt(df, id_vars=i, value_vars=value_vars,
414414
value_name=stub.rstrip(sep), var_name=j)
415415
newdf[j] = Categorical(newdf[j])
416-
newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "")
416+
newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "",
417+
regex=True)
417418

418419
# GH17627 Cast numerics suffixes to int/float
419420
newdf[j] = to_numeric(newdf[j], errors='ignore')

pandas/core/strings.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ def str_endswith(arr, pat, na=np.nan):
421421
return _na_map(f, arr, na, dtype=bool)
422422

423423

424-
def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
424+
def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=None):
425425
r"""
426426
Replace occurrences of pattern/regex in the Series/Index with
427427
some other string. Equivalent to :meth:`str.replace` or
@@ -452,9 +452,13 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
452452
flags : int, default 0 (no flags)
453453
- re module flags, e.g. re.IGNORECASE
454454
- Cannot be set if `pat` is a compiled regex
455-
regex : bool, default True
455+
regex : boolean, default None
456456
- If True, assumes the passed-in pattern is a regular expression.
457457
- If False, treats the pattern as a literal string
458+
- If `pat` is a single character and `regex` is not specified, `pat`
459+
is interpreted as a string literal. If `pat` is also a regular
460+
expression symbol, a warning is issued that in the future `pat`
461+
will be interpreted as a regex, rather than a literal.
458462
- Cannot be set to False if `pat` is a compiled regex or `repl` is
459463
a callable.
460464
@@ -561,7 +565,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
561565
# add case flag, if provided
562566
if case is False:
563567
flags |= re.IGNORECASE
564-
if is_compiled_re or len(pat) > 1 or flags or callable(repl):
568+
if is_compiled_re or pat or flags or callable(repl):
565569
n = n if n >= 0 else 0
566570
compiled = re.compile(pat, flags=flags)
567571
f = lambda x: compiled.sub(repl=repl, string=x, count=n)
@@ -574,6 +578,12 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
574578
if callable(repl):
575579
raise ValueError("Cannot use a callable replacement when "
576580
"regex=False")
581+
# if regex is default None, and a single special character is given
582+
# in pat, still take it as a literal, and raise the Future warning
583+
if regex is None and len(pat) == 1 and pat in list(r"[\^$.|?*+()]"):
584+
warnings.warn("'{}' is interpreted as a literal in ".format(pat) +
585+
"default, not regex. It will change in the future.",
586+
FutureWarning)
577587
f = lambda x: x.replace(pat, repl, n)
578588

579589
return _na_map(f, arr)

pandas/tests/test_strings.py

+83-22
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from numpy.random import randint
77
import pytest
88

9+
import pandas.compat as compat
10+
911
from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, notna
1012
import pandas.core.strings as strings
1113
import pandas.util.testing as tm
@@ -892,27 +894,39 @@ def test_casemethods(self):
892894
def test_replace(self):
893895
values = Series(['fooBAD__barBAD', NA])
894896

895-
result = values.str.replace('BAD[_]*', '')
897+
result = values.str.replace('BAD[_]*', '', regex=True)
896898
exp = Series(['foobar', NA])
897899
tm.assert_series_equal(result, exp)
898900

899-
result = values.str.replace('BAD[_]*', '', n=1)
901+
result = values.str.replace('BAD[_]*', '', regex=True, n=1)
900902
exp = Series(['foobarBAD', NA])
901903
tm.assert_series_equal(result, exp)
902904

903905
# mixed
904906
mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD',
905907
None, 1, 2.])
906908

907-
rs = Series(mixed).str.replace('BAD[_]*', '')
909+
rs = Series(mixed).str.replace('BAD[_]*', '', regex=True)
908910
xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA])
909911
assert isinstance(rs, Series)
910912
tm.assert_almost_equal(rs, xp)
911913

914+
# unicode
915+
values = Series([u'fooBAD__barBAD', NA])
916+
917+
result = values.str.replace('BAD[_]*', '', regex=True)
918+
exp = Series([u'foobar', NA])
919+
tm.assert_series_equal(result, exp)
920+
921+
result = values.str.replace('BAD[_]*', '', n=1, regex=True)
922+
exp = Series([u'foobarBAD', NA])
923+
tm.assert_series_equal(result, exp)
924+
912925
# flags + unicode
913926
values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
914927
exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
915-
result = values.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE)
928+
result = values.str.replace(r"(?<=\w),(?=\w)", ", ", regex=True,
929+
flags=re.UNICODE)
916930
tm.assert_series_equal(result, exp)
917931

918932
# GH 13438
@@ -930,7 +944,7 @@ def test_replace_callable(self):
930944

931945
# test with callable
932946
repl = lambda m: m.group(0).swapcase()
933-
result = values.str.replace('[a-z][A-Z]{2}', repl, n=2)
947+
result = values.str.replace('[a-z][A-Z]{2}', repl, n=2, regex=True)
934948
exp = Series(['foObaD__baRbaD', NA])
935949
tm.assert_series_equal(result, exp)
936950

@@ -940,21 +954,21 @@ def test_replace_callable(self):
940954

941955
repl = lambda: None
942956
with pytest.raises(TypeError, match=p_err):
943-
values.str.replace('a', repl)
957+
values.str.replace('a', repl, regex=True)
944958

945959
repl = lambda m, x: None
946960
with pytest.raises(TypeError, match=p_err):
947-
values.str.replace('a', repl)
961+
values.str.replace('a', repl, regex=True)
948962

949963
repl = lambda m, x, y=None: None
950964
with pytest.raises(TypeError, match=p_err):
951-
values.str.replace('a', repl)
965+
values.str.replace('a', repl, regex=True)
952966

953967
# test regex named groups
954968
values = Series(['Foo Bar Baz', NA])
955969
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
956970
repl = lambda m: m.group('middle').swapcase()
957-
result = values.str.replace(pat, repl)
971+
result = values.str.replace(pat, repl, regex=True)
958972
exp = Series(['bAR', NA])
959973
tm.assert_series_equal(result, exp)
960974

@@ -964,28 +978,39 @@ def test_replace_compiled_regex(self):
964978

965979
# test with compiled regex
966980
pat = re.compile(r'BAD[_]*')
967-
result = values.str.replace(pat, '')
981+
result = values.str.replace(pat, '', regex=True)
968982
exp = Series(['foobar', NA])
969983
tm.assert_series_equal(result, exp)
970984

971-
result = values.str.replace(pat, '', n=1)
985+
result = values.str.replace(pat, '', n=1, regex=True)
972986
exp = Series(['foobarBAD', NA])
973987
tm.assert_series_equal(result, exp)
974988

975989
# mixed
976990
mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD',
977991
None, 1, 2.])
978992

979-
rs = Series(mixed).str.replace(pat, '')
993+
rs = Series(mixed).str.replace(pat, '', regex=True)
980994
xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA])
981995
assert isinstance(rs, Series)
982996
tm.assert_almost_equal(rs, xp)
983997

998+
# unicode
999+
values = Series([u'fooBAD__barBAD', NA])
1000+
1001+
result = values.str.replace(pat, '', regex=True)
1002+
exp = Series([u'foobar', NA])
1003+
tm.assert_series_equal(result, exp)
1004+
1005+
result = values.str.replace(pat, '', n=1, regex=True)
1006+
exp = Series([u'foobarBAD', NA])
1007+
tm.assert_series_equal(result, exp)
1008+
9841009
# flags + unicode
9851010
values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
9861011
exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
9871012
pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
988-
result = values.str.replace(pat, ", ")
1013+
result = values.str.replace(pat, ", ", regex=True)
9891014
tm.assert_series_equal(result, exp)
9901015

9911016
# case and flags provided to str.replace will have no effect
@@ -995,29 +1020,30 @@ def test_replace_compiled_regex(self):
9951020

9961021
with pytest.raises(ValueError,
9971022
match="case and flags cannot be"):
998-
result = values.str.replace(pat, '', flags=re.IGNORECASE)
1023+
result = values.str.replace(pat, '', flags=re.IGNORECASE,
1024+
regex=True)
9991025

10001026
with pytest.raises(ValueError,
10011027
match="case and flags cannot be"):
1002-
result = values.str.replace(pat, '', case=False)
1028+
result = values.str.replace(pat, '', case=False, regex=True)
10031029

10041030
with pytest.raises(ValueError,
10051031
match="case and flags cannot be"):
1006-
result = values.str.replace(pat, '', case=True)
1032+
result = values.str.replace(pat, '', case=True, regex=True)
10071033

10081034
# test with callable
10091035
values = Series(['fooBAD__barBAD', NA])
10101036
repl = lambda m: m.group(0).swapcase()
10111037
pat = re.compile('[a-z][A-Z]{2}')
1012-
result = values.str.replace(pat, repl, n=2)
1038+
result = values.str.replace(pat, repl, n=2, regex=True)
10131039
exp = Series(['foObaD__baRbaD', NA])
10141040
tm.assert_series_equal(result, exp)
10151041

10161042
def test_replace_literal(self):
10171043
# GH16808 literal replace (regex=False vs regex=True)
10181044
values = Series(['f.o', 'foo', NA])
10191045
exp = Series(['bao', 'bao', NA])
1020-
result = values.str.replace('f.', 'ba')
1046+
result = values.str.replace('f.', 'ba', regex=True)
10211047
tm.assert_series_equal(result, exp)
10221048

10231049
exp = Series(['bao', 'foo', NA])
@@ -2710,6 +2736,7 @@ def test_partition_deprecation(self):
27102736
result = values.str.rpartition(pat='_')
27112737
tm.assert_frame_equal(result, expected)
27122738

2739+
@pytest.mark.filterwarnings("ignore: '|' is interpreted as a literal")
27132740
def test_pipe_failures(self):
27142741
# #2119
27152742
s = Series(['A|B|C'])
@@ -2719,7 +2746,7 @@ def test_pipe_failures(self):
27192746

27202747
tm.assert_series_equal(result, exp)
27212748

2722-
result = s.str.replace('|', ' ')
2749+
result = s.str.replace('|', ' ', regex=None)
27232750
exp = Series(['A B C'])
27242751

27252752
tm.assert_series_equal(result, exp)
@@ -2980,17 +3007,17 @@ def test_replace_moar(self):
29803007
s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA',
29813008
'dog', 'cat'])
29823009

2983-
result = s.str.replace('A', 'YYY')
3010+
result = s.str.replace('A', 'YYY', regex=True)
29843011
expected = Series(['YYY', 'B', 'C', 'YYYaba', 'Baca', '', NA,
29853012
'CYYYBYYY', 'dog', 'cat'])
29863013
assert_series_equal(result, expected)
29873014

2988-
result = s.str.replace('A', 'YYY', case=False)
3015+
result = s.str.replace('A', 'YYY', case=False, regex=True)
29893016
expected = Series(['YYY', 'B', 'C', 'YYYYYYbYYY', 'BYYYcYYY', '', NA,
29903017
'CYYYBYYY', 'dog', 'cYYYt'])
29913018
assert_series_equal(result, expected)
29923019

2993-
result = s.str.replace('^.a|dog', 'XX-XX ', case=False)
3020+
result = s.str.replace('^.a|dog', 'XX-XX ', case=False, regex=True)
29943021
expected = Series(['A', 'B', 'C', 'XX-XX ba', 'XX-XX ca', '', NA,
29953022
'XX-XX BA', 'XX-XX ', 'XX-XX t'])
29963023
assert_series_equal(result, expected)
@@ -3162,6 +3189,40 @@ def test_method_on_bytes(self):
31623189
match="Cannot use .str.cat with values of.*"):
31633190
lhs.str.cat(rhs)
31643191

3192+
@pytest.mark.filterwarnings("ignore: '.' is interpreted as a literal")
3193+
@pytest.mark.parametrize("regex, expected_array", [
3194+
(True, ['foofoofoo', 'foofoofoo']),
3195+
(False, ['abc', '123']),
3196+
(None, ['abc', '123'])
3197+
])
3198+
def test_replace_single_pattern(self, regex, expected_array):
3199+
values = Series(['abc', '123'])
3200+
# GH: 24804
3201+
result = values.str.replace('.', 'foo', regex=regex)
3202+
expected = Series(expected_array)
3203+
tm.assert_series_equal(result, expected)
3204+
3205+
@pytest.mark.parametrize("input_array, single_char, replace_char, "
3206+
"expect_array, warn",
3207+
[("a.c", ".", "b", "abc", True),
3208+
("a@c", "@", "at", "aatc", False)]
3209+
)
3210+
def test_replace_warning_single_character(self, input_array,
3211+
single_char, replace_char,
3212+
expect_array, warn):
3213+
# GH: 24804
3214+
values = Series([input_array])
3215+
if warn:
3216+
with tm.assert_produces_warning(FutureWarning,
3217+
check_stacklevel=False):
3218+
result = values.str.replace(single_char, replace_char)
3219+
else:
3220+
result = values.str.replace(single_char, replace_char)
3221+
3222+
expected = Series([expect_array])
3223+
tm.assert_series_equal(result, expected)
3224+
3225+
@pytest.mark.skipif(compat.PY2, reason='not in python2')
31653226
def test_casefold(self):
31663227
# GH25405
31673228
expected = Series(['ss', NA, 'case', 'ssd'])

0 commit comments

Comments
 (0)