6
6
from numpy .random import randint
7
7
import pytest
8
8
9
+ import pandas .compat as compat
10
+
9
11
from pandas import DataFrame , Index , MultiIndex , Series , concat , isna , notna
10
12
import pandas .core .strings as strings
11
13
import pandas .util .testing as tm
@@ -892,27 +894,39 @@ def test_casemethods(self):
892
894
def test_replace (self ):
893
895
values = Series (['fooBAD__barBAD' , NA ])
894
896
895
- result = values .str .replace ('BAD[_]*' , '' )
897
+ result = values .str .replace ('BAD[_]*' , '' , regex = True )
896
898
exp = Series (['foobar' , NA ])
897
899
tm .assert_series_equal (result , exp )
898
900
899
- result = values .str .replace ('BAD[_]*' , '' , n = 1 )
901
+ result = values .str .replace ('BAD[_]*' , '' , regex = True , n = 1 )
900
902
exp = Series (['foobarBAD' , NA ])
901
903
tm .assert_series_equal (result , exp )
902
904
903
905
# mixed
904
906
mixed = Series (['aBAD' , NA , 'bBAD' , True , datetime .today (), 'fooBAD' ,
905
907
None , 1 , 2. ])
906
908
907
- rs = Series (mixed ).str .replace ('BAD[_]*' , '' )
909
+ rs = Series (mixed ).str .replace ('BAD[_]*' , '' , regex = True )
908
910
xp = Series (['a' , NA , 'b' , NA , NA , 'foo' , NA , NA , NA ])
909
911
assert isinstance (rs , Series )
910
912
tm .assert_almost_equal (rs , xp )
911
913
914
+ # unicode
915
+ values = Series ([u'fooBAD__barBAD' , NA ])
916
+
917
+ result = values .str .replace ('BAD[_]*' , '' , regex = True )
918
+ exp = Series ([u'foobar' , NA ])
919
+ tm .assert_series_equal (result , exp )
920
+
921
+ result = values .str .replace ('BAD[_]*' , '' , n = 1 , regex = True )
922
+ exp = Series ([u'foobarBAD' , NA ])
923
+ tm .assert_series_equal (result , exp )
924
+
912
925
# flags + unicode
913
926
values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )])
914
927
exp = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )])
915
- result = values .str .replace (r"(?<=\w),(?=\w)" , ", " , flags = re .UNICODE )
928
+ result = values .str .replace (r"(?<=\w),(?=\w)" , ", " , regex = True ,
929
+ flags = re .UNICODE )
916
930
tm .assert_series_equal (result , exp )
917
931
918
932
# GH 13438
@@ -930,7 +944,7 @@ def test_replace_callable(self):
930
944
931
945
# test with callable
932
946
repl = lambda m : m .group (0 ).swapcase ()
933
- result = values .str .replace ('[a-z][A-Z]{2}' , repl , n = 2 )
947
+ result = values .str .replace ('[a-z][A-Z]{2}' , repl , n = 2 , regex = True )
934
948
exp = Series (['foObaD__baRbaD' , NA ])
935
949
tm .assert_series_equal (result , exp )
936
950
@@ -940,21 +954,21 @@ def test_replace_callable(self):
940
954
941
955
repl = lambda : None
942
956
with pytest .raises (TypeError , match = p_err ):
943
- values .str .replace ('a' , repl )
957
+ values .str .replace ('a' , repl , regex = True )
944
958
945
959
repl = lambda m , x : None
946
960
with pytest .raises (TypeError , match = p_err ):
947
- values .str .replace ('a' , repl )
961
+ values .str .replace ('a' , repl , regex = True )
948
962
949
963
repl = lambda m , x , y = None : None
950
964
with pytest .raises (TypeError , match = p_err ):
951
- values .str .replace ('a' , repl )
965
+ values .str .replace ('a' , repl , regex = True )
952
966
953
967
# test regex named groups
954
968
values = Series (['Foo Bar Baz' , NA ])
955
969
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
956
970
repl = lambda m : m .group ('middle' ).swapcase ()
957
- result = values .str .replace (pat , repl )
971
+ result = values .str .replace (pat , repl , regex = True )
958
972
exp = Series (['bAR' , NA ])
959
973
tm .assert_series_equal (result , exp )
960
974
@@ -964,28 +978,39 @@ def test_replace_compiled_regex(self):
964
978
965
979
# test with compiled regex
966
980
pat = re .compile (r'BAD[_]*' )
967
- result = values .str .replace (pat , '' )
981
+ result = values .str .replace (pat , '' , regex = True )
968
982
exp = Series (['foobar' , NA ])
969
983
tm .assert_series_equal (result , exp )
970
984
971
- result = values .str .replace (pat , '' , n = 1 )
985
+ result = values .str .replace (pat , '' , n = 1 , regex = True )
972
986
exp = Series (['foobarBAD' , NA ])
973
987
tm .assert_series_equal (result , exp )
974
988
975
989
# mixed
976
990
mixed = Series (['aBAD' , NA , 'bBAD' , True , datetime .today (), 'fooBAD' ,
977
991
None , 1 , 2. ])
978
992
979
- rs = Series (mixed ).str .replace (pat , '' )
993
+ rs = Series (mixed ).str .replace (pat , '' , regex = True )
980
994
xp = Series (['a' , NA , 'b' , NA , NA , 'foo' , NA , NA , NA ])
981
995
assert isinstance (rs , Series )
982
996
tm .assert_almost_equal (rs , xp )
983
997
998
+ # unicode
999
+ values = Series ([u'fooBAD__barBAD' , NA ])
1000
+
1001
+ result = values .str .replace (pat , '' , regex = True )
1002
+ exp = Series ([u'foobar' , NA ])
1003
+ tm .assert_series_equal (result , exp )
1004
+
1005
+ result = values .str .replace (pat , '' , n = 1 , regex = True )
1006
+ exp = Series ([u'foobarBAD' , NA ])
1007
+ tm .assert_series_equal (result , exp )
1008
+
984
1009
# flags + unicode
985
1010
values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )])
986
1011
exp = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )])
987
1012
pat = re .compile (r"(?<=\w),(?=\w)" , flags = re .UNICODE )
988
- result = values .str .replace (pat , ", " )
1013
+ result = values .str .replace (pat , ", " , regex = True )
989
1014
tm .assert_series_equal (result , exp )
990
1015
991
1016
# case and flags provided to str.replace will have no effect
@@ -995,29 +1020,30 @@ def test_replace_compiled_regex(self):
995
1020
996
1021
with pytest .raises (ValueError ,
997
1022
match = "case and flags cannot be" ):
998
- result = values .str .replace (pat , '' , flags = re .IGNORECASE )
1023
+ result = values .str .replace (pat , '' , flags = re .IGNORECASE ,
1024
+ regex = True )
999
1025
1000
1026
with pytest .raises (ValueError ,
1001
1027
match = "case and flags cannot be" ):
1002
- result = values .str .replace (pat , '' , case = False )
1028
+ result = values .str .replace (pat , '' , case = False , regex = True )
1003
1029
1004
1030
with pytest .raises (ValueError ,
1005
1031
match = "case and flags cannot be" ):
1006
- result = values .str .replace (pat , '' , case = True )
1032
+ result = values .str .replace (pat , '' , case = True , regex = True )
1007
1033
1008
1034
# test with callable
1009
1035
values = Series (['fooBAD__barBAD' , NA ])
1010
1036
repl = lambda m : m .group (0 ).swapcase ()
1011
1037
pat = re .compile ('[a-z][A-Z]{2}' )
1012
- result = values .str .replace (pat , repl , n = 2 )
1038
+ result = values .str .replace (pat , repl , n = 2 , regex = True )
1013
1039
exp = Series (['foObaD__baRbaD' , NA ])
1014
1040
tm .assert_series_equal (result , exp )
1015
1041
1016
1042
def test_replace_literal (self ):
1017
1043
# GH16808 literal replace (regex=False vs regex=True)
1018
1044
values = Series (['f.o' , 'foo' , NA ])
1019
1045
exp = Series (['bao' , 'bao' , NA ])
1020
- result = values .str .replace ('f.' , 'ba' )
1046
+ result = values .str .replace ('f.' , 'ba' , regex = True )
1021
1047
tm .assert_series_equal (result , exp )
1022
1048
1023
1049
exp = Series (['bao' , 'foo' , NA ])
@@ -2710,6 +2736,7 @@ def test_partition_deprecation(self):
2710
2736
result = values .str .rpartition (pat = '_' )
2711
2737
tm .assert_frame_equal (result , expected )
2712
2738
2739
+ @pytest .mark .filterwarnings ("ignore: '|' is interpreted as a literal" )
2713
2740
def test_pipe_failures (self ):
2714
2741
# #2119
2715
2742
s = Series (['A|B|C' ])
@@ -2719,7 +2746,7 @@ def test_pipe_failures(self):
2719
2746
2720
2747
tm .assert_series_equal (result , exp )
2721
2748
2722
- result = s .str .replace ('|' , ' ' )
2749
+ result = s .str .replace ('|' , ' ' , regex = None )
2723
2750
exp = Series (['A B C' ])
2724
2751
2725
2752
tm .assert_series_equal (result , exp )
@@ -2980,17 +3007,17 @@ def test_replace_moar(self):
2980
3007
s = Series (['A' , 'B' , 'C' , 'Aaba' , 'Baca' , '' , NA , 'CABA' ,
2981
3008
'dog' , 'cat' ])
2982
3009
2983
- result = s .str .replace ('A' , 'YYY' )
3010
+ result = s .str .replace ('A' , 'YYY' , regex = True )
2984
3011
expected = Series (['YYY' , 'B' , 'C' , 'YYYaba' , 'Baca' , '' , NA ,
2985
3012
'CYYYBYYY' , 'dog' , 'cat' ])
2986
3013
assert_series_equal (result , expected )
2987
3014
2988
- result = s .str .replace ('A' , 'YYY' , case = False )
3015
+ result = s .str .replace ('A' , 'YYY' , case = False , regex = True )
2989
3016
expected = Series (['YYY' , 'B' , 'C' , 'YYYYYYbYYY' , 'BYYYcYYY' , '' , NA ,
2990
3017
'CYYYBYYY' , 'dog' , 'cYYYt' ])
2991
3018
assert_series_equal (result , expected )
2992
3019
2993
- result = s .str .replace ('^.a|dog' , 'XX-XX ' , case = False )
3020
+ result = s .str .replace ('^.a|dog' , 'XX-XX ' , case = False , regex = True )
2994
3021
expected = Series (['A' , 'B' , 'C' , 'XX-XX ba' , 'XX-XX ca' , '' , NA ,
2995
3022
'XX-XX BA' , 'XX-XX ' , 'XX-XX t' ])
2996
3023
assert_series_equal (result , expected )
@@ -3162,6 +3189,40 @@ def test_method_on_bytes(self):
3162
3189
match = "Cannot use .str.cat with values of.*" ):
3163
3190
lhs .str .cat (rhs )
3164
3191
3192
+ @pytest .mark .filterwarnings ("ignore: '.' is interpreted as a literal" )
3193
+ @pytest .mark .parametrize ("regex, expected_array" , [
3194
+ (True , ['foofoofoo' , 'foofoofoo' ]),
3195
+ (False , ['abc' , '123' ]),
3196
+ (None , ['abc' , '123' ])
3197
+ ])
3198
+ def test_replace_single_pattern (self , regex , expected_array ):
3199
+ values = Series (['abc' , '123' ])
3200
+ # GH: 24804
3201
+ result = values .str .replace ('.' , 'foo' , regex = regex )
3202
+ expected = Series (expected_array )
3203
+ tm .assert_series_equal (result , expected )
3204
+
3205
+ @pytest .mark .parametrize ("input_array, single_char, replace_char, "
3206
+ "expect_array, warn" ,
3207
+ [("a.c" , "." , "b" , "abc" , True ),
3208
+ ("a@c" , "@" , "at" , "aatc" , False )]
3209
+ )
3210
+ def test_replace_warning_single_character (self , input_array ,
3211
+ single_char , replace_char ,
3212
+ expect_array , warn ):
3213
+ # GH: 24804
3214
+ values = Series ([input_array ])
3215
+ if warn :
3216
+ with tm .assert_produces_warning (FutureWarning ,
3217
+ check_stacklevel = False ):
3218
+ result = values .str .replace (single_char , replace_char )
3219
+ else :
3220
+ result = values .str .replace (single_char , replace_char )
3221
+
3222
+ expected = Series ([expect_array ])
3223
+ tm .assert_series_equal (result , expected )
3224
+
3225
+ @pytest .mark .skipif (compat .PY2 , reason = 'not in python2' )
3165
3226
def test_casefold (self ):
3166
3227
# GH25405
3167
3228
expected = Series (['ss' , NA , 'case' , 'ssd' ])
0 commit comments