From d21e4f0f69e0f0c9e3ef87c6421485bbb8ab074f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 12 Aug 2020 20:48:56 -0500 Subject: [PATCH 1/7] REGR: Don't ignore compiled patterns in replace --- doc/source/whatsnew/v1.1.1.rst | 1 + pandas/core/internals/managers.py | 2 +- pandas/tests/frame/methods/test_replace.py | 8 ++++++++ pandas/tests/series/methods/test_replace.py | 10 ++++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst index cdc244ca193b4..d170e7c06f07c 100644 --- a/doc/source/whatsnew/v1.1.1.rst +++ b/doc/source/whatsnew/v1.1.1.rst @@ -23,6 +23,7 @@ Fixed regressions - Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`) - Fixed regression in :meth:`DataFrame.apply` where functions that altered the input in-place only operated on a single row (:issue:`35462`) - Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`) +- Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` where compiled regular expressions would be ignored during replacement (:issue:`35680`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 371b721f08b27..5101acea7d713 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1949,7 +1949,7 @@ def _check_comparison_types( else: op = np.vectorize( lambda x: bool(re.search(b, x)) - if isinstance(x, str) and isinstance(b, str) + if isinstance(x, str) and isinstance(b, (str, re.Pattern)) else False ) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index a3f056dbf9648..8603bff0587b6 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1573,3 +1573,11 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) tm.assert_frame_equal(result, expected) + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + df = pd.DataFrame(["a", "b", "c"]) + regex = re.compile("^a$") + result = df.replace({regex: "z"}, regex=True) + expected = pd.DataFrame(["z", "b", "c"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 11802c59a29da..f78a28c66e946 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -415,3 +417,11 @@ def test_replace_extension_other(self): # https://github.com/pandas-dev/pandas/issues/34530 ser = pd.Series(pd.array([1, 2, 3], dtype="Int64")) ser.replace("", "") # no exception + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + s = pd.Series(["a", "b", "c"]) + regex = re.compile("^a$") + result = s.replace({regex: "z"}, regex=True) + expected = pd.Series(["z", "b", "c"]) + tm.assert_series_equal(result, expected) From 5006980843a382cd27291e75a8173ec70c00fda6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 12 Aug 2020 21:50:08 -0500 Subject: [PATCH 2/7] Type --- pandas/core/internals/managers.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5101acea7d713..3d6bf751dd82c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2,7 +2,17 @@ import itertools import operator import re -from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import ( + DefaultDict, + Dict, + List, + Optional, + Pattern, + Sequence, + Tuple, + TypeVar, + Union, +) import warnings import numpy as np @@ -1907,7 +1917,10 @@ def _merge_blocks( def _compare_or_regex_search( - a: ArrayLike, b: Scalar, regex: bool = False, mask: Optional[ArrayLike] = None + a: ArrayLike, + b: Union[Scalar, Pattern], + regex: bool = False, + mask: Optional[ArrayLike] = None, ) -> Union[ArrayLike, bool]: """ Compare two array_like inputs of the same shape or two scalar values From 40bb67c8bc4bedcad4c9a04a6eba0d96377e0ac2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 12 Aug 2020 22:11:16 -0500 Subject: [PATCH 3/7] Revert "Type" This reverts commit 5006980843a382cd27291e75a8173ec70c00fda6. --- pandas/core/internals/managers.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3d6bf751dd82c..5101acea7d713 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2,17 +2,7 @@ import itertools import operator import re -from typing import ( - DefaultDict, - Dict, - List, - Optional, - Pattern, - Sequence, - Tuple, - TypeVar, - Union, -) +from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union import warnings import numpy as np @@ -1917,10 +1907,7 @@ def _merge_blocks( def _compare_or_regex_search( - a: ArrayLike, - b: Union[Scalar, Pattern], - regex: bool = False, - mask: Optional[ArrayLike] = None, + a: ArrayLike, b: Scalar, regex: bool = False, mask: Optional[ArrayLike] = None ) -> Union[ArrayLike, bool]: """ Compare two array_like inputs of the same shape or two scalar values From a05f3b224325dc10e191bcb489ff0c8765e75f9a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Aug 2020 08:45:39 -0500 Subject: [PATCH 4/7] Type --- pandas/core/internals/managers.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5101acea7d713..bd03055be8ef2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2,7 +2,17 @@ import itertools import operator import re -from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import ( + DefaultDict, + Dict, + List, + Optional, + Pattern, + Sequence, + Tuple, + TypeVar, + Union, +) import warnings import numpy as np @@ -1949,7 +1959,7 @@ def _check_comparison_types( else: op = np.vectorize( lambda x: bool(re.search(b, x)) - if isinstance(x, str) and isinstance(b, (str, re.Pattern)) + if isinstance(x, str) and isinstance(b, (str, Pattern)) else False ) From 01e90cb59924673a4d345ec086dfa4fc16e31943 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Aug 2020 13:50:12 -0500 Subject: [PATCH 5/7] Type and docstring --- pandas/core/internals/managers.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index bd03055be8ef2..5a215c4cd5fa3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1917,7 +1917,10 @@ def _merge_blocks( def _compare_or_regex_search( - a: ArrayLike, b: Scalar, regex: bool = False, mask: Optional[ArrayLike] = None + a: ArrayLike, + b: Union[Scalar, Pattern], + regex: bool = False, + mask: Optional[ArrayLike] = None, ) -> Union[ArrayLike, bool]: """ Compare two array_like inputs of the same shape or two scalar values @@ -1928,7 +1931,7 @@ def _compare_or_regex_search( Parameters ---------- a : array_like - b : scalar + b : scalar or regex pattern regex : bool, default False mask : array_like or None (default) @@ -1938,7 +1941,7 @@ def _compare_or_regex_search( """ def _check_comparison_types( - result: Union[ArrayLike, bool], a: ArrayLike, b: Scalar, + result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern], ): """ Raises an error if the two arrays (a,b) cannot be compared. From 264d17d309093e25f6e8bfc55add3a2c3965495f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Aug 2020 19:22:41 -0500 Subject: [PATCH 6/7] Fix --- pandas/core/internals/managers.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5a215c4cd5fa3..3a014ae58fcef 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1917,21 +1917,21 @@ def _merge_blocks( def _compare_or_regex_search( - a: ArrayLike, - b: Union[Scalar, Pattern], + a: Union[ArrayLike, Scalar], + b: Union[ArrayLike, Scalar, Pattern], regex: bool = False, mask: Optional[ArrayLike] = None, ) -> Union[ArrayLike, bool]: """ - Compare two array_like inputs of the same shape or two scalar values + Compare two array-like or scalar inputs. Calls operator.eq or re.search, depending on regex argument. If regex is True, perform an element-wise regex matching. Parameters ---------- - a : array_like - b : scalar or regex pattern + a : array-like or scalar + b : array-like, scalar, or regex pattern regex : bool, default False mask : array_like or None (default) @@ -1941,7 +1941,9 @@ def _compare_or_regex_search( """ def _check_comparison_types( - result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern], + result: Union[ArrayLike, bool], + a: Union[ArrayLike, Scalar], + b: Union[ArrayLike, Scalar, Pattern], ): """ Raises an error if the two arrays (a,b) cannot be compared. From ae6f53f40677fde41da0a66b96232e027589d8fd Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 16 Aug 2020 08:16:52 -0500 Subject: [PATCH 7/7] Revert "Fix" This reverts commit 264d17d309093e25f6e8bfc55add3a2c3965495f. --- pandas/core/internals/managers.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3a014ae58fcef..5a215c4cd5fa3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1917,21 +1917,21 @@ def _merge_blocks( def _compare_or_regex_search( - a: Union[ArrayLike, Scalar], - b: Union[ArrayLike, Scalar, Pattern], + a: ArrayLike, + b: Union[Scalar, Pattern], regex: bool = False, mask: Optional[ArrayLike] = None, ) -> Union[ArrayLike, bool]: """ - Compare two array-like or scalar inputs. + Compare two array_like inputs of the same shape or two scalar values Calls operator.eq or re.search, depending on regex argument. If regex is True, perform an element-wise regex matching. Parameters ---------- - a : array-like or scalar - b : array-like, scalar, or regex pattern + a : array_like + b : scalar or regex pattern regex : bool, default False mask : array_like or None (default) @@ -1941,9 +1941,7 @@ def _compare_or_regex_search( """ def _check_comparison_types( - result: Union[ArrayLike, bool], - a: Union[ArrayLike, Scalar], - b: Union[ArrayLike, Scalar, Pattern], + result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern], ): """ Raises an error if the two arrays (a,b) cannot be compared.