From b035dd1e0dcc891a00a4d99b67424aea7a05b890 Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Sun, 21 Mar 2021 18:09:42 +0300 Subject: [PATCH 1/4] BUG: Fix behavior of replace_list with mixed types. --- pandas/core/dtypes/cast.py | 2 +- pandas/tests/frame/methods/test_replace.py | 14 ++++++++++++++ pandas/tests/series/methods/test_replace.py | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7a2175a364a8a..61fd928ba1299 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2184,7 +2184,7 @@ def can_hold_element(dtype: np.dtype, element: Any) -> bool: if dtype.kind in ["i", "u"]: if tipo is not None: - return tipo.kind in ["i", "u"] and dtype.itemsize >= tipo.itemsize + return tipo.kind in ["f", "i", "u"] and dtype.itemsize >= tipo.itemsize # We have not inferred an integer from the dtype # check if we have a builtin int or a float equal to an int diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 56750da7c90b2..33006762cc5ed 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1659,3 +1659,17 @@ def test_replace_bytes(self, frame_or_series): expected = obj.copy() obj = obj.replace({None: np.nan}) tm.assert_equal(obj, expected) + + @pytest.mark.parametrize( + "s, to_replace, value, expected", + [ + (DataFrame([1]), np.array([1.0]), [0], DataFrame([0])), + (DataFrame([1]), np.array([1]), [0], DataFrame([0])), + (DataFrame([1.0]), np.array([1.0]), [0], DataFrame([0.0])), + (DataFrame([1.0]), np.array([1]), [0], DataFrame([0.0])), + ], + ) + def test_replace_list_with_mixed_type(self, s, to_replace, value, expected): + # GH#40371 + result = s.replace(to_replace, value) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 69dd7d083119f..ebaed1031cec1 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -460,3 +460,17 @@ def test_str_replace_regex_default_raises_warning(self, pattern): with tm.assert_produces_warning(FutureWarning) as w: s.str.replace(pattern, "") assert re.match(msg, str(w[0].message)) + + @pytest.mark.parametrize( + "s, to_replace, value, expected", + [ + (pd.Series([1]), np.array([1.0]), [0], pd.Series([0])), + (pd.Series([1]), np.array([1]), [0], pd.Series([0])), + (pd.Series([1.0]), np.array([1.0]), [0], pd.Series([0.0])), + (pd.Series([1.0]), np.array([1]), [0], pd.Series([0.0])), + ], + ) + def test_replace_list_with_mixed_type(self, s, to_replace, value, expected): + # GH#40371 + result = s.replace(to_replace, value) + tm.assert_series_equal(result, expected) From fddddf63d7c032e71fac1e502284a34cd0f019ba Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Sun, 21 Mar 2021 22:19:36 +0300 Subject: [PATCH 2/4] BUG: Revert changes in cast.py and refactor tests --- pandas/core/dtypes/cast.py | 2 +- pandas/tests/frame/methods/test_replace.py | 21 ++++++++++++++------- pandas/tests/series/methods/test_replace.py | 14 -------------- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 61fd928ba1299..7a2175a364a8a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2184,7 +2184,7 @@ def can_hold_element(dtype: np.dtype, element: Any) -> bool: if dtype.kind in ["i", "u"]: if tipo is not None: - return tipo.kind in ["f", "i", "u"] and dtype.itemsize >= tipo.itemsize + return tipo.kind in ["i", "u"] and dtype.itemsize >= tipo.itemsize # We have not inferred an integer from the dtype # check if we have a builtin int or a float equal to an int diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 33006762cc5ed..5c87618ade2a4 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1546,7 +1546,6 @@ def test_replace_ea_ignore_float(self, frame_or_series, value): obj = DataFrame({"Per": [value] * 3}) if frame_or_series is not DataFrame: obj = obj["Per"] - expected = obj.copy() result = obj.replace(1.0, 0.0) tm.assert_equal(expected, result) @@ -1663,13 +1662,21 @@ def test_replace_bytes(self, frame_or_series): @pytest.mark.parametrize( "s, to_replace, value, expected", [ - (DataFrame([1]), np.array([1.0]), [0], DataFrame([0])), - (DataFrame([1]), np.array([1]), [0], DataFrame([0])), - (DataFrame([1.0]), np.array([1.0]), [0], DataFrame([0.0])), - (DataFrame([1.0]), np.array([1]), [0], DataFrame([0.0])), + ([1], np.array([1.0]), [0], [0]), + ([1], np.array([1]), [0], [0]), + ([1.0], np.array([1.0]), [0], [0.0]), + ([1.0], np.array([1]), [0], [0.0]), + ([1], [1.0], [0], [0]), + ([1], [1], [0], [0]), + ([1.0], [1.0], [0], [0.0]), + ([1.0], [1], [0], [0.0]), ], ) - def test_replace_list_with_mixed_type(self, s, to_replace, value, expected): + def test_replace_list_with_mixed_type( + self, s, to_replace, value, expected, frame_or_series + ): # GH#40371 + s = frame_or_series(s) + expected = frame_or_series(expected) result = s.replace(to_replace, value) - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index ebaed1031cec1..69dd7d083119f 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -460,17 +460,3 @@ def test_str_replace_regex_default_raises_warning(self, pattern): with tm.assert_produces_warning(FutureWarning) as w: s.str.replace(pattern, "") assert re.match(msg, str(w[0].message)) - - @pytest.mark.parametrize( - "s, to_replace, value, expected", - [ - (pd.Series([1]), np.array([1.0]), [0], pd.Series([0])), - (pd.Series([1]), np.array([1]), [0], pd.Series([0])), - (pd.Series([1.0]), np.array([1.0]), [0], pd.Series([0.0])), - (pd.Series([1.0]), np.array([1]), [0], pd.Series([0.0])), - ], - ) - def test_replace_list_with_mixed_type(self, s, to_replace, value, expected): - # GH#40371 - result = s.replace(to_replace, value) - tm.assert_series_equal(result, expected) From 71dd789603e1e9f88870b556a6fe66d3a43e6eb1 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 24 May 2021 14:50:22 +0100 Subject: [PATCH 3/4] add patch, release note and rename test variables --- doc/source/whatsnew/v1.2.5.rst | 2 +- pandas/core/internals/blocks.py | 14 ++++++++++++-- pandas/tests/frame/methods/test_replace.py | 8 ++++---- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.2.5.rst b/doc/source/whatsnew/v1.2.5.rst index 60e146b2212eb..e936519383520 100644 --- a/doc/source/whatsnew/v1.2.5.rst +++ b/doc/source/whatsnew/v1.2.5.rst @@ -16,7 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Regression in :func:`concat` between two :class:`DataFrames` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`) - Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`) -- +- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4f1b16e747394..73e439bc46027 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -6,6 +6,8 @@ TYPE_CHECKING, Any, Callable, + Iterable, + Sequence, cast, ) import warnings @@ -763,8 +765,8 @@ def _replace_regex( @final def _replace_list( self, - src_list: list[Any], - dest_list: list[Any], + src_list: Iterable[Any], + dest_list: Sequence[Any], inplace: bool = False, regex: bool = False, ) -> list[Block]: @@ -779,6 +781,14 @@ def _replace_list( # so un-tile here return self.replace(src_list, dest_list[0], inplace, regex) + # https://github.com/pandas-dev/pandas/issues/40371 + # the following pairs check code caused a regression so we catch that case here + # until the issue is fixed properly in can_hold_element + + # error: "Iterable[Any]" has no attribute "tolist" + if hasattr(src_list, "tolist"): + src_list = src_list.tolist() # type: ignore[attr-defined] + # Exclude anything that we know we won't contain pairs = [ (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 28a43bb6338e9..46d8d27559b33 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1428,7 +1428,7 @@ def test_replace_bytes(self, frame_or_series): tm.assert_equal(obj, expected) @pytest.mark.parametrize( - "s, to_replace, value, expected", + "data, to_replace, value, expected", [ ([1], np.array([1.0]), [0], [0]), ([1], np.array([1]), [0], [0]), @@ -1441,12 +1441,12 @@ def test_replace_bytes(self, frame_or_series): ], ) def test_replace_list_with_mixed_type( - self, s, to_replace, value, expected, frame_or_series + self, data, to_replace, value, expected, frame_or_series ): # GH#40371 - s = frame_or_series(s) + obj = frame_or_series(data) expected = frame_or_series(expected) - result = s.replace(to_replace, value) + result = obj.replace(to_replace, value) tm.assert_equal(result, expected) From 18d2b79e384479b2f8cb03d293f829d09a3ee480 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 24 May 2021 17:56:53 +0100 Subject: [PATCH 4/4] parametrize --- pandas/tests/frame/methods/test_replace.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 46d8d27559b33..46a5a47e091dd 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1314,6 +1314,7 @@ def test_replace_ea_ignore_float(self, frame_or_series, value): obj = DataFrame({"Per": [value] * 3}) if frame_or_series is not DataFrame: obj = obj["Per"] + expected = obj.copy() result = obj.replace(1.0, 0.0) tm.assert_equal(expected, result) @@ -1430,23 +1431,20 @@ def test_replace_bytes(self, frame_or_series): @pytest.mark.parametrize( "data, to_replace, value, expected", [ - ([1], np.array([1.0]), [0], [0]), - ([1], np.array([1]), [0], [0]), - ([1.0], np.array([1.0]), [0], [0.0]), - ([1.0], np.array([1]), [0], [0.0]), ([1], [1.0], [0], [0]), ([1], [1], [0], [0]), ([1.0], [1.0], [0], [0.0]), ([1.0], [1], [0], [0.0]), ], ) + @pytest.mark.parametrize("box", [list, tuple, np.array]) def test_replace_list_with_mixed_type( - self, data, to_replace, value, expected, frame_or_series + self, data, to_replace, value, expected, box, frame_or_series ): # GH#40371 obj = frame_or_series(data) expected = frame_or_series(expected) - result = obj.replace(to_replace, value) + result = obj.replace(box(to_replace), value) tm.assert_equal(result, expected)