From c74b60e776df7a7c31264ae0a9cd9c675228f460 Mon Sep 17 00:00:00 2001 From: shubham11941140 <63910248+shubham11941140@users.noreply.github.com> Date: Sun, 17 Oct 2021 16:34:05 +0530 Subject: [PATCH] Backport PR #43199: BUG: convert_dtypes incorrectly converts byte strings to strings in 1.3+ --- doc/source/whatsnew/v1.3.4.rst | 1 + pandas/core/dtypes/cast.py | 2 +- pandas/tests/series/methods/test_convert_dtypes.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst index c99f9e28e7fdf..22e15ed9f5d71 100644 --- a/doc/source/whatsnew/v1.3.4.rst +++ b/doc/source/whatsnew/v1.3.4.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.convert_dtypes` incorrectly converts byte strings to strings (:issue:`43183`) - Fixed regression in :meth:`.GroupBy.agg` where it was failing silently with mixed data types along ``axis=1`` and :class:`MultiIndex` (:issue:`43209`) - Fixed regression in :func:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`) - Fixed regression in :meth:`DataFrame.corr` raising ``ValueError`` with ``method="spearman"`` on 32-bit platforms (:issue:`43588`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 66f835212212b..49f31ac82ff8d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1419,7 +1419,7 @@ def convert_dtypes( inferred_dtype = input_array.dtype if is_string_dtype(inferred_dtype): - if not convert_string: + if not convert_string or inferred_dtype == "bytes": return input_array.dtype else: return pandas_dtype("string") diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 81203b944fa92..1e88ddf3cd943 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -226,3 +226,12 @@ def test_convert_bool_dtype(self): # GH32287 df = pd.DataFrame({"A": pd.array([True])}) tm.assert_frame_equal(df, df.convert_dtypes()) + + def test_convert_byte_string_dtype(self): + # GH-43183 + byte_str = b"binary-string" + + df = pd.DataFrame(data={"A": byte_str}, index=[0]) + result = df.convert_dtypes() + expected = df + tm.assert_frame_equal(result, expected)