From 802a1dcdd1a4261bc0d4be62bbeeaae18e7286b5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 16 Jun 2021 03:46:04 +0200 Subject: [PATCH] Backport PR #42030: Regression raising Error when having dup cols with single dtype for read csv --- pandas/_libs/parsers.pyx | 2 ++ pandas/io/parsers/python_parser.py | 2 ++ pandas/tests/io/parser/dtypes/test_dtypes_basic.py | 9 +++++++++ 3 files changed, 13 insertions(+) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 7d7074988e5f0..e5e61e409c320 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -108,6 +108,7 @@ from pandas.core.dtypes.common import ( is_object_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.inference import is_dict_like cdef: float64_t INF = np.inf @@ -689,6 +690,7 @@ cdef class TextReader: count = counts.get(name, 0) if ( self.dtype is not None + and is_dict_like(self.dtype) and self.dtype.get(old_name) is not None and self.dtype.get(name) is None ): diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 670868c6f4261..af25a4166d5a6 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -25,6 +25,7 @@ ) from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.inference import is_dict_like from pandas.io.parsers.base_parser import ( ParserBase, @@ -424,6 +425,7 @@ def _infer_columns(self): cur_count = counts[col] if ( self.dtype is not None + and is_dict_like(self.dtype) and self.dtype.get(old_col) is not None and self.dtype.get(col) is None ): diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 59fd3de60e0bf..bc20f1d1eea5f 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -248,3 +248,12 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): result = parser.read_csv(StringIO(data), dtype={"a": str, **dtypes}) expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) tm.assert_frame_equal(result, expected) + + +def test_dtype_mangle_dup_cols_single_dtype(all_parsers): + # GH#42022 + parser = all_parsers + data = """a,a\n1,1""" + result = parser.read_csv(StringIO(data), dtype=str) + expected = DataFrame({"a": ["1"], "a.1": ["1"]}) + tm.assert_frame_equal(result, expected)