Skip to content

Commit 8153feb

Browse files
committed
Add warning if both converter and dtype specified
1 parent ab7e1e8 commit 8153feb

File tree

3 files changed

+41
-30
lines changed

3 files changed

+41
-30
lines changed

pandas/io/parsers.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1314,7 +1314,12 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
13141314

13151315
if conv_f is not None:
13161316
# conv_f applied to data before inference
1317-
# dtype isn't used if a converted specified
1317+
if cast_type is not None:
1318+
warnings.warn(("Both a converter and dtype were specified "
1319+
"for column {0} - only the converter will "
1320+
"be used").format(c), ParserWarning,
1321+
stacklevel=7)
1322+
13181323
try:
13191324
values = lib.map_infer(values, conv_f)
13201325
except ValueError:

pandas/io/tests/parser/dtypes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas import DataFrame, Series, Index, MultiIndex, Categorical
1313
from pandas.compat import StringIO
1414
from pandas.types.dtypes import CategoricalDtype
15+
from pandas.io.common import ParserWarning
1516

1617

1718
class DtypeTests(object):
@@ -219,8 +220,9 @@ def test_dtype_with_converter(self):
219220
data = """a,b
220221
1.1,2.2
221222
1.2,2.3"""
222-
result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
223-
converters={'a': lambda x: str(x)})
224223
# dtype spec ignored if converted specified
224+
with tm.assert_produces_warning(ParserWarning):
225+
result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
226+
converters={'a': lambda x: str(x)})
225227
expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
226228
tm.assert_frame_equal(result, expected)

pandas/parser.pyx

+31-27
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ from cpython cimport (PyObject, PyBytes_FromString,
1313
PyUnicode_Check, PyUnicode_AsUTF8String,
1414
PyErr_Occurred, PyErr_Fetch)
1515
from cpython.ref cimport PyObject, Py_XDECREF
16-
from io.common import CParserError, DtypeWarning, EmptyDataError
16+
from io.common import CParserError, DtypeWarning, EmptyDataError, ParserWarning
1717

1818

1919
cdef extern from "Python.h":
@@ -982,7 +982,7 @@ cdef class TextReader:
982982
Py_ssize_t i, nused
983983
kh_str_t *na_hashset = NULL
984984
int start, end
985-
object name, na_flist
985+
object name, na_flist, col_dtype = None
986986
bint na_filter = 0
987987
Py_ssize_t num_cols
988988

@@ -1038,14 +1038,33 @@ cdef class TextReader:
10381038
else:
10391039
na_filter = 0
10401040

1041+
col_dtype = None
1042+
if self.dtype is not None:
1043+
if isinstance(self.dtype, dict):
1044+
if name in self.dtype:
1045+
col_dtype = self.dtype[name]
1046+
elif i in self.dtype:
1047+
col_dtype = self.dtype[i]
1048+
else:
1049+
if self.dtype.names:
1050+
# structured array
1051+
col_dtype = np.dtype(self.dtype.descr[i][1])
1052+
else:
1053+
col_dtype = self.dtype
1054+
10411055
if conv:
1056+
if col_dtype is not None:
1057+
warnings.warn(("Both a converter and dtype were specified "
1058+
"for column {0} - only the converter will "
1059+
"be used").format(name), ParserWarning,
1060+
stacklevel=5)
10421061
results[i] = _apply_converter(conv, self.parser, i, start, end,
10431062
self.c_encoding)
10441063
continue
10451064

10461065
# Should return as the desired dtype (inferred or specified)
10471066
col_res, na_count = self._convert_tokens(
1048-
i, start, end, name, na_filter, na_hashset, na_flist)
1067+
i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)
10491068

10501069
if na_filter:
10511070
self._free_na_set(na_hashset)
@@ -1070,32 +1089,17 @@ cdef class TextReader:
10701089
cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end,
10711090
object name, bint na_filter,
10721091
kh_str_t *na_hashset,
1073-
object na_flist):
1074-
cdef:
1075-
object col_dtype = None
1076-
1077-
if self.dtype is not None:
1078-
if isinstance(self.dtype, dict):
1079-
if name in self.dtype:
1080-
col_dtype = self.dtype[name]
1081-
elif i in self.dtype:
1082-
col_dtype = self.dtype[i]
1083-
else:
1084-
if self.dtype.names:
1085-
# structured array
1086-
col_dtype = np.dtype(self.dtype.descr[i][1])
1087-
else:
1088-
col_dtype = self.dtype
1092+
object na_flist, object col_dtype):
10891093

1090-
if col_dtype is not None:
1091-
col_res, na_count = self._convert_with_dtype(
1092-
col_dtype, i, start, end, na_filter,
1093-
1, na_hashset, na_flist)
1094+
if col_dtype is not None:
1095+
col_res, na_count = self._convert_with_dtype(
1096+
col_dtype, i, start, end, na_filter,
1097+
1, na_hashset, na_flist)
10941098

1095-
# Fallback on the parse (e.g. we requested int dtype,
1096-
# but its actually a float).
1097-
if col_res is not None:
1098-
return col_res, na_count
1099+
# Fallback on the parse (e.g. we requested int dtype,
1100+
# but its actually a float).
1101+
if col_res is not None:
1102+
return col_res, na_count
10991103

11001104
if i in self.noconvert:
11011105
return self._string_convert(i, start, end, na_filter, na_hashset)

0 commit comments

Comments
 (0)