115
115
dtype : Type name or dict of column -> type, default None
116
116
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
117
117
Use `str` or `object` to preserve and not interpret dtype.
118
- If converters are specified, they will be applied AFTER
119
- dtype conversion.
118
+ If converters are specified, they will be applied INSTEAD
119
+ of dtype conversion.
120
+
121
+ .. versionadded:: 0.20.0 support for the Python parser.
122
+
120
123
%s
121
124
converters : dict, default None
122
125
Dict of functions for converting values in certain columns. Keys can either
@@ -1292,20 +1295,6 @@ def _agg_index(self, index, try_parse_dates=True):
1292
1295
1293
1296
return index
1294
1297
1295
- def _apply_converter (self , values , conv_f , na_values , col_na_values ,
1296
- col_na_fvalues ):
1297
- """ apply converter function to values, respecting NAs """
1298
- try :
1299
- values = lib .map_infer (values , conv_f )
1300
- except ValueError :
1301
- mask = lib .ismember (values , na_values ).view (np .uint8 )
1302
- values = lib .map_infer_mask (values , conv_f , mask )
1303
-
1304
- cvals , na_count = self ._infer_types (
1305
- values , set (col_na_values ) | col_na_fvalues ,
1306
- try_num_bool = False )
1307
- return cvals , na_count
1308
-
1309
1298
def _convert_to_ndarrays (self , dct , na_values , na_fvalues , verbose = False ,
1310
1299
converters = None , dtypes = None ):
1311
1300
result = {}
@@ -1323,45 +1312,58 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
1323
1312
else :
1324
1313
col_na_values , col_na_fvalues = set (), set ()
1325
1314
1326
- if conv_f is not None and cast_type is None :
1327
- # if type is not specified, apply the conversion first, without
1328
- # inference
1329
- cvals , na_count = self ._apply_converter (
1330
- values , conv_f , na_values ,
1331
- col_na_values , col_na_fvalues )
1315
+ if conv_f is not None :
1316
+ # conv_f applied to data before inference
1317
+ # dtype isn't used if a converted specified
1318
+ try :
1319
+ values = lib .map_infer (values , conv_f )
1320
+ except ValueError :
1321
+ mask = lib .ismember (values , na_values ).view (np .uint8 )
1322
+ values = lib .map_infer_mask (values , conv_f , mask )
1323
+
1324
+ cvals , na_count = self ._infer_types (
1325
+ values , set (col_na_values ) | col_na_fvalues ,
1326
+ try_num_bool = False )
1332
1327
else :
1333
- try_num_bool = True
1334
- if cast_type and is_object_dtype (cast_type ):
1335
- # skip inference if specified dtype is object
1336
- try_num_bool = False
1328
+ # skip inference if specified dtype is object
1329
+ try_num_bool = not (cast_type and is_object_dtype (cast_type ))
1337
1330
1338
1331
# general type inference and conversion
1339
1332
cvals , na_count = self ._infer_types (
1340
1333
values , set (col_na_values ) | col_na_fvalues ,
1341
1334
try_num_bool )
1342
1335
1336
+ # type specificed in dtype param
1337
+ if cast_type and not is_dtype_equal (cvals , cast_type ):
1338
+ cvals = self ._cast_types (cvals , cast_type , c )
1339
+
1343
1340
if issubclass (cvals .dtype .type , np .integer ) and self .compact_ints :
1344
1341
cvals = lib .downcast_int64 (
1345
1342
cvals , _parser .na_values ,
1346
1343
self .use_unsigned )
1347
1344
1348
- if cast_type and not is_dtype_equal (cvals , cast_type ):
1349
- # type specificed in dtype param
1350
-
1351
- cvals = self ._cast_types (cvals , cast_type , c )
1352
- # for consistency with c-parser, if a converter and dtype are
1353
- # specified, apply the converter last
1354
- if conv_f is not None :
1355
- values , na_count = self ._apply_converter (
1356
- values , conv_f , na_values ,
1357
- col_na_values , col_na_fvalues )
1358
-
1359
1345
result [c ] = cvals
1360
1346
if verbose and na_count :
1361
1347
print ('Filled %d NA values in column %s' % (na_count , str (c )))
1362
1348
return result
1363
1349
1364
1350
def _infer_types (self , values , na_values , try_num_bool = True ):
1351
+ """
1352
+ Infer types of values, possibly casting
1353
+
1354
+ Parameters
1355
+ ----------
1356
+ values : ndarray
1357
+ na_values : set
1358
+ try_num_bool : bool, default try
1359
+ try to cast values to numeric (first preference) or boolean
1360
+
1361
+ Returns:
1362
+ --------
1363
+ converted : ndarray
1364
+ na_count : int
1365
+ """
1366
+
1365
1367
na_count = 0
1366
1368
if issubclass (values .dtype .type , (np .number , np .bool_ )):
1367
1369
mask = lib .ismember (values , na_values )
@@ -1393,7 +1395,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
1393
1395
return result , na_count
1394
1396
1395
1397
def _cast_types (self , values , cast_type , column ):
1396
- """ cast column to type specified in dtypes= param """
1398
+ """
1399
+ Cast values to specified type
1400
+
1401
+ Parameters
1402
+ ----------
1403
+ values : ndarray
1404
+ cast_type : string or np.dtype
1405
+ dtype to cast values to
1406
+ column : string
1407
+ column name - used only for error reporting
1408
+
1409
+ Returns
1410
+ -------
1411
+ converted : ndarray
1412
+ """
1413
+
1397
1414
if is_categorical_dtype (cast_type ):
1398
1415
# XXX this is for consistency with
1399
1416
# c-parser which parses all categories
0 commit comments