From 960441ac44078dc05c477083662fc97b9759a78b Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sat, 24 Sep 2016 13:03:41 -0500
Subject: [PATCH 01/23] API: add dtype= option to python parser

---
 pandas/io/parsers.py                    | 126 ++++++++++----
 pandas/io/tests/parser/c_parser_only.py | 193 +--------------------
 pandas/io/tests/parser/dtypes.py        | 217 ++++++++++++++++++++++++
 pandas/io/tests/parser/test_parsers.py  |   4 +-
 4 files changed, 315 insertions(+), 225 deletions(-)
 create mode 100644 pandas/io/tests/parser/dtypes.py

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 3fe5e5e826ebd..37f6a02906a63 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -17,10 +17,14 @@
                            zip, string_types, map, u)
 from pandas.types.common import (is_integer, _ensure_object,
                                  is_list_like, is_integer_dtype,
-                                 is_float,
-                                 is_scalar)
+                                 is_float, is_dtype_equal,
+                                 is_object_dtype,
+                                 is_scalar, is_categorical_dtype)
+from pandas.types.missing import isnull
+from pandas.types.cast import _astype_nansafe
 from pandas.core.index import Index, MultiIndex, RangeIndex
 from pandas.core.frame import DataFrame
+from pandas.core.categorical import Categorical
 from pandas.core.common import AbstractMethodError
 from pandas.core.config import get_option
 from pandas.io.date_converters import generic_parser
@@ -110,8 +114,9 @@
     are duplicate names in the columns.
 dtype : Type name or dict of column -> type, default None
     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
-    (Unsupported with engine='python'). Use `str` or `object` to preserve and
-    not interpret dtype.
+    Use `str` or `object` to preserve and not interpret dtype.
+    If converters are specified, they will be applied AFTER
+    dtype conversion.
 %s
 converters : dict, default None
     Dict of functions for converting values in certain columns. Keys can either
@@ -420,6 +425,7 @@ def _read(filepath_or_buffer, kwds):
     'true_values': None,
     'false_values': None,
     'converters': None,
+    'dtype': None,
     'skipfooter': 0,
 
     'keep_default_na': True,
@@ -460,7 +466,6 @@ def _read(filepath_or_buffer, kwds):
     'buffer_lines': None,
     'error_bad_lines': True,
     'warn_bad_lines': True,
-    'dtype': None,
     'float_precision': None
 }
 
@@ -475,7 +480,6 @@ def _read(filepath_or_buffer, kwds):
     'buffer_lines',
     'error_bad_lines',
     'warn_bad_lines',
-    'dtype',
     'float_precision',
 ])
 _deprecated_args = set([
@@ -833,9 +837,6 @@ def _clean_options(self, options, engine):
                            " ignored as it is not supported by the 'python'"
                            " engine.").format(reason=fallback_reason,
                                               option=arg)
-                    if arg == 'dtype':
-                        msg += " (Note the 'converters' option provides"\
-                               " similar functionality.)"
                     raise ValueError(msg)
                 del result[arg]
 
@@ -1284,18 +1285,37 @@ def _agg_index(self, index, try_parse_dates=True):
                     col_na_values, col_na_fvalues = _get_na_values(
                         col_name, self.na_values, self.na_fvalues)
 
-            arr, _ = self._convert_types(arr, col_na_values | col_na_fvalues)
+            arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
             arrays.append(arr)
 
         index = MultiIndex.from_arrays(arrays, names=self.index_names)
 
         return index
 
+    def _apply_converter(self, values, conv_f, na_values, col_na_values,
+                         col_na_fvalues):
+        """ apply converter function to values, respecting NAs """
+        try:
+            values = lib.map_infer(values, conv_f)
+        except ValueError:
+            mask = lib.ismember(values, na_values).view(np.uint8)
+            values = lib.map_infer_mask(values, conv_f, mask)
+
+        cvals, na_count = self._infer_types(
+            values, set(col_na_values) | col_na_fvalues,
+            try_numeric=False)
+        return cvals, na_count
+
     def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
-                             converters=None):
+                             converters=None, dtypes=None):
         result = {}
         for c, values in compat.iteritems(dct):
             conv_f = None if converters is None else converters.get(c, None)
+            if isinstance(dtypes, dict):
+                cast_type = dtypes.get(c, None)
+            else:
+                # single dtype or None
+                cast_type = dtypes
 
             if self.na_filter:
                 col_na_values, col_na_fvalues = _get_na_values(
@@ -1303,29 +1323,40 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
             else:
                 col_na_values, col_na_fvalues = set(), set()
 
-            coerce_type = True
-            if conv_f is not None:
-                try:
-                    values = lib.map_infer(values, conv_f)
-                except ValueError:
-                    mask = lib.ismember(values, na_values).view(np.uint8)
-                    values = lib.map_infer_mask(values, conv_f, mask)
-                coerce_type = False
-
-            cvals, na_count = self._convert_types(
-                values, set(col_na_values) | col_na_fvalues, coerce_type)
+            if conv_f is not None and cast_type is None:
+                # if type is not specified, apply the conversion first, without
+                # inference
+                cvals, na_count = self._apply_converter(
+                    values, conv_f, na_values,
+                    col_na_values, col_na_fvalues)
+            else:
+                # general type inference and conversion
+                cvals, na_count = self._infer_types(
+                    values, set(col_na_values) | col_na_fvalues,
+                    try_numeric=True)
 
             if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
                 cvals = lib.downcast_int64(
                     cvals, _parser.na_values,
                     self.use_unsigned)
 
+            if cast_type and not is_dtype_equal(cvals, cast_type):
+                # type specificed in dtype param
+
+                cvals = self._cast_types(cvals, cast_type, c)
+                # for consistency with c-parser, if a converter and dtype are
+                # specified, apply the converter last
+                if conv_f is not None:
+                    values, na_count = self._apply_converter(
+                        values, conv_f, na_values,
+                        col_na_values, col_na_fvalues)
+
             result[c] = cvals
             if verbose and na_count:
                 print('Filled %d NA values in column %s' % (na_count, str(c)))
         return result
 
-    def _convert_types(self, values, na_values, try_num_bool=True):
+    def _infer_types(self, values, na_values, try_numeric=True):
         na_count = 0
         if issubclass(values.dtype.type, (np.number, np.bool_)):
             mask = lib.ismember(values, na_values)
@@ -1336,9 +1367,10 @@ def _convert_types(self, values, na_values, try_num_bool=True):
                 np.putmask(values, mask, np.nan)
             return values, na_count
 
-        if try_num_bool:
+        if try_numeric:
             try:
                 result = lib.maybe_convert_numeric(values, na_values, False)
+                na_count = isnull(result).sum()
             except Exception:
                 result = values
                 if values.dtype == np.object_:
@@ -1348,13 +1380,30 @@ def _convert_types(self, values, na_values, try_num_bool=True):
             if values.dtype == np.object_:
                 na_count = lib.sanitize_objects(values, na_values, False)
 
-        if result.dtype == np.object_ and try_num_bool:
+        if result.dtype == np.object_ and try_numeric:
             result = lib.maybe_convert_bool(values,
                                             true_values=self.true_values,
                                             false_values=self.false_values)
 
         return result, na_count
 
+    def _cast_types(self, values, cast_type, column):
+        """ cast column to type specified in dtypes= param """
+        if is_categorical_dtype(cast_type):
+            # XXX this is for consistency with
+            # c-parser which parses all categories
+            # as strings
+            if not is_object_dtype(values):
+                values = _astype_nansafe(values, str)
+            values = Categorical(values)
+        else:
+            try:
+                values = _astype_nansafe(values, cast_type, copy=True)
+            except ValueError:
+                raise ValueError("Unable to convert column %s to "
+                                 "type %s" % (column, cast_type))
+        return values
+
     def _do_date_conversions(self, names, data):
         # returns data, columns
         if self.parse_dates is not None:
@@ -1783,6 +1832,7 @@ def __init__(self, f, **kwds):
 
         self.verbose = kwds['verbose']
         self.converters = kwds['converters']
+        self.dtype = kwds['dtype']
 
         self.compact_ints = kwds['compact_ints']
         self.use_unsigned = kwds['use_unsigned']
@@ -1981,7 +2031,7 @@ def read(self, rows=None):
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
             index, columns, col_dict = _get_empty_meta(
-                names, self.index_col, self.index_names)
+                names, self.index_col, self.index_names, self.dtype)
             columns = self._maybe_make_multi_index_columns(
                 columns, self.col_names)
             return index, columns, col_dict
@@ -2032,15 +2082,25 @@ def get_chunk(self, size=None):
 
     def _convert_data(self, data):
         # apply converters
-        clean_conv = {}
-
-        for col, f in compat.iteritems(self.converters):
-            if isinstance(col, int) and col not in self.orig_names:
-                col = self.orig_names[col]
-            clean_conv[col] = f
+        def _clean_mapping(mapping):
+            "converts col numbers to names"
+            clean = {}
+            for col, v in compat.iteritems(mapping):
+                if isinstance(col, int) and col not in self.orig_names:
+                    col = self.orig_names[col]
+                clean[col] = v
+            return clean
+
+        clean_conv = _clean_mapping(self.converters)
+        if not isinstance(self.dtype, dict):
+            # handles single dtype applied to all columns
+            clean_dtypes = self.dtype
+        else:
+            clean_dtypes = _clean_mapping(self.dtype)
 
         return self._convert_to_ndarrays(data, self.na_values, self.na_fvalues,
-                                         self.verbose, clean_conv)
+                                         self.verbose, clean_conv,
+                                         clean_dtypes)
 
     def _to_recarray(self, data, columns):
         dtypes = []
diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
index 75b99654dbf89..0f23155464ad2 100644
--- a/pandas/io/tests/parser/c_parser_only.py
+++ b/pandas/io/tests/parser/c_parser_only.py
@@ -100,29 +100,13 @@ def test_dtype_and_names_error(self):
             self.read_csv(StringIO(data), sep=r'\s+', header=None,
                           names=['a', 'b'], dtype={'a': np.int32})
 
-    def test_passing_dtype(self):
-        # see gh-6607
+    def test_unsupported_dtype(self):
         df = DataFrame(np.random.rand(5, 2), columns=list(
             'AB'), index=['1A', '1B', '1C', '1D', '1E'])
 
-        with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
+        with tm.ensure_clean('__unsupported_dtype__.csv') as path:
             df.to_csv(path)
 
-            # see gh-3795: passing 'str' as the dtype
-            result = self.read_csv(path, dtype=str, index_col=0)
-            tm.assert_series_equal(result.dtypes, Series(
-                {'A': 'object', 'B': 'object'}))
-
-            # we expect all object columns, so need to
-            # convert to test for equivalence
-            result = result.astype(float)
-            tm.assert_frame_equal(result, df)
-
-            # invalid dtype
-            self.assertRaises(TypeError, self.read_csv, path,
-                              dtype={'A': 'foo', 'B': 'float64'},
-                              index_col=0)
-
             # valid but we don't support it (date)
             self.assertRaises(TypeError, self.read_csv, path,
                               dtype={'A': 'datetime64', 'B': 'float64'},
@@ -141,11 +125,6 @@ def test_passing_dtype(self):
                               dtype={'A': 'U8'},
                               index_col=0)
 
-        # see gh-12048: empty frame
-        actual = self.read_csv(StringIO('A,B'), dtype=str)
-        expected = DataFrame({'A': [], 'B': []}, index=[], dtype=str)
-        tm.assert_frame_equal(actual, expected)
-
     def test_precise_conversion(self):
         # see gh-8002
         tm._skip_if_32bit()
@@ -178,104 +157,6 @@ def error(val):
         self.assertTrue(sum(precise_errors) <= sum(normal_errors))
         self.assertTrue(max(precise_errors) <= max(normal_errors))
 
-    def test_pass_dtype(self):
-        data = """\
-one,two
-1,2.5
-2,3.5
-3,4.5
-4,5.5"""
-
-        result = self.read_csv(StringIO(data), dtype={'one': 'u1', 1: 'S1'})
-        self.assertEqual(result['one'].dtype, 'u1')
-        self.assertEqual(result['two'].dtype, 'object')
-
-    def test_categorical_dtype(self):
-        # GH 10153
-        data = """a,b,c
-1,a,3.4
-1,a,3.4
-2,b,4.5"""
-        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
-                                 'b': Categorical(['a', 'a', 'b']),
-                                 'c': Categorical(['3.4', '3.4', '4.5'])})
-        actual = self.read_csv(StringIO(data), dtype='category')
-        tm.assert_frame_equal(actual, expected)
-
-        actual = self.read_csv(StringIO(data), dtype=CategoricalDtype())
-        tm.assert_frame_equal(actual, expected)
-
-        actual = self.read_csv(StringIO(data), dtype={'a': 'category',
-                                                      'b': 'category',
-                                                      'c': CategoricalDtype()})
-        tm.assert_frame_equal(actual, expected)
-
-        actual = self.read_csv(StringIO(data), dtype={'b': 'category'})
-        expected = pd.DataFrame({'a': [1, 1, 2],
-                                 'b': Categorical(['a', 'a', 'b']),
-                                 'c': [3.4, 3.4, 4.5]})
-        tm.assert_frame_equal(actual, expected)
-
-        actual = self.read_csv(StringIO(data), dtype={1: 'category'})
-        tm.assert_frame_equal(actual, expected)
-
-        # unsorted
-        data = """a,b,c
-1,b,3.4
-1,b,3.4
-2,a,4.5"""
-        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
-                                 'b': Categorical(['b', 'b', 'a']),
-                                 'c': Categorical(['3.4', '3.4', '4.5'])})
-        actual = self.read_csv(StringIO(data), dtype='category')
-        tm.assert_frame_equal(actual, expected)
-
-        # missing
-        data = """a,b,c
-1,b,3.4
-1,nan,3.4
-2,a,4.5"""
-        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
-                                 'b': Categorical(['b', np.nan, 'a']),
-                                 'c': Categorical(['3.4', '3.4', '4.5'])})
-        actual = self.read_csv(StringIO(data), dtype='category')
-        tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_encoding(self):
-        # GH 10153
-        pth = tm.get_data_path('unicode_series.csv')
-        encoding = 'latin-1'
-        expected = self.read_csv(pth, header=None, encoding=encoding)
-        expected[1] = Categorical(expected[1])
-        actual = self.read_csv(pth, header=None, encoding=encoding,
-                               dtype={1: 'category'})
-        tm.assert_frame_equal(actual, expected)
-
-        pth = tm.get_data_path('utf16_ex.txt')
-        encoding = 'utf-16'
-        expected = self.read_table(pth, encoding=encoding)
-        expected = expected.apply(Categorical)
-        actual = self.read_table(pth, encoding=encoding, dtype='category')
-        tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_chunksize(self):
-        # GH 10153
-        data = """a,b
-1,a
-1,b
-1,b
-2,c"""
-        expecteds = [pd.DataFrame({'a': [1, 1],
-                                   'b': Categorical(['a', 'b'])}),
-                     pd.DataFrame({'a': [1, 2],
-                                   'b': Categorical(['b', 'c'])},
-                                  index=[2, 3])]
-        actuals = self.read_csv(StringIO(data), dtype={'b': 'category'},
-                                chunksize=2)
-
-        for actual, expected in zip(actuals, expecteds):
-            tm.assert_frame_equal(actual, expected)
-
     def test_pass_dtype_as_recarray(self):
         if compat.is_platform_windows() and self.low_memory:
             raise nose.SkipTest(
@@ -295,66 +176,6 @@ def test_pass_dtype_as_recarray(self):
             self.assertEqual(result['one'].dtype, 'u1')
             self.assertEqual(result['two'].dtype, 'S1')
 
-    def test_empty_pass_dtype(self):
-        data = 'one,two'
-        result = self.read_csv(StringIO(data), dtype={'one': 'u1'})
-
-        expected = DataFrame({'one': np.empty(0, dtype='u1'),
-                              'two': np.empty(0, dtype=np.object)})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_index_pass_dtype(self):
-        data = 'one,two'
-        result = self.read_csv(StringIO(data), index_col=['one'],
-                               dtype={'one': 'u1', 1: 'f'})
-
-        expected = DataFrame({'two': np.empty(0, dtype='f')},
-                             index=Index([], dtype='u1', name='one'))
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_multiindex_pass_dtype(self):
-        data = 'one,two,three'
-        result = self.read_csv(StringIO(data), index_col=['one', 'two'],
-                               dtype={'one': 'u1', 1: 'f8'})
-
-        exp_idx = MultiIndex.from_arrays([np.empty(0, dtype='u1'),
-                                          np.empty(0, dtype='O')],
-                                         names=['one', 'two'])
-        expected = DataFrame(
-            {'three': np.empty(0, dtype=np.object)}, index=exp_idx)
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_mangled_column_pass_dtype_by_names(self):
-        data = 'one,one'
-        result = self.read_csv(StringIO(data), dtype={
-            'one': 'u1', 'one.1': 'f'})
-
-        expected = DataFrame(
-            {'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_mangled_column_pass_dtype_by_indexes(self):
-        data = 'one,one'
-        result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
-
-        expected = DataFrame(
-            {'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_dup_column_pass_dtype_by_indexes(self):
-        # see gh-9424
-        expected = pd.concat([Series([], name='one', dtype='u1'),
-                              Series([], name='one.1', dtype='f')], axis=1)
-
-        data = 'one,one'
-        result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-        data = ''
-        result = self.read_csv(StringIO(data), names=['one', 'one'],
-                               dtype={0: 'u1', 1: 'f'})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
     def test_usecols_dtypes(self):
         data = """\
 1,2,3
@@ -400,16 +221,6 @@ def test_custom_lineterminator(self):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_raise_on_passed_int_dtype_with_nas(self):
-        # see gh-2631
-        data = """YEAR, DOY, a
-2001,106380451,10
-2001,,11
-2001,106380451,67"""
-        self.assertRaises(ValueError, self.read_csv, StringIO(data),
-                          sep=",", skipinitialspace=True,
-                          dtype={'DOY': np.int64})
-
     def test_parse_ragged_csv(self):
         data = """1,2,3
 1,2,3,4
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
new file mode 100644
index 0000000000000..4d796f00eec91
--- /dev/null
+++ b/pandas/io/tests/parser/dtypes.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+
+from datetime import datetime
+
+import nose
+
+import numpy as np
+import pandas as pd
+import pandas.util.testing as tm
+
+from pandas.lib import Timestamp
+from pandas import DataFrame, Series, Index, MultiIndex, Categorical
+from pandas.compat import parse_date, StringIO, lmap
+from pandas.types.dtypes import CategoricalDtype
+
+
+class DtypeTests(object):
+    def test_passing_dtype(self):
+        # see gh-6607
+        df = DataFrame(np.random.rand(5, 2), columns=list(
+            'AB'), index=['1A', '1B', '1C', '1D', '1E'])
+
+        with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
+            df.to_csv(path)
+
+            # see gh-3795: passing 'str' as the dtype
+            result = self.read_csv(path, dtype=str, index_col=0)
+            tm.assert_series_equal(result.dtypes, Series(
+                {'A': 'object', 'B': 'object'}))
+
+            # we expect all object columns, so need to
+            # convert to test for equivalence
+            result = result.astype(float)
+            tm.assert_frame_equal(result, df)
+
+            # invalid dtype
+            self.assertRaises(TypeError, self.read_csv, path,
+                              dtype={'A': 'foo', 'B': 'float64'},
+                              index_col=0)
+
+        # see gh-12048: empty frame
+        actual = self.read_csv(StringIO('A,B'), dtype=str)
+        expected = DataFrame({'A': [], 'B': []}, index=[], dtype=str)
+        tm.assert_frame_equal(actual, expected)
+
+    def test_pass_dtype(self):
+        data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+
+        result = self.read_csv(StringIO(data), dtype={'one': 'u1', 1: 'S1'})
+        self.assertEqual(result['one'].dtype, 'u1')
+        self.assertEqual(result['two'].dtype, 'object')
+
+    def test_categorical_dtype(self):
+        # GH 10153
+        data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['a', 'a', 'b']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype=CategoricalDtype())
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={'a': 'category',
+                                                      'b': 'category',
+                                                      'c': CategoricalDtype()})
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={'b': 'category'})
+        expected = pd.DataFrame({'a': [1, 1, 2],
+                                 'b': Categorical(['a', 'a', 'b']),
+                                 'c': [3.4, 3.4, 4.5]})
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={1: 'category'})
+        tm.assert_frame_equal(actual, expected)
+
+        # unsorted
+        data = """a,b,c
+1,b,3.4
+1,b,3.4
+2,a,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['b', 'b', 'a']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+        # missing
+        data = """a,b,c
+1,b,3.4
+1,nan,3.4
+2,a,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['b', np.nan, 'a']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+    def test_categorical_dtype_encoding(self):
+        # GH 10153
+        pth = tm.get_data_path('unicode_series.csv')
+        encoding = 'latin-1'
+        expected = self.read_csv(pth, header=None, encoding=encoding)
+        expected[1] = Categorical(expected[1])
+        actual = self.read_csv(pth, header=None, encoding=encoding,
+                               dtype={1: 'category'})
+        tm.assert_frame_equal(actual, expected)
+
+        pth = tm.get_data_path('utf16_ex.txt')
+        encoding = 'utf-16'
+        expected = self.read_table(pth, encoding=encoding)
+        expected = expected.apply(Categorical)
+        actual = self.read_table(pth, encoding=encoding, dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+    def test_categorical_dtype_chunksize(self):
+        # GH 10153
+        data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+        expecteds = [pd.DataFrame({'a': [1, 1],
+                                   'b': Categorical(['a', 'b'])}),
+                     pd.DataFrame({'a': [1, 2],
+                                   'b': Categorical(['b', 'c'])},
+                                  index=[2, 3])]
+        actuals = self.read_csv(StringIO(data), dtype={'b': 'category'},
+                                chunksize=2)
+
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
+    def test_empty_pass_dtype(self):
+        data = 'one,two'
+        result = self.read_csv(StringIO(data), dtype={'one': 'u1'})
+
+        expected = DataFrame({'one': np.empty(0, dtype='u1'),
+                              'two': np.empty(0, dtype=np.object)})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_index_pass_dtype(self):
+        data = 'one,two'
+        result = self.read_csv(StringIO(data), index_col=['one'],
+                               dtype={'one': 'u1', 1: 'f'})
+
+        expected = DataFrame({'two': np.empty(0, dtype='f')},
+                             index=Index([], dtype='u1', name='one'))
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_multiindex_pass_dtype(self):
+        data = 'one,two,three'
+        result = self.read_csv(StringIO(data), index_col=['one', 'two'],
+                               dtype={'one': 'u1', 1: 'f8'})
+
+        exp_idx = MultiIndex.from_arrays([np.empty(0, dtype='u1'),
+                                          np.empty(0, dtype='O')],
+                                         names=['one', 'two'])
+        expected = DataFrame(
+            {'three': np.empty(0, dtype=np.object)}, index=exp_idx)
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_mangled_column_pass_dtype_by_names(self):
+        data = 'one,one'
+        result = self.read_csv(StringIO(data), dtype={
+            'one': 'u1', 'one.1': 'f'})
+
+        expected = DataFrame(
+            {'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_mangled_column_pass_dtype_by_indexes(self):
+        data = 'one,one'
+        result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
+
+        expected = DataFrame(
+            {'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_dup_column_pass_dtype_by_indexes(self):
+        # see gh-9424
+        expected = pd.concat([Series([], name='one', dtype='u1'),
+                              Series([], name='one.1', dtype='f')], axis=1)
+
+        data = 'one,one'
+        result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+        data = ''
+        result = self.read_csv(StringIO(data), names=['one', 'one'],
+                               dtype={0: 'u1', 1: 'f'})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_raise_on_passed_int_dtype_with_nas(self):
+        # see gh-2631
+        data = """YEAR, DOY, a
+2001,106380451,10
+2001,,11
+2001,106380451,67"""
+        self.assertRaises(ValueError, self.read_csv, StringIO(data),
+                          sep=",", skipinitialspace=True,
+                          dtype={'DOY': np.int64})
diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py
index 6001c85ae76b1..6cca2e35e1135 100644
--- a/pandas/io/tests/parser/test_parsers.py
+++ b/pandas/io/tests/parser/test_parsers.py
@@ -22,6 +22,7 @@
 from .compression import CompressionTests
 from .multithread import MultithreadTests
 from .python_parser_only import PythonParserTests
+from .dtypes import DtypeTests
 
 
 class BaseParser(CommentTests, CompressionTests,
@@ -29,7 +30,8 @@ class BaseParser(CommentTests, CompressionTests,
                  IndexColTests, MultithreadTests,
                  NAvaluesTests, ParseDatesTests,
                  ParserTests, SkipRowsTests,
-                 UsecolsTests, QuotingTests):
+                 UsecolsTests, QuotingTests,
+                 DtypeTests):
     def read_csv(self, *args, **kwargs):
         raise NotImplementedError
 

From 7be7b423c1e673304e4e35a3bd4889cbc9ffc3af Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sat, 24 Sep 2016 14:09:32 -0500
Subject: [PATCH 02/23] remove unsupported test

---
 pandas/io/tests/parser/test_unsupported.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py
index 5d60c20854a83..ffd1cfa9a2538 100644
--- a/pandas/io/tests/parser/test_unsupported.py
+++ b/pandas/io/tests/parser/test_unsupported.py
@@ -44,16 +44,6 @@ def test_c_engine(self):
         data = 'a b c\n1 2 3'
         msg = 'does not support'
 
-        # specify C-unsupported options with python-unsupported option
-        # (options will be ignored on fallback, raise)
-        with tm.assertRaisesRegexp(ValueError, msg):
-            read_table(StringIO(data), sep=None,
-                       delim_whitespace=False, dtype={'a': float})
-        with tm.assertRaisesRegexp(ValueError, msg):
-            read_table(StringIO(data), sep=r'\s', dtype={'a': float})
-        with tm.assertRaisesRegexp(ValueError, msg):
-            read_table(StringIO(data), skipfooter=1, dtype={'a': float})
-
         # specify C engine with unsupported options (raise)
         with tm.assertRaisesRegexp(ValueError, msg):
             read_table(StringIO(data), engine='c',

From 65a94ae85772ae5e32011739790a1551924bb4b1 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 25 Sep 2016 09:10:13 -0500
Subject: [PATCH 03/23] add test/fix for dtype=object

---
 pandas/io/parsers.py             | 15 ++++++++++-----
 pandas/io/tests/parser/dtypes.py | 15 +++++++--------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 37f6a02906a63..ee20965ec50fb 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1303,7 +1303,7 @@ def _apply_converter(self, values, conv_f, na_values, col_na_values,
 
         cvals, na_count = self._infer_types(
             values, set(col_na_values) | col_na_fvalues,
-            try_numeric=False)
+            try_num_bool=False)
         return cvals, na_count
 
     def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
@@ -1330,10 +1330,15 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                     values, conv_f, na_values,
                     col_na_values, col_na_fvalues)
             else:
+                try_num_bool = True
+                if cast_type and is_object_dtype(cast_type):
+                    # skip inference if specified dtype is object
+                    try_num_bool = False
+
                 # general type inference and conversion
                 cvals, na_count = self._infer_types(
                     values, set(col_na_values) | col_na_fvalues,
-                    try_numeric=True)
+                    try_num_bool)
 
             if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
                 cvals = lib.downcast_int64(
@@ -1356,7 +1361,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                 print('Filled %d NA values in column %s' % (na_count, str(c)))
         return result
 
-    def _infer_types(self, values, na_values, try_numeric=True):
+    def _infer_types(self, values, na_values, try_num_bool=True):
         na_count = 0
         if issubclass(values.dtype.type, (np.number, np.bool_)):
             mask = lib.ismember(values, na_values)
@@ -1367,7 +1372,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
                 np.putmask(values, mask, np.nan)
             return values, na_count
 
-        if try_numeric:
+        if try_num_bool:
             try:
                 result = lib.maybe_convert_numeric(values, na_values, False)
                 na_count = isnull(result).sum()
@@ -1380,7 +1385,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
             if values.dtype == np.object_:
                 na_count = lib.sanitize_objects(values, na_values, False)
 
-        if result.dtype == np.object_ and try_numeric:
+        if result.dtype == np.object_ and try_num_bool:
             result = lib.maybe_convert_bool(values,
                                             true_values=self.true_values,
                                             false_values=self.false_values)
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index 4d796f00eec91..a0a3b43279475 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -5,17 +5,12 @@
 for all of the parsers defined in parsers.py
 """
 
-from datetime import datetime
-
-import nose
-
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
 
-from pandas.lib import Timestamp
 from pandas import DataFrame, Series, Index, MultiIndex, Categorical
-from pandas.compat import parse_date, StringIO, lmap
+from pandas.compat import StringIO
 from pandas.types.dtypes import CategoricalDtype
 
 
@@ -30,8 +25,12 @@ def test_passing_dtype(self):
 
             # see gh-3795: passing 'str' as the dtype
             result = self.read_csv(path, dtype=str, index_col=0)
-            tm.assert_series_equal(result.dtypes, Series(
-                {'A': 'object', 'B': 'object'}))
+            expected = df.astype(str)
+            tm.assert_frame_equal(result, expected)
+
+            # for parsing, interpret object as str
+            result = self.read_csv(path, dtype=object, index_col=0)
+            tm.assert_frame_equal(result, expected)
 
             # we expect all object columns, so need to
             # convert to test for equivalence

From 68535879314b7d90dbd47a46e28d06cd732da58c Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 25 Sep 2016 10:24:55 -0500
Subject: [PATCH 04/23] float precision...

---
 pandas/io/tests/parser/c_parser_only.py | 3 +--
 pandas/io/tests/parser/dtypes.py        | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
index 0f23155464ad2..c781b0549ee60 100644
--- a/pandas/io/tests/parser/c_parser_only.py
+++ b/pandas/io/tests/parser/c_parser_only.py
@@ -12,10 +12,9 @@
 
 import pandas as pd
 import pandas.util.testing as tm
-from pandas import DataFrame, Series, Index, MultiIndex, Categorical
+from pandas import DataFrame
 from pandas import compat
 from pandas.compat import StringIO, range, lrange
-from pandas.types.dtypes import CategoricalDtype
 
 
 class CParserTests(object):
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index a0a3b43279475..cf37dd97b9fc9 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -21,11 +21,11 @@ def test_passing_dtype(self):
             'AB'), index=['1A', '1B', '1C', '1D', '1E'])
 
         with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
-            df.to_csv(path)
+            df.to_csv(path, float_format='%.12f')
 
             # see gh-3795: passing 'str' as the dtype
             result = self.read_csv(path, dtype=str, index_col=0)
-            expected = df.astype(str)
+            expected = df.applymap(lambda x: '%.12f' % (x,))
             tm.assert_frame_equal(result, expected)
 
             # for parsing, interpret object as str

From 3024177264f2066c6cf8d9a5e5ade1695d53f7dc Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Mon, 26 Sep 2016 19:29:15 -0500
Subject: [PATCH 05/23] float precision fix

---
 pandas/io/tests/parser/dtypes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index cf37dd97b9fc9..cba293965e56b 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -17,15 +17,15 @@
 class DtypeTests(object):
     def test_passing_dtype(self):
         # see gh-6607
-        df = DataFrame(np.random.rand(5, 2), columns=list(
+        df = DataFrame(np.random.rand(5, 2).round(4), columns=list(
             'AB'), index=['1A', '1B', '1C', '1D', '1E'])
 
         with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
-            df.to_csv(path, float_format='%.12f')
+            df.to_csv(path)
 
             # see gh-3795: passing 'str' as the dtype
             result = self.read_csv(path, dtype=str, index_col=0)
-            expected = df.applymap(lambda x: '%.12f' % (x,))
+            expected = df.astype(str)
             tm.assert_frame_equal(result, expected)
 
             # for parsing, interpret object as str

From f9ff10edb9e21dd88b52802b77f92ef9eccfd4a3 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 30 Oct 2016 16:33:30 -0500
Subject: [PATCH 06/23] add docs; test for conv cast

---
 doc/source/io.rst                |  9 ++--
 doc/source/whatsnew/v0.20.0.txt  |  9 ++++
 pandas/io/parsers.py             | 93 +++++++++++++++++++-------------
 pandas/io/tests/parser/dtypes.py | 10 ++++
 4 files changed, 79 insertions(+), 42 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index ee319092c6dd5..03210ce3231b9 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -157,6 +157,9 @@ dtype : Type name or dict of column -> type, default ``None``
   Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}``
   (unsupported with ``engine='python'``). Use `str` or `object` to preserve and
   not interpret dtype.
+
+  .. versionadded:: 0.20.0 support for the Python parser.
+
 engine : {``'c'``, ``'python'``}
   Parser engine to use. The C engine is faster while the python engine is
   currently more feature-complete.
@@ -473,10 +476,8 @@ However, if you wanted for all the data to be coerced, no matter the type, then
 using the ``converters`` argument of :func:`~pandas.read_csv` would certainly be
 worth trying.
 
-.. note::
-    The ``dtype`` option is currently only supported by the C engine.
-    Specifying ``dtype`` with ``engine`` other than 'c' raises a
-    ``ValueError``.
+  .. versionadded:: 0.20.0 support for the Python parser.
+     The ``dtype`` option is supported by the 'python' engine
 
 .. note::
    In some cases, reading in abnormal data with columns containing mixed dtypes
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 581106924c77e..62000139234c8 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -32,6 +32,15 @@ Other enhancements
 
 - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
 
+- The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns
+ is now supported with the ``'python'`` engine.  See the :ref:`io docs <io.dtypes>` for more information.
+
+.. ipython:: python
+
+   from io import StringIO
+   data = "a,b\n1,2\n3,4"
+   pd.read_csv(StringIO(data), engine='python').dtypes
+   pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
 
 .. _whatsnew_0200.api_breaking:
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index ee20965ec50fb..b3142b3ef740e 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -115,8 +115,11 @@
 dtype : Type name or dict of column -> type, default None
     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
     Use `str` or `object` to preserve and not interpret dtype.
-    If converters are specified, they will be applied AFTER
-    dtype conversion.
+    If converters are specified, they will be applied INSTEAD
+    of dtype conversion.
+
+  .. versionadded:: 0.20.0 support for the Python parser.
+
 %s
 converters : dict, default None
     Dict of functions for converting values in certain columns. Keys can either
@@ -1292,20 +1295,6 @@ def _agg_index(self, index, try_parse_dates=True):
 
         return index
 
-    def _apply_converter(self, values, conv_f, na_values, col_na_values,
-                         col_na_fvalues):
-        """ apply converter function to values, respecting NAs """
-        try:
-            values = lib.map_infer(values, conv_f)
-        except ValueError:
-            mask = lib.ismember(values, na_values).view(np.uint8)
-            values = lib.map_infer_mask(values, conv_f, mask)
-
-        cvals, na_count = self._infer_types(
-            values, set(col_na_values) | col_na_fvalues,
-            try_num_bool=False)
-        return cvals, na_count
-
     def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                              converters=None, dtypes=None):
         result = {}
@@ -1323,45 +1312,58 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
             else:
                 col_na_values, col_na_fvalues = set(), set()
 
-            if conv_f is not None and cast_type is None:
-                # if type is not specified, apply the conversion first, without
-                # inference
-                cvals, na_count = self._apply_converter(
-                    values, conv_f, na_values,
-                    col_na_values, col_na_fvalues)
+            if conv_f is not None:
+                # conv_f applied to data before inference
+                # dtype isn't used if a converted specified
+                try:
+                    values = lib.map_infer(values, conv_f)
+                except ValueError:
+                    mask = lib.ismember(values, na_values).view(np.uint8)
+                    values = lib.map_infer_mask(values, conv_f, mask)
+
+                cvals, na_count = self._infer_types(
+                    values, set(col_na_values) | col_na_fvalues,
+                    try_num_bool=False)
             else:
-                try_num_bool = True
-                if cast_type and is_object_dtype(cast_type):
-                    # skip inference if specified dtype is object
-                    try_num_bool = False
+                # skip inference if specified dtype is object
+                try_num_bool = not (cast_type and is_object_dtype(cast_type))
 
                 # general type inference and conversion
                 cvals, na_count = self._infer_types(
                     values, set(col_na_values) | col_na_fvalues,
                     try_num_bool)
 
+                # type specificed in dtype param
+                if cast_type and not is_dtype_equal(cvals, cast_type):
+                    cvals = self._cast_types(cvals, cast_type, c)
+
             if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
                 cvals = lib.downcast_int64(
                     cvals, _parser.na_values,
                     self.use_unsigned)
 
-            if cast_type and not is_dtype_equal(cvals, cast_type):
-                # type specificed in dtype param
-
-                cvals = self._cast_types(cvals, cast_type, c)
-                # for consistency with c-parser, if a converter and dtype are
-                # specified, apply the converter last
-                if conv_f is not None:
-                    values, na_count = self._apply_converter(
-                        values, conv_f, na_values,
-                        col_na_values, col_na_fvalues)
-
             result[c] = cvals
             if verbose and na_count:
                 print('Filled %d NA values in column %s' % (na_count, str(c)))
         return result
 
     def _infer_types(self, values, na_values, try_num_bool=True):
+        """
+        Infer types of values, possibly casting
+
+        Parameters
+        ----------
+        values : ndarray
+        na_values : set
+        try_num_bool : bool, default try
+           try to cast values to numeric (first preference) or boolean
+
+        Returns:
+        --------
+        converted : ndarray
+        na_count : int
+        """
+
         na_count = 0
         if issubclass(values.dtype.type, (np.number, np.bool_)):
             mask = lib.ismember(values, na_values)
@@ -1393,7 +1395,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
         return result, na_count
 
     def _cast_types(self, values, cast_type, column):
-        """ cast column to type specified in dtypes= param """
+        """
+        Cast values to specified type
+
+        Parameters
+        ----------
+        values : ndarray
+        cast_type : string or np.dtype
+           dtype to cast values to
+        column : string
+            column name - used only for error reporting
+
+        Returns
+        -------
+        converted : ndarray
+        """
+
         if is_categorical_dtype(cast_type):
             # XXX this is for consistency with
             # c-parser which parses all categories
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index cba293965e56b..510efac80ee78 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -214,3 +214,13 @@ def test_raise_on_passed_int_dtype_with_nas(self):
         self.assertRaises(ValueError, self.read_csv, StringIO(data),
                           sep=",", skipinitialspace=True,
                           dtype={'DOY': np.int64})
+
+    def test_dtype_with_converter(self):
+        data = """a,b
+1.1,2.2
+1.2,2.3"""
+        result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
+                               converters={'a': lambda x: str(x)})
+        # dtype spec ignored if converted specified
+        expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
+        tm.assert_frame_equal(result, expected)

From f5b23a67b5490e181326eb533cb728a9a5832d71 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sat, 5 Nov 2016 12:22:48 -0500
Subject: [PATCH 07/23] Add warning if both converter and dtype specified

---
 pandas/io/parsers.py             |  7 +++-
 pandas/io/tests/parser/dtypes.py |  6 ++--
 pandas/parser.pyx                | 58 +++++++++++++++++---------------
 3 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index b3142b3ef740e..8187b129d2702 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1314,7 +1314,12 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
 
             if conv_f is not None:
                 # conv_f applied to data before inference
-                # dtype isn't used if a converted specified
+                if cast_type is not None:
+                    warnings.warn(("Both a converter and dtype were specified "
+                                   "for column {0} - only the converter will "
+                                   "be used").format(c), ParserWarning,
+                                  stacklevel=7)
+
                 try:
                     values = lib.map_infer(values, conv_f)
                 except ValueError:
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index 510efac80ee78..a2163aaf31ea8 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -12,6 +12,7 @@
 from pandas import DataFrame, Series, Index, MultiIndex, Categorical
 from pandas.compat import StringIO
 from pandas.types.dtypes import CategoricalDtype
+from pandas.io.common import ParserWarning
 
 
 class DtypeTests(object):
@@ -219,8 +220,9 @@ def test_dtype_with_converter(self):
         data = """a,b
 1.1,2.2
 1.2,2.3"""
-        result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
-                               converters={'a': lambda x: str(x)})
         # dtype spec ignored if converted specified
+        with tm.assert_produces_warning(ParserWarning):
+            result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
+                                converters={'a': lambda x: str(x)})
         expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 6b43dfbabc4a0..ca9b34c06f025 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -13,7 +13,7 @@ from cpython cimport (PyObject, PyBytes_FromString,
                       PyUnicode_Check, PyUnicode_AsUTF8String,
                       PyErr_Occurred, PyErr_Fetch)
 from cpython.ref cimport PyObject, Py_XDECREF
-from io.common import ParserError, DtypeWarning, EmptyDataError
+from io.common import ParserError, DtypeWarning, EmptyDataError, ParserWarning
 
 # Import CParserError as alias of ParserError for backwards compatibility.
 # Ultimately, we want to remove this import. See gh-12665 and gh-14479.
@@ -987,7 +987,7 @@ cdef class TextReader:
             Py_ssize_t i, nused
             kh_str_t *na_hashset = NULL
             int start, end
-            object name, na_flist
+            object name, na_flist, col_dtype = None
             bint na_filter = 0
             Py_ssize_t num_cols
 
@@ -1043,14 +1043,33 @@ cdef class TextReader:
             else:
                 na_filter = 0
 
+            col_dtype = None
+            if self.dtype is not None:
+                if isinstance(self.dtype, dict):
+                    if name in self.dtype:
+                        col_dtype = self.dtype[name]
+                    elif i in self.dtype:
+                        col_dtype = self.dtype[i]
+                else:
+                    if self.dtype.names:
+                        # structured array
+                        col_dtype = np.dtype(self.dtype.descr[i][1])
+                    else:
+                        col_dtype = self.dtype
+
             if conv:
+                if col_dtype is not None:
+                    warnings.warn(("Both a converter and dtype were specified "
+                                   "for column {0} - only the converter will "
+                                   "be used").format(name), ParserWarning,
+                                  stacklevel=5)
                 results[i] = _apply_converter(conv, self.parser, i, start, end,
                                               self.c_encoding)
                 continue
 
             # Should return as the desired dtype (inferred or specified)
             col_res, na_count = self._convert_tokens(
-                i, start, end, name, na_filter, na_hashset, na_flist)
+                i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)
 
             if na_filter:
                 self._free_na_set(na_hashset)
@@ -1075,32 +1094,17 @@ cdef class TextReader:
     cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end,
                                 object name, bint na_filter,
                                 kh_str_t *na_hashset,
-                                object na_flist):
-        cdef:
-            object col_dtype = None
-
-        if self.dtype is not None:
-            if isinstance(self.dtype, dict):
-                if name in self.dtype:
-                    col_dtype = self.dtype[name]
-                elif i in self.dtype:
-                    col_dtype = self.dtype[i]
-            else:
-                if self.dtype.names:
-                    # structured array
-                    col_dtype = np.dtype(self.dtype.descr[i][1])
-                else:
-                    col_dtype = self.dtype
+                                object na_flist, object col_dtype):
 
-            if col_dtype is not None:
-                col_res, na_count = self._convert_with_dtype(
-                    col_dtype, i, start, end, na_filter,
-                    1, na_hashset, na_flist)
+        if col_dtype is not None:
+            col_res, na_count = self._convert_with_dtype(
+                col_dtype, i, start, end, na_filter,
+                1, na_hashset, na_flist)
 
-                # Fallback on the parse (e.g. we requested int dtype,
-                # but its actually a float).
-                if col_res is not None:
-                    return col_res, na_count
+            # Fallback on the parse (e.g. we requested int dtype,
+            # but its actually a float).
+            if col_res is not None:
+                return col_res, na_count
 
         if i in self.noconvert:
             return self._string_convert(i, start, end, na_filter, na_hashset)

From e0e5ae817a72e1fc8179dbeefd05f1881588a53b Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 13 Nov 2016 09:18:15 -0600
Subject: [PATCH 08/23] doc comments

---
 doc/source/whatsnew/v0.20.0.txt | 1 -
 pandas/io/parsers.py            | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 62000139234c8..bef90b5392418 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -37,7 +37,6 @@ Other enhancements
 
 .. ipython:: python
 
-   from io import StringIO
    data = "a,b\n1,2\n3,4"
    pd.read_csv(StringIO(data), engine='python').dtypes
    pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 8187b129d2702..31b3ea1ebf3c0 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -117,9 +117,6 @@
     Use `str` or `object` to preserve and not interpret dtype.
     If converters are specified, they will be applied INSTEAD
     of dtype conversion.
-
-  .. versionadded:: 0.20.0 support for the Python parser.
-
 %s
 converters : dict, default None
     Dict of functions for converting values in certain columns. Keys can either

From a5821d3dfba3232395ee4b48267236691f202d6c Mon Sep 17 00:00:00 2001
From: Christopher Bartak <cbartak@gmail.com>
Date: Wed, 23 Nov 2016 08:39:10 -0600
Subject: [PATCH 09/23] doc updates

---
 doc/source/io.rst               |  1 +
 doc/source/whatsnew/v0.20.0.txt | 17 +++++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 03210ce3231b9..b1c151def26af 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -477,6 +477,7 @@ using the ``converters`` argument of :func:`~pandas.read_csv` would certainly be
 worth trying.
 
   .. versionadded:: 0.20.0 support for the Python parser.
+
      The ``dtype`` option is supported by the 'python' engine
 
 .. note::
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index bef90b5392418..30c80ca0a7523 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -22,8 +22,17 @@ New features
 ~~~~~~~~~~~~
 
 
+``read_csv`` supports ``dtype`` keyword for python engine
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns
+ is now supported with the ``'python'`` engine.  See the :ref:`io docs <io.dtypes>` for more information.
 
+.. ipython:: python
+
+   data = "a,b\n1,2\n3,4"
+   pd.read_csv(StringIO(data), engine='python').dtypes
+   pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
 
 .. _whatsnew_0200.enhancements.other:
 
@@ -32,14 +41,6 @@ Other enhancements
 
 - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
 
-- The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns
- is now supported with the ``'python'`` engine.  See the :ref:`io docs <io.dtypes>` for more information.
-
-.. ipython:: python
-
-   data = "a,b\n1,2\n3,4"
-   pd.read_csv(StringIO(data), engine='python').dtypes
-   pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
 
 .. _whatsnew_0200.api_breaking:
 

From 7c703fe632c0a9a98ef648509d3c84ff0bc1a292 Mon Sep 17 00:00:00 2001
From: Christopher Bartak <cbartak@gmail.com>
Date: Wed, 23 Nov 2016 09:34:53 -0600
Subject: [PATCH 10/23] lint

---
 pandas/io/tests/parser/dtypes.py | 2 +-
 pandas/parser.pyx                | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index a2163aaf31ea8..058bfea7ae330 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -223,6 +223,6 @@ def test_dtype_with_converter(self):
         # dtype spec ignored if converted specified
         with tm.assert_produces_warning(ParserWarning):
             result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
-                                converters={'a': lambda x: str(x)})
+                                   converters={'a': lambda x: str(x)})
         expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index ca9b34c06f025..6760e822960f1 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -1069,7 +1069,8 @@ cdef class TextReader:
 
             # Should return as the desired dtype (inferred or specified)
             col_res, na_count = self._convert_tokens(
-                i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)
+                i, start, end, name, na_filter, na_hashset,
+                na_flist, col_dtype)
 
             if na_filter:
                 self._free_na_set(na_hashset)

From d790bdf1779a2f248aa290234d23abb1fec03043 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sat, 24 Sep 2016 13:03:41 -0500
Subject: [PATCH 11/23] API: add dtype= option to python parser

---
 pandas/io/parsers.py                    | 126 ++++++++++----
 pandas/io/tests/parser/c_parser_only.py | 193 +--------------------
 pandas/io/tests/parser/dtypes.py        | 217 ++++++++++++++++++++++++
 pandas/io/tests/parser/test_parsers.py  |   4 +-
 4 files changed, 315 insertions(+), 225 deletions(-)
 create mode 100644 pandas/io/tests/parser/dtypes.py

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 929b360854d5b..d4e1a70240bb2 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -17,11 +17,15 @@
                            zip, string_types, map, u)
 from pandas.types.common import (is_integer, _ensure_object,
                                  is_list_like, is_integer_dtype,
-                                 is_float,
-                                 is_scalar)
+                                 is_float, is_dtype_equal,
+                                 is_object_dtype,
+                                 is_scalar, is_categorical_dtype)
+from pandas.types.missing import isnull
+from pandas.types.cast import _astype_nansafe
 from pandas.core.index import Index, MultiIndex, RangeIndex
 from pandas.core.series import Series
 from pandas.core.frame import DataFrame
+from pandas.core.categorical import Categorical
 from pandas.core.common import AbstractMethodError
 from pandas.core.config import get_option
 from pandas.io.date_converters import generic_parser
@@ -111,8 +115,9 @@
     are duplicate names in the columns.
 dtype : Type name or dict of column -> type, default None
     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
-    (Unsupported with engine='python'). Use `str` or `object` to preserve and
-    not interpret dtype.
+    Use `str` or `object` to preserve and not interpret dtype.
+    If converters are specified, they will be applied AFTER
+    dtype conversion.
 %s
 converters : dict, default None
     Dict of functions for converting values in certain columns. Keys can either
@@ -421,6 +426,7 @@ def _read(filepath_or_buffer, kwds):
     'true_values': None,
     'false_values': None,
     'converters': None,
+    'dtype': None,
     'skipfooter': 0,
 
     'keep_default_na': True,
@@ -461,7 +467,6 @@ def _read(filepath_or_buffer, kwds):
     'buffer_lines': None,
     'error_bad_lines': True,
     'warn_bad_lines': True,
-    'dtype': None,
     'float_precision': None
 }
 
@@ -476,7 +481,6 @@ def _read(filepath_or_buffer, kwds):
     'buffer_lines',
     'error_bad_lines',
     'warn_bad_lines',
-    'dtype',
     'float_precision',
 ])
 _deprecated_args = set([
@@ -834,9 +838,6 @@ def _clean_options(self, options, engine):
                            " ignored as it is not supported by the 'python'"
                            " engine.").format(reason=fallback_reason,
                                               option=arg)
-                    if arg == 'dtype':
-                        msg += " (Note the 'converters' option provides"\
-                               " similar functionality.)"
                     raise ValueError(msg)
                 del result[arg]
 
@@ -1285,18 +1286,37 @@ def _agg_index(self, index, try_parse_dates=True):
                     col_na_values, col_na_fvalues = _get_na_values(
                         col_name, self.na_values, self.na_fvalues)
 
-            arr, _ = self._convert_types(arr, col_na_values | col_na_fvalues)
+            arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
             arrays.append(arr)
 
         index = MultiIndex.from_arrays(arrays, names=self.index_names)
 
         return index
 
+    def _apply_converter(self, values, conv_f, na_values, col_na_values,
+                         col_na_fvalues):
+        """ apply converter function to values, respecting NAs """
+        try:
+            values = lib.map_infer(values, conv_f)
+        except ValueError:
+            mask = lib.ismember(values, na_values).view(np.uint8)
+            values = lib.map_infer_mask(values, conv_f, mask)
+
+        cvals, na_count = self._infer_types(
+            values, set(col_na_values) | col_na_fvalues,
+            try_numeric=False)
+        return cvals, na_count
+
     def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
-                             converters=None):
+                             converters=None, dtypes=None):
         result = {}
         for c, values in compat.iteritems(dct):
             conv_f = None if converters is None else converters.get(c, None)
+            if isinstance(dtypes, dict):
+                cast_type = dtypes.get(c, None)
+            else:
+                # single dtype or None
+                cast_type = dtypes
 
             if self.na_filter:
                 col_na_values, col_na_fvalues = _get_na_values(
@@ -1304,29 +1324,40 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
             else:
                 col_na_values, col_na_fvalues = set(), set()
 
-            coerce_type = True
-            if conv_f is not None:
-                try:
-                    values = lib.map_infer(values, conv_f)
-                except ValueError:
-                    mask = lib.ismember(values, na_values).view(np.uint8)
-                    values = lib.map_infer_mask(values, conv_f, mask)
-                coerce_type = False
-
-            cvals, na_count = self._convert_types(
-                values, set(col_na_values) | col_na_fvalues, coerce_type)
+            if conv_f is not None and cast_type is None:
+                # if type is not specified, apply the conversion first, without
+                # inference
+                cvals, na_count = self._apply_converter(
+                    values, conv_f, na_values,
+                    col_na_values, col_na_fvalues)
+            else:
+                # general type inference and conversion
+                cvals, na_count = self._infer_types(
+                    values, set(col_na_values) | col_na_fvalues,
+                    try_numeric=True)
 
             if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
                 cvals = lib.downcast_int64(
                     cvals, _parser.na_values,
                     self.use_unsigned)
 
+            if cast_type and not is_dtype_equal(cvals, cast_type):
+                # type specificed in dtype param
+
+                cvals = self._cast_types(cvals, cast_type, c)
+                # for consistency with c-parser, if a converter and dtype are
+                # specified, apply the converter last
+                if conv_f is not None:
+                    values, na_count = self._apply_converter(
+                        values, conv_f, na_values,
+                        col_na_values, col_na_fvalues)
+
             result[c] = cvals
             if verbose and na_count:
                 print('Filled %d NA values in column %s' % (na_count, str(c)))
         return result
 
-    def _convert_types(self, values, na_values, try_num_bool=True):
+    def _infer_types(self, values, na_values, try_numeric=True):
         na_count = 0
         if issubclass(values.dtype.type, (np.number, np.bool_)):
             mask = lib.ismember(values, na_values)
@@ -1337,9 +1368,10 @@ def _convert_types(self, values, na_values, try_num_bool=True):
                 np.putmask(values, mask, np.nan)
             return values, na_count
 
-        if try_num_bool:
+        if try_numeric:
             try:
                 result = lib.maybe_convert_numeric(values, na_values, False)
+                na_count = isnull(result).sum()
             except Exception:
                 result = values
                 if values.dtype == np.object_:
@@ -1349,13 +1381,30 @@ def _convert_types(self, values, na_values, try_num_bool=True):
             if values.dtype == np.object_:
                 na_count = lib.sanitize_objects(values, na_values, False)
 
-        if result.dtype == np.object_ and try_num_bool:
+        if result.dtype == np.object_ and try_numeric:
             result = lib.maybe_convert_bool(values,
                                             true_values=self.true_values,
                                             false_values=self.false_values)
 
         return result, na_count
 
+    def _cast_types(self, values, cast_type, column):
+        """ cast column to type specified in dtypes= param """
+        if is_categorical_dtype(cast_type):
+            # XXX this is for consistency with
+            # c-parser which parses all categories
+            # as strings
+            if not is_object_dtype(values):
+                values = _astype_nansafe(values, str)
+            values = Categorical(values)
+        else:
+            try:
+                values = _astype_nansafe(values, cast_type, copy=True)
+            except ValueError:
+                raise ValueError("Unable to convert column %s to "
+                                 "type %s" % (column, cast_type))
+        return values
+
     def _do_date_conversions(self, names, data):
         # returns data, columns
         if self.parse_dates is not None:
@@ -1784,6 +1833,7 @@ def __init__(self, f, **kwds):
 
         self.verbose = kwds['verbose']
         self.converters = kwds['converters']
+        self.dtype = kwds['dtype']
 
         self.compact_ints = kwds['compact_ints']
         self.use_unsigned = kwds['use_unsigned']
@@ -1982,7 +2032,7 @@ def read(self, rows=None):
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
             index, columns, col_dict = _get_empty_meta(
-                names, self.index_col, self.index_names)
+                names, self.index_col, self.index_names, self.dtype)
             columns = self._maybe_make_multi_index_columns(
                 columns, self.col_names)
             return index, columns, col_dict
@@ -2033,15 +2083,25 @@ def get_chunk(self, size=None):
 
     def _convert_data(self, data):
         # apply converters
-        clean_conv = {}
-
-        for col, f in compat.iteritems(self.converters):
-            if isinstance(col, int) and col not in self.orig_names:
-                col = self.orig_names[col]
-            clean_conv[col] = f
+        def _clean_mapping(mapping):
+            "converts col numbers to names"
+            clean = {}
+            for col, v in compat.iteritems(mapping):
+                if isinstance(col, int) and col not in self.orig_names:
+                    col = self.orig_names[col]
+                clean[col] = v
+            return clean
+
+        clean_conv = _clean_mapping(self.converters)
+        if not isinstance(self.dtype, dict):
+            # handles single dtype applied to all columns
+            clean_dtypes = self.dtype
+        else:
+            clean_dtypes = _clean_mapping(self.dtype)
 
         return self._convert_to_ndarrays(data, self.na_values, self.na_fvalues,
-                                         self.verbose, clean_conv)
+                                         self.verbose, clean_conv,
+                                         clean_dtypes)
 
     def _to_recarray(self, data, columns):
         dtypes = []
diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
index 9cbe88d4032a3..d8926855ddca7 100644
--- a/pandas/io/tests/parser/c_parser_only.py
+++ b/pandas/io/tests/parser/c_parser_only.py
@@ -100,29 +100,13 @@ def test_dtype_and_names_error(self):
             self.read_csv(StringIO(data), sep=r'\s+', header=None,
                           names=['a', 'b'], dtype={'a': np.int32})
 
-    def test_passing_dtype(self):
-        # see gh-6607
+    def test_unsupported_dtype(self):
         df = DataFrame(np.random.rand(5, 2), columns=list(
             'AB'), index=['1A', '1B', '1C', '1D', '1E'])
 
-        with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
+        with tm.ensure_clean('__unsupported_dtype__.csv') as path:
             df.to_csv(path)
 
-            # see gh-3795: passing 'str' as the dtype
-            result = self.read_csv(path, dtype=str, index_col=0)
-            tm.assert_series_equal(result.dtypes, Series(
-                {'A': 'object', 'B': 'object'}))
-
-            # we expect all object columns, so need to
-            # convert to test for equivalence
-            result = result.astype(float)
-            tm.assert_frame_equal(result, df)
-
-            # invalid dtype
-            self.assertRaises(TypeError, self.read_csv, path,
-                              dtype={'A': 'foo', 'B': 'float64'},
-                              index_col=0)
-
             # valid but we don't support it (date)
             self.assertRaises(TypeError, self.read_csv, path,
                               dtype={'A': 'datetime64', 'B': 'float64'},
@@ -141,11 +125,6 @@ def test_passing_dtype(self):
                               dtype={'A': 'U8'},
                               index_col=0)
 
-        # see gh-12048: empty frame
-        actual = self.read_csv(StringIO('A,B'), dtype=str)
-        expected = DataFrame({'A': [], 'B': []}, index=[], dtype=str)
-        tm.assert_frame_equal(actual, expected)
-
     def test_precise_conversion(self):
         # see gh-8002
         tm._skip_if_32bit()
@@ -178,104 +157,6 @@ def error(val):
         self.assertTrue(sum(precise_errors) <= sum(normal_errors))
         self.assertTrue(max(precise_errors) <= max(normal_errors))
 
-    def test_pass_dtype(self):
-        data = """\
-one,two
-1,2.5
-2,3.5
-3,4.5
-4,5.5"""
-
-        result = self.read_csv(StringIO(data), dtype={'one': 'u1', 1: 'S1'})
-        self.assertEqual(result['one'].dtype, 'u1')
-        self.assertEqual(result['two'].dtype, 'object')
-
-    def test_categorical_dtype(self):
-        # GH 10153
-        data = """a,b,c
-1,a,3.4
-1,a,3.4
-2,b,4.5"""
-        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
-                                 'b': Categorical(['a', 'a', 'b']),
-                                 'c': Categorical(['3.4', '3.4', '4.5'])})
-        actual = self.read_csv(StringIO(data), dtype='category')
-        tm.assert_frame_equal(actual, expected)
-
-        actual = self.read_csv(StringIO(data), dtype=CategoricalDtype())
-        tm.assert_frame_equal(actual, expected)
-
-        actual = self.read_csv(StringIO(data), dtype={'a': 'category',
-                                                      'b': 'category',
-                                                      'c': CategoricalDtype()})
-        tm.assert_frame_equal(actual, expected)
-
-        actual = self.read_csv(StringIO(data), dtype={'b': 'category'})
-        expected = pd.DataFrame({'a': [1, 1, 2],
-                                 'b': Categorical(['a', 'a', 'b']),
-                                 'c': [3.4, 3.4, 4.5]})
-        tm.assert_frame_equal(actual, expected)
-
-        actual = self.read_csv(StringIO(data), dtype={1: 'category'})
-        tm.assert_frame_equal(actual, expected)
-
-        # unsorted
-        data = """a,b,c
-1,b,3.4
-1,b,3.4
-2,a,4.5"""
-        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
-                                 'b': Categorical(['b', 'b', 'a']),
-                                 'c': Categorical(['3.4', '3.4', '4.5'])})
-        actual = self.read_csv(StringIO(data), dtype='category')
-        tm.assert_frame_equal(actual, expected)
-
-        # missing
-        data = """a,b,c
-1,b,3.4
-1,nan,3.4
-2,a,4.5"""
-        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
-                                 'b': Categorical(['b', np.nan, 'a']),
-                                 'c': Categorical(['3.4', '3.4', '4.5'])})
-        actual = self.read_csv(StringIO(data), dtype='category')
-        tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_encoding(self):
-        # GH 10153
-        pth = tm.get_data_path('unicode_series.csv')
-        encoding = 'latin-1'
-        expected = self.read_csv(pth, header=None, encoding=encoding)
-        expected[1] = Categorical(expected[1])
-        actual = self.read_csv(pth, header=None, encoding=encoding,
-                               dtype={1: 'category'})
-        tm.assert_frame_equal(actual, expected)
-
-        pth = tm.get_data_path('utf16_ex.txt')
-        encoding = 'utf-16'
-        expected = self.read_table(pth, encoding=encoding)
-        expected = expected.apply(Categorical)
-        actual = self.read_table(pth, encoding=encoding, dtype='category')
-        tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_chunksize(self):
-        # GH 10153
-        data = """a,b
-1,a
-1,b
-1,b
-2,c"""
-        expecteds = [pd.DataFrame({'a': [1, 1],
-                                   'b': Categorical(['a', 'b'])}),
-                     pd.DataFrame({'a': [1, 2],
-                                   'b': Categorical(['b', 'c'])},
-                                  index=[2, 3])]
-        actuals = self.read_csv(StringIO(data), dtype={'b': 'category'},
-                                chunksize=2)
-
-        for actual, expected in zip(actuals, expecteds):
-            tm.assert_frame_equal(actual, expected)
-
     def test_pass_dtype_as_recarray(self):
         if compat.is_platform_windows() and self.low_memory:
             raise nose.SkipTest(
@@ -295,66 +176,6 @@ def test_pass_dtype_as_recarray(self):
             self.assertEqual(result['one'].dtype, 'u1')
             self.assertEqual(result['two'].dtype, 'S1')
 
-    def test_empty_pass_dtype(self):
-        data = 'one,two'
-        result = self.read_csv(StringIO(data), dtype={'one': 'u1'})
-
-        expected = DataFrame({'one': np.empty(0, dtype='u1'),
-                              'two': np.empty(0, dtype=np.object)})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_index_pass_dtype(self):
-        data = 'one,two'
-        result = self.read_csv(StringIO(data), index_col=['one'],
-                               dtype={'one': 'u1', 1: 'f'})
-
-        expected = DataFrame({'two': np.empty(0, dtype='f')},
-                             index=Index([], dtype='u1', name='one'))
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_multiindex_pass_dtype(self):
-        data = 'one,two,three'
-        result = self.read_csv(StringIO(data), index_col=['one', 'two'],
-                               dtype={'one': 'u1', 1: 'f8'})
-
-        exp_idx = MultiIndex.from_arrays([np.empty(0, dtype='u1'),
-                                          np.empty(0, dtype='O')],
-                                         names=['one', 'two'])
-        expected = DataFrame(
-            {'three': np.empty(0, dtype=np.object)}, index=exp_idx)
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_mangled_column_pass_dtype_by_names(self):
-        data = 'one,one'
-        result = self.read_csv(StringIO(data), dtype={
-            'one': 'u1', 'one.1': 'f'})
-
-        expected = DataFrame(
-            {'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_mangled_column_pass_dtype_by_indexes(self):
-        data = 'one,one'
-        result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
-
-        expected = DataFrame(
-            {'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_empty_with_dup_column_pass_dtype_by_indexes(self):
-        # see gh-9424
-        expected = pd.concat([Series([], name='one', dtype='u1'),
-                              Series([], name='one.1', dtype='f')], axis=1)
-
-        data = 'one,one'
-        result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
-        data = ''
-        result = self.read_csv(StringIO(data), names=['one', 'one'],
-                               dtype={0: 'u1', 1: 'f'})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
-
     def test_usecols_dtypes(self):
         data = """\
 1,2,3
@@ -400,16 +221,6 @@ def test_custom_lineterminator(self):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_raise_on_passed_int_dtype_with_nas(self):
-        # see gh-2631
-        data = """YEAR, DOY, a
-2001,106380451,10
-2001,,11
-2001,106380451,67"""
-        self.assertRaises(ValueError, self.read_csv, StringIO(data),
-                          sep=",", skipinitialspace=True,
-                          dtype={'DOY': np.int64})
-
     def test_parse_ragged_csv(self):
         data = """1,2,3
 1,2,3,4
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
new file mode 100644
index 0000000000000..4d796f00eec91
--- /dev/null
+++ b/pandas/io/tests/parser/dtypes.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+
+from datetime import datetime
+
+import nose
+
+import numpy as np
+import pandas as pd
+import pandas.util.testing as tm
+
+from pandas.lib import Timestamp
+from pandas import DataFrame, Series, Index, MultiIndex, Categorical
+from pandas.compat import parse_date, StringIO, lmap
+from pandas.types.dtypes import CategoricalDtype
+
+
+class DtypeTests(object):
+    def test_passing_dtype(self):
+        # see gh-6607
+        df = DataFrame(np.random.rand(5, 2), columns=list(
+            'AB'), index=['1A', '1B', '1C', '1D', '1E'])
+
+        with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
+            df.to_csv(path)
+
+            # see gh-3795: passing 'str' as the dtype
+            result = self.read_csv(path, dtype=str, index_col=0)
+            tm.assert_series_equal(result.dtypes, Series(
+                {'A': 'object', 'B': 'object'}))
+
+            # we expect all object columns, so need to
+            # convert to test for equivalence
+            result = result.astype(float)
+            tm.assert_frame_equal(result, df)
+
+            # invalid dtype
+            self.assertRaises(TypeError, self.read_csv, path,
+                              dtype={'A': 'foo', 'B': 'float64'},
+                              index_col=0)
+
+        # see gh-12048: empty frame
+        actual = self.read_csv(StringIO('A,B'), dtype=str)
+        expected = DataFrame({'A': [], 'B': []}, index=[], dtype=str)
+        tm.assert_frame_equal(actual, expected)
+
+    def test_pass_dtype(self):
+        data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+
+        result = self.read_csv(StringIO(data), dtype={'one': 'u1', 1: 'S1'})
+        self.assertEqual(result['one'].dtype, 'u1')
+        self.assertEqual(result['two'].dtype, 'object')
+
+    def test_categorical_dtype(self):
+        # GH 10153
+        data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['a', 'a', 'b']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype=CategoricalDtype())
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={'a': 'category',
+                                                      'b': 'category',
+                                                      'c': CategoricalDtype()})
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={'b': 'category'})
+        expected = pd.DataFrame({'a': [1, 1, 2],
+                                 'b': Categorical(['a', 'a', 'b']),
+                                 'c': [3.4, 3.4, 4.5]})
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={1: 'category'})
+        tm.assert_frame_equal(actual, expected)
+
+        # unsorted
+        data = """a,b,c
+1,b,3.4
+1,b,3.4
+2,a,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['b', 'b', 'a']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+        # missing
+        data = """a,b,c
+1,b,3.4
+1,nan,3.4
+2,a,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['b', np.nan, 'a']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+    def test_categorical_dtype_encoding(self):
+        # GH 10153
+        pth = tm.get_data_path('unicode_series.csv')
+        encoding = 'latin-1'
+        expected = self.read_csv(pth, header=None, encoding=encoding)
+        expected[1] = Categorical(expected[1])
+        actual = self.read_csv(pth, header=None, encoding=encoding,
+                               dtype={1: 'category'})
+        tm.assert_frame_equal(actual, expected)
+
+        pth = tm.get_data_path('utf16_ex.txt')
+        encoding = 'utf-16'
+        expected = self.read_table(pth, encoding=encoding)
+        expected = expected.apply(Categorical)
+        actual = self.read_table(pth, encoding=encoding, dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+    def test_categorical_dtype_chunksize(self):
+        # GH 10153
+        data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+        expecteds = [pd.DataFrame({'a': [1, 1],
+                                   'b': Categorical(['a', 'b'])}),
+                     pd.DataFrame({'a': [1, 2],
+                                   'b': Categorical(['b', 'c'])},
+                                  index=[2, 3])]
+        actuals = self.read_csv(StringIO(data), dtype={'b': 'category'},
+                                chunksize=2)
+
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
+    def test_empty_pass_dtype(self):
+        data = 'one,two'
+        result = self.read_csv(StringIO(data), dtype={'one': 'u1'})
+
+        expected = DataFrame({'one': np.empty(0, dtype='u1'),
+                              'two': np.empty(0, dtype=np.object)})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_index_pass_dtype(self):
+        data = 'one,two'
+        result = self.read_csv(StringIO(data), index_col=['one'],
+                               dtype={'one': 'u1', 1: 'f'})
+
+        expected = DataFrame({'two': np.empty(0, dtype='f')},
+                             index=Index([], dtype='u1', name='one'))
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_multiindex_pass_dtype(self):
+        data = 'one,two,three'
+        result = self.read_csv(StringIO(data), index_col=['one', 'two'],
+                               dtype={'one': 'u1', 1: 'f8'})
+
+        exp_idx = MultiIndex.from_arrays([np.empty(0, dtype='u1'),
+                                          np.empty(0, dtype='O')],
+                                         names=['one', 'two'])
+        expected = DataFrame(
+            {'three': np.empty(0, dtype=np.object)}, index=exp_idx)
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_mangled_column_pass_dtype_by_names(self):
+        data = 'one,one'
+        result = self.read_csv(StringIO(data), dtype={
+            'one': 'u1', 'one.1': 'f'})
+
+        expected = DataFrame(
+            {'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_mangled_column_pass_dtype_by_indexes(self):
+        data = 'one,one'
+        result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
+
+        expected = DataFrame(
+            {'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_empty_with_dup_column_pass_dtype_by_indexes(self):
+        # see gh-9424
+        expected = pd.concat([Series([], name='one', dtype='u1'),
+                              Series([], name='one.1', dtype='f')], axis=1)
+
+        data = 'one,one'
+        result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+        data = ''
+        result = self.read_csv(StringIO(data), names=['one', 'one'],
+                               dtype={0: 'u1', 1: 'f'})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+
+    def test_raise_on_passed_int_dtype_with_nas(self):
+        # see gh-2631
+        data = """YEAR, DOY, a
+2001,106380451,10
+2001,,11
+2001,106380451,67"""
+        self.assertRaises(ValueError, self.read_csv, StringIO(data),
+                          sep=",", skipinitialspace=True,
+                          dtype={'DOY': np.int64})
diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py
index 6001c85ae76b1..6cca2e35e1135 100644
--- a/pandas/io/tests/parser/test_parsers.py
+++ b/pandas/io/tests/parser/test_parsers.py
@@ -22,6 +22,7 @@
 from .compression import CompressionTests
 from .multithread import MultithreadTests
 from .python_parser_only import PythonParserTests
+from .dtypes import DtypeTests
 
 
 class BaseParser(CommentTests, CompressionTests,
@@ -29,7 +30,8 @@ class BaseParser(CommentTests, CompressionTests,
                  IndexColTests, MultithreadTests,
                  NAvaluesTests, ParseDatesTests,
                  ParserTests, SkipRowsTests,
-                 UsecolsTests, QuotingTests):
+                 UsecolsTests, QuotingTests,
+                 DtypeTests):
     def read_csv(self, *args, **kwargs):
         raise NotImplementedError
 

From 5462774229b0ace0651951b7a84f4e33e9b715ec Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sat, 24 Sep 2016 14:09:32 -0500
Subject: [PATCH 12/23] remove unsupported test

---
 pandas/io/tests/parser/test_unsupported.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py
index 5d60c20854a83..ffd1cfa9a2538 100644
--- a/pandas/io/tests/parser/test_unsupported.py
+++ b/pandas/io/tests/parser/test_unsupported.py
@@ -44,16 +44,6 @@ def test_c_engine(self):
         data = 'a b c\n1 2 3'
         msg = 'does not support'
 
-        # specify C-unsupported options with python-unsupported option
-        # (options will be ignored on fallback, raise)
-        with tm.assertRaisesRegexp(ValueError, msg):
-            read_table(StringIO(data), sep=None,
-                       delim_whitespace=False, dtype={'a': float})
-        with tm.assertRaisesRegexp(ValueError, msg):
-            read_table(StringIO(data), sep=r'\s', dtype={'a': float})
-        with tm.assertRaisesRegexp(ValueError, msg):
-            read_table(StringIO(data), skipfooter=1, dtype={'a': float})
-
         # specify C engine with unsupported options (raise)
         with tm.assertRaisesRegexp(ValueError, msg):
             read_table(StringIO(data), engine='c',

From 64c7214b457157913ea938682d3e50900265045e Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 25 Sep 2016 09:10:13 -0500
Subject: [PATCH 13/23] add test/fix for dtype=object

---
 pandas/io/parsers.py             | 15 ++++++++++-----
 pandas/io/tests/parser/dtypes.py | 15 +++++++--------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index d4e1a70240bb2..b4e203f9d0236 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1304,7 +1304,7 @@ def _apply_converter(self, values, conv_f, na_values, col_na_values,
 
         cvals, na_count = self._infer_types(
             values, set(col_na_values) | col_na_fvalues,
-            try_numeric=False)
+            try_num_bool=False)
         return cvals, na_count
 
     def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
@@ -1331,10 +1331,15 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                     values, conv_f, na_values,
                     col_na_values, col_na_fvalues)
             else:
+                try_num_bool = True
+                if cast_type and is_object_dtype(cast_type):
+                    # skip inference if specified dtype is object
+                    try_num_bool = False
+
                 # general type inference and conversion
                 cvals, na_count = self._infer_types(
                     values, set(col_na_values) | col_na_fvalues,
-                    try_numeric=True)
+                    try_num_bool)
 
             if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
                 cvals = lib.downcast_int64(
@@ -1357,7 +1362,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                 print('Filled %d NA values in column %s' % (na_count, str(c)))
         return result
 
-    def _infer_types(self, values, na_values, try_numeric=True):
+    def _infer_types(self, values, na_values, try_num_bool=True):
         na_count = 0
         if issubclass(values.dtype.type, (np.number, np.bool_)):
             mask = lib.ismember(values, na_values)
@@ -1368,7 +1373,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
                 np.putmask(values, mask, np.nan)
             return values, na_count
 
-        if try_numeric:
+        if try_num_bool:
             try:
                 result = lib.maybe_convert_numeric(values, na_values, False)
                 na_count = isnull(result).sum()
@@ -1381,7 +1386,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
             if values.dtype == np.object_:
                 na_count = lib.sanitize_objects(values, na_values, False)
 
-        if result.dtype == np.object_ and try_numeric:
+        if result.dtype == np.object_ and try_num_bool:
             result = lib.maybe_convert_bool(values,
                                             true_values=self.true_values,
                                             false_values=self.false_values)
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index 4d796f00eec91..a0a3b43279475 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -5,17 +5,12 @@
 for all of the parsers defined in parsers.py
 """
 
-from datetime import datetime
-
-import nose
-
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
 
-from pandas.lib import Timestamp
 from pandas import DataFrame, Series, Index, MultiIndex, Categorical
-from pandas.compat import parse_date, StringIO, lmap
+from pandas.compat import StringIO
 from pandas.types.dtypes import CategoricalDtype
 
 
@@ -30,8 +25,12 @@ def test_passing_dtype(self):
 
             # see gh-3795: passing 'str' as the dtype
             result = self.read_csv(path, dtype=str, index_col=0)
-            tm.assert_series_equal(result.dtypes, Series(
-                {'A': 'object', 'B': 'object'}))
+            expected = df.astype(str)
+            tm.assert_frame_equal(result, expected)
+
+            # for parsing, interpret object as str
+            result = self.read_csv(path, dtype=object, index_col=0)
+            tm.assert_frame_equal(result, expected)
 
             # we expect all object columns, so need to
             # convert to test for equivalence

From 26f42c2c43366da2fb9f6f7af4fb2b43fb2b081f Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 25 Sep 2016 10:24:55 -0500
Subject: [PATCH 14/23] float precision...

---
 pandas/io/tests/parser/c_parser_only.py | 3 +--
 pandas/io/tests/parser/dtypes.py        | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
index d8926855ddca7..2f2a3ab507f8f 100644
--- a/pandas/io/tests/parser/c_parser_only.py
+++ b/pandas/io/tests/parser/c_parser_only.py
@@ -12,10 +12,9 @@
 
 import pandas as pd
 import pandas.util.testing as tm
-from pandas import DataFrame, Series, Index, MultiIndex, Categorical
+from pandas import DataFrame
 from pandas import compat
 from pandas.compat import StringIO, range, lrange
-from pandas.types.dtypes import CategoricalDtype
 
 
 class CParserTests(object):
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index a0a3b43279475..cf37dd97b9fc9 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -21,11 +21,11 @@ def test_passing_dtype(self):
             'AB'), index=['1A', '1B', '1C', '1D', '1E'])
 
         with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
-            df.to_csv(path)
+            df.to_csv(path, float_format='%.12f')
 
             # see gh-3795: passing 'str' as the dtype
             result = self.read_csv(path, dtype=str, index_col=0)
-            expected = df.astype(str)
+            expected = df.applymap(lambda x: '%.12f' % (x,))
             tm.assert_frame_equal(result, expected)
 
             # for parsing, interpret object as str

From 7fbe0a3dc53a3121fdad6b12e14718ca48b20af6 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Mon, 26 Sep 2016 19:29:15 -0500
Subject: [PATCH 15/23] float precision fix

---
 pandas/io/tests/parser/dtypes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index cf37dd97b9fc9..cba293965e56b 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -17,15 +17,15 @@
 class DtypeTests(object):
     def test_passing_dtype(self):
         # see gh-6607
-        df = DataFrame(np.random.rand(5, 2), columns=list(
+        df = DataFrame(np.random.rand(5, 2).round(4), columns=list(
             'AB'), index=['1A', '1B', '1C', '1D', '1E'])
 
         with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
-            df.to_csv(path, float_format='%.12f')
+            df.to_csv(path)
 
             # see gh-3795: passing 'str' as the dtype
             result = self.read_csv(path, dtype=str, index_col=0)
-            expected = df.applymap(lambda x: '%.12f' % (x,))
+            expected = df.astype(str)
             tm.assert_frame_equal(result, expected)
 
             # for parsing, interpret object as str

From 08315b81883e8a46c20c6290eb56317c925ba7d8 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 30 Oct 2016 16:33:30 -0500
Subject: [PATCH 16/23] add docs; test for conv cast

---
 doc/source/io.rst                |  9 ++--
 doc/source/whatsnew/v0.20.0.txt  |  9 ++++
 pandas/io/parsers.py             | 93 +++++++++++++++++++-------------
 pandas/io/tests/parser/dtypes.py | 10 ++++
 4 files changed, 79 insertions(+), 42 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index ee319092c6dd5..03210ce3231b9 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -157,6 +157,9 @@ dtype : Type name or dict of column -> type, default ``None``
   Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}``
   (unsupported with ``engine='python'``). Use `str` or `object` to preserve and
   not interpret dtype.
+
+  .. versionadded:: 0.20.0 support for the Python parser.
+
 engine : {``'c'``, ``'python'``}
   Parser engine to use. The C engine is faster while the python engine is
   currently more feature-complete.
@@ -473,10 +476,8 @@ However, if you wanted for all the data to be coerced, no matter the type, then
 using the ``converters`` argument of :func:`~pandas.read_csv` would certainly be
 worth trying.
 
-.. note::
-    The ``dtype`` option is currently only supported by the C engine.
-    Specifying ``dtype`` with ``engine`` other than 'c' raises a
-    ``ValueError``.
+  .. versionadded:: 0.20.0 support for the Python parser.
+     The ``dtype`` option is supported by the 'python' engine
 
 .. note::
    In some cases, reading in abnormal data with columns containing mixed dtypes
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 65b62601c7022..ce36d4cf53601 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -32,6 +32,15 @@ Other enhancements
 
 - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
 
+- The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns
+ is now supported with the ``'python'`` engine.  See the :ref:`io docs <io.dtypes>` for more information.
+
+.. ipython:: python
+
+   from io import StringIO
+   data = "a,b\n1,2\n3,4"
+   pd.read_csv(StringIO(data), engine='python').dtypes
+   pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
 
 .. _whatsnew_0200.api_breaking:
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index b4e203f9d0236..d7a300ef7095a 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -116,8 +116,11 @@
 dtype : Type name or dict of column -> type, default None
     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
     Use `str` or `object` to preserve and not interpret dtype.
-    If converters are specified, they will be applied AFTER
-    dtype conversion.
+    If converters are specified, they will be applied INSTEAD
+    of dtype conversion.
+
+  .. versionadded:: 0.20.0 support for the Python parser.
+
 %s
 converters : dict, default None
     Dict of functions for converting values in certain columns. Keys can either
@@ -1293,20 +1296,6 @@ def _agg_index(self, index, try_parse_dates=True):
 
         return index
 
-    def _apply_converter(self, values, conv_f, na_values, col_na_values,
-                         col_na_fvalues):
-        """ apply converter function to values, respecting NAs """
-        try:
-            values = lib.map_infer(values, conv_f)
-        except ValueError:
-            mask = lib.ismember(values, na_values).view(np.uint8)
-            values = lib.map_infer_mask(values, conv_f, mask)
-
-        cvals, na_count = self._infer_types(
-            values, set(col_na_values) | col_na_fvalues,
-            try_num_bool=False)
-        return cvals, na_count
-
     def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
                              converters=None, dtypes=None):
         result = {}
@@ -1324,45 +1313,58 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
             else:
                 col_na_values, col_na_fvalues = set(), set()
 
-            if conv_f is not None and cast_type is None:
-                # if type is not specified, apply the conversion first, without
-                # inference
-                cvals, na_count = self._apply_converter(
-                    values, conv_f, na_values,
-                    col_na_values, col_na_fvalues)
+            if conv_f is not None:
+                # conv_f applied to data before inference
+                # dtype isn't used if a converted specified
+                try:
+                    values = lib.map_infer(values, conv_f)
+                except ValueError:
+                    mask = lib.ismember(values, na_values).view(np.uint8)
+                    values = lib.map_infer_mask(values, conv_f, mask)
+
+                cvals, na_count = self._infer_types(
+                    values, set(col_na_values) | col_na_fvalues,
+                    try_num_bool=False)
             else:
-                try_num_bool = True
-                if cast_type and is_object_dtype(cast_type):
-                    # skip inference if specified dtype is object
-                    try_num_bool = False
+                # skip inference if specified dtype is object
+                try_num_bool = not (cast_type and is_object_dtype(cast_type))
 
                 # general type inference and conversion
                 cvals, na_count = self._infer_types(
                     values, set(col_na_values) | col_na_fvalues,
                     try_num_bool)
 
+                # type specificed in dtype param
+                if cast_type and not is_dtype_equal(cvals, cast_type):
+                    cvals = self._cast_types(cvals, cast_type, c)
+
             if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
                 cvals = lib.downcast_int64(
                     cvals, _parser.na_values,
                     self.use_unsigned)
 
-            if cast_type and not is_dtype_equal(cvals, cast_type):
-                # type specificed in dtype param
-
-                cvals = self._cast_types(cvals, cast_type, c)
-                # for consistency with c-parser, if a converter and dtype are
-                # specified, apply the converter last
-                if conv_f is not None:
-                    values, na_count = self._apply_converter(
-                        values, conv_f, na_values,
-                        col_na_values, col_na_fvalues)
-
             result[c] = cvals
             if verbose and na_count:
                 print('Filled %d NA values in column %s' % (na_count, str(c)))
         return result
 
     def _infer_types(self, values, na_values, try_num_bool=True):
+        """
+        Infer types of values, possibly casting
+
+        Parameters
+        ----------
+        values : ndarray
+        na_values : set
+        try_num_bool : bool, default try
+           try to cast values to numeric (first preference) or boolean
+
+        Returns:
+        --------
+        converted : ndarray
+        na_count : int
+        """
+
         na_count = 0
         if issubclass(values.dtype.type, (np.number, np.bool_)):
             mask = lib.ismember(values, na_values)
@@ -1394,7 +1396,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
         return result, na_count
 
     def _cast_types(self, values, cast_type, column):
-        """ cast column to type specified in dtypes= param """
+        """
+        Cast values to specified type
+
+        Parameters
+        ----------
+        values : ndarray
+        cast_type : string or np.dtype
+           dtype to cast values to
+        column : string
+            column name - used only for error reporting
+
+        Returns
+        -------
+        converted : ndarray
+        """
+
         if is_categorical_dtype(cast_type):
             # XXX this is for consistency with
             # c-parser which parses all categories
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index cba293965e56b..510efac80ee78 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -214,3 +214,13 @@ def test_raise_on_passed_int_dtype_with_nas(self):
         self.assertRaises(ValueError, self.read_csv, StringIO(data),
                           sep=",", skipinitialspace=True,
                           dtype={'DOY': np.int64})
+
+    def test_dtype_with_converter(self):
+        data = """a,b
+1.1,2.2
+1.2,2.3"""
+        result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
+                               converters={'a': lambda x: str(x)})
+        # dtype spec ignored if converted specified
+        expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
+        tm.assert_frame_equal(result, expected)

From 810e750e2c19c4abd6ddd2253636e519cda6fee1 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sat, 5 Nov 2016 12:22:48 -0500
Subject: [PATCH 17/23] Add warning if both converter and dtype specified

---
 pandas/io/parsers.py             |  7 +++-
 pandas/io/tests/parser/dtypes.py |  6 ++--
 pandas/parser.pyx                | 58 +++++++++++++++++---------------
 3 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index d7a300ef7095a..3f6a2e53343f3 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1315,7 +1315,12 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
 
             if conv_f is not None:
                 # conv_f applied to data before inference
-                # dtype isn't used if a converted specified
+                if cast_type is not None:
+                    warnings.warn(("Both a converter and dtype were specified "
+                                   "for column {0} - only the converter will "
+                                   "be used").format(c), ParserWarning,
+                                  stacklevel=7)
+
                 try:
                     values = lib.map_infer(values, conv_f)
                 except ValueError:
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index 510efac80ee78..a2163aaf31ea8 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -12,6 +12,7 @@
 from pandas import DataFrame, Series, Index, MultiIndex, Categorical
 from pandas.compat import StringIO
 from pandas.types.dtypes import CategoricalDtype
+from pandas.io.common import ParserWarning
 
 
 class DtypeTests(object):
@@ -219,8 +220,9 @@ def test_dtype_with_converter(self):
         data = """a,b
 1.1,2.2
 1.2,2.3"""
-        result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
-                               converters={'a': lambda x: str(x)})
         # dtype spec ignored if converted specified
+        with tm.assert_produces_warning(ParserWarning):
+            result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
+                                converters={'a': lambda x: str(x)})
         expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 6b43dfbabc4a0..ca9b34c06f025 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -13,7 +13,7 @@ from cpython cimport (PyObject, PyBytes_FromString,
                       PyUnicode_Check, PyUnicode_AsUTF8String,
                       PyErr_Occurred, PyErr_Fetch)
 from cpython.ref cimport PyObject, Py_XDECREF
-from io.common import ParserError, DtypeWarning, EmptyDataError
+from io.common import ParserError, DtypeWarning, EmptyDataError, ParserWarning
 
 # Import CParserError as alias of ParserError for backwards compatibility.
 # Ultimately, we want to remove this import. See gh-12665 and gh-14479.
@@ -987,7 +987,7 @@ cdef class TextReader:
             Py_ssize_t i, nused
             kh_str_t *na_hashset = NULL
             int start, end
-            object name, na_flist
+            object name, na_flist, col_dtype = None
             bint na_filter = 0
             Py_ssize_t num_cols
 
@@ -1043,14 +1043,33 @@ cdef class TextReader:
             else:
                 na_filter = 0
 
+            col_dtype = None
+            if self.dtype is not None:
+                if isinstance(self.dtype, dict):
+                    if name in self.dtype:
+                        col_dtype = self.dtype[name]
+                    elif i in self.dtype:
+                        col_dtype = self.dtype[i]
+                else:
+                    if self.dtype.names:
+                        # structured array
+                        col_dtype = np.dtype(self.dtype.descr[i][1])
+                    else:
+                        col_dtype = self.dtype
+
             if conv:
+                if col_dtype is not None:
+                    warnings.warn(("Both a converter and dtype were specified "
+                                   "for column {0} - only the converter will "
+                                   "be used").format(name), ParserWarning,
+                                  stacklevel=5)
                 results[i] = _apply_converter(conv, self.parser, i, start, end,
                                               self.c_encoding)
                 continue
 
             # Should return as the desired dtype (inferred or specified)
             col_res, na_count = self._convert_tokens(
-                i, start, end, name, na_filter, na_hashset, na_flist)
+                i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)
 
             if na_filter:
                 self._free_na_set(na_hashset)
@@ -1075,32 +1094,17 @@ cdef class TextReader:
     cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end,
                                 object name, bint na_filter,
                                 kh_str_t *na_hashset,
-                                object na_flist):
-        cdef:
-            object col_dtype = None
-
-        if self.dtype is not None:
-            if isinstance(self.dtype, dict):
-                if name in self.dtype:
-                    col_dtype = self.dtype[name]
-                elif i in self.dtype:
-                    col_dtype = self.dtype[i]
-            else:
-                if self.dtype.names:
-                    # structured array
-                    col_dtype = np.dtype(self.dtype.descr[i][1])
-                else:
-                    col_dtype = self.dtype
+                                object na_flist, object col_dtype):
 
-            if col_dtype is not None:
-                col_res, na_count = self._convert_with_dtype(
-                    col_dtype, i, start, end, na_filter,
-                    1, na_hashset, na_flist)
+        if col_dtype is not None:
+            col_res, na_count = self._convert_with_dtype(
+                col_dtype, i, start, end, na_filter,
+                1, na_hashset, na_flist)
 
-                # Fallback on the parse (e.g. we requested int dtype,
-                # but its actually a float).
-                if col_res is not None:
-                    return col_res, na_count
+            # Fallback on the parse (e.g. we requested int dtype,
+            # but its actually a float).
+            if col_res is not None:
+                return col_res, na_count
 
         if i in self.noconvert:
             return self._string_convert(i, start, end, na_filter, na_hashset)

From 10f5be3516ab8e40c726e989107f8ed0bc90f228 Mon Sep 17 00:00:00 2001
From: Chris <cbartak@gmail.com>
Date: Sun, 13 Nov 2016 09:18:15 -0600
Subject: [PATCH 18/23] doc comments

---
 doc/source/whatsnew/v0.20.0.txt | 1 -
 pandas/io/parsers.py            | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index ce36d4cf53601..d6470d9e8fb52 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -37,7 +37,6 @@ Other enhancements
 
 .. ipython:: python
 
-   from io import StringIO
    data = "a,b\n1,2\n3,4"
    pd.read_csv(StringIO(data), engine='python').dtypes
    pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 3f6a2e53343f3..0736535ce2d67 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -118,9 +118,6 @@
     Use `str` or `object` to preserve and not interpret dtype.
     If converters are specified, they will be applied INSTEAD
     of dtype conversion.
-
-  .. versionadded:: 0.20.0 support for the Python parser.
-
 %s
 converters : dict, default None
     Dict of functions for converting values in certain columns. Keys can either

From b2f7b94457eaa603137e8ff8a6e77f5b4319637c Mon Sep 17 00:00:00 2001
From: Christopher Bartak <cbartak@gmail.com>
Date: Wed, 23 Nov 2016 08:39:10 -0600
Subject: [PATCH 19/23] doc updates

---
 doc/source/io.rst               |  1 +
 doc/source/whatsnew/v0.20.0.txt | 17 +++++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 03210ce3231b9..b1c151def26af 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -477,6 +477,7 @@ using the ``converters`` argument of :func:`~pandas.read_csv` would certainly be
 worth trying.
 
   .. versionadded:: 0.20.0 support for the Python parser.
+
      The ``dtype`` option is supported by the 'python' engine
 
 .. note::
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index d6470d9e8fb52..5a51887d4f983 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -22,8 +22,17 @@ New features
 ~~~~~~~~~~~~
 
 
+``read_csv`` supports ``dtype`` keyword for python engine
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns
+ is now supported with the ``'python'`` engine.  See the :ref:`io docs <io.dtypes>` for more information.
 
+.. ipython:: python
+
+   data = "a,b\n1,2\n3,4"
+   pd.read_csv(StringIO(data), engine='python').dtypes
+   pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
 
 .. _whatsnew_0200.enhancements.other:
 
@@ -32,14 +41,6 @@ Other enhancements
 
 - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
 
-- The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns
- is now supported with the ``'python'`` engine.  See the :ref:`io docs <io.dtypes>` for more information.
-
-.. ipython:: python
-
-   data = "a,b\n1,2\n3,4"
-   pd.read_csv(StringIO(data), engine='python').dtypes
-   pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
 
 .. _whatsnew_0200.api_breaking:
 

From be2b43bf3bf998953a2a7dc3e30c285bcae92b70 Mon Sep 17 00:00:00 2001
From: Christopher Bartak <cbartak@gmail.com>
Date: Wed, 23 Nov 2016 09:34:53 -0600
Subject: [PATCH 20/23] lint

---
 pandas/io/tests/parser/dtypes.py | 2 +-
 pandas/parser.pyx                | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index a2163aaf31ea8..058bfea7ae330 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -223,6 +223,6 @@ def test_dtype_with_converter(self):
         # dtype spec ignored if converted specified
         with tm.assert_produces_warning(ParserWarning):
             result = self.read_csv(StringIO(data), dtype={'a': 'i8'},
-                                converters={'a': lambda x: str(x)})
+                                   converters={'a': lambda x: str(x)})
         expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index ca9b34c06f025..6760e822960f1 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -1069,7 +1069,8 @@ cdef class TextReader:
 
             # Should return as the desired dtype (inferred or specified)
             col_res, na_count = self._convert_tokens(
-                i, start, end, name, na_filter, na_hashset, na_flist, col_dtype)
+                i, start, end, name, na_filter, na_hashset,
+                na_flist, col_dtype)
 
             if na_filter:
                 self._free_na_set(na_hashset)

From 47669d3c8c379640a82c7f65341c80ecf540e743 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 24 Nov 2016 22:24:36 +0100
Subject: [PATCH 21/23] TST: move empty dtype tests from c_parser_only to dtype
 tests

---
 pandas/io/tests/parser/c_parser_only.py | 46 -------------------------
 pandas/io/tests/parser/dtypes.py        | 46 +++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
index 2f2a3ab507f8f..c781b0549ee60 100644
--- a/pandas/io/tests/parser/c_parser_only.py
+++ b/pandas/io/tests/parser/c_parser_only.py
@@ -371,49 +371,3 @@ def test_internal_null_byte(self):
 
         result = self.read_csv(StringIO(data), names=names)
         tm.assert_frame_equal(result, expected)
-
-    def test_empty_dtype(self):
-        # see gh-14712
-        data = 'a,b'
-
-        expected = pd.DataFrame(columns=['a', 'b'], dtype=np.float64)
-        result = self.read_csv(StringIO(data), header=0, dtype=np.float64)
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.DataFrame({'a': pd.Categorical([]),
-                                 'b': pd.Categorical([])},
-                                index=[])
-        result = self.read_csv(StringIO(data), header=0,
-                               dtype='category')
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.DataFrame(columns=['a', 'b'], dtype='datetime64[ns]')
-        result = self.read_csv(StringIO(data), header=0,
-                               dtype='datetime64[ns]')
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.DataFrame({'a': pd.Series([], dtype='timedelta64[ns]'),
-                                 'b': pd.Series([], dtype='timedelta64[ns]')},
-                                index=[])
-        result = self.read_csv(StringIO(data), header=0,
-                               dtype='timedelta64[ns]')
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.DataFrame(columns=['a', 'b'])
-        expected['a'] = expected['a'].astype(np.float64)
-        result = self.read_csv(StringIO(data), header=0,
-                               dtype={'a': np.float64})
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.DataFrame(columns=['a', 'b'])
-        expected['a'] = expected['a'].astype(np.float64)
-        result = self.read_csv(StringIO(data), header=0,
-                               dtype={0: np.float64})
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.DataFrame(columns=['a', 'b'])
-        expected['a'] = expected['a'].astype(np.int32)
-        expected['b'] = expected['b'].astype(np.float64)
-        result = self.read_csv(StringIO(data), header=0,
-                               dtype={'a': np.int32, 1: np.float64})
-        tm.assert_frame_equal(result, expected)
diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index 058bfea7ae330..18c37b31f6480 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -226,3 +226,49 @@ def test_dtype_with_converter(self):
                                    converters={'a': lambda x: str(x)})
         expected = DataFrame({'a': ['1.1', '1.2'], 'b': [2.2, 2.3]})
         tm.assert_frame_equal(result, expected)
+
+    def test_empty_dtype(self):
+        # see gh-14712
+        data = 'a,b'
+
+        expected = pd.DataFrame(columns=['a', 'b'], dtype=np.float64)
+        result = self.read_csv(StringIO(data), header=0, dtype=np.float64)
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.DataFrame({'a': pd.Categorical([]),
+                                 'b': pd.Categorical([])},
+                                index=[])
+        result = self.read_csv(StringIO(data), header=0,
+                               dtype='category')
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.DataFrame(columns=['a', 'b'], dtype='datetime64[ns]')
+        result = self.read_csv(StringIO(data), header=0,
+                               dtype='datetime64[ns]')
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.DataFrame({'a': pd.Series([], dtype='timedelta64[ns]'),
+                                 'b': pd.Series([], dtype='timedelta64[ns]')},
+                                index=[])
+        result = self.read_csv(StringIO(data), header=0,
+                               dtype='timedelta64[ns]')
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.DataFrame(columns=['a', 'b'])
+        expected['a'] = expected['a'].astype(np.float64)
+        result = self.read_csv(StringIO(data), header=0,
+                               dtype={'a': np.float64})
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.DataFrame(columns=['a', 'b'])
+        expected['a'] = expected['a'].astype(np.float64)
+        result = self.read_csv(StringIO(data), header=0,
+                               dtype={0: np.float64})
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.DataFrame(columns=['a', 'b'])
+        expected['a'] = expected['a'].astype(np.int32)
+        expected['b'] = expected['b'].astype(np.float64)
+        result = self.read_csv(StringIO(data), header=0,
+                               dtype={'a': np.int32, 1: np.float64})
+        tm.assert_frame_equal(result, expected)

From 1706b39ad64ce75896bb680606fcd3aa4de3fffe Mon Sep 17 00:00:00 2001
From: Christopher Bartak <cbartak@gmail.com>
Date: Fri, 25 Nov 2016 09:15:53 -0600
Subject: [PATCH 22/23] issue ref

---
 doc/source/whatsnew/v0.20.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 5a51887d4f983..6e3559bee728d 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -26,7 +26,7 @@ New features
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns
- is now supported with the ``'python'`` engine.  See the :ref:`io docs <io.dtypes>` for more information.
+ is now supported with the ``'python'`` engine (:issue:`14295`). See the :ref:`io docs <io.dtypes>` for more information.
 
 .. ipython:: python
 

From 3abb0bd6e46e78557c1fd480ac173881dc5d530b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 25 Nov 2016 21:36:01 +0100
Subject: [PATCH 23/23] fix merge conflict leftover

---
 pandas/io/tests/parser/dtypes.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py
index c0189050ee90b..18c37b31f6480 100644
--- a/pandas/io/tests/parser/dtypes.py
+++ b/pandas/io/tests/parser/dtypes.py
@@ -272,4 +272,3 @@ def test_empty_dtype(self):
         result = self.read_csv(StringIO(data), header=0,
                                dtype={'a': np.int32, 1: np.float64})
         tm.assert_frame_equal(result, expected)
->>>>>>> 47669d3c8c379640a82c7f65341c80ecf540e743