diff --git a/doc/source/io.rst b/doc/source/io.rst index b1c151def26af..f524d37d0de60 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1268,11 +1268,22 @@ is whitespace). df = pd.read_fwf('bar.csv', header=None, index_col=0) df +.. versionadded:: 0.20.0 + +``read_fwf`` supports the ``dtype`` parameter for specifying the types of +parsed columns to be different from the inferred type. + +.. ipython:: python + + pd.read_fwf('bar.csv', header=None, index_col=0).dtypes + pd.read_fwf('bar.csv', header=None, dtype={2: 'object'}).dtypes + .. ipython:: python :suppress: os.remove('bar.csv') + Indexes ''''''' diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ff086380fdb05..6fe0ad8092a03 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -34,6 +34,15 @@ The ``dtype`` keyword argument in the :func:`read_csv` function for specifying t pd.read_csv(StringIO(data), engine='python').dtypes pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes +The ``dtype`` keyword argument is also now supported in the :func:`read_fwf` function for parsing +fixed-width text files. + +.. ipython:: python + + data = "a b\n1 2\n3 4" + pd.read_fwf(StringIO(data)).dtypes + pd.read_fwf(StringIO(data), dtype={'a':'float64', 'b':'object'}).dtypes + .. _whatsnew_0200.enhancements.other: Other enhancements diff --git a/pandas/io/tests/parser/test_read_fwf.py b/pandas/io/tests/parser/test_read_fwf.py index 11b10211650d6..42b1116280a1e 100644 --- a/pandas/io/tests/parser/test_read_fwf.py +++ b/pandas/io/tests/parser/test_read_fwf.py @@ -345,3 +345,23 @@ def test_variable_width_unicode(self): header=None, encoding='utf8') tm.assert_frame_equal(expected, read_fwf( BytesIO(test.encode('utf8')), header=None, encoding='utf8')) + + def test_dtype(self): + data = ''' a b c +1 2 3.2 +3 4 5.2 +''' + colspecs = [(0, 5), (5, 10), (10, None)] + result = pd.read_fwf(StringIO(data), colspecs=colspecs) + expected = pd.DataFrame({ + 'a': [1, 3], + 'b': [2, 4], + 'c': [3.2, 5.2]}, columns=['a', 'b', 'c']) + tm.assert_frame_equal(result, expected) + + expected['a'] = expected['a'].astype('float64') + expected['b'] = expected['b'].astype(str) + expected['c'] = expected['c'].astype('int32') + result = pd.read_fwf(StringIO(data), colspecs=colspecs, + dtype={'a': 'float64', 'b': str, 'c': 'int32'}) + tm.assert_frame_equal(result, expected)