pandas-dev · gfyoung · May 29, 2016
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -349,6 +349,7 @@ Bug Fixes
 
 
 - Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`)
+- Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`)
 
 
 

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -2226,14 +2226,16 @@ def _get_index_name(self, columns):
         return index_name, orig_names, columns
 
     def _rows_to_cols(self, content):
-        zipped_content = list(lib.to_object_array(content).T)
-
         col_len = self.num_original_columns
-        zip_len = len(zipped_content)
 
         if self._implicit_index:
             col_len += len(self.index_col)
 
+        # see gh-13320
+        zipped_content = list(lib.to_object_array(
+            content, min_width=col_len).T)
+        zip_len = len(zipped_content)
+
         if self.skip_footer < 0:
             raise ValueError('skip footer cannot be negative')
 

diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
@@ -360,15 +360,6 @@ def test_raise_on_passed_int_dtype_with_nas(self):
                           sep=",", skipinitialspace=True,
                           dtype={'DOY': np.int64})
 
-    def test_na_trailing_columns(self):
-        data = """Date,Currenncy,Symbol,Type,Units,UnitPrice,Cost,Tax
-2012-03-14,USD,AAPL,BUY,1000
-2012-05-12,USD,SBUX,SELL,500"""
-
-        result = self.read_csv(StringIO(data))
-        self.assertEqual(result['Date'][1], '2012-05-12')
-        self.assertTrue(result['UnitPrice'].isnull().all())
-
     def test_parse_ragged_csv(self):
         data = """1,2,3
 1,2,3,4

diff --git a/pandas/io/tests/parser/na_values.py b/pandas/io/tests/parser/na_values.py
@@ -241,3 +241,12 @@ def test_na_values_na_filter_override(self):
                              columns=['A', 'B'])
         out = self.read_csv(StringIO(data), na_values=['B'], na_filter=False)
         tm.assert_frame_equal(out, expected)
+
+    def test_na_trailing_columns(self):
+        data = """Date,Currenncy,Symbol,Type,Units,UnitPrice,Cost,Tax
+2012-03-14,USD,AAPL,BUY,1000
+2012-05-12,USD,SBUX,SELL,500"""
+
+        result = self.read_csv(StringIO(data))
+        self.assertEqual(result['Date'][1], '2012-05-12')
+        self.assertTrue(result['UnitPrice'].isnull().all())
diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
@@ -1132,15 +1132,32 @@ def map_infer(ndarray arr, object f, bint convert=1):
     return result
 
 
-def to_object_array(list rows):
+def to_object_array(list rows, int min_width=0):
+    """
+    Convert a list of lists into an object array.
+
+    Parameters
+    ----------
+    rows : 2-d array (N, K)
+        A list of lists to be converted into an array
+    min_width : int
+        The minimum width of the object array. If a list
+        in `rows` contains fewer than `width` elements,
+        the remaining elements in the corresponding row
+        will all be `NaN`.
+
+    Returns
+    -------
+    obj_array : numpy array of the object dtype
+    """
     cdef:
         Py_ssize_t i, j, n, k, tmp
         ndarray[object, ndim=2] result
         list row
 
     n = len(rows)
 
-    k = 0
+    k = min_width
     for i from 0 <= i < n:
         tmp = len(rows[i])
         if tmp > k:

diff --git a/pandas/tests/test_infer_and_convert.py b/pandas/tests/test_infer_and_convert.py
@@ -201,6 +201,23 @@ def test_to_object_array_tuples(self):
         except ImportError:
             pass
 
+    def test_to_object_array_width(self):
+        # see gh-13320
+        rows = [[1, 2, 3], [4, 5, 6]]
+
+        expected = np.array(rows, dtype=object)
+        out = lib.to_object_array(rows)
+        tm.assert_numpy_array_equal(out, expected)
+
+        expected = np.array(rows, dtype=object)
+        out = lib.to_object_array(rows, min_width=1)
+        tm.assert_numpy_array_equal(out, expected)
+
+        expected = np.array([[1, 2, 3, None, None],
+                             [4, 5, 6, None, None]], dtype=object)
+        out = lib.to_object_array(rows, min_width=5)
+        tm.assert_numpy_array_equal(out, expected)
+
     def test_object(self):
 
         # GH 7431
Original file line number	Diff line number	Diff line change
Expand Up		@@ -349,6 +349,7 @@ Bug Fixes


		- Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`)
		- Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`)



Expand Down