From d98c6fd6694f12e914c4869345ab4fc4c8942315 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 19 Dec 2020 22:22:12 +0100
Subject: [PATCH 01/16] ENH: Raise ParserWarning when length of names does not
 match length of data

---
 doc/source/whatsnew/v1.3.0.rst        |  1 +
 pandas/_libs/parsers.pyx              |  9 ++++++++-
 pandas/io/parsers.py                  |  8 ++++++++
 pandas/tests/io/parser/test_common.py | 10 +++++++---
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 7671962018144..98bfa5e912a52 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -41,6 +41,7 @@ Other enhancements
 
 - Added :meth:`MultiIndex.dtypes` (:issue:`37062`)
 - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
+- :func:`read_csv` now raising ``ParserWarning`` if length of header or given names does not match length of data when usecols is not specified (:issue:`21768`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 4995252d7aafd..fe932de95cdce 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -729,7 +729,9 @@ cdef class TextReader:
                 field_count = max(field_count, len(self.names))
 
             passed_count = len(header[0])
-
+            print(self.allow_leading_cols)
+            print(passed_count)
+            print(field_count)
             if (self.has_usecols and self.allow_leading_cols and
                     not callable(self.usecols)):
                 nuse = len(self.usecols)
@@ -743,6 +745,11 @@ cdef class TextReader:
             # oh boy, #2442, #2981
             elif self.allow_leading_cols and passed_count < field_count:
                 self.leading_cols = field_count - passed_count
+            elif not self.allow_leading_cols and passed_count < field_count:
+                warnings.warn(
+                    "Length of header or names does not match length of data. This leads "
+                    "to a loss of data with index_col=False.", ParserWarning, stacklevel=6,
+                )
 
         return header, field_count, unnamed_cols
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index d670821c98520..7bb12d9f37b83 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2505,6 +2505,14 @@ def _exclude_implicit_index(self, alldata):
         if self._col_indices is not None and len(names) != len(self._col_indices):
             names = [names[i] for i in sorted(self._col_indices)]
 
+        if not self.index_col and len(names) != len(alldata) and names:
+            warnings.warn(
+                "Length of header or names does not match length of data. This leads "
+                "to a loss of data with index_col=False.",
+                ParserWarning,
+                stacklevel=6,
+            )
+
         return {name: alldata[i + offset] for i, name in enumerate(names)}, names
 
     # legacy
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index df20db0c7ee84..29e0d3b15d16e 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -15,7 +15,7 @@
 import pytest
 
 from pandas._libs.tslib import Timestamp
-from pandas.errors import DtypeWarning, EmptyDataError, ParserError
+from pandas.errors import DtypeWarning, EmptyDataError, ParserError, ParserWarning
 import pandas.util._test_decorators as td
 
 from pandas import DataFrame, Index, MultiIndex, Series, compat, concat, option_context
@@ -1062,6 +1062,7 @@ def test_skip_initial_space(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Lenght of header:pandas.errors.ParserWarning")
 def test_trailing_delimiters(all_parsers):
     # see gh-2442
     data = """A,B,C
@@ -1069,7 +1070,9 @@ def test_trailing_delimiters(all_parsers):
 4,5,6,
 7,8,9,"""
     parser = all_parsers
-    result = parser.read_csv(StringIO(data), index_col=False)
+
+    with tm.assert_produces_warning(ParserWarning):
+        result = parser.read_csv(StringIO(data), index_col=False)
 
     expected = DataFrame({"A": [1, 4, 7], "B": [2, 5, 8], "C": [3, 6, 9]})
     tm.assert_frame_equal(result, expected)
@@ -2178,7 +2181,8 @@ def test_no_header_two_extra_columns(all_parsers):
     ref = DataFrame([["foo", "bar", "baz"]], columns=column_names)
     stream = StringIO("foo,bar,baz,bam,blah")
     parser = all_parsers
-    df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
+    with tm.assert_produces_warning(ParserWarning):
+        df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
     tm.assert_frame_equal(df, ref)
 
 

From 26b07b2db2413c28ce7410625ed883fe850dc8ff Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 19 Dec 2020 22:31:51 +0100
Subject: [PATCH 02/16] Fix bugs from strg+z

---
 doc/source/whatsnew/v1.3.0.rst        | 2 +-
 pandas/_libs/parsers.pyx              | 4 +---
 pandas/tests/io/parser/test_common.py | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 98bfa5e912a52..f647a1acc357e 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -41,7 +41,7 @@ Other enhancements
 
 - Added :meth:`MultiIndex.dtypes` (:issue:`37062`)
 - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
-- :func:`read_csv` now raising ``ParserWarning`` if length of header or given names does not match length of data when usecols is not specified (:issue:`21768`)
+- :func:`read_csv` now raising ``ParserWarning`` if length of header or given names does not match length of data when ``usecols`` is not specified (:issue:`21768`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index fe932de95cdce..18a188261d750 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -729,9 +729,7 @@ cdef class TextReader:
                 field_count = max(field_count, len(self.names))
 
             passed_count = len(header[0])
-            print(self.allow_leading_cols)
-            print(passed_count)
-            print(field_count)
+
             if (self.has_usecols and self.allow_leading_cols and
                     not callable(self.usecols)):
                 nuse = len(self.usecols)
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index 29e0d3b15d16e..97ecce07e80c5 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -1062,7 +1062,6 @@ def test_skip_initial_space(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Lenght of header:pandas.errors.ParserWarning")
 def test_trailing_delimiters(all_parsers):
     # see gh-2442
     data = """A,B,C

From 7dd3f1b1e1c7baa6a4cdbdab44542a261f31aa21 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 19 Dec 2020 22:33:01 +0100
Subject: [PATCH 03/16] Refactor code

---
 pandas/_libs/parsers.pyx | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 18a188261d750..3e0bcf5a7b0b5 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -745,8 +745,10 @@ cdef class TextReader:
                 self.leading_cols = field_count - passed_count
             elif not self.allow_leading_cols and passed_count < field_count:
                 warnings.warn(
-                    "Length of header or names does not match length of data. This leads "
-                    "to a loss of data with index_col=False.", ParserWarning, stacklevel=6,
+                    "Length of header or names does not match length of data. This "
+                    "leads to a loss of data with index_col=False.",
+                    ParserWarning,
+                    stacklevel=6,
                 )
 
         return header, field_count, unnamed_cols

From 70d5c1c0834811529c75de9da834f6d40913dea1 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 19 Dec 2020 22:33:55 +0100
Subject: [PATCH 04/16] Refactor if else

---
 pandas/_libs/parsers.pyx | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 3e0bcf5a7b0b5..6bbfcb9c31ee4 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -741,15 +741,16 @@ cdef class TextReader:
                     raise ValueError('Number of passed names did not match number of '
                                      'header fields in the file')
             # oh boy, #2442, #2981
-            elif self.allow_leading_cols and passed_count < field_count:
-                self.leading_cols = field_count - passed_count
-            elif not self.allow_leading_cols and passed_count < field_count:
-                warnings.warn(
-                    "Length of header or names does not match length of data. This "
-                    "leads to a loss of data with index_col=False.",
-                    ParserWarning,
-                    stacklevel=6,
-                )
+            if passed_count < field_count:
+                if self.allow_leading_cols:
+                    self.leading_cols = field_count - passed_count
+                else:
+                    warnings.warn(
+                        "Length of header or names does not match length of data. This "
+                        "leads to a loss of data with index_col=False.",
+                        ParserWarning,
+                        stacklevel=6,
+                    )
 
         return header, field_count, unnamed_cols
 

From 76abd33d662d03011b76919ce5933dbe32e138bc Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 19 Dec 2020 23:16:44 +0100
Subject: [PATCH 05/16] Add okwarning

---
 doc/source/user_guide/io.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index b04abf512fbeb..aae9fd73cc361 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -753,6 +753,7 @@ the end of each data line, confusing the parser. To explicitly disable the
 index column inference and discard the last column, pass ``index_col=False``:
 
 .. ipython:: python
+    :okwarning:
 
     data = "a,b,c\n4,apple,bat,\n8,orange,cow,"
     print(data)

From 5b688f7366e13ddb3cea29bc5912d6b3185b2616 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sun, 3 Jan 2021 20:59:08 +0100
Subject: [PATCH 06/16] Allow trailing commas

---
 pandas/_libs/parsers.pyx              | 12 ++--------
 pandas/io/parsers.py                  | 32 +++++++++++++++++++++------
 pandas/tests/io/parser/test_common.py |  4 +---
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 6bbfcb9c31ee4..4995252d7aafd 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -741,16 +741,8 @@ cdef class TextReader:
                     raise ValueError('Number of passed names did not match number of '
                                      'header fields in the file')
             # oh boy, #2442, #2981
-            if passed_count < field_count:
-                if self.allow_leading_cols:
-                    self.leading_cols = field_count - passed_count
-                else:
-                    warnings.warn(
-                        "Length of header or names does not match length of data. This "
-                        "leads to a loss of data with index_col=False.",
-                        ParserWarning,
-                        stacklevel=6,
-                    )
+            elif self.allow_leading_cols and passed_count < field_count:
+                self.leading_cols = field_count - passed_count
 
         return header, field_count, unnamed_cols
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 17e6b41ba4052..c139869a07a46 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1844,6 +1844,28 @@ def _do_date_conversions(self, names, data):
 
         return names, data
 
+    def _check_data_length(self, columns: List[str], data: List[np.ndarray]):
+        """Checks if length of data is equal to length of column names. One set of
+        trailing commas is allowed.
+
+        Parameters
+        ----------
+        columns: list of column names
+        data: list of array-likes containing the data column-wise
+
+        """
+        if not self.index_col and len(columns) != len(data) and columns:
+            if len(columns) == len(data) - 1 and np.all(
+                (data[-1] == "") | isna(data[-1])
+            ):
+                return
+            warnings.warn(
+                "Length of header or names does not match length of data. This leads "
+                "to a loss of data with index_col=False.",
+                ParserWarning,
+                stacklevel=6,
+            )
+
 
 class CParserWrapper(ParserBase):
     def __init__(self, src: FilePathOrBuffer, **kwds):
@@ -2128,6 +2150,8 @@ def read(self, nrows=None):
 
             # columns as list
             alldata = [x[1] for x in data]
+            if self.usecols is None:
+                self._check_data_length(names, alldata)
 
             data = {k: v for k, (i, v) in zip(names, data)}
 
@@ -2511,13 +2535,7 @@ def _exclude_implicit_index(self, alldata):
         if self._col_indices is not None and len(names) != len(self._col_indices):
             names = [names[i] for i in sorted(self._col_indices)]
 
-        if not self.index_col and len(names) != len(alldata) and names:
-            warnings.warn(
-                "Length of header or names does not match length of data. This leads "
-                "to a loss of data with index_col=False.",
-                ParserWarning,
-                stacklevel=6,
-            )
+        self._check_data_length(names, alldata)
 
         return {name: alldata[i + offset] for i, name in enumerate(names)}, names
 
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index b4032aec737a8..594440890bd7a 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -1071,9 +1071,7 @@ def test_trailing_delimiters(all_parsers):
 7,8,9,"""
     parser = all_parsers
 
-    with tm.assert_produces_warning(ParserWarning):
-        result = parser.read_csv(StringIO(data), index_col=False)
-
+    result = parser.read_csv(StringIO(data), index_col=False)
     expected = DataFrame({"A": [1, 4, 7], "B": [2, 5, 8], "C": [3, 6, 9]})
     tm.assert_frame_equal(result, expected)
 

From 56cdd189f15ed22da60bb632b983eb795e9e2d28 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Mon, 4 Jan 2021 01:11:02 +0100
Subject: [PATCH 07/16] Fix dtype bug

---
 pandas/io/parsers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index c139869a07a46..59485b8d4a0eb 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1856,7 +1856,7 @@ def _check_data_length(self, columns: List[str], data: List[np.ndarray]):
         """
         if not self.index_col and len(columns) != len(data) and columns:
             if len(columns) == len(data) - 1 and np.all(
-                (data[-1] == "") | isna(data[-1])
+                (np.isin(data[-1], [""])) | isna(data[-1])
             ):
                 return
             warnings.warn(

From ac15a3086c5b1110c96a71a8aa93402607656540 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Mon, 4 Jan 2021 14:58:49 +0100
Subject: [PATCH 08/16] Fix npdev bug

---
 pandas/io/parsers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 59485b8d4a0eb..8eef5ba6bc48b 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1856,7 +1856,7 @@ def _check_data_length(self, columns: List[str], data: List[np.ndarray]):
         """
         if not self.index_col and len(columns) != len(data) and columns:
             if len(columns) == len(data) - 1 and np.all(
-                (np.isin(data[-1], [""])) | isna(data[-1])
+                (is_object_dtype(data[-1]) and data[-1] == "") | isna(data[-1])
             ):
                 return
             warnings.warn(

From 387b5fa1484575c543b69d038bd0fd8af63722c6 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Mon, 4 Jan 2021 15:00:45 +0100
Subject: [PATCH 09/16] Add missing init file

---
 pandas/tests/io/parser/common/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 pandas/tests/io/parser/common/__init__.py

diff --git a/pandas/tests/io/parser/common/__init__.py b/pandas/tests/io/parser/common/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d

From 53cac930a63d084c3088ca46cb0dc2968f7579a1 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Mon, 4 Jan 2021 15:01:25 +0100
Subject: [PATCH 10/16] Remove empty file

---
 pandas/tests/io/parser/test_common.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 pandas/tests/io/parser/test_common.py

diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
deleted file mode 100644
index e69de29bb2d1d..0000000000000

From 5d142febef76ee2cc16febcdd58693adc568388a Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Mon, 4 Jan 2021 15:02:42 +0100
Subject: [PATCH 11/16] Add warning

---
 pandas/tests/io/parser/common/test_common_basic.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index 4fd754bf79ba2..fa53215cf11eb 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -11,7 +11,7 @@
 import pytest
 
 from pandas._libs.tslib import Timestamp
-from pandas.errors import EmptyDataError, ParserError
+from pandas.errors import EmptyDataError, ParserError, ParserWarning
 
 from pandas import DataFrame, Index, Series, compat
 import pandas._testing as tm
@@ -660,7 +660,8 @@ def test_no_header_two_extra_columns(all_parsers):
     ref = DataFrame([["foo", "bar", "baz"]], columns=column_names)
     stream = StringIO("foo,bar,baz,bam,blah")
     parser = all_parsers
-    df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
+    with tm.assert_produces_warning(ParserWarning):
+        df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
     tm.assert_frame_equal(df, ref)
 
 

From b21b7955ef48085a7a284b7578a6192b0b754377 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 19 Feb 2021 21:01:09 +0100
Subject: [PATCH 12/16] Merge master

---
 pandas/io/parsers.py                  |  0
 pandas/io/parsers/base_parser.py      | 20 ++++++++++++++++++++
 pandas/io/parsers/c_parser_wrapper.py |  2 ++
 pandas/io/parsers/python_parser.py    |  2 ++
 4 files changed, 24 insertions(+)
 delete mode 100644 pandas/io/parsers.py

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 2d17978b60327..23ac2e7e4db61 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -767,6 +767,26 @@ def _do_date_conversions(self, names, data):
 
         return names, data
 
+    def _check_data_length(self, columns: List[str], data: List[np.ndarray]):
+        """Checks if length of data is equal to length of column names. One set of
+        trailing commas is allowed.
+        Parameters
+        ----------
+        columns: list of column names
+        data: list of array-likes containing the data column-wise
+        """
+        if not self.index_col and len(columns) != len(data) and columns:
+            if len(columns) == len(data) - 1 and np.all(
+                (is_object_dtype(data[-1]) and data[-1] == "") | isna(data[-1])
+            ):
+                return
+            warnings.warn(
+                "Length of header or names does not match length of data. This leads "
+                "to a loss of data with index_col=False.",
+                ParserWarning,
+                stacklevel=6,
+            )
+
     def _evaluate_usecols(self, usecols, names):
         """
         Check whether or not the 'usecols' parameter
diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index 135e093cdc1e0..4dea9e60238c6 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -234,6 +234,8 @@ def read(self, nrows=None):
 
             # columns as list
             alldata = [x[1] for x in data]
+            if self.usecols is None:
+                self._check_data_length(names, alldata)
 
             data = {k: v for k, (i, v) in zip(names, data)}
 
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 37f553c724c9e..dba9a8a9eccdf 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -292,6 +292,8 @@ def _exclude_implicit_index(self, alldata):
         if self._col_indices is not None and len(names) != len(self._col_indices):
             names = [names[i] for i in sorted(self._col_indices)]
 
+        self._check_data_length(names, alldata)
+
         return {name: alldata[i + offset] for i, name in enumerate(names)}, names
 
     # legacy

From eb771576132be42228ae6a6e677edc8ff111e698 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 21 Apr 2021 00:10:23 +0200
Subject: [PATCH 13/16] Fix typing

---
 pandas/io/parsers/base_parser.py                | 3 ++-
 pandas/tests/io/parser/common/test_chunksize.py | 5 +----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 9911233a14f65..ec729cce550e3 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -25,6 +25,7 @@
 from pandas._libs.parsers import STR_NA_VALUES
 from pandas._libs.tslibs import parsing
 from pandas._typing import (
+    ArrayLike,
     DtypeArg,
     FilePathOrBuffer,
 )
@@ -778,7 +779,7 @@ def _do_date_conversions(self, names, data):
 
         return names, data
 
-    def _check_data_length(self, columns: List[str], data: List[np.ndarray]):
+    def _check_data_length(self, columns: List[str], data: List[ArrayLike]):
         """Checks if length of data is equal to length of column names. One set of
         trailing commas is allowed.
         Parameters
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
index 6d5aeaa713687..4bc3f3c38f506 100644
--- a/pandas/tests/io/parser/common/test_chunksize.py
+++ b/pandas/tests/io/parser/common/test_chunksize.py
@@ -143,10 +143,7 @@ def test_read_chunksize_jagged_names(all_parsers):
     parser = all_parsers
     data = "\n".join(["0"] * 7 + [",".join(["0"] * 10)])
 
-    # error: List item 0 has incompatible type "float"; expected "int"
-    expected = DataFrame(
-        [[0] + [np.nan] * 9] * 7 + [[0] * 10]  # type: ignore[list-item]
-    )
+    expected = DataFrame([[0] + [np.nan] * 9] * 7 + [[0] * 10])
     with parser.read_csv(StringIO(data), names=range(10), chunksize=4) as reader:
         result = concat(reader)
     tm.assert_frame_equal(result, expected)

From 16faf35a92be6e3825c386e8ab5d6bd4df11839a Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 14 May 2021 23:56:26 +0200
Subject: [PATCH 14/16] Change test

---
 pandas/tests/io/parser/usecols/test_usecols_basic.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
index b86dc5ef85fc6..16649be5b8a58 100644
--- a/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -383,7 +383,9 @@ def test_usecols_indices_out_of_bounds(all_parsers, names):
 a,b
 1,2
     """
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+    with tm.assert_produces_warning(
+        FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
+    ):
         result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
     expected = DataFrame({"a": [1], "b": [None]})
     if names is None and parser.engine == "python":

From 4b3f63a763d5cb9cded5aa004c57dba910b630f4 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 15 May 2021 00:40:48 +0200
Subject: [PATCH 15/16] Remove warning

---
 doc/source/user_guide/io.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index a89ae5a38d8a5..7f0cd613726dc 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -757,7 +757,6 @@ the end of each data line, confusing the parser. To explicitly disable the
 index column inference and discard the last column, pass ``index_col=False``:
 
 .. ipython:: python
-    :okwarning:
 
     data = "a,b,c\n4,apple,bat,\n8,orange,cow,"
     print(data)

From fa6fed09e1d472dc8ddf9962e75f7e594b47a200 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Mon, 24 May 2021 00:43:09 +0200
Subject: [PATCH 16/16] Adress comments

---
 pandas/io/parsers/base_parser.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index bf955a724bb21..6031d84d2b8ec 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -793,13 +793,16 @@ def _do_date_conversions(self, names, data):
 
         return names, data
 
-    def _check_data_length(self, columns: List[str], data: List[ArrayLike]):
-        """Checks if length of data is equal to length of column names. One set of
-        trailing commas is allowed.
+    def _check_data_length(self, columns: list[str], data: list[ArrayLike]) -> None:
+        """Checks if length of data is equal to length of column names.
+
+        One set of trailing commas is allowed. self.index_col not False
+        results in a ParserError previously when lengths do not match.
+
         Parameters
         ----------
         columns: list of column names
-        data: list of array-likes containing the data column-wise
+        data: list of array-likes containing the data column-wise.
         """
         if not self.index_col and len(columns) != len(data) and columns:
             if len(columns) == len(data) - 1 and np.all(