pandas-dev · phofl · May 18, 2023 · Dec 4, 2022 · Apr 10, 2023 · Apr 10, 2023
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -745,7 +745,7 @@ I/O
 - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
 - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
 - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
--
+- Fixed issue where :func:`read_csv` would error when ``parse_dates`` was set to a list or dictionary with ``engine="pyarrow"`` (:issue:`47961`)
 
 Period
 ^^^^^^

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -107,7 +107,7 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
                 multi_index_named = False
             frame.columns = self.names
         # we only need the frame not the names
-        frame.columns, frame = self._do_date_conversions(frame.columns, frame)
+        _, frame = self._do_date_conversions(frame.columns, frame)
         if self.index_col is not None:
             for i, item in enumerate(self.index_col):
                 if is_integer(item):

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -7,7 +7,6 @@
 from enum import Enum
 import itertools
 from typing import (
-    TYPE_CHECKING,
     Any,
     Callable,
     DefaultDict,
@@ -71,7 +70,11 @@
 )
 from pandas.core.dtypes.missing import isna
 
-from pandas import StringDtype
+from pandas import (
+    DataFrame,
+    StringDtype,
+    concat,
+)
 from pandas.core import algorithms
 from pandas.core.arrays import (
     ArrowExtensionArray,
@@ -89,9 +92,6 @@
 from pandas.core.series import Series
 from pandas.core.tools import datetimes as tools
 
-if TYPE_CHECKING:
-    from pandas import DataFrame
-
 
 class ParserBase:
     class BadLineHandleMethod(Enum):
@@ -1264,7 +1264,10 @@ def _isindex(colspec):
             new_cols.append(new_name)
             date_cols.update(old_names)
 
-    data_dict.update(new_data)
+    if isinstance(data_dict, DataFrame):
+        data_dict = concat([DataFrame(new_data), data_dict], axis=1)
+    else:
+        data_dict.update(new_data)
     new_cols.extend(columns)
 
     if not keep_date_col:

diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -138,9 +138,8 @@ def test_separator_date_conflict(all_parsers):
     tm.assert_frame_equal(df, expected)
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize("keep_date_col", [True, False])
-def test_multiple_date_col_custom(all_parsers, keep_date_col):
+def test_multiple_date_col_custom(all_parsers, keep_date_col, request):
     data = """\
 KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
 KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
@@ -151,6 +150,14 @@ def test_multiple_date_col_custom(all_parsers, keep_date_col):
 """
     parser = all_parsers
 
+    if keep_date_col and parser.engine == "pyarrow":
+        # For this to pass, we need to disable auto-inference on the date columns
+        # in parse_dates. We have no way of doing this though
+        mark = pytest.mark.xfail(
+            reason="pyarrow doesn't support disabling auto-inference on column numbers."
+        )
+        request.node.add_marker(mark)
+
     def date_parser(*date_cols):
         """
         Test date parser.
@@ -293,9 +300,8 @@ def test_concat_date_col_fail(container, dim):
         parsing.concat_date_cols(date_cols)
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize("keep_date_col", [True, False])
-def test_multiple_date_col(all_parsers, keep_date_col):
+def test_multiple_date_col(all_parsers, keep_date_col, request):
     data = """\
 KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
 KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
@@ -305,6 +311,15 @@ def test_multiple_date_col(all_parsers, keep_date_col):
 KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
 """
     parser = all_parsers
+
+    if keep_date_col and parser.engine == "pyarrow":
+        # For this to pass, we need to disable auto-inference on the date columns
+        # in parse_dates. We have no way of doing this though
+        mark = pytest.mark.xfail(
+            reason="pyarrow doesn't support disabling auto-inference on column numbers."
+        )
+        request.node.add_marker(mark)
+
     kwds = {
         "header": None,
         "parse_dates": [[1, 2], [1, 3]],
@@ -461,7 +476,6 @@ def test_date_col_as_index_col(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_multiple_date_cols_int_cast(all_parsers):
     data = (
         "KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
@@ -1132,7 +1146,6 @@ def test_multiple_date_cols_chunked(all_parsers):
     tm.assert_frame_equal(chunks[2], expected[4:])
 
 
-@xfail_pyarrow
 def test_multiple_date_col_named_index_compat(all_parsers):
     parser = all_parsers
     data = """\
@@ -1156,7 +1169,6 @@ def test_multiple_date_col_named_index_compat(all_parsers):
     tm.assert_frame_equal(with_indices, with_names)
 
 
-@xfail_pyarrow
 def test_multiple_date_col_multiple_index_compat(all_parsers):
     parser = all_parsers
     data = """\
@@ -1301,7 +1313,6 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize(
     "data,kwargs,expected",
     [
@@ -1385,7 +1396,6 @@ def test_parse_date_time(all_parsers, data, kwargs, expected):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_parse_date_fields(all_parsers):
     parser = all_parsers
     data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
@@ -1403,7 +1413,6 @@ def test_parse_date_fields(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_parse_date_all_fields(all_parsers):
     parser = all_parsers
     data = """\
@@ -1427,7 +1436,6 @@ def test_parse_date_all_fields(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_datetime_fractional_seconds(all_parsers):
     parser = all_parsers
     data = """\
@@ -1451,7 +1459,6 @@ def test_datetime_fractional_seconds(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_generic(all_parsers):
     parser = all_parsers
     data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."