diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index f6e90a3341424..01ad7d69cddc7 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -378,6 +378,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`) +- Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`) - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`) - diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index e7fb38db2aa17..d214694fb659d 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -20,10 +20,10 @@ from numpy cimport ( ndarray, ) +from pandas._libs.missing cimport checknull_with_nat_and_na from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_nat_strings as nat_strings, - checknull_with_nat, ) from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, @@ -134,7 +134,7 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors=' iresult[i] = NPY_NAT continue else: - if checknull_with_nat(val): + if checknull_with_nat_and_na(val): iresult[i] = NPY_NAT continue else: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a38affbc7f723..850ce6df21b7f 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -177,6 +177,28 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected): result = to_datetime(input_s, format="%Y%m%d", errors="coerce") tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "data, format, expected", + [ + ([pd.NA], "%Y%m%d%H%M%S", DatetimeIndex(["NaT"])), + ([pd.NA], None, DatetimeIndex(["NaT"])), + ( + [pd.NA, "20210202202020"], + "%Y%m%d%H%M%S", + DatetimeIndex(["NaT", "2021-02-02 20:20:20"]), + ), + (["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])), + (["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])), + (["201010", pd.NA], None, DatetimeIndex(["2010-10-20", "NaT"])), + ([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])), + ([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])), + ], + ) + def test_to_datetime_with_NA(self, data, format, expected): + # GH#42957 + result = to_datetime(data, format=format) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_format_integer(self, cache): # GH 10178