Skip to content

Commit 63e11e4

Browse files
BUG: pd.to_datetime with format doesn't work with pd.NA (#42982)
1 parent 8a61d30 commit 63e11e4

File tree

3 files changed

+25
-2
lines changed

3 files changed

+25
-2
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ Categorical
378378
Datetimelike
379379
^^^^^^^^^^^^
380380
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
381+
- Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`)
381382
- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
382383
-
383384

pandas/_libs/tslibs/strptime.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ from numpy cimport (
2020
ndarray,
2121
)
2222

23+
from pandas._libs.missing cimport checknull_with_nat_and_na
2324
from pandas._libs.tslibs.nattype cimport (
2425
NPY_NAT,
2526
c_nat_strings as nat_strings,
26-
checknull_with_nat,
2727
)
2828
from pandas._libs.tslibs.np_datetime cimport (
2929
check_dts_bounds,
@@ -134,7 +134,7 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='
134134
iresult[i] = NPY_NAT
135135
continue
136136
else:
137-
if checknull_with_nat(val):
137+
if checknull_with_nat_and_na(val):
138138
iresult[i] = NPY_NAT
139139
continue
140140
else:

pandas/tests/tools/test_to_datetime.py

+22
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,28 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
177177
result = to_datetime(input_s, format="%Y%m%d", errors="coerce")
178178
tm.assert_series_equal(result, expected)
179179

180+
@pytest.mark.parametrize(
181+
"data, format, expected",
182+
[
183+
([pd.NA], "%Y%m%d%H%M%S", DatetimeIndex(["NaT"])),
184+
([pd.NA], None, DatetimeIndex(["NaT"])),
185+
(
186+
[pd.NA, "20210202202020"],
187+
"%Y%m%d%H%M%S",
188+
DatetimeIndex(["NaT", "2021-02-02 20:20:20"]),
189+
),
190+
(["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])),
191+
(["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])),
192+
(["201010", pd.NA], None, DatetimeIndex(["2010-10-20", "NaT"])),
193+
([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])),
194+
([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])),
195+
],
196+
)
197+
def test_to_datetime_with_NA(self, data, format, expected):
198+
# GH#42957
199+
result = to_datetime(data, format=format)
200+
tm.assert_index_equal(result, expected)
201+
180202
@pytest.mark.parametrize("cache", [True, False])
181203
def test_to_datetime_format_integer(self, cache):
182204
# GH 10178

0 commit comments

Comments
 (0)