Skip to content

Commit 3de2331

Browse files
author
MarcoGorelli
committed
fixup
1 parent 0971f55 commit 3de2331

File tree

7 files changed

+123
-21
lines changed

7 files changed

+123
-21
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ repos:
6363
'--extensions=c,h',
6464
'--headers=h',
6565
--recursive,
66-
'--filter=-readability/casting,-runtime/int,-build/include_subdir'
66+
'--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
6767
]
6868
- repo: https://github.com/PyCQA/flake8
6969
rev: 6.0.0

pandas/_libs/tslib.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ cnp.import_array()
2828

2929
from pandas._libs.tslibs.np_datetime cimport (
3030
NPY_DATETIMEUNIT,
31+
Exact,
3132
NPY_FR_ns,
3233
check_dts_bounds,
3334
get_datetime64_value,
@@ -411,7 +412,7 @@ cpdef array_to_datetime(
411412
bint utc=False,
412413
bint require_iso8601=False,
413414
format: str | None=None,
414-
bint exact=True,
415+
Exact exact=Exact.EXACT_MATCH,
415416
):
416417
"""
417418
Converts a 1D array of date-like values to a numpy array of either:

pandas/_libs/tslibs/np_datetime.pxd

+7-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ cdef int string_to_dts(
9696
int* out_tzoffset,
9797
bint want_exc,
9898
format: str | None = *,
99-
bint exact = *
99+
Exact exact = *
100100
) except? -1
101101

102102
cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)
@@ -120,3 +120,9 @@ cdef int64_t convert_reso(
120120
NPY_DATETIMEUNIT to_reso,
121121
bint round_ok,
122122
) except? -1
123+
124+
cdef extern from "src/datetime/np_datetime_strings.h":
125+
cdef enum Exact:
126+
PARTIAL_MATCH
127+
EXACT_MATCH
128+
NO_MATCH

pandas/_libs/tslibs/np_datetime.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ cdef extern from "src/datetime/np_datetime_strings.h":
5353
npy_datetimestruct *out,
5454
NPY_DATETIMEUNIT *out_bestunit,
5555
int *out_local, int *out_tzoffset,
56-
const char *format, int format_len, int exact)
56+
const char *format, int format_len, Exact exact)
5757

5858

5959
# ----------------------------------------------------------------------
@@ -279,7 +279,7 @@ cdef int string_to_dts(
279279
int* out_tzoffset,
280280
bint want_exc,
281281
format: str | None=None,
282-
bint exact=True,
282+
Exact exact=EXACT_MATCH,
283283
) except? -1:
284284
cdef:
285285
Py_ssize_t length
@@ -291,7 +291,7 @@ cdef int string_to_dts(
291291
if format is None:
292292
format_buf = b""
293293
format_length = 0
294-
exact = False
294+
exact = NO_MATCH
295295
else:
296296
format_buf = get_c_string_buf_and_size(format, &format_length)
297297
return parse_iso_8601_datetime(buf, length, want_exc,

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

+62-14
Original file line numberDiff line numberDiff line change
@@ -71,20 +71,13 @@ This file implements string parsing and creation for NumPy datetime.
7171
// and decrement characters_remaining by n on success
7272
// On failure will return -1 without incrementing
7373
static int compare_format(const char **format, int *characters_remaining,
74-
const char *compare_to, int n, const int exact) {
74+
const char *compare_to, int n, const enum Exact exact) {
75+
if (exact == NO_MATCH) {
76+
return 0;
77+
}
7578
if (*characters_remaining < n) {
76-
if (exact) {
77-
// TODO(pandas-dev): in the future we should set a PyErr here
78-
// to be very clear about what went wrong
79-
return -1;
80-
} else if (*characters_remaining) {
81-
// TODO(pandas-dev): same return value in this function as
82-
// above branch, but stub out a future where
83-
// we have a better error message
84-
return -1;
85-
} else {
86-
return 0;
87-
}
79+
// TODO(pandas-dev): PyErr to differentiate what went wrong
80+
return -1;
8881
} else {
8982
if (strncmp(*format, compare_to, n)) {
9083
// TODO(pandas-dev): PyErr to differentiate what went wrong
@@ -102,7 +95,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
10295
npy_datetimestruct *out,
10396
NPY_DATETIMEUNIT *out_bestunit,
10497
int *out_local, int *out_tzoffset,
105-
const char* format, int format_len, int exact) {
98+
const char* format, int format_len,
99+
enum Exact exact) {
106100
if (len < 0 || format_len < 0)
107101
goto parse_error;
108102
int year_leap = 0;
@@ -139,6 +133,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
139133
while (sublen > 0 && isspace(*substr)) {
140134
++substr;
141135
--sublen;
136+
if (exact == PARTIAL_MATCH && !format_len) {
137+
goto finish;
138+
}
142139
if (compare_format(&format, &format_len, " ", 1, exact)) {
143140
goto parse_error;
144141
}
@@ -155,6 +152,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
155152
}
156153

157154
/* PARSE THE YEAR (4 digits) */
155+
if (exact == PARTIAL_MATCH && !format_len) {
156+
goto finish;
157+
}
158158
if (compare_format(&format, &format_len, "%Y", 2, exact)) {
159159
goto parse_error;
160160
}
@@ -202,6 +202,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
202202
++substr;
203203
--sublen;
204204

205+
if (exact == PARTIAL_MATCH && !format_len) {
206+
goto finish;
207+
}
205208
if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) {
206209
goto parse_error;
207210
}
@@ -212,6 +215,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
212215
}
213216

214217
/* PARSE THE MONTH */
218+
if (exact == PARTIAL_MATCH && !format_len) {
219+
goto finish;
220+
}
215221
if (compare_format(&format, &format_len, "%m", 2, exact)) {
216222
goto parse_error;
217223
}
@@ -258,12 +264,18 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
258264
}
259265
++substr;
260266
--sublen;
267+
if (exact == PARTIAL_MATCH && !format_len) {
268+
goto finish;
269+
}
261270
if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) {
262271
goto parse_error;
263272
}
264273
}
265274

266275
/* PARSE THE DAY */
276+
if (exact == PARTIAL_MATCH && !format_len) {
277+
goto finish;
278+
}
267279
if (compare_format(&format, &format_len, "%d", 2, exact)) {
268280
goto parse_error;
269281
}
@@ -306,13 +318,19 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
306318
if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
307319
goto parse_error;
308320
}
321+
if (exact == PARTIAL_MATCH && !format_len) {
322+
goto finish;
323+
}
309324
if (compare_format(&format, &format_len, substr, 1, exact)) {
310325
goto parse_error;
311326
}
312327
++substr;
313328
--sublen;
314329

315330
/* PARSE THE HOURS */
331+
if (exact == PARTIAL_MATCH && !format_len) {
332+
goto finish;
333+
}
316334
if (compare_format(&format, &format_len, "%H", 2, exact)) {
317335
goto parse_error;
318336
}
@@ -359,6 +377,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
359377
if (sublen == 0 || !isdigit(*substr)) {
360378
goto parse_error;
361379
}
380+
if (exact == PARTIAL_MATCH && !format_len) {
381+
goto finish;
382+
}
362383
if (compare_format(&format, &format_len, ":", 1, exact)) {
363384
goto parse_error;
364385
}
@@ -370,6 +391,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
370391
}
371392

372393
/* PARSE THE MINUTES */
394+
if (exact == PARTIAL_MATCH && !format_len) {
395+
goto finish;
396+
}
373397
if (compare_format(&format, &format_len, "%M", 2, exact)) {
374398
goto parse_error;
375399
}
@@ -405,6 +429,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
405429
/* If we make it through this condition block, then the next
406430
* character is a digit. */
407431
if (has_hms_sep && *substr == ':') {
432+
if (exact == PARTIAL_MATCH && !format_len) {
433+
goto finish;
434+
}
408435
if (compare_format(&format, &format_len, ":", 1, exact)) {
409436
goto parse_error;
410437
}
@@ -420,6 +447,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
420447
}
421448

422449
/* PARSE THE SECONDS */
450+
if (exact == PARTIAL_MATCH && !format_len) {
451+
goto finish;
452+
}
423453
if (compare_format(&format, &format_len, "%S", 2, exact)) {
424454
goto parse_error;
425455
}
@@ -448,6 +478,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
448478
if (sublen > 0 && *substr == '.') {
449479
++substr;
450480
--sublen;
481+
if (exact == PARTIAL_MATCH && !format_len) {
482+
goto finish;
483+
}
451484
if (compare_format(&format, &format_len, ".", 1, exact)) {
452485
goto parse_error;
453486
}
@@ -457,6 +490,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
457490
}
458491

459492
/* PARSE THE MICROSECONDS (0 to 6 digits) */
493+
if (exact == PARTIAL_MATCH && !format_len) {
494+
goto finish;
495+
}
460496
if (compare_format(&format, &format_len, "%f", 2, exact)) {
461497
goto parse_error;
462498
}
@@ -524,6 +560,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
524560
while (sublen > 0 && isspace(*substr)) {
525561
++substr;
526562
--sublen;
563+
if (exact == PARTIAL_MATCH && !format_len) {
564+
goto finish;
565+
}
527566
if (compare_format(&format, &format_len, " ", 1, exact)) {
528567
goto parse_error;
529568
}
@@ -539,6 +578,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
539578

540579
/* UTC specifier */
541580
if (*substr == 'Z') {
581+
if (exact == PARTIAL_MATCH && !format_len) {
582+
goto finish;
583+
}
542584
if (compare_format(&format, &format_len, "%z", 2, exact)) {
543585
goto parse_error;
544586
}
@@ -561,6 +603,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
561603
--sublen;
562604
}
563605
} else if (*substr == '-' || *substr == '+') {
606+
if (exact == PARTIAL_MATCH && !format_len) {
607+
goto finish;
608+
}
564609
if (compare_format(&format, &format_len, "%z", 2, exact)) {
565610
goto parse_error;
566611
}
@@ -647,6 +692,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
647692
while (sublen > 0 && isspace(*substr)) {
648693
++substr;
649694
--sublen;
695+
if (exact == PARTIAL_MATCH && !format_len) {
696+
goto finish;
697+
}
650698
if (compare_format(&format, &format_len, " ", 1, exact)) {
651699
goto parse_error;
652700
}

pandas/_libs/tslibs/src/datetime/np_datetime_strings.h

+17-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,22 @@ This file implements string parsing and creation for NumPy datetime.
2626
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
2727
#endif // NPY_NO_DEPRECATED_API
2828

29+
/* 'exact' can be one of three values:
30+
* * PARTIAL_MATCH : Only require a partial match with 'format'.
31+
* For example, if the string is '2020-01-01 05:00:00' and
32+
* 'format' is '%Y-%m-%d', then parse '2020-01-01';
33+
* * EXACT_MATCH : require an exact match with 'format'. If the
34+
* string is '2020-01-01', then the only format which will
35+
* be able to parse it without error is '%Y-%m-%d';
36+
* * NO_MATCH: don't require any match - parse without comparing
37+
* with 'format'.
38+
*/
39+
enum Exact {
40+
PARTIAL_MATCH,
41+
EXACT_MATCH,
42+
NO_MATCH
43+
};
44+
2945
/*
3046
* Parses (almost) standard ISO 8601 date strings. The differences are:
3147
*
@@ -61,7 +77,7 @@ parse_iso_8601_datetime(const char *str, int len, int want_exc,
6177
int *out_tzoffset,
6278
const char* format,
6379
int format_len,
64-
int exact);
80+
enum Exact exact);
6581

6682
/*
6783
* Provides a string length to use for converting datetime

pandas/tests/tools/test_to_datetime.py

+31
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,37 @@ def test_to_datetime_with_non_exact(self, cache):
353353
)
354354
tm.assert_series_equal(result, expected)
355355

356+
@pytest.mark.parametrize(
357+
"format, expected",
358+
[
359+
("%Y-%m-%d", Timestamp(2000, 1, 3)),
360+
("%Y-%d-%m", Timestamp(2000, 3, 1)),
361+
("%Y-%m-%d %H", Timestamp(2000, 1, 3, 12)),
362+
("%Y-%d-%m %H", Timestamp(2000, 3, 1, 12)),
363+
("%Y-%m-%d %H:%M", Timestamp(2000, 1, 3, 12, 34)),
364+
("%Y-%d-%m %H:%M", Timestamp(2000, 3, 1, 12, 34)),
365+
("%Y-%m-%d %H:%M:%S", Timestamp(2000, 1, 3, 12, 34, 56)),
366+
("%Y-%d-%m %H:%M:%S", Timestamp(2000, 3, 1, 12, 34, 56)),
367+
("%Y-%m-%d %H:%M:%S.%f", Timestamp(2000, 1, 3, 12, 34, 56, 123456)),
368+
("%Y-%d-%m %H:%M:%S.%f", Timestamp(2000, 3, 1, 12, 34, 56, 123456)),
369+
(
370+
"%Y-%m-%d %H:%M:%S.%f%z",
371+
Timestamp(2000, 1, 3, 12, 34, 56, 123456, tz="UTC+01:00"),
372+
),
373+
(
374+
"%Y-%d-%m %H:%M:%S.%f%z",
375+
Timestamp(2000, 3, 1, 12, 34, 56, 123456, tz="UTC+01:00"),
376+
),
377+
],
378+
)
379+
def test_non_exact_doesnt_parse_whole_string(self, cache, format, expected):
380+
# https://github.com/pandas-dev/pandas/issues/50412
381+
# the formats alternate between ISO8601 and non-ISO8601 to check both paths
382+
result = to_datetime(
383+
"2000-01-03 12:34:56.123456+01:00", format=format, exact=False
384+
)
385+
assert result == expected
386+
356387
@pytest.mark.parametrize(
357388
"arg",
358389
[

0 commit comments

Comments
 (0)