Skip to content

Commit 073e48b

Browse files
author
MarcoGorelli
committed
wip
1 parent 1d5ce5b commit 073e48b

File tree

4 files changed

+78
-33
lines changed

4 files changed

+78
-33
lines changed

pandas/_libs/tslib.pyx

+11-12
Original file line numberDiff line numberDiff line change
@@ -546,17 +546,10 @@ cpdef array_to_datetime(
546546
seen_datetime = True
547547
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
548548

549-
elif is_integer_object(val) or is_float_object(val):
550-
if require_iso8601:
551-
if is_coerce:
552-
iresult[i] = NPY_NAT
553-
continue
554-
elif is_raise:
555-
raise ValueError(
556-
f"time data \"{val}\" at position {i} doesn't "
557-
f"match format \"{format}\""
558-
)
559-
return values, tz_out
549+
elif (
550+
(is_integer_object(val) or is_float_object(val))
551+
and format is None
552+
):
560553
# these must be ns unit by-definition
561554
seen_integer = True
562555

@@ -575,7 +568,11 @@ cpdef array_to_datetime(
575568
except OverflowError:
576569
iresult[i] = NPY_NAT
577570

578-
elif isinstance(val, str):
571+
elif (
572+
(is_integer_object(val) or is_float_object(val))
573+
or isinstance(val, str)
574+
):
575+
print("val", val)
579576
# string
580577
if type(val) is not str:
581578
# GH#32264 np.str_ object
@@ -589,6 +586,7 @@ cpdef array_to_datetime(
589586
val, &dts, &out_bestunit, &out_local,
590587
&out_tzoffset, False, format, exact
591588
)
589+
print("failed", string_to_dts_failed)
592590
if string_to_dts_failed:
593591
# An error at this point is a _parsing_ error
594592
# specifically _not_ OutOfBoundsDatetime
@@ -655,6 +653,7 @@ cpdef array_to_datetime(
655653
# parsing mixed naive and aware strings
656654
out_tzoffset_vals.add("naive")
657655
iresult[i] = value
656+
print("value", value)
658657
check_dts_bounds(&dts)
659658

660659
else:

pandas/_libs/tslibs/parsing.pyx

+54-11
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ from cpython.object cimport PyObject_Str
1818
from cython cimport Py_ssize_t
1919
from libc.string cimport strchr
2020

21+
from pandas._libs.missing cimport checknull_with_nat_and_na
22+
from pandas._libs.tslibs.conversion cimport (
23+
_TSObject,
24+
cast_from_unit,
25+
convert_datetime_to_tsobject,
26+
)
27+
2128
import_datetime()
2229

2330
import numpy as np
@@ -32,6 +39,8 @@ from numpy cimport (
3239
float64_t,
3340
)
3441

42+
from pandas._libs.tslibs.nattype cimport NPY_NAT
43+
3544
cnp.import_array()
3645

3746
# dateutil compat
@@ -61,7 +70,9 @@ from pandas._libs.tslibs.np_datetime cimport (
6170
string_to_dts,
6271
)
6372
from pandas._libs.tslibs.offsets cimport is_offset_object
73+
6474
from pandas._libs.tslibs.strptime import array_strptime
75+
6576
from pandas._libs.tslibs.util cimport (
6677
get_c_string_buf_and_size,
6778
is_array,
@@ -744,21 +755,53 @@ def try_parse_dates(
744755
return result.base # .base to access underlying ndarray
745756

746757

747-
def try_parse_year_month_day(
748-
object[:] years, object[:] months, object[:] days
749-
) -> np.ndarray:
758+
cpdef try_parse_year_month_day(
759+
object[:] values,
760+
str errors,
761+
bint exact,
762+
):
750763
cdef:
751764
Py_ssize_t i, n
752-
object[::1] result
765+
#object[::1] result
766+
object val
767+
int year, month, day
768+
bint is_ignore = errors=="ignore"
769+
bint is_coerce = errors=="coerce"
770+
bint is_raise = errors=="raise"
753771

754-
n = len(years)
755-
# TODO(cython3): Use len instead of `shape[0]`
756-
if months.shape[0] != n or days.shape[0] != n:
757-
raise ValueError("Length of years/months/days must all be equal")
758-
result = np.empty(n, dtype="O")
772+
n = len(values)
773+
#result = np.empty(n, dtype="O")
774+
result = np.empty(n, dtype="M8[ns]")
775+
iresult = result.view("i8")
759776

760777
for i in range(n):
761-
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))
778+
val = values[i]
779+
if checknull_with_nat_and_na(val):
780+
iresult[i] = NPY_NAT
781+
continue
782+
strval = str(val)
783+
if strval in nat_strings:
784+
result[i] = NPY_NAT
785+
continue
786+
year = int(strval[:4])
787+
month = int(strval[4:6])
788+
day = int(strval[6:8])
789+
if exact and strval[8:]:
790+
if is_coerce:
791+
result[i] = NPY_NAT
792+
elif is_raise:
793+
raise ValueError(f"{val} does not match format '%Y%m%d'")
794+
from pandas._libs.tslibs import OutOfBoundsDatetime
795+
try:
796+
dt =datetime(year, month, day)
797+
ts = convert_datetime_to_tsobject(dt, None)
798+
except (OutOfBoundsDatetime, ValueError):
799+
if is_coerce:
800+
iresult[i] = NPY_NAT
801+
elif is_raise:
802+
raise
803+
else:
804+
iresult[i] = ts.value
762805

763806
return result.base # .base to access underlying ndarray
764807

@@ -890,7 +933,7 @@ def format_is_iso(f: str) -> bint:
890933
but must be consistent. Leading 0s in dates and times are optional.
891934
"""
892935
iso_template = "%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}".format
893-
excluded_formats = ["%Y%m%d", "%Y%m", "%Y"]
936+
excluded_formats = ["%Y%m", "%Y"]
894937

895938
for date_sep in [" ", "/", "\\", "-", ".", ""]:
896939
for time_sep in [" ", "T"]:

pandas/core/tools/datetimes.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ def _to_datetime_with_format(
532532
orig_arg = ensure_object(orig_arg)
533533
try:
534534
# may return None without raising
535-
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
535+
result = _attempt_YYYYMMDD(orig_arg, errors=errors, exact=exact)
536536
except (ValueError, TypeError, OutOfBoundsDatetime) as err:
537537
raise ValueError(
538538
"cannot convert the input to '%Y%m%d' date format"
@@ -1244,7 +1244,9 @@ def coerce(values):
12441244
return values
12451245

12461246

1247-
def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray | None:
1247+
def _attempt_YYYYMMDD(
1248+
arg: npt.NDArray[np.object_], errors: str, exact: bool
1249+
) -> np.ndarray | None:
12481250
"""
12491251
try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
12501252
arg is a passed in as an object dtype, but could really be ints/strings
@@ -1255,13 +1257,12 @@ def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray |
12551257
arg : np.ndarray[object]
12561258
errors : {'raise','ignore','coerce'}
12571259
"""
1260+
return parsing.try_parse_year_month_day(arg, errors, exact)
12581261

12591262
def calc(carg):
12601263
# calculate the actual result
12611264
carg = carg.astype(object, copy=False)
1262-
parsed = parsing.try_parse_year_month_day(
1263-
carg / 10000, carg / 100 % 100, carg % 100
1264-
)
1265+
parsed = parsing.try_parse_year_month_day(carg)
12651266
return tslib.array_to_datetime(parsed, errors=errors)[0]
12661267

12671268
def calc_with_mask(carg, mask):
@@ -1275,7 +1276,7 @@ def calc_with_mask(carg, mask):
12751276

12761277
# try intlike / strings that are ints
12771278
try:
1278-
return calc(arg.astype(np.int64))
1279+
return calc(arg)
12791280
except (ValueError, OverflowError, TypeError):
12801281
pass
12811282

pandas/tests/tools/test_to_datetime.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,16 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache):
125125
expected[2] = np.nan
126126
ser[2] = np.nan
127127

128-
result = to_datetime(ser, format="%Y%m%d", cache=cache)
129-
tm.assert_series_equal(result, expected)
128+
with pytest.raises(ValueError, match=None):
129+
result = to_datetime(ser, format="%Y%m%d", cache=cache)
130+
# tm.assert_series_equal(result, expected)
130131

131132
# string with NaT
132133
ser2 = ser.apply(str)
133134
ser2[2] = "nat"
134-
result = to_datetime(ser2, format="%Y%m%d", cache=cache)
135-
tm.assert_series_equal(result, expected)
135+
with pytest.raises(ValueError, match=None):
136+
result = to_datetime(ser2, format="%Y%m%d", cache=cache)
137+
# tm.assert_series_equal(result, expected)
136138

137139
def test_to_datetime_format_YYYYMMDD_ignore(self, cache):
138140
# coercion

0 commit comments

Comments
 (0)