Skip to content

Commit 2b9b012

Browse files
author
MarcoGorelli
committed
wip
1 parent 86a4ee0 commit 2b9b012

File tree

3 files changed

+65
-77
lines changed

3 files changed

+65
-77
lines changed

pandas/_libs/tslibs/strptime.pyx

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ from cpython.datetime cimport (
2424
timedelta,
2525
tzinfo,
2626
)
27+
2728
from _strptime import (
2829
TimeRE as _TimeRE,
2930
_getlang,
@@ -46,8 +47,8 @@ from numpy cimport (
4647

4748
from pandas._libs.missing cimport checknull_with_nat_and_na
4849
from pandas._libs.tslibs.conversion cimport (
49-
convert_timezone,
5050
get_datetime64_nanos,
51+
parse_pydatetime,
5152
)
5253
from pandas._libs.tslibs.nattype cimport (
5354
NPY_NAT,
@@ -61,14 +62,13 @@ from pandas._libs.tslibs.np_datetime cimport (
6162
npy_datetimestruct,
6263
npy_datetimestruct_to_datetime,
6364
pydate_to_dt64,
64-
pydatetime_to_dt64,
6565
string_to_dts,
6666
)
6767

6868
import_pandas_datetime()
6969

7070
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
71-
from pandas._libs.tslibs.timestamps cimport _Timestamp
71+
7272
from pandas._libs.util cimport (
7373
is_datetime64_object,
7474
is_float_object,
@@ -175,7 +175,7 @@ def array_strptime(
175175
Py_ssize_t i, n = len(values)
176176
npy_datetimestruct dts
177177
int64_t[::1] iresult
178-
object[::1] result_timezone
178+
object result_timezone
179179
int year, month, day, minute, hour, second, weekday, julian
180180
int week_of_year, week_of_year_start, parse_code, ordinal
181181
int iso_week, iso_year
@@ -184,9 +184,6 @@ def array_strptime(
184184
bint is_raise = errors=="raise"
185185
bint is_ignore = errors=="ignore"
186186
bint is_coerce = errors=="coerce"
187-
bint found_naive = False
188-
bint found_tz = False
189-
tzinfo tz_out = None
190187
bint iso_format = format_is_iso(fmt)
191188
NPY_DATETIMEUNIT out_bestunit
192189
int out_local = 0, out_tzoffset = 0
@@ -262,7 +259,7 @@ def array_strptime(
262259

263260
result = np.empty(n, dtype="M8[ns]")
264261
iresult = result.view("i8")
265-
result_timezone = np.empty(n, dtype="object")
262+
result_timezone = None
266263

267264
dts.us = dts.ps = dts.as = 0
268265

@@ -277,30 +274,21 @@ def array_strptime(
277274
iresult[i] = NPY_NAT
278275
continue
279276
elif PyDateTime_Check(val):
280-
if val.tzinfo is not None:
281-
found_tz = True
282-
else:
283-
found_naive = True
284-
tz_out = convert_timezone(
285-
val.tzinfo,
286-
tz_out,
287-
found_naive,
288-
found_tz,
289-
utc,
290-
)
291-
if isinstance(val, _Timestamp):
292-
iresult[i] = val.tz_localize(None).as_unit("ns")._value
293-
else:
294-
iresult[i] = pydatetime_to_dt64(val.replace(tzinfo=None), &dts)
295-
check_dts_bounds(&dts)
296-
result_timezone[i] = val.tzinfo
277+
iresult[i] = parse_pydatetime(val, &dts, True)
278+
check_dts_bounds(&dts)
279+
if result_timezone is None:
280+
result_timezone = val.tzinfo
281+
elif result_timezone != val.tzinfo and not utc:
282+
raise ValueError("Can't parse mixed timezones with utc=False")
297283
continue
298284
elif PyDate_Check(val):
299285
iresult[i] = pydate_to_dt64(val, &dts)
300286
check_dts_bounds(&dts)
301287
continue
302288
elif is_datetime64_object(val):
303289
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
290+
if result_timezone is not None and not utc:
291+
raise ValueError("Can't parse mixed timezones with utc=False")
304292
continue
305293
elif (
306294
(is_integer_object(val) or is_float_object(val))
@@ -330,10 +318,12 @@ def array_strptime(
330318
# since we store the total_seconds of
331319
# dateutil.tz.tzoffset objects
332320
tz = timezone(timedelta(minutes=out_tzoffset))
333-
result_timezone[i] = tz
321+
if result_timezone is None:
322+
result_timezone = tz
323+
elif result_timezone != tz and not utc:
324+
raise ValueError("Can't parse mixed timezones with utc=False")
334325
out_local = 0
335-
out_tzoffset = 0
336-
iresult[i] = value
326+
iresult[i] = value - <int64_t>out_tzoffset * 60 * 1_000_000_000
337327
check_dts_bounds(&dts)
338328
continue
339329

@@ -464,8 +454,9 @@ def array_strptime(
464454
week_of_year_start = 0
465455
elif parse_code == 17:
466456
tz = pytz.timezone(found_dict["Z"])
457+
out_tzoffset = 0
467458
elif parse_code == 19:
468-
tz = parse_timezone_directive(found_dict["z"])
459+
tz = parse_timezone_directive(found_dict["z"], &out_tzoffset)
469460
elif parse_code == 20:
470461
iso_year = int(found_dict["G"])
471462
elif parse_code == 21:
@@ -512,11 +503,16 @@ def array_strptime(
512503
dts.us = us
513504
dts.ps = ns * 1000
514505

515-
iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
506+
if result_timezone is None:
507+
result_timezone = tz
508+
elif result_timezone != tz and not utc:
509+
raise ValueError("Can't parse mixed timezones with utc=False")
510+
iresult[i] = (
511+
npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
512+
- <int64_t>out_tzoffset*60*1_000_000_000
513+
)
516514
check_dts_bounds(&dts)
517515

518-
result_timezone[i] = tz
519-
520516
except (ValueError, OutOfBoundsDatetime) as ex:
521517
ex.args = (
522518
f"{str(ex)}, at position {i}. You might want to try:\n"
@@ -532,9 +528,8 @@ def array_strptime(
532528
continue
533529
elif is_raise:
534530
raise
535-
return values, []
536-
537-
return result, result_timezone.base
531+
return values, None
532+
return result, result_timezone
538533

539534

540535
class TimeRE(_TimeRE):
@@ -657,7 +652,7 @@ cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday)
657652
return iso_year, ordinal
658653

659654

660-
cdef tzinfo parse_timezone_directive(str z):
655+
cdef tzinfo parse_timezone_directive(str z, int *offset_minutes):
661656
"""
662657
Parse the '%z' directive and return a datetime.timezone object.
663658
@@ -701,4 +696,5 @@ cdef tzinfo parse_timezone_directive(str z):
701696
total_minutes = ((hours * 60) + minutes + (seconds // 60) +
702697
(microseconds // 60_000_000))
703698
total_minutes = -total_minutes if z.startswith("-") else total_minutes
699+
offset_minutes[0] = total_minutes
704700
return timezone(timedelta(minutes=total_minutes))

pandas/core/tools/datetimes.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def _convert_and_box_cache(
313313

314314

315315
def _return_parsed_timezone_results(
316-
result: np.ndarray, timezones, utc: bool, name: str
316+
result: np.ndarray, timezone, utc: bool, name: str
317317
) -> Index:
318318
"""
319319
Return results from array_strptime if a %z or %Z directive was passed.
@@ -333,18 +333,10 @@ def _return_parsed_timezone_results(
333333
-------
334334
tz_result : Index-like of parsed dates with timezone
335335
"""
336-
tz_results = np.empty(len(result), dtype=object)
337-
for zone in unique(timezones):
338-
mask = timezones == zone
339-
dta = DatetimeArray(result[mask]).tz_localize(zone)
340-
if utc:
341-
if dta.tzinfo is None:
342-
dta = dta.tz_localize("utc")
343-
else:
344-
dta = dta.tz_convert("utc")
345-
tz_results[mask] = dta
346-
347-
return Index(tz_results, name=name)
336+
dta = DatetimeArray(result).tz_localize("UTC")
337+
if not utc:
338+
dta = dta.tz_convert(timezone)
339+
return Index(dta, name=name)
348340

349341

350342
def _convert_listlike_datetimes(
@@ -479,9 +471,9 @@ def _array_strptime_with_fallback(
479471
"""
480472
Call array_strptime, with fallback behavior depending on 'errors'.
481473
"""
482-
result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc)
483-
if any(tz is not None for tz in timezones):
484-
return _return_parsed_timezone_results(result, timezones, utc, name)
474+
result, timezone = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc)
475+
if timezone is not None:
476+
return _return_parsed_timezone_results(result, timezone, utc, name)
485477

486478
return _box_as_indexlike(result, utc=utc, name=name)
487479

pandas/tests/tools/test_to_datetime.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -446,19 +446,19 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache):
446446
["2010-01-01 12:00:00 UTC"] * 2,
447447
[Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2,
448448
],
449-
[
450-
"%Y-%m-%d %H:%M:%S %Z",
451-
[
452-
"2010-01-01 12:00:00 UTC",
453-
"2010-01-01 12:00:00 GMT",
454-
"2010-01-01 12:00:00 US/Pacific",
455-
],
456-
[
457-
Timestamp("2010-01-01 12:00:00", tz="UTC"),
458-
Timestamp("2010-01-01 12:00:00", tz="GMT"),
459-
Timestamp("2010-01-01 12:00:00", tz="US/Pacific"),
460-
],
461-
],
449+
# [
450+
# "%Y-%m-%d %H:%M:%S %Z",
451+
# [
452+
# "2010-01-01 12:00:00 UTC",
453+
# "2010-01-01 12:00:00 GMT",
454+
# "2010-01-01 12:00:00 US/Pacific",
455+
# ],
456+
# [
457+
# Timestamp("2010-01-01 12:00:00", tz="UTC"),
458+
# Timestamp("2010-01-01 12:00:00", tz="GMT"),
459+
# Timestamp("2010-01-01 12:00:00", tz="US/Pacific"),
460+
# ],
461+
# ],
462462
[
463463
"%Y-%m-%d %H:%M:%S%z",
464464
["2010-01-01 12:00:00+0100"] * 2,
@@ -479,18 +479,18 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache):
479479
]
480480
* 2,
481481
],
482-
[
483-
"%Y-%m-%d %H:%M:%S %z",
484-
["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"],
485-
[
486-
Timestamp(
487-
"2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
488-
),
489-
Timestamp(
490-
"2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60))
491-
),
492-
],
493-
],
482+
# [
483+
# "%Y-%m-%d %H:%M:%S %z",
484+
# ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"],
485+
# [
486+
# Timestamp(
487+
# "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
488+
# ),
489+
# Timestamp(
490+
# "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60))
491+
# ),
492+
# ],
493+
# ],
494494
[
495495
"%Y-%m-%d %H:%M:%S %z",
496496
["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"],

0 commit comments

Comments
 (0)