Skip to content

wip #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 31 additions & 36 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ from numpy cimport (

from pandas._libs.missing cimport checknull_with_nat_and_na
from pandas._libs.tslibs.conversion cimport (
convert_timezone,
get_datetime64_nanos,
parse_pydatetime,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
Expand All @@ -61,14 +61,13 @@ from pandas._libs.tslibs.np_datetime cimport (
npy_datetimestruct,
npy_datetimestruct_to_datetime,
pydate_to_dt64,
pydatetime_to_dt64,
string_to_dts,
)

import_pandas_datetime()

from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.timestamps cimport _Timestamp

from pandas._libs.util cimport (
is_datetime64_object,
is_float_object,
Expand Down Expand Up @@ -175,7 +174,7 @@ def array_strptime(
Py_ssize_t i, n = len(values)
npy_datetimestruct dts
int64_t[::1] iresult
object[::1] result_timezone
object result_timezone
int year, month, day, minute, hour, second, weekday, julian
int week_of_year, week_of_year_start, parse_code, ordinal
int iso_week, iso_year
Expand All @@ -184,9 +183,6 @@ def array_strptime(
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
bint found_naive = False
bint found_tz = False
tzinfo tz_out = None
bint iso_format = format_is_iso(fmt)
NPY_DATETIMEUNIT out_bestunit
int out_local = 0, out_tzoffset = 0
Expand Down Expand Up @@ -262,7 +258,7 @@ def array_strptime(

result = np.empty(n, dtype="M8[ns]")
iresult = result.view("i8")
result_timezone = np.empty(n, dtype="object")
result_timezone = None

dts.us = dts.ps = dts.as = 0

Expand All @@ -277,30 +273,21 @@ def array_strptime(
iresult[i] = NPY_NAT
continue
elif PyDateTime_Check(val):
if val.tzinfo is not None:
found_tz = True
else:
found_naive = True
tz_out = convert_timezone(
val.tzinfo,
tz_out,
found_naive,
found_tz,
utc,
)
if isinstance(val, _Timestamp):
iresult[i] = val.tz_localize(None).as_unit("ns")._value
else:
iresult[i] = pydatetime_to_dt64(val.replace(tzinfo=None), &dts)
check_dts_bounds(&dts)
result_timezone[i] = val.tzinfo
iresult[i] = parse_pydatetime(val, &dts, True)
check_dts_bounds(&dts)
if result_timezone is None:
result_timezone = val.tzinfo
elif result_timezone != val.tzinfo and not utc:
raise ValueError("Can't parse mixed timezones with utc=False")
continue
elif PyDate_Check(val):
iresult[i] = pydate_to_dt64(val, &dts)
check_dts_bounds(&dts)
continue
elif is_datetime64_object(val):
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
if result_timezone is not None and not utc:
raise ValueError("Can't parse mixed timezones with utc=False")
continue
elif (
(is_integer_object(val) or is_float_object(val))
Expand Down Expand Up @@ -330,10 +317,12 @@ def array_strptime(
# since we store the total_seconds of
# dateutil.tz.tzoffset objects
tz = timezone(timedelta(minutes=out_tzoffset))
result_timezone[i] = tz
if result_timezone is None:
result_timezone = tz
elif result_timezone != tz and not utc:
raise ValueError("Can't parse mixed timezones with utc=False")
out_local = 0
out_tzoffset = 0
iresult[i] = value
iresult[i] = value - <int64_t>out_tzoffset * 60 * 1_000_000_000
check_dts_bounds(&dts)
continue

Expand Down Expand Up @@ -464,8 +453,9 @@ def array_strptime(
week_of_year_start = 0
elif parse_code == 17:
tz = pytz.timezone(found_dict["Z"])
out_tzoffset = 0
elif parse_code == 19:
tz = parse_timezone_directive(found_dict["z"])
tz = parse_timezone_directive(found_dict["z"], &out_tzoffset)
elif parse_code == 20:
iso_year = int(found_dict["G"])
elif parse_code == 21:
Expand Down Expand Up @@ -512,11 +502,16 @@ def array_strptime(
dts.us = us
dts.ps = ns * 1000

iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
if result_timezone is None:
result_timezone = tz
elif result_timezone != tz and not utc:
raise ValueError("Can't parse mixed timezones with utc=False")
iresult[i] = (
npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
- <int64_t>out_tzoffset*60*1_000_000_000
)
check_dts_bounds(&dts)

result_timezone[i] = tz

except (ValueError, OutOfBoundsDatetime) as ex:
ex.args = (
f"{str(ex)}, at position {i}. You might want to try:\n"
Expand All @@ -532,9 +527,8 @@ def array_strptime(
continue
elif is_raise:
raise
return values, []

return result, result_timezone.base
return values, None
return result, result_timezone


class TimeRE(_TimeRE):
Expand Down Expand Up @@ -657,7 +651,7 @@ cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday)
return iso_year, ordinal


cdef tzinfo parse_timezone_directive(str z):
cdef tzinfo parse_timezone_directive(str z, int *offset_minutes):
"""
Parse the '%z' directive and return a datetime.timezone object.

Expand Down Expand Up @@ -701,4 +695,5 @@ cdef tzinfo parse_timezone_directive(str z):
total_minutes = ((hours * 60) + minutes + (seconds // 60) +
(microseconds // 60_000_000))
total_minutes = -total_minutes if z.startswith("-") else total_minutes
offset_minutes[0] = total_minutes
return timezone(timedelta(minutes=total_minutes))
24 changes: 8 additions & 16 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def _convert_and_box_cache(


def _return_parsed_timezone_results(
result: np.ndarray, timezones, utc: bool, name: str
result: np.ndarray, timezone, utc: bool, name: str
) -> Index:
"""
Return results from array_strptime if a %z or %Z directive was passed.
Expand All @@ -333,18 +333,10 @@ def _return_parsed_timezone_results(
-------
tz_result : Index-like of parsed dates with timezone
"""
tz_results = np.empty(len(result), dtype=object)
for zone in unique(timezones):
mask = timezones == zone
dta = DatetimeArray(result[mask]).tz_localize(zone)
if utc:
if dta.tzinfo is None:
dta = dta.tz_localize("utc")
else:
dta = dta.tz_convert("utc")
tz_results[mask] = dta

return Index(tz_results, name=name)
dta = DatetimeArray(result).tz_localize("UTC")
if not utc:
dta = dta.tz_convert(timezone)
return Index(dta, name=name)


def _convert_listlike_datetimes(
Expand Down Expand Up @@ -479,9 +471,9 @@ def _array_strptime_with_fallback(
"""
Call array_strptime, with fallback behavior depending on 'errors'.
"""
result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc)
if any(tz is not None for tz in timezones):
return _return_parsed_timezone_results(result, timezones, utc, name)
result, timezone = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc)
if timezone is not None:
return _return_parsed_timezone_results(result, timezone, utc, name)

return _box_as_indexlike(result, utc=utc, name=name)

Expand Down
50 changes: 25 additions & 25 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,19 +446,19 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache):
["2010-01-01 12:00:00 UTC"] * 2,
[Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2,
],
[
"%Y-%m-%d %H:%M:%S %Z",
[
"2010-01-01 12:00:00 UTC",
"2010-01-01 12:00:00 GMT",
"2010-01-01 12:00:00 US/Pacific",
],
[
Timestamp("2010-01-01 12:00:00", tz="UTC"),
Timestamp("2010-01-01 12:00:00", tz="GMT"),
Timestamp("2010-01-01 12:00:00", tz="US/Pacific"),
],
],
# [
# "%Y-%m-%d %H:%M:%S %Z",
# [
# "2010-01-01 12:00:00 UTC",
# "2010-01-01 12:00:00 GMT",
# "2010-01-01 12:00:00 US/Pacific",
# ],
# [
# Timestamp("2010-01-01 12:00:00", tz="UTC"),
# Timestamp("2010-01-01 12:00:00", tz="GMT"),
# Timestamp("2010-01-01 12:00:00", tz="US/Pacific"),
# ],
# ],
[
"%Y-%m-%d %H:%M:%S%z",
["2010-01-01 12:00:00+0100"] * 2,
Expand All @@ -479,18 +479,18 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache):
]
* 2,
],
[
"%Y-%m-%d %H:%M:%S %z",
["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"],
[
Timestamp(
"2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
),
Timestamp(
"2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60))
),
],
],
# [
# "%Y-%m-%d %H:%M:%S %z",
# ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"],
# [
# Timestamp(
# "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
# ),
# Timestamp(
# "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60))
# ),
# ],
# ],
[
"%Y-%m-%d %H:%M:%S %z",
["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"],
Expand Down