Skip to content

ENH: to_datetime support iso week year (16607) #25541

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ including other versions of pandas.
Other Enhancements
^^^^^^^^^^^^^^^^^^

- Added support for ISO week year format ('%G-%V-%u') when parsing datetimes using :meth: `to_datetime` (:issue:`16607`)
- Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`)
- :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`)
- :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`)
Expand Down
107 changes: 98 additions & 9 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ cdef dict _parse_code_table = {'y': 0,
'W': 16,
'Z': 17,
'p': 18, # an additional key, only with I
'z': 19}
'z': 19,
'G': 20,
'V': 21,
'u': 22}


def array_strptime(object[:] values, object fmt,
Expand All @@ -77,6 +80,7 @@ def array_strptime(object[:] values, object fmt,
object[:] result_timezone
int year, month, day, minute, hour, second, weekday, julian
int week_of_year, week_of_year_start, parse_code, ordinal
int iso_week, iso_year
int64_t us, ns
object val, group_key, ampm, found, timezone
dict found_key
Expand Down Expand Up @@ -169,13 +173,14 @@ def array_strptime(object[:] values, object fmt,
raise ValueError("time data %r does not match format "
"%r (search)" % (values[i], fmt))

iso_year = -1
year = 1900
month = day = 1
hour = minute = second = ns = us = 0
timezone = None
# Default to -1 to signify that values not known; not critical to have,
# though
week_of_year = -1
iso_week = week_of_year = -1
week_of_year_start = -1
# weekday and julian defaulted to -1 so as to signal need to calculate
# values
Expand Down Expand Up @@ -265,13 +270,44 @@ def array_strptime(object[:] values, object fmt,
timezone = pytz.timezone(found_dict['Z'])
elif parse_code == 19:
timezone = parse_timezone_directive(found_dict['z'])
elif parse_code == 20:
iso_year = int(found_dict['G'])
elif parse_code == 21:
iso_week = int(found_dict['V'])
elif parse_code == 22:
weekday = int(found_dict['u'])
weekday -= 1

# don't assume default values for ISO week/year
if iso_year != -1:
if iso_week == -1 or weekday == -1:
raise ValueError("ISO year directive '%G' must be used with "
"the ISO week directive '%V' and a weekday "
"directive '%A', '%a', '%w', or '%u'.")
if julian != -1:
raise ValueError("Day of the year directive '%j' is not "
"compatible with ISO year directive '%G'. "
"Use '%Y' instead.")
elif year != -1 and week_of_year == -1 and iso_week != -1:
if weekday == -1:
raise ValueError("ISO week directive '%V' must be used with "
"the ISO year directive '%G' and a weekday "
"directive '%A', '%a', '%w', or '%u'.")
else:
raise ValueError("ISO week directive '%V' is incompatible with"
" the year directive '%Y'. Use the ISO year "
"'%G' instead.")

# If we know the wk of the year and what day of that wk, we can figure
# out the Julian day of the year.
if julian == -1 and week_of_year != -1 and weekday != -1:
week_starts_Mon = True if week_of_year_start == 0 else False
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
week_starts_Mon)
if julian == -1 and weekday != -1:
if week_of_year != -1:
week_starts_Mon = week_of_year_start == 0
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
week_starts_Mon)
elif iso_year != -1 and iso_week != -1:
year, julian = _calc_julian_from_V(iso_year, iso_week,
weekday + 1)
# Cannot pre-calculate datetime_date() since can change in Julian
# calculation and thus could have different value for the day of the wk
# calculation.
Expand Down Expand Up @@ -511,14 +547,17 @@ class TimeRE(dict):
# The " \d" part of the regex is to make %c from ANSI C work
'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
'f': r"(?P<f>[0-9]{1,9})",
'G': r"(?P<G>\d\d\d\d)",
'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
'j': (r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|"
r"[1-9]\d|0[1-9]|[1-9])"),
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
'M': r"(?P<M>[0-5]\d|\d)",
'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
'u': r"(?P<u>[1-7])",
'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
'w': r"(?P<w>[0-6])",
# W is set below by using 'U'
'y': r"(?P<y>\d\d)",
Expand Down Expand Up @@ -593,11 +632,27 @@ _CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
_regex_cache = {}


cdef _calc_julian_from_U_or_W(int year, int week_of_year,
int day_of_week, int week_starts_Mon):
cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
int day_of_week, int week_starts_Mon):
"""Calculate the Julian day based on the year, week of the year, and day of
the week, with week_start_day representing whether the week of the year
assumes the week starts on Sunday or Monday (6 or 0)."""
assumes the week starts on Sunday or Monday (6 or 0).

Parameters
----------
year : int
the year
week_of_year : int
week taken from format U or W
week_starts_Mon : int
represents whether the week of the year
assumes the week starts on Sunday or Monday (6 or 0)

Returns
-------
int
converted julian day
"""

cdef:
int first_weekday, week_0_length, days_to_week
Expand All @@ -620,6 +675,40 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year,
return 1 + days_to_week + day_of_week


cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
"""Calculate the Julian day based on the ISO 8601 year, week, and weekday.
ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
ISO week days range from 1 (Monday) to 7 (Sunday).

Parameters
----------
iso_year : int
the year taken from format %G
iso_week : int
the week taken from format %V
iso_weekday : int
weekday taken from format %u

Returns
-------
(int, int)
the iso year and the Gregorian ordinal date / julian date
"""

cdef:
int correction, ordinal

correction = datetime_date(iso_year, 1, 4).isoweekday() + 3
ordinal = (iso_week * 7) + iso_weekday - correction
# ordinal may be negative or 0 now, which means the date is in the previous
# calendar year
if ordinal < 1:
ordinal += datetime_date(iso_year, 1, 1).toordinal()
iso_year -= 1
ordinal -= datetime_date(iso_year, 1, 1).toordinal()
return iso_year, ordinal


cdef parse_timezone_directive(object z):
"""
Parse the '%z' directive and return a pytz.FixedOffset
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
format : string, default None
strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
all the way up to nanoseconds.
See strftime documentation for more information on choices:
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
exact : boolean, True by default

- If True, require an exact format match.
Expand Down
57 changes: 57 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,63 @@ def test_to_datetime_parse_timezone_keeps_name(self):


class TestToDatetime(object):
@pytest.mark.parametrize("s, _format, dt", [
['2015-1-1', '%G-%V-%u', datetime(2014, 12, 29, 0, 0)],
['2015-1-4', '%G-%V-%u', datetime(2015, 1, 1, 0, 0)],
['2015-1-7', '%G-%V-%u', datetime(2015, 1, 4, 0, 0)]
])
def test_to_datetime_iso_week_year_format(self, s, _format, dt):
# See GH#16607
assert to_datetime(s, format=_format) == dt

@pytest.mark.parametrize("msg, s, _format", [
["ISO week directive '%V' must be used with the ISO year directive "
"'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 50",
"%Y %V"],
["ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 51",
"%G %V"],
["ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 "
"Monday", "%G %A"],
["ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 Mon",
"%G %a"],
["ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6",
"%G %w"],
["ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "1999 6",
"%G %u"],
["ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", "2051",
"%G"],
["Day of the year directive '%j' is not compatible with ISO year "
"directive '%G'. Use '%Y' instead.", "1999 51 6 256", "%G %V %u %j"],
["ISO week directive '%V' is incompatible with the year directive "
"'%Y'. Use the ISO year '%G' instead.", "1999 51 Sunday", "%Y %V %A"],
["ISO week directive '%V' is incompatible with the year directive "
"'%Y'. Use the ISO year '%G' instead.", "1999 51 Sun", "%Y %V %a"],
["ISO week directive '%V' is incompatible with the year directive "
"'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %w"],
["ISO week directive '%V' is incompatible with the year directive "
"'%Y'. Use the ISO year '%G' instead.", "1999 51 1", "%Y %V %u"],
["ISO week directive '%V' must be used with the ISO year directive "
"'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", "20", "%V"]
])
def test_error_iso_week_year(self, msg, s, _format):
# See GH#16607
# This test checks for errors thrown when giving the wrong format
# However, as discussed on PR#25541, overriding the locale
# causes a different error to be thrown due to the format being
# locale specific, but the test data is in english.
# Therefore, the tests only run when locale is not overwritten,
# as a sort of solution to this problem.
if (locale.getlocale() != ('zh_CN', 'UTF-8') and
locale.getlocale() != ('it_IT', 'UTF-8')):
with pytest.raises(ValueError, match=msg):
to_datetime(s, format=_format)

@pytest.mark.parametrize('tz', [None, 'US/Central'])
def test_to_datetime_dtarr(self, tz):
# DatetimeArray
Expand Down