diff --git a/docs/intro_2.md b/docs/intro_2.md index d68ca1bd2..061c58120 100644 --- a/docs/intro_2.md +++ b/docs/intro_2.md @@ -66,6 +66,37 @@ python semantics is its treament of integers. For performance and memory reasons this won't be a problem, but if you attempt to place an integer larger than 64 bits into a `typed_python` container, you'll see the integer get cast down to 64 bits. +### Timestamp + +`typed_python` provides the Timestamp type that wraps useful datetime functionality around a +unix timestamp. + +For e.g, you can create a Timestamp from a unixtime with the following: + +``` +ts1 = Timestamp.make(1654615145) +ts2 = Timestamp(ts=1654615145) +``` + +You can also create Timestamps from datestrings. The parser supports ISO 8601 along with variety +of non-iso formats. E.g: +``` + ts1 = Timestamp.parse("2022-01-05T10:11:12+00:15") + ts2 = Timestamp.parse("2022-01-05T10:11:12NYC") + ts3 = Timestamp.parse("January 1, 2022") + ts4 = Timestamp.parse("January/1/2022") + ts5 = Timestamp.parse("Jan-1-2022") +``` + +You can format Timestamps as strings using standard time format directives. E.g: + +``` +timestamp = Timestamp.make(1654615145) +print(timestamp.format(utc_offset=144000)) # 2022-06-09T07:19:05 +print(timestamp.format(format="%Y-%m-%d")) # 2022-06-09 +``` + + ### Object In some cases, you may have types that need to hold regular python objects. For these cases, you may diff --git a/typed_python/lib/datetime/chrono.py b/typed_python/lib/datetime/chrono.py new file mode 100644 index 000000000..30c8d5e57 --- /dev/null +++ b/typed_python/lib/datetime/chrono.py @@ -0,0 +1,229 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typed_python import Class, Entrypoint, Final + + +class Chrono(Class, Final): + ''' + Implements a number of useful algorithms for working with dates and times. + ''' + @Entrypoint + @staticmethod + def days_from_civil(year: int = 0, month: int = 0, day: int = 0) -> int: + ''' + Creates a unix timestamp from year, month, day values. + Parameters: + year (int): The year + month (int): The month. January: 1, February: 2, .... + day (int): The day + Returns: + seconds(float): The number of seconds + ''' + # Implements the days_from_civil algorithm described here: + # https://howardhinnant.github.io/date_algorithms.html#days_from_civil + + year -= month <= 2 + era = (year if year >= 0 else year - 399) // 400 + # year of the era + yoe = (year - era * 400) + + # day of the year + doy = (153 * ( month - 3 if month > 2 else month + 9) + 2) // 5 + day - 1 + + # day of the era + doe = yoe * 365 + yoe // 4 - yoe // 100 + doy + + # number of days since epch + days = era * 146097 + doe - 719468 + + return days + + @Entrypoint + @staticmethod + def date_to_seconds(year: int = 0, month: int = 0, day: int = 0) -> float: + ''' + Creates a unix timestamp from date values. + Parameters: + year (int): The year + month (int): The month. January: 1, February: 2, .... + day (int): The day + Returns: + seconds(float): The number of seconds + + ''' + return Chrono.days_from_civil(year, month, day) * 86400 + + @Entrypoint + @staticmethod + def time_to_seconds(hour: int = 0, minute: int = 0, second: float = 0) -> float: + ''' + Converts and hour, min, second combination into seconds + Parameters: + hour (int): The hour (0-23) + minute (int): The minute + second (float): The second + Returns: + (float) the number of seconds + ''' + return (hour * 3600) + (minute * 60) + second + + @Entrypoint + @staticmethod + def weekday_difference(day1: int, day2: int) -> int: + ''' + Gets the difference in days between two weekdays + Parameters: + day1 (int): The first day + day2 (int): The second day + + Returns: + (int) the difference between the two weekdays + ''' + day1 -= day2 + return day1 if day1 >= 0 and day1 <= 6 else day1 + 7 + + @Entrypoint + @staticmethod + def weekday_from_days(days_from_epoch: int) -> int: + ''' + Gets the weekday of the day a given the number of days from the unix epoch + Parameters: + days_from_epoch (int): The number of days from the unix epoch + + Returns: + (int) the weekday (0-6) + ''' + return (days_from_epoch + 4) % 7 if days_from_epoch >= -4 else (days_from_epoch + 5) % 7 + 6 + + @Entrypoint + @staticmethod + def get_nth_dow_of_month_ts(n: int, dow: int, month: int, year: int) -> int: + ''' + Gets the timestamp for the nth day-of-week for the given month/year. E.g. get 2nd Sat in July 2022 + Parameters: + n (int): nth day of week (1-4). + dow (int): The day of the week (0-6) where 0 => Sunday ... 6 => Saturday + month (int): the month (1-12) + year (int): the year + + Returns: + (int): The nth day of the month in unixtime + ''' + # Note: Some months will have a 5th dow. i.e. Some months may have 5 Saturdays, for e.g. + # We, however, restrict this to the 4th dow for reliablity and predictability + if n < 1 or n > 4: + raise ValueError('n should be 1-4:', n) + if dow < 0 or dow > 6: + raise ValueError('dow should be 0-6:', dow) + if month < 1 or month > 12: + raise ValueError('invalid month:', month) + + weekday = Chrono.weekday_from_days(Chrono.days_from_civil(year, month, 1)) + + return Chrono.date_to_seconds(year=year, + month=month, + day=Chrono.weekday_difference(dow, weekday) + 1 + (n - 1) * 7) + + @Entrypoint + @staticmethod + def year_from_ts(ts: float) -> int: + ''' + Gets the year from a unix timestamp + Parameters: + ts (float): the unix timestamp + Returns: + (int): The year + ''' + # Based on the days_from_civil algorithm described here: + # https://howardhinnant.github.io/date_algorithms.html#civil_from_days + z = int(ts // 86400 + 719468) + era = (z if z >= 0 else z - 146096) // 146097 + doe = z - era * 146097 + yoe = (doe - (doe // 1460) + (doe // 36524) - (doe // 146096)) // 365 + year = yoe + era * 400 + doy = doe - ((365 * yoe) + (yoe // 4) - (yoe // 100)) + mp = (5 * doy + 2) // 153 + m = mp + (3 if mp < 10 else -9) + year += (m <= 2) + return year + + @Entrypoint + @staticmethod + def is_leap_year(year: int): + ''' + Tests if a year is a leap year. + Parameters: + year(int): The year + Returns: + True if the year is a leap year, False otherwise + ''' + return (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 + + @Entrypoint + @staticmethod + def is_valid_time(hour: int, minute: int, second: float) -> bool: + ''' + Tests if a hour, min, sec combination is a valid time. + Parameters: + hour(int): The hour + min(int): The min + sec(float): The second + Returns: + True if the time is valid, False otherwise + ''' + # '24' is valid alternative to '0' but only when min and sec are both 0 + if hour < 0 or hour > 24 or (hour == 24 and (minute != 0 or second != 0)): + return False + elif minute < 0 or minute > 59 or second < 0 or second >= 60: + return False + return True + + @Entrypoint + @staticmethod + def is_valid_date(year: int, month: int, day: int) -> bool: + ''' + Tests if a year, month, day combination is a valid date. Year is required. + Month and day are optional. If day is present, month is required. + Parameters: + year (int): The year + month (int): The month (January=1) + day (int): The day of the month + Returns: + True if the date is valid, False otherwise + ''' + hasYear, hasMonth, hasDay = year > -1, month > -1, day > -1 + + if not hasYear: + return False + if hasMonth and not hasYear: + return False + if hasDay and not hasMonth: + return False + if hasMonth and (month < 1 or month > 12): + return False + + if hasDay: + if day < 1: + return False + elif month == 1 or month == 3 or month == 5 or month == 7 or month == 8 or month == 10 or month == 12: + return day < 32 + elif month == 4 or month == 6 or month == 9 or month == 11: + return day < 31 + elif month == 2: + if ((year % 4 == 0 and year % 100 != 0) or year % 400 == 0): + return day < 30 + return day < 29 + + return True diff --git a/typed_python/lib/datetime/chrono_test.py b/typed_python/lib/datetime/chrono_test.py new file mode 100644 index 000000000..83db4ec12 --- /dev/null +++ b/typed_python/lib/datetime/chrono_test.py @@ -0,0 +1,90 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from typed_python.lib.datetime.chrono import Chrono + + +class TestChrono(unittest.TestCase): + + def test_is_leap_year_valid(self): + leap_years = [ + 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028, 2032, 2036, 2040, 2044, 2048 + ] + + for year in leap_years: + assert Chrono.is_leap_year(year), year + + def test_is_leap_year_invalid(self): + not_leap_years = [ + 1700, 1800, 1900, 1997, 1999, 2100, 2022 + ] + + for year in not_leap_years: + assert not Chrono.is_leap_year(year), year + + def test_is_date_valid(self): + # y, m, d + dates = [ + (1997, 1, 1), # random date + (2020, 2, 29) # Feb 29 on leap year + ] + + for date in dates: + assert Chrono.is_valid_date(date[0], date[1], date[2]), date + + def test_is_date_invalid(self): + # y, m, d + dates = [ + (1997, 0, 1), # Month < 1 + (1997, 13, 1), # Month > 12 + (1997, 1, 0), # Day < 1 + (1997, 1, 32), # Day > 31 in Jan + (1997, 2, 29), # Day > 28 in non-leap-year Feb, + (2100, 2, 29), # Day > 28 in non-leap-year Feb, + (1997, 0, 25), # Month < 1 + (2020, 2, 30), # Day > 29 in Feb (leap year) + (2020, 4, 31), # Day > 30 in Apr (leap year) + (2020, 6, 31), # Day > 30 in June (leap year) + (2020, 9, 31), # Day > 30 in Sept (leap year) + (2020, 11, 31) # Day > 30 in Nov (leap year) + ] + + for date in dates: + assert not Chrono.is_valid_date(date[0], date[1], date[2]), date + + def test_is_time_valid(self): + # h, m, s + times = [ + (0, 0, 0), # 00:00:00 + (24, 0, 0), # 24:00:00 + (1, 1, 1), # random time + (12, 59, 59) # random time + ] + for time in times: + assert Chrono.is_valid_time(time[0], time[1], time[2]), time + + def test_is_time_invalid(self): + # h, m, s + times = [ + (24, 1, 0), # m and s must be 0 if hour is 24 + (25, 0, 0), # hour greater than 24 + (-1, 0, 0), # hour less than 0 + (1, 0, -1), # second < 1 + (1, -1, 0), # min < 1 + (1, 0, 60), # second > 59 + (1, 60, 0) # min > 59 + ] + for time in times: + assert not Chrono.is_valid_time(time[0], time[1], time[2]), time diff --git a/typed_python/lib/datetime/date_formatter.py b/typed_python/lib/datetime/date_formatter.py new file mode 100644 index 000000000..b05f99958 --- /dev/null +++ b/typed_python/lib/datetime/date_formatter.py @@ -0,0 +1,254 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typed_python import Class, Dict, Entrypoint, Final, ListOf +from typed_python.lib.datetime.chrono import Chrono +# int to string month mapping where 1 => January +INT_TO_MONTH_NAMES = Dict(int, str)({ + 1: 'January', + 2: 'February', + 3: 'March', + 4: 'April', + 5: 'May', + 6: 'June', + 7: 'July', + 8: 'August', + 9: 'September', + 10: 'October', + 11: 'November', + 12: 'December', +}) + +# int to string abbreviated month mapping where 1 => Jan +INT_TO_MONTH_ABBR = Dict(int, str)({ + 1: 'Jan', + 2: 'Feb', + 3: 'Mar', + 4: 'Apr', + 5: 'May', + 6: 'Jun', + 7: 'Jul', + 8: 'Aug', + 9: 'Sep', + 10: 'Oct', + 11: 'Nov', + 12: 'Dec', +}) + +# int to string abbreviated day mapping where 0 => Sunday +INT_TO_DAY_NAMES = Dict(int, str)({ + 0: 'Sunday', + 1: 'Monday', + 2: 'Tuesday', + 3: 'Wednesday', + 4: 'Thursday', + 5: 'Friday', + 6: 'Saturday' +}) + +# int to string abbreviated day mapping where 0 => Sun +INT_TO_DAY_ABBR = Dict(int, str)({ + 0: 'Sun', + 1: 'Mon', + 2: 'Tue', + 3: 'Wed', + 4: 'Thu', + 5: 'Fri', + 6: 'Sat' +}) + + +@Entrypoint +def convert_to_12h(hour: int): + return 12 if (hour == 0 or hour == 12 or hour == 24) else (hour if hour < 12 else hour - 12) + + +class DateFormatter(Class, Final): + @Entrypoint + @staticmethod + def isoformat(ts: float, utc_offset: int = 0): + return DateFormatter.format(ts=ts, format='%Y-%m-%dT%H:%M:%S', utc_offset=utc_offset) + + @Entrypoint + @staticmethod + def f2d(num: int) -> str: + ''' + Converts a int to string and left pads it to 2 digits + Parameters: + num (int): The int to format + Returns: + (str): a 2 digit string representation of the int + ''' + res = str(num) + if len(res) == 1: + return '0' + res + return res + + @Entrypoint + @staticmethod + def f3d(num: int) -> str: + ''' + Converts a int to string and left pads it to 3 digits + Parameters: + num (int): The int to format + Returns: + (str): a 3 digit string representation of the int + ''' + res = str(num) + l = len(res) + if l == 2: + return '0' + res + elif l == 1: + return '00' + res + return res + + @Entrypoint + @staticmethod + def f4d(num: int) -> str: + ''' + C converts a int to string and left pads it with zeroes to 4 digits + Parameters: + num (int): The int to format + Returns: + (str): a 4 digit string representation of the int + ''' + res = str(num) + l = len(res) + if l == 3: + return '0' + res + elif l == 2: + return '00' + res + elif l == 1: + return '000' + res + return res + + @ Entrypoint + @ staticmethod + def format(ts: float = 0, utc_offset: int = 0, format: str = "%Y-%m-%d %H:%M:%S") -> str: + ''' + Converts a Timestamp to a string in a given format + Parameters: + utc_offset (int): The offset from UTC in seconds + format (str): A string specifying formatting directives. E.g. '%Y-%m-%d %H:%M:%S' + Returns: + date_str(str): A string representing the date in the specified format. E.g. "Mon January 2, 2021" + ''' + # This bit of logic rightly belongs in the Chrono module. However, we gain some efficiency by inlining + # here instead of paying the tuple creation cost - i.e to return (year, month, day, hour, etc) + # especially considering that .format may be called in large loops/batches + ts = ts + utc_offset + + tsi = int(ts) + z = tsi // 86400 + 719468 + era = (z if z >= 0 else z - 146096) // 146097 + doe = z - era * 146097 + yoe = (doe - (doe // 1460) + (doe // 36524) - (doe // 146096)) // 365 + y = yoe + era * 400 + doy = doe - ((365 * yoe) + (yoe // 4) - (yoe // 100)) + mp = (5 * doy + 2) // 153 + d = doy - (153 * mp + 2) // 5 + 1 + m = mp + (3 if mp < 10 else -9) + y += (m <= 2) + + h = (tsi // 3600) % 24 + min = (tsi // 60) % 60 + s = tsi % 60 + + weekday = Chrono.weekday_from_days(tsi // 86400) + + # Above is based on a year starting on March 1. + # Shift to January 1 based year by adding 60 days and wrapping + doy += 60 + if doy > 365: + doy = doy % 365 + + # add extra day to doy if leap year and month is march or greater + if m > 2 and Chrono.is_leap_year(y): + doy += 1 + + # short circuits for common formats + if format == '%Y-%m-%d': + return '-'.join(ListOf(str)([DateFormatter.f4d(y), DateFormatter.f2d(m), DateFormatter.f2d(d)])) + + if format == '%Y-%m-%d %H:%M:%S': + return ''.join(ListOf(str)([ + DateFormatter.f4d(y), + '-', + DateFormatter.f2d(m), + '-', + DateFormatter.f2d(d), + ' ', + DateFormatter.f2d(h), + ':', + DateFormatter.f2d(min), + ':', + DateFormatter.f2d(s) + ])) + + result = ListOf(str)() + + pos = 0 + strlen = len(format) + + while pos < strlen: + if format[pos] == '%' and pos + 1 < strlen: + directive = format[pos + 1] + + if directive == 'Y': + result.append(DateFormatter.f4d(y)) + elif directive == 'm': + result.append(DateFormatter.f2d(m)) + elif directive == 'd': + result.append(DateFormatter.f2d(d)) + elif directive == 'H': + result.append(DateFormatter.f2d(h)) + elif directive == 'M': + result.append(DateFormatter.f2d(min)) + elif directive == 'S': + result.append(DateFormatter.f2d(s)) + elif directive == 'a': + result.append(INT_TO_DAY_ABBR[weekday]) + elif directive == 'A': + result.append(INT_TO_DAY_NAMES[weekday]) + elif directive == 'w': + result.append(str(weekday)) + elif directive == 'b': + result.append(INT_TO_MONTH_ABBR[m]) + elif directive == 'B': + result.append(INT_TO_MONTH_NAMES[m]) + elif directive == 'y': + result.append(DateFormatter.f2d(y % 100)) + elif directive == 'I': + result.append(DateFormatter.f2d(convert_to_12h(h))) + elif directive == 'p': + result.append('AM' if h < 12 else 'PM') + elif directive == 'Z': + result.append('UTC') # timestamps don't store tz data, are pegged to UTC + elif directive == 'z': + result.append('+0000') # timestamps don't store tz data, are pegged to UTC + elif directive == 'j': + result.append(DateFormatter.f3d(doy)) # day number of year + elif directive == 'C': + result.append(DateFormatter.f2d(y // 100)) # century + elif directive == '%': + result.append('%') + elif directive == 'u': + result.append(str(7 if weekday == 0 else weekday)) # ISO weekday 1-7 + else: + raise ValueError('Unsupported formatting directive: ' + directive) + pos += 1 + else: + result.append(format[pos]) + pos += 1 + return ''.join(result) diff --git a/typed_python/lib/datetime/date_formatter_test.py b/typed_python/lib/datetime/date_formatter_test.py new file mode 100644 index 000000000..5d78f8083 --- /dev/null +++ b/typed_python/lib/datetime/date_formatter_test.py @@ -0,0 +1,257 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import time +from datetime import datetime, timedelta +from typed_python.lib.datetime.date_formatter import DateFormatter +import pytz + + +def get_datetimes_in_range(start, end, step): + d = start + dates = [start] + + while d < end: + if step == 'days': + d += timedelta(days=1) + elif step == 'hours': + d += timedelta(hours=1) + elif step == 'minutes': + d += timedelta(minutes=1) + elif step == 'seconds': + d += timedelta(seconds=1) + else: + raise ValueError('Unsupported step: ' + step) + dates.append(d) + return dates + + +def get_years_in_range(start, end): + dates = [] + for i in range(start, end): + dates.append(datetime(i, 1, 1, 0, 0, 0, 0, pytz.UTC)) + + return dates + + +class TestDateFormatter(unittest.TestCase): + def test_isoformat(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + for second in seconds: + assert DateFormatter.isoformat(datetime.timestamp(second), 0) == second.strftime( + '%Y-%m-%dT%H:%M:%S'), second.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directives(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + for second in seconds: + assert DateFormatter.format(datetime.timestamp( + second), 0, '%Y-%m-%dT%H:%M:%S') == second.strftime('%Y-%m-%dT%H:%M:%S'), second.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_a(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%a') == day.strftime('%a'), day.strftime('%Y-%m-%d') + + def test_format_directive_A(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%A') == day.strftime('%A'), day.strftime('%Y-%m-%d') + + def test_format_directive_w(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%w') == day.strftime('%w'), day.strftime('%Y-%m-%d') + + def test_format_directive_d(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%d') == day.strftime('%d'), day.strftime('%Y-%m-%d') + + def test_format_directive_b(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%b') == day.strftime('%b'), day.strftime('%Y-%m-%d') + + def test_format_directive_B(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%B') == day.strftime('%B'), day.strftime('%Y-%m-%d') + + def test_format_directive_m(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%m') == day.strftime('%m'), day.strftime('%Y-%m-%d') + + def test_format_directive_y(self): + years = get_years_in_range(1999, 2022) + for year in years: + assert DateFormatter.format(datetime.timestamp(year), 0, '%y') == year.strftime('%y'), year.strftime('%Y-%m-%d') + + def test_format_directive_H(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 0, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 23, 59, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp(minute), 0, '%H') == minute.strftime( + '%H'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_I(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 0, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 23, 59, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp(minute), 0, '%I') == minute.strftime('%I'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + unixtime = time.time() + dt = datetime.fromtimestamp(unixtime) + assert dt.strftime('%I') == DateFormatter.format(unixtime, time.localtime().tm_gmtoff, '%I') + + def test_format_directive_p(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 0, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 23, 59, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp(minute), 0, '%p') == minute.strftime('%p'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_M(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 10, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 12, 19, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp(minute), 0, '%M') == minute.strftime('%M'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_S(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + for second in seconds: + assert DateFormatter.format(datetime.timestamp(second), 0, '%S') == second.strftime('%S'), second.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_directive_Z(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%Z') == 'UTC', day.strftime('%Y-%m-%d') + + def test_format_directive_z(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%z') == '+0000', day.strftime('%Y-%m-%d') + + def test_format_directive_C(self): + years = get_years_in_range(1999, 2022) + + for year in years: + assert DateFormatter.format(datetime.timestamp(year), 0, '%C') == year.strftime('%C'), year.strftime('%Y') + + def test_format_directive_Y(self): + years = get_years_in_range(1999, 2022) + for year in years: + assert DateFormatter.format(datetime.timestamp(year), 0, '%Y') == year.strftime('%Y'), year.strftime('%Y-%m-%d') + + def test_format_directive_u(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%u') == day.strftime('%u'), day.strftime('%Y-%m-%d') + + def test_format_directive_percent(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%%') == day.strftime('%%'), day.strftime('%Y-%m-%d') + + def test_format_directive_doy(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%j') == day.strftime('%j'), day.strftime('%Y-%m-%d') + + def test_format_string_Ymd(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%Y-%m-%d') == day.strftime('%Y-%m-%d'), day.strftime('%Y-%m-%d') + + def test_format_string_ymd(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%y-%m-%d') == day.strftime('%y-%m-%d'), day.strftime('%Y-%m-%d') + + def test_format_string_abdY(self): + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + + for day in days: + assert DateFormatter.format(datetime.timestamp(day), 0, '%a %b %d, %Y') == day.strftime( + '%a %b %d, %Y'), day.strftime('%Y-%m-%d') + + def test_format_string_YmdHMS(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 10, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 12, 19, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp( + minute), 0, '%Y-%m-%d %H:%M:%S') == minute.strftime('%Y-%m-%d %H:%M:%S'), minute.strftime('%Y-%m-%dT%H:%M:%S') + + def test_format_string_YmdTHMS(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 10, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 12, 19, 0, 0, pytz.UTC), + step='minutes') + for minute in minutes: + assert DateFormatter.format(datetime.timestamp( + minute), 0, '%Y-%m-%dT%H:%M:%S') == minute.strftime('%Y-%m-%dT%H:%M:%S'), minute.strftime('%Y-%m-%dT%H:%M:%S') diff --git a/typed_python/lib/datetime/date_parser.py b/typed_python/lib/datetime/date_parser.py new file mode 100644 index 000000000..a6f288c7d --- /dev/null +++ b/typed_python/lib/datetime/date_parser.py @@ -0,0 +1,579 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typed_python import Class, Dict, Final +from typed_python import Entrypoint, ListOf +from typed_python.lib.datetime.timezone import Timezone +from typed_python.lib.datetime.chrono import Chrono + +JAN = 'jan' +FEB = 'feb' +MAR = 'mar' +APR = 'apr' +MAY = 'may' +JUN = 'jun' +JUL = 'jul' +AUG = 'aug' +SEP = 'sep' +OCT = 'oct' +NOV = 'nov' +DEC = 'dec' + +JANUARY = 'january' +FEBRUARY = 'february' +MARCH = 'march' +APRIL = 'april' +JUNE = 'june' +JULY = 'july' +AUGUST = 'august' +SEPT = 'sept' +SEPTEMBER = 'september' +OCTOBER = 'october' +NOVEMBER = 'november' +DECEMBER = 'december' + +T = 't' +PLUS = '+' +COLON = ':' +BACKSLASH = '/' +DASH = '-' +COMMA = ',' +SPACE = ' ' +DOT = '.' +AM = 'am' +PM = 'pm' + +MONTH_TO_INT = Dict(str, int)({ + JAN: 1, JANUARY: 1, + FEB: 2, FEBRUARY: 2, + MAR: 3, MARCH: 3, + APR: 4, APRIL: 4, + MAY: 5, + JUN: 6, JUNE: 6, + JUL: 7, JULY: 7, + AUG: 8, AUGUST: 8, + SEP: 9, SEPT: 9, SEPTEMBER: 9, + OCT: 10, OCTOBER: 10, + NOV: 11, NOVEMBER: 11, + DEC: 12, DECEMBER: 12 +}) + + +@Entrypoint +def is_month(month_str: str) -> bool: + ''' + Tests if string represents a valid month + Parameters: + month_str: The month string (case insenstive). Examples: 'Jan', 'January' + Returns: + True if the month string is valid, False otherwise + ''' + return month_str in MONTH_TO_INT + + +@Entrypoint +def is4d(str: str) -> bool: + ''' + Tests if a string is 4 digits. + Parameters: + str(string): + Returns: + True if the input is a 4 digit string + ''' + return len(str) == 4 and str.isdigit() + + +@Entrypoint +def is2d(str: str) -> bool: + ''' + Tests if a string is 2 digits. + Parameters: + str(string): + Returns: + True if the input is a 2 digit string + ''' + return len(str) == 2 and str.isdigit() + + +class DateParser(Class, Final): + @Entrypoint + @staticmethod + def parse(date_str: str, format: str = '') -> float: + ''' + Parse a date string and return a unix timestamp + Parameters: + date_str (str): A string representing a date time. examples: 2022-01-03T02:45 or January 2, 1997 2:00pm + format (str): An optional format string. E.g. '%Y-%m-%d'. If no format string is provided, the parser will + correctly parse ISO 8601 formatted strings and a number of non ISO 8601 formats + Returns: + (float) A unix timestamp + ''' + if format != '': + return DateParser.parse_with_format(date_str, format) + + try: + return DateParser.parse_iso_str(date_str) + except ValueError: + return DateParser.parse_non_iso(date_str) + + @Entrypoint + @staticmethod + def parse_with_format(date_str: str, format: str) -> float: + ''' + Parse a date string in the specified format and return a unix timestamp + Parameters: + date_str (str): A date string + format (str): A string containing format directives. E.g. '%Y-%m-%d' + Supported directives are: + %Y (zero padded 4 digit year) + %m (zero padded 2 digit month, 01-12) + %d (zero padded 2 digit day, 01-31) + %H (zero padded 2 digit hour in 24 hour format, 00-24) + %I (zero padded 2 digit hour in 12 hour format, 00-12) + %M (zero padded 2 digit minute, 00-59) + %S (zero padded 2 digit second, 00-59) + %b (3 character month abbreviation, jan-dec) + %B (month, january-december) + %Z (timezone abbreviation, e.g. EST, UTC, NYC) + %z (timezone offset, e.g. +0000, +00:00) + Returns: + (float) A unix timestamp + ''' + year, month, day, hour, minute, second = -1, -1, -1, 0, 0, 0 + tz_str = '' + + format_str_len = len(format) + date_str = date_str.lower() + ampm = '' + date_str_cursor = format_cursor = 0 + + while format_cursor < format_str_len: + if format[format_cursor] == '%' and format_cursor + 1 < format_str_len: + directive = format[format_cursor + 1] + format_cursor += 1 + + if directive == 'Y': + if is4d(date_str[date_str_cursor: date_str_cursor + 4]): + year = int(date_str[date_str_cursor: date_str_cursor + 4]) + date_str_cursor += 4 + else: + raise ValueError('Bad value for %Y:', date_str) + elif directive == 'm': + if is2d(date_str[date_str_cursor: date_str_cursor + 2]): + month = int(date_str[date_str_cursor: date_str_cursor + 2]) + date_str_cursor += 2 + else: + raise ValueError('Bad value for %m:', date_str) + elif directive == 'd': + if is2d(date_str[date_str_cursor: date_str_cursor + 2]): + day = int(date_str[date_str_cursor: date_str_cursor + 2]) + date_str_cursor += 2 + else: + raise ValueError('Bad value for %d:', date_str) + elif directive == 'H': + if is2d(date_str[date_str_cursor: date_str_cursor + 2]): + hour = int(date_str[date_str_cursor: date_str_cursor + 2]) + date_str_cursor += 2 + else: + raise ValueError('Bad value for %H:', date_str) + elif directive == 'I': + if is2d(date_str[date_str_cursor: date_str_cursor + 2]): + hour = int(date_str[date_str_cursor: date_str_cursor + 2]) + date_str_cursor += 2 + else: + raise ValueError('Bad value for %I:', date_str) + elif directive == 'M': + if is2d(date_str[date_str_cursor: date_str_cursor + 2]): + minute = int(date_str[date_str_cursor: date_str_cursor + 2]) + date_str_cursor += 2 + else: + raise ValueError('Bad value for %M:', date_str) + elif directive == 'S': + if is2d(date_str[date_str_cursor: date_str_cursor + 2]): + second = int(date_str[date_str_cursor: date_str_cursor + 2]) + date_str_cursor += 2 + else: + raise ValueError('Bad value for %SL', date_str) + elif directive == 'b': + month = date_str[date_str_cursor: date_str_cursor + 3] + if month in MONTH_TO_INT: + month = MONTH_TO_INT[month] + date_str_cursor += 3 + else: + raise ValueError('Bad value for %b:', date_str) + elif directive == 'B': + # september + if date_str[date_str_cursor: date_str_cursor + 9] in MONTH_TO_INT: + month = MONTH_TO_INT[date_str[date_str_cursor: date_str_cursor + 9]] + date_str_cursor += 9 + # february, november, december + elif date_str[date_str_cursor: date_str_cursor + 8] in MONTH_TO_INT: + month = MONTH_TO_INT[date_str[date_str_cursor: date_str_cursor + 8]] + date_str_cursor += 8 + # january,october + elif date_str[date_str_cursor: date_str_cursor + 7] in MONTH_TO_INT: + month = MONTH_TO_INT[date_str[date_str_cursor: date_str_cursor + 7]] + date_str_cursor += 7 + # august + elif date_str[date_str_cursor: date_str_cursor + 6] in MONTH_TO_INT: + month = MONTH_TO_INT[date_str[date_str_cursor: date_str_cursor + 6]] + date_str_cursor += 6 + # march,april + elif date_str[date_str_cursor: date_str_cursor + 5] in MONTH_TO_INT: + month = MONTH_TO_INT[date_str[date_str_cursor: date_str_cursor + 5]] + date_str_cursor += 5 + # june, july + elif date_str[date_str_cursor: date_str_cursor + 4] in MONTH_TO_INT: + month = MONTH_TO_INT[date_str[date_str_cursor: date_str_cursor + 4]] + date_str_cursor += 4 + # may + elif date_str[date_str_cursor: date_str_cursor + 3] in MONTH_TO_INT: + month = MONTH_TO_INT[date_str[date_str_cursor: date_str_cursor + 3]] + date_str_cursor += 3 + else: + raise ValueError('Bad value for %B:', date_str) + elif directive == 'Z': + # 5 character tz abbreviations (future proofing since we don't currently support any) + if Timezone.is_valid_tz_string(date_str[date_str_cursor: date_str_cursor + 4]): + tz_str = date_str[date_str_cursor: date_str_cursor + 4] + date_str_cursor += 4 + # 4 character tz abbreviations (future proofing since we don't currenlty support any) + elif Timezone.is_valid_tz_string(date_str[date_str_cursor: date_str_cursor + 4]): + tz_str = date_str[date_str_cursor: date_str_cursor + 4] + date_str_cursor += 4 + # e.g. EST, EDT, PST + elif Timezone.is_valid_tz_string(date_str[date_str_cursor: date_str_cursor + 3]): + tz_str = date_str[date_str_cursor: date_str_cursor + 3] + date_str_cursor += 3 + # e.g. PT, ET, CT + elif Timezone.is_valid_tz_string(date_str[date_str_cursor: date_str_cursor + 2]): + tz_str = date_str[date_str_cursor: date_str_cursor + 2] + date_str_cursor += 2 + else: + raise ValueError('Bad value for %Z:', date_str) + elif directive == 'z': + # [+|-]DDDD or [+|-]DD:DD, e.g. +0000, +1200 + if Timezone.is_valid_tz_string(date_str[date_str_cursor: date_str_cursor + 5]): + tz_str = date_str[date_str_cursor: date_str_cursor + 5] + date_str_cursor += 5 + # [+|-]DD or [+|-]DD + elif Timezone.is_valid_tz_string(date_str[date_str_cursor: date_str_cursor + 3]): + tz_str = date_str[date_str_cursor: date_str_cursor + 3] + date_str_cursor += 3 + else: + raise ValueError('Bad value for %z:', date_str) + elif directive == 'p': + tok = date_str[date_str_cursor: date_str_cursor + 2] + if tok != AM and tok != PM: + raise ValueError('Bad value for %p:', date_str) + ampm = tok + date_str_cursor += 2 + else: + raise ValueError('Unsupported directive:', directive) + + format_cursor += 1 + else: + format_cursor += 1 + date_str_cursor += 1 + + if ampm != '': + if hour > 12 or hour < 1: + raise ValueError('AM/PM specified. hour must be between 1 and 12') + if ampm == AM and hour == 12: + hour = 0 + elif ampm == PM: + hour = hour + 12 + + if not Chrono.is_valid_date(year, month, day): + raise ValueError('Invalid date:', date_str) + if not Chrono.is_valid_time(hour, minute, second): + raise ValueError('Invalid time:', date_str) + + if month == -1: + month = 1 + if day == -1: + day = 1 + + datetime = Chrono.date_to_seconds(year, month, day) + Chrono.time_to_seconds(hour, minute, second) + + if Timezone.is_valid_tz_string(tz_str): + datetime += Timezone.tz_str_to_utc_offset(tz_str, datetime) + else: + raise ValueError('Unrecognized timezone: ', tz_str) + + return datetime + + @Entrypoint + @staticmethod + def parse_iso_str(date_str: str) -> float: + ''' + Converts an ISO 8601 formated date string to a unix timestamp + Parameters: + date_str (str): An ISO 8601 formatted string + Returns: + unixtime(float): A unix timestamp + ''' + tokens = DateParser._get_tokens(time_str=date_str.lower().replace(' ', T), skip_chars='/-:') + + # Process date segment + date_tokens = ListOf(str)() + cursor = 0 + while cursor < len(tokens): + if (tokens[cursor] == T or tokens[cursor] == PLUS or tokens[cursor] == DASH): + cursor += 1 + break + else: + date_tokens.append(tokens[cursor]) + cursor += 1 + + year = month = day = -1 + + if len(date_tokens) == 1: + if len(date_tokens[0]) == 8: + year, month, day = int(date_tokens[0][:4]), int(date_tokens[0][4:6]), int(date_tokens[0][6:8]) + elif len(date_tokens[0]) == 6: + year, month, day = int(date_tokens[0][:4]), int(date_tokens[0][4:6]), 1 + elif len(date_tokens[0]) == 4: + year, month, day = int(date_tokens[0][:4]), 1, 1 + elif len(date_tokens) == 2 and is4d(date_tokens[0]): + year, month, day = int(date_tokens[0]), int(date_tokens[1]), 1 + elif len(date_tokens) == 3 and is4d(date_tokens[0]): + year, month, day = int(date_tokens[0]), int(date_tokens[1]), int(date_tokens[2]) + else: + raise ValueError('Invalid format: ', date_tokens) + + if not Chrono.is_valid_date(year, month, day): + raise ValueError('Invalid date_tokens: ', date_tokens) + + dt = Chrono.date_to_seconds(year, month, day) + + if cursor >= len(tokens): + return dt + + # Process time segement + time_tokens = ListOf(str)() + while cursor < len(tokens): + if tokens[cursor] == T: + cursor += 1 + break + elif tokens[cursor] == PLUS or tokens[cursor] == DASH or tokens[cursor].isalpha(): + break + else: + time_tokens.append(tokens[cursor]) + cursor += 1 + + dt += DateParser._parse_iso_time_tokens(time_tokens) + if cursor >= len(tokens): + return dt + + return Timezone.ts_to_utc(dt, ''.join(tokens[cursor:])) + + @Entrypoint + @staticmethod + def _parse_iso_time_tokens(time_tokens: ListOf(str)): + hour, minute, second = 0, 0, 0.0 + + if len(time_tokens) == 1: + if len(time_tokens[0]) == 6: + hour, minute, second = int(time_tokens[0][:2]), int(time_tokens[0][2:4]), float(time_tokens[0][4:6]) + elif len(time_tokens[0]) == 4: + hour, minute, second = int(time_tokens[0][:2]), int(time_tokens[0][2:4]), 0.0 + elif len(time_tokens[0]) == 2: + hour, minute, second = int(time_tokens[0][:2]), 0, 0.0 + elif len(time_tokens) == 2: + hour, minute, second = int(time_tokens[0]), int(time_tokens[1]), 0.0 + elif len(time_tokens) == 3: + hour, minute, second = int(time_tokens[0]), int(time_tokens[1]), float(time_tokens[2]) + + if not Chrono.is_valid_time(hour, minute, second): + raise ValueError('Invalid time: ', time_tokens) + + return Chrono.time_to_seconds(hour, minute, second) + + @Entrypoint + @staticmethod + def _get_tokens(time_str: str, skip_chars: str = '') -> ListOf(str): + ''' + Tokenises a string into components suitable for datetime processing + Parameters: + time_str (str): A string representing a date time. examples: 2022-01-03T02:45 or January 2, 1997 2:00pm + Returns: + (ListOf(str) A list of tokens. E.g. ["1997", "/", "01", "/", "02"] + ''' + + tokens = ListOf(str)() + cursor = 0 + + strlen = len(time_str) + while cursor < strlen: + # greedily grab all alpha characters to make a string token + start = stop = cursor + + while cursor < strlen and time_str[cursor].isalpha(): + cursor += 1 + stop += 1 + + if time_str[start:stop]: + tokens.append(time_str[start:stop]) + + # greedily grab digits or . to make a numeric token + start = stop = cursor + while cursor < strlen and (time_str[cursor].isdigit() or time_str[cursor] == DOT): + cursor += 1 + stop += 1 + continue + + if time_str[start:stop]: + tokens.append(time_str[start:stop]) + + # whitespace + start = stop = cursor + while cursor < strlen and time_str[cursor].isspace(): + cursor += 1 + stop += 1 + + if not skip_chars.find(SPACE) > -1 and time_str[start:stop]: + tokens.append(SPACE) + + # everything else is a separator (e.g. / or :) + start = stop = cursor + while (cursor < strlen and not time_str[cursor].isspace() + and not time_str[cursor].isdigit() and not time_str[cursor].isalpha()): + cursor += 1 + stop += 1 + + if not skip_chars.find(time_str[start:stop]) > -1 and time_str[start:stop]: + tokens.append(time_str[start:stop]) + return tokens + + @ Entrypoint + @ staticmethod + def parse_non_iso_time(tokens) -> float: + ''' + Converts a set of tokens representing a time seconds + Parameters: + tokens (str): The time tokens + Returns: + (float): The seconds + ''' + h = m = s = 0 + + if len(tokens) == 0: + return 0 + + # break into time and ampm parts + sep_idx = -1 + for idx, token in enumerate(tokens): + if token == AM or token == PM: + sep_idx = idx + break + + if sep_idx > -1: + time_part, ampm_part = tokens[:sep_idx], tokens[sep_idx:] + else: + time_part = tokens + ampm_part = ListOf(str)() + + if len(time_part) == 5 and time_part[1] == COLON and time_part[2].isdigit() and time_part[3] == COLON: + # HH:MM:SS + if time_part[0].isdigit() and time_part[4].isdigit(): + h, m, s = int(time_part[0]), int(time_part[2]), int(time_part[4]) + else: + raise ValueError() + elif len(time_part) == 3: + # HH:MM + if time_part[0].isdigit() and time_part[1] == COLON and time_part[2].isdigit(): + h, m, s = int(time_part[0]), int(time_part[2]), 0 + else: + raise ValueError() + else: + raise ValueError('Unsupported time format', tokens) + + if len(ampm_part) > 0: + if h > 12 or h < 1: + raise ValueError('AM/PM specified. hour must be between 1 and 12') + if ampm_part[0] == AM and h == 12: + h = 0 + elif ampm_part[0] == PM: + h = h + 12 + + if not Chrono.is_valid_time(h, m, s): + raise ValueError('Invalid time: ', h, m, s) + return Chrono.time_to_seconds(h, m, s) + + @Entrypoint + @staticmethod + def parse_non_iso(date_str: str) -> float: + ''' + Parse a date string and return a unix timestamp + Parameters: + date_str (str): A date string + Returns: + (float) A unix timestamp + ''' + date_str = date_str.lower().replace('a.m.', AM).replace('p.m.', PM) + tokens = DateParser._get_tokens(date_str, skip_chars=' ,') + + # if/elsif block is long but it's simple and allows us to clearly define the formats we support + # and add new formats as needed + + y = m = d = 0 + time_tokens = ListOf(str)() + + # 5+ tokens with 4 digit year as 5th token + if len(tokens) >= 5 and is4d(tokens[4]): + # DD/Month/YYYY or DD-Month-YYYY + if (is_month(tokens[2]) and ((tokens[1] == BACKSLASH and tokens[3] == BACKSLASH) or + (tokens[1] == DASH and tokens[3] == DASH))): + y, m, d, time_tokens = int(tokens[4]), MONTH_TO_INT[tokens[2]], int(tokens[0]), tokens[5:] + + # Month-DD-YYYY + elif is_month(tokens[0]) and tokens[1] == DASH and tokens[2].isdigit() and tokens[3] == DASH: + y, m, d, time_tokens = int(tokens[4]), MONTH_TO_INT[tokens[0]], int(tokens[2]), tokens[5:] + + # Month-DD-YYYY or Month/DD/YYYY + elif (is_month(tokens[0]) and ((tokens[1] == BACKSLASH and tokens[3] == BACKSLASH) or + (tokens[1] == DASH and tokens[3] == DASH))): + y, m, d, time_tokens = int(tokens[4]), MONTH_TO_INT[tokens[0]], int(tokens[2]), tokens[5:] + + else: + raise ValueError('Unsupported date format: ' + date_str) + + # 5+ tokens with 4 digit year as 1st token + elif len(tokens) >= 5 and is4d(tokens[0]) and is_month(tokens[2]) and tokens[4].isdigit(): + # YYYY/Month/DD or YYYY-Month-DD + if (tokens[1] == BACKSLASH and tokens[3] == BACKSLASH) or (tokens[1] == DASH and tokens[3] == DASH): + y, m, d, time_tokens = int(tokens[0]), MONTH_TO_INT[tokens[2]], int(tokens[4]), tokens[5:] + else: + raise ValueError('Unsupported date format: ' + date_str) + + # Month D YYYY + elif len(tokens) >= 3 and is_month(tokens[0]) and tokens[1].isdigit() and is4d(tokens[2]): + y, m, d, time_tokens = int(tokens[2]), MONTH_TO_INT[tokens[0]], int(tokens[1]), tokens[3:] + + # D Month YYYY + elif len(tokens) >= 3 and is_month(tokens[1]) and tokens[0].isdigit() and is4d(tokens[2]): + y, m, d, time_tokens = int(tokens[2]), MONTH_TO_INT[tokens[1]], int(tokens[0]), tokens[3:] + + # YYYY Month DD + elif len(tokens) >= 3 and is4d(tokens[0]) and is_month(tokens[1]) and tokens[2].isdigit(): + y, m, d, time_tokens = int(tokens[0]), MONTH_TO_INT[tokens[1]], int(tokens[2]), tokens[3:] + + else: + raise ValueError('Unsupported date format: ' + date_str) + + if not Chrono.is_valid_date(y, m, d): + raise ValueError('Invalid date: ' + date_str) + + return Chrono.date_to_seconds(y, m, d) + DateParser.parse_non_iso_time(time_tokens) diff --git a/typed_python/lib/datetime/date_parser_test.py b/typed_python/lib/datetime/date_parser_test.py new file mode 100644 index 000000000..f1c4f5e78 --- /dev/null +++ b/typed_python/lib/datetime/date_parser_test.py @@ -0,0 +1,721 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import time +from typed_python.lib.datetime.date_parser import DateParser +from typed_python.compiler.runtime import PrintNewFunctionVisitor +from typed_python import Entrypoint, ListOf + +import pytest +from datetime import datetime, timedelta +import pytz + + +def get_years_in_range(start, end): + dates = [] + for i in range(start, end): + dates.append(datetime(i, 1, 1, 0, 0, 0, 0, pytz.UTC)) + + return dates + + +def get_months_in_year(year): + dates = [] + for i in range(1, 13): + dates.append(datetime(year, i, 1, 0, 0, 0, 0, pytz.UTC)) + + return dates + + +def get_datetimes_in_range(start, end, step): + d = start + dates = [start] + + while d < end: + if step == 'days': + d += timedelta(days=1) + elif step == 'hours': + d += timedelta(hours=1) + elif step == 'minutes': + d += timedelta(minutes=1) + elif step == 'seconds': + d += timedelta(seconds=1) + else: + raise ValueError('Unsupported step: ' + step) + dates.append(d) + return dates + + +def parseISODatetimes(strings: ListOf(str)): + res = ListOf(float)() + for string in strings: + res.append(DateParser.parse_iso_str(string)) + return res + + +@Entrypoint +def parseDatetimes(strings: ListOf(str)): + res = ListOf(datetime)() + for string in strings: + res.append(datetime.strptime(string, '%Y-%m-%dT%H:%M:%S')) + return res + + +@Entrypoint +def parseFormatDatetimes(strings: ListOf(str)): + res = ListOf(float)() + for string in strings: + res.append(DateParser.parse_with_format(string, '%Y-%m-%dT%H:%M:%S')) + return res + + +@Entrypoint +def make_list_of_iso_datestrings(n): + res = ListOf(str)() + for i in range(n): + dt = datetime.fromtimestamp(i) + res.append(dt.isoformat()) + return res + + +class TestDateParser(unittest.TestCase): + + # ------------------------------------------------------- + # This set of tests exercises the parse() entrypoint + # ------------------------------------------------------- + + def test_parse(self): + # test parse() without a format string + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + formats = [ + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%d %H:%M:%S', + '%Y/%m/%dT%H:%M:%S', + '%Y/%m/%d %H:%M:%S', + '%Y%m%dT%H:%M:%S', + '%Y%m%d %H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S+00', + '%Y-%m-%dT%H:%M:%S+00:00', + '%b %d %Y %H:%M:%S', # e.g Jan 1 1997 13:50 + '%B %d %Y %H:%M:%S', # e.g January 1 1997 13:50 + '%b %d, %Y %H:%M:%S', # e.g Jan 1, 1997 13:50 + '%B %d, %Y %H:%M:%S', # e.g January 1, 1997 13:50 + '%b%d, %Y %H:%M:%S', # e.g Jan1, 1997 13:50 + '%B%d, %Y %H:%M:%S', # e.g January1, 1997 13:50 + '%b %d,%Y %H:%M:%S', # e.g Jan1, 1997 13:50 + '%B %d,%Y %H:%M:%S', # e.g January1, 1997 13:50 + '%d %b %Y %H:%M:%S', # e.g 1 Jan 1997 13:50 + '%d %B %Y %H:%M:%S', # e.g 1January 1997 13:50 + '%d%b%Y %H:%M:%S', # e.g 1Jan1997 13:50 + '%d%B%Y %H:%M:%S', # e.g 1January1997 13:50 + '%d/%b/%Y %H:%M:%S', # e.g 1/Jan/1997 13:50 + '%d/%B/%Y %H:%M:%S', # e.g 1/January/1997 13:50 + '%d-%b-%Y %H:%M:%S', # e.g 1-Jan-1997 13:50 + '%d-%B-%Y %H:%M:%S', # e.g 1-January-1997 13:50 + '%Y %b %d %H:%M:%S', # e.g 1997 Jan 1 13:50 + '%Y %B %d %H:%M:%S', # e.g 1997 January 1 13:50 + '%Y/%b/%d %H:%M:%S', # e.g 1997/Jan/1 13:50 + '%Y/%B/%d %H:%M:%S', # e.g 1997/January/1 13:50 + '%Y-%b-%d %H:%M:%S', # e.g 1997-Jan-1 13:50 + '%Y-%B-%d %H:%M:%S', # e.g 1997-January-1 13:50 + '%b-%d-%Y %H:%M:%S', # e.g Jan-1-1997 13:50 + '%B-%d-%Y %H:%M:%S', # e.g January-1-1997 13:50 + '%b/%d/%Y %H:%M:%S', # e.g Jan/1/1997 13:50 + '%B/%d/%Y %H:%M:%S', # e.g January/1/1997 13:50 + ] + + for format in formats: + for second in seconds: + assert DateParser.parse(second.strftime(format)) == datetime.timestamp(second), second.strftime(format) + + def test_empty_string(self): + with pytest.raises(ValueError): + DateParser.parse('') + + def test_fails_on_random_text(self): + with pytest.raises(ValueError): + DateParser.parse('scuse me while i kiss the sky') + + def test_fails_with_extra_text(self): + with pytest.raises(ValueError): + DateParser.parse('1997-01-01 and some more text') + + def test_parse_invalid_year(self): + days = [ + 'a997', # not 4 digit number + '97', + ] + for day in days: + with pytest.raises(ValueError): + DateParser.parse(day) + + def test_parse_ambiguous_date(self): + days = [ + '22-12-13', + '12-12-12' + ] + for day in days: + with pytest.raises(ValueError): + DateParser.parse(day) + + def test_parse_valid_year(self): + years = [ + '1997', + '2020', + '9999', + '0000' + ] + for year in years: + DateParser.parse(year) + + # ------------------------------------------------------- + # This set of tests exercises the parse_iso method + # ------------------------------------------------------- + + def test_parse_invalid_date(self): + days = [ + '1997-01-00', # day < 1 + '1997-01-32', # day > 31 + '1997-04-31', # day > 30 in Apr + '1997-06-31', # day > 30 in June + '1997-09-31', # day > 30 in Sep + '1997-11-31', # day > 30 in Nov + '1997-02-29', # day > 28 for non-leap year Feb + '2020-02-30', # day > 30 for leap year Feb + '2020-02-ab', # day is not digit + '1900-02-29', # year is multiple of 4, but not leap year so no 29 + ] + + for day in days: + with pytest.raises(ValueError): + DateParser.parse_iso_str(day) + + def test_parse_iso_invalid_month(self): + days = [ + '1997-00', + '1997-13', + '1997-ab' + ] + for day in days: + with pytest.raises(ValueError): + DateParser.parse_iso_str(day) + + def test_parse_iso_yyyy(self): + years = get_years_in_range(1942, 1970) + get_years_in_range(2001, 2022) + for year in years: + assert DateParser.parse_iso_str(year.strftime('%Y')) == datetime.timestamp(year), year.strftime('%Y') + + def test_parse_iso_yyyymm(self): + months = get_months_in_year(1999) + get_months_in_year(2020) + formats = [ + '%Y-%m', + '%Y/%m', + '%Y%m', + ] + for format in formats: + for month in months: + assert DateParser.parse_iso_str(month.strftime(format)) == datetime.timestamp(month), month.strftime(format) + + def test_parse_iso_yyyymmdd(self): + # all days in non leap year and leap year + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + formats = [ + '%Y-%m-%d', + '%Y/%m/%d', + '%Y%m%d', + + ] + for format in formats: + for day in days: + assert DateParser.parse_iso_str(day.strftime(format)) == datetime.timestamp(day), day.strftime(format) + + def test_parse_iso_yyyymmddhh(self): + # all hours in feb 2020 + hours = get_datetimes_in_range(start=datetime(2020, 2, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 3, 2, 0, 0, 0, 0, pytz.UTC), + step='hours') + formats = [ + '%Y-%m-%dT%H', + '%Y-%m-%dT%HZ', + '%Y-%m-%d %H', + '%Y/%m/%dT%H', + '%Y/%m/%d %H', + '%Y%m%dT%H', + '%Y%m%d %H', + '%Y-%m-%dT%HZ', + '%Y-%m-%dT%H+00', + '%Y-%m-%dT%H+00:00' + ] + + for format in formats: + for hour in hours: + assert DateParser.parse_iso_str(hour.strftime(format)) == datetime.timestamp(hour), hour.strftime(format) + + def test_parse_iso_yyyymmddhhmm(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 15, 0, 0, 0, pytz.UTC), + step='minutes') + formats = [ + '%Y-%m-%dT%H:%M', + '%Y-%m-%dT%H:%MZ', + '%Y-%m-%d %H:%M', + '%Y/%m/%dT%H:%M', + '%Y/%m/%d %H:%M', + '%Y%m%dT%H:%M', + '%Y%m%d %H:%M', + '%Y-%m-%dT%H:%MZ', + '%Y-%m-%dT%H:%M+00', + '%Y-%m-%dT%H:%M+00:00' + ] + + for format in formats: + for minute in minutes: + assert DateParser.parse_iso_str(minute.strftime(format)) == datetime.timestamp(minute), minute.strftime(format) + + def test_parse_iso_yyyymmddhhmmss(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + formats = [ + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%d %H:%M:%S', + '%Y/%m/%dT%H:%M:%S', + '%Y/%m/%d %H:%M:%S', + '%Y%m%dT%H:%M:%S', + '%Y%m%d %H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S+00', + '%Y-%m-%dT%H:%M:%S+00:00' + ] + + for format in formats: + for second in seconds: + assert DateParser.parse_iso_str(second.strftime(format)) == datetime.timestamp(second), second.strftime(format) + + def test_parse_iso_yyyymmddhhmmsssss(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + formats = [ + '%Y-%m-%dT%H:%M:%S.123', + '%Y-%m-%dT%H:%M:%S.123Z', + '%Y-%m-%d %H:%M:%S.123', + '%Y/%m/%dT%H:%M:%S.123', + '%Y/%m/%d %H:%M:%S.123', + '%Y%m%dT%H:%M:%S.123', + '%Y%m%d %H:%M:%S.123', + '%Y-%m-%dT%H:%M:%S.123Z', + '%Y-%m-%dT%H:%M:%S.123+00', + '%Y-%m-%dT%H:%M:%S.123+00:00' + ] + + for format in formats: + for second in seconds: + assert DateParser.parse_iso_str(second.strftime(format)) == datetime.timestamp(second) + .123, second.strftime(format) + + def test_parse_iso_with_tz_offset(self): + hours = get_datetimes_in_range(start=datetime(2020, 2, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 3, 2, 0, 0, 0, 0, pytz.UTC), + step='hours') + + tz_offset = 4500 + + formats = [ + '%Y-%m-%dT%H:%M+01:15', + '%Y-%m-%d %H:%M+01:15', + ] + + for format in formats: + for hour in hours: + assert DateParser.parse_iso_str(hour.strftime(format)) == datetime.timestamp(hour) + tz_offset, hour.strftime(format) + + # ------------------------------------------------------- + # This group of tests exercise the parse_non_iso method + # ------------------------------------------------------- + + def test_parse_non_iso_valid_month_string(self): + months = [ + 'Jan', 'January', + 'Feb', 'February', + 'Mar', 'March', + 'Apr', 'April', + 'May', + 'Jun', 'June', + 'Jul', 'July', + 'Aug', 'August', + 'Sep', 'Sept', 'September', + 'Oct', 'October', + 'Nov', 'November', + 'Dec', 'December' + ] + + for month in months: + DateParser.parse_non_iso(month + ' 1, 1997') + + def test_parse_non_iso_invalid_month_string(self): + months = [ + 'not a month', + 'Jane', + 'Movember', + '&*&' + ] + + for month in months: + with pytest.raises(ValueError): + DateParser.parse_non_iso(month + ' 1, 1997') + + def test_parse_non_iso_with_whitespace(self): + hours = get_datetimes_in_range(start=datetime(2020, 2, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 3, 2, 0, 0, 0, 0, pytz.UTC), + step='hours') + formats = [ + ' %B-%d-%Y %H:%M', + '%B-%d-%Y %H:%M ', + ' %B-%d-%Y %H:%M ', + ] + + for format in formats: + for hour in hours: + assert DateParser.parse(hour.strftime(format)) == datetime.timestamp(hour), hour.strftime(format) + + def test_parse_non_iso_dates(self): + # all days in non leap year and leap year + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + supported_formats = [ + '%b %d %Y', # e.g Jan 1 1997 + '%B %d %Y', # e.g January 1 1997 + '%b %d, %Y', # e.g Jan 1, 1997 + '%B %d, %Y', # e.g January 1, 1997 + '%b%d, %Y', # e.g Jan1, 1997 + '%B%d, %Y', # e.g January1, 1997 + '%b %d,%Y', # e.g Jan1, 1997 + '%B %d,%Y', # e.g January1, 1997 + '%d %b %Y', # e.g 1 Jan 1997 + '%d %B %Y', # e.g 1January 1997 + '%d%b%Y', # e.g 1Jan1997 + '%d%B%Y', # e.g 1January1997 + '%d/%b/%Y', # e.g 1/Jan/1997 + '%d/%B/%Y', # e.g 1/January/1997 + '%d-%b-%Y', # e.g 1-Jan-1997 + '%d-%B-%Y', # e.g 1-January-1997 + '%Y %b %d', # e.g 1997 Jan 1 + '%Y %B %d', # e.g 1997 January 1 + '%Y/%b/%d', # e.g 1997/Jan/1 + '%Y/%B/%d', # e.g 1997/January/1 + '%Y-%b-%d', # e.g 1997-Jan-1 + '%Y-%B-%d', # e.g 1997-January-1 + '%b-%d-%Y', # e.g Jan-1-1997 + '%B-%d-%Y', # e.g January-1-1997 + '%b/%d/%Y', # e.g Jan/1/1997 + '%B/%d/%Y', # e.g January/1/1997 + ] + for format in supported_formats: + for day in days: + assert DateParser.parse_non_iso(day.strftime(format)) == datetime.timestamp(day), day.strftime(format) + + def test_parse_non_iso_yyyymmddhhmm(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 15, 0, 0, 0, pytz.UTC), + step='minutes') + supported_formats = [ + '%b %d %Y %H:%M', # e.g Jan 1 1997 13:50 + '%B %d %Y %H:%M', # e.g January 1 1997 13:50 + '%b %d, %Y %H:%M', # e.g Jan 1, 1997 13:50 + '%B %d, %Y %H:%M', # e.g January 1, 1997 13:50 + '%b%d, %Y %H:%M', # e.g Jan1, 1997 13:50 + '%B%d, %Y %H:%M', # e.g January1, 1997 13:50 + '%b %d,%Y %H:%M', # e.g Jan1, 1997 13:50 + '%B %d,%Y %H:%M', # e.g January1, 1997 13:50 + '%d %b %Y %H:%M', # e.g 1 Jan 1997 13:50 + '%d %B %Y %H:%M', # e.g 1January 1997 13:50 + '%d%b%Y %H:%M', # e.g 1Jan1997 13:50 + '%d%B%Y %H:%M', # e.g 1January1997 13:50 + '%d/%b/%Y %H:%M', # e.g 1/Jan/1997 13:50 + '%d/%B/%Y %H:%M', # e.g 1/January/1997 13:50 + '%d-%b-%Y %H:%M', # e.g 1-Jan-1997 13:50 + '%d-%B-%Y %H:%M', # e.g 1-January-1997 13:50 + '%Y %b %d %H:%M', # e.g 1997 Jan 1 13:50 + '%Y %B %d %H:%M', # e.g 1997 January 1 13:50 + '%Y/%b/%d %H:%M', # e.g 1997/Jan/1 13:50 + '%Y/%B/%d %H:%M', # e.g 1997/January/1 13:50 + '%Y-%b-%d %H:%M', # e.g 1997-Jan-1 13:50 + '%Y-%B-%d %H:%M', # e.g 1997-January-1 13:50 + '%b-%d-%Y %H:%M', # e.g Jan-1-1997 13:50 + '%B-%d-%Y %H:%M', # e.g January-1-1997 13:50 + '%b/%d/%Y %H:%M', # e.g Jan/1/1997 13:50 + '%B/%d/%Y %H:%M', # e.g January/1/1997 13:50 + ] + for format in supported_formats: + for minute in minutes: + assert DateParser.parse_non_iso(minute.strftime(format)) == datetime.timestamp(minute), minute.strftime(format) + + def test_parse_non_iso_yyyymmddhhmmss(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + supported_formats = [ + '%b %d %Y %H:%M:%S', # e.g Jan 1 1997 13:50 + '%B %d %Y %H:%M:%S', # e.g January 1 1997 13:50 + '%b %d, %Y %H:%M:%S', # e.g Jan 1, 1997 13:50 + '%B %d, %Y %H:%M:%S', # e.g January 1, 1997 13:50 + '%b%d, %Y %H:%M:%S', # e.g Jan1, 1997 13:50 + '%B%d, %Y %H:%M:%S', # e.g January1, 1997 13:50 + '%b %d,%Y %H:%M:%S', # e.g Jan1, 1997 13:50 + '%B %d,%Y %H:%M:%S', # e.g January1, 1997 13:50 + '%d %b %Y %H:%M:%S', # e.g 1 Jan 1997 13:50 + '%d %B %Y %H:%M:%S', # e.g 1January 1997 13:50 + '%d%b%Y %H:%M:%S', # e.g 1Jan1997 13:50 + '%d%B%Y %H:%M:%S', # e.g 1January1997 13:50 + '%d/%b/%Y %H:%M:%S', # e.g 1/Jan/1997 13:50 + '%d/%B/%Y %H:%M:%S', # e.g 1/January/1997 13:50 + '%d-%b-%Y %H:%M:%S', # e.g 1-Jan-1997 13:50 + '%d-%B-%Y %H:%M:%S', # e.g 1-January-1997 13:50 + '%Y %b %d %H:%M:%S', # e.g 1997 Jan 1 13:50 + '%Y %B %d %H:%M:%S', # e.g 1997 January 1 13:50 + '%Y/%b/%d %H:%M:%S', # e.g 1997/Jan/1 13:50 + '%Y/%B/%d %H:%M:%S', # e.g 1997/January/1 13:50 + '%Y-%b-%d %H:%M:%S', # e.g 1997-Jan-1 13:50 + '%Y-%B-%d %H:%M:%S', # e.g 1997-January-1 13:50 + '%b-%d-%Y %H:%M:%S', # e.g Jan-1-1997 13:50 + '%B-%d-%Y %H:%M:%S', # e.g January-1-1997 13:50 + '%b/%d/%Y %H:%M:%S', # e.g Jan/1/1997 13:50 + '%B/%d/%Y %H:%M:%S', # e.g January/1/1997 13:50 + ] + for format in supported_formats: + for second in seconds: + assert DateParser.parse_non_iso(second.strftime(format)) == datetime.timestamp(second), second.strftime(format) + + def test_parse_non_iso_pm_indicator(self): + times = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 23, 59, 0, 0, pytz.UTC), + step='minutes') + supported_formats = [ + '%B/%d/%Y %I:%MPM', + '%B/%d/%Y %I:%Mpm', + '%B/%d/%Y %I:%M:%SPM', + '%B/%d/%Y %I:%M:%Spm', + ] + + for format in supported_formats: + for t in times: + assert DateParser.parse_non_iso(t.strftime(format)) == datetime.timestamp(t), t.strftime(format) + + def test_parse_non_iso_am_indicator(self): + times = get_datetimes_in_range(start=datetime(2020, 2, 29, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 11, 59, 0, 0, pytz.UTC), + step='minutes') + supported_formats = [ + '%B/%d/%Y %I:%MAM', + '%B/%d/%Y %I:%Mam', + '%B/%d/%Y %I:%M:%SAM', + '%B/%d/%Y %I:%M:%Sam', + ] + + for format in supported_formats: + for t in times: + assert DateParser.parse_non_iso(t.strftime(format)) == datetime.timestamp(t), t.strftime(format) + + def test_parse_non_iso_invalid_day(self): + with pytest.raises(ValueError): + DateParser.parse_non_iso('1997 Jan 32') + with pytest.raises(ValueError): + DateParser.parse_non_iso('1997 Jan 0') + + def test_parse_non_iso_invalid_month(self): + with pytest.raises(ValueError): + DateParser.parse_non_iso('Janeary 01 1997') + + def test_nyc_tz(self): + # edt: Oct 21, 2022 + assert 1666355040 == DateParser.parse('2022-10-21t08:24:00NYC') + assert 1666355040 == DateParser.parse('2022-10-21t08:24:00EDT') + + # est: Dec 21, 2022 + assert 1671629040 == DateParser.parse('2022-12-21t08:24:00NYC') + assert 1671629040 == DateParser.parse('2022-12-21t08:24:00EST') + + def test_compare_parse_iso_perf(self): + runs = 100000 + date_strings = make_list_of_iso_datestrings(runs) + + with PrintNewFunctionVisitor(): + DateParser.parse_iso_str('1997') + + start = time.time() + parseISODatetimes(date_strings) + tsTime = time.time() - start + + start = time.time() + parseDatetimes(date_strings) + dtTime = time.time() - start + + if dtTime > tsTime: + speedup = dtTime / tsTime + compare = 'x faster' + else: + speedup = tsTime / dtTime + compare = 'x slower' + + print('Datetime.parse (' + str(tsTime) + ') is ' + + str("{:.2f}".format(speedup)) + compare + ' than datetime.strptime (' + str(dtTime) + ')') + # assert speedup > 7 and speedup < 8 + + # ------------------------------------------------------- + # This group of tests exercise the parse_format method + # ------------------------------------------------------- + + def test_parse_format_yyyy(self): + years = get_years_in_range(1942, 1970) + get_years_in_range(2001, 2022) + for year in years: + assert DateParser.parse_with_format(year.strftime('%Y'), '%Y') == datetime.timestamp(year), year.strftime('%Y') + + def test_parse_format_yyyymm(self): + months = get_months_in_year(1999) + get_months_in_year(2020) + formats = [ + '%Y-%m', + '%Y/%m', + '%Y%m', + ] + for format in formats: + for month in months: + assert DateParser.parse_with_format(month.strftime(format), format) == datetime.timestamp(month), month.strftime(format) + + def test_parse_format_yyyymmdd(self): + # all days in non leap year and leap year + days = get_datetimes_in_range(start=datetime(2019, 1, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 1, 31, 0, 0, 0, 0, pytz.UTC), + step='days') + formats = [ + '%Y-%m-%d', + '%Y/%m/%d', + '%Y%m%d', + + ] + for format in formats: + for day in days: + assert DateParser.parse_with_format(day.strftime(format), format) == datetime.timestamp(day), day.strftime(format) + + def test_parse_format_yyyymmddhh(self): + # all hours in feb 2020 + hours = get_datetimes_in_range(start=datetime(2020, 2, 1, 0, 0, 0, 0, pytz.UTC), + end=datetime(2020, 3, 2, 0, 0, 0, 0, pytz.UTC), + step='hours') + formats = [ + '%Y-%m-%dT%H', + '%Y-%m-%dT%HZ', + '%Y-%m-%d %H', + '%Y/%m/%dT%H', + '%Y/%m/%d %H', + '%Y%m%dT%H', + '%Y%m%d %H', + '%Y-%m-%dT%HZ', + '%Y-%m-%dT%H+00', + '%Y-%m-%dT%H+00:00' + ] + + for format in formats: + for hour in hours: + assert DateParser.parse_with_format(hour.strftime(format), format) == datetime.timestamp(hour), hour.strftime(format) + + def test_parse_format_yyyymmddhhmm(self): + minutes = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 0, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 15, 0, 0, 0, pytz.UTC), + step='minutes') + supported_formats = [ + '%b %d %Y %H:%M', # e.g Jan 1 1997 13:50 + '%B %d %Y %H:%M', # e.g January 1 1997 13:50 + '%b %d, %Y %H:%M', # e.g Jan 1, 1997 13:50 + '%B %d, %Y %H:%M', # e.g January 1, 1997 13:50 + '%b%d, %Y %H:%M', # e.g Jan1, 1997 13:50 + '%B%d, %Y %H:%M', # e.g January1, 1997 13:50 + '%b %d,%Y %H:%M', # e.g Jan1, 1997 13:50 + '%B %d,%Y %H:%M', # e.g January1, 1997 13:50 + '%d %b %Y %H:%M', # e.g 1 Jan 1997 13:50 + '%d %B %Y %H:%M', # e.g 1January 1997 13:50 + '%d%b%Y %H:%M', # e.g 1Jan1997 13:50 + '%d%B%Y %H:%M', # e.g 1January1997 13:50 + '%d/%b/%Y %H:%M', # e.g 1/Jan/1997 13:50 + '%d/%B/%Y %H:%M', # e.g 1/January/1997 13:50 + '%d-%b-%Y %H:%M', # e.g 1-Jan-1997 13:50 + '%d-%B-%Y %H:%M', # e.g 1-January-1997 13:50 + '%Y %b %d %H:%M', # e.g 1997 Jan 1 13:50 + '%Y %B %d %H:%M', # e.g 1997 January 1 13:50 + '%Y/%b/%d %H:%M', # e.g 1997/Jan/1 13:50 + '%Y/%B/%d %H:%M', # e.g 1997/January/1 13:50 + '%Y-%b-%d %H:%M', # e.g 1997-Jan-1 13:50 + '%Y-%B-%d %H:%M', # e.g 1997-January-1 13:50 + '%b-%d-%Y %H:%M', # e.g Jan-1-1997 13:50 + '%B-%d-%Y %H:%M', # e.g January-1-1997 13:50 + '%b/%d/%Y %H:%M', # e.g Jan/1/1997 13:50 + '%B/%d/%Y %H:%M', # e.g January/1/1997 13:50 + ] + for format in supported_formats: + for minute in minutes: + assert DateParser.parse_with_format(minute.strftime(format), format) == datetime.timestamp(minute), minute.strftime(format) + + def test_parse_format_yyyymmddhhmmss(self): + seconds = get_datetimes_in_range(start=datetime(2020, 2, 29, 13, 17, 0, 0, pytz.UTC), + end=datetime(2020, 2, 29, 13, 19, 0, 0, pytz.UTC), + step='seconds') + formats = [ + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%d %H:%M:%S', + '%Y/%m/%dT%H:%M:%S', + '%Y/%m/%d %H:%M:%S', + '%Y%m%dT%H:%M:%S', + '%Y%m%d %H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S+00', + '%Y-%m-%dT%H:%M:%S+00:00' + ] + + for format in formats: + for second in seconds: + assert DateParser.parse_with_format(second.strftime(format), format) == datetime.timestamp(second), second.strftime(format) + + def test_compare_parse_format_perf(self): + runs = 100000 + date_strings = make_list_of_iso_datestrings(runs) + + with PrintNewFunctionVisitor(): + DateParser.parse_with_format('1997', '%Y') + + start = time.time() + parseFormatDatetimes(date_strings) + tsTime = time.time() - start + + start = time.time() + parseDatetimes(date_strings) + dtTime = time.time() - start + + if dtTime > tsTime: + speedup = dtTime / tsTime + compare = 'x faster' + else: + speedup = tsTime / dtTime + compare = 'x slower' + + print('Datetime.parse (' + str(tsTime) + ') is ' + + str("{:.2f}".format(speedup)) + compare + ' than datetime.strptime (' + str(dtTime) + ')') diff --git a/typed_python/lib/datetime/timezone.py b/typed_python/lib/datetime/timezone.py new file mode 100644 index 000000000..5760db546 --- /dev/null +++ b/typed_python/lib/datetime/timezone.py @@ -0,0 +1,215 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typed_python import Class, Dict, Entrypoint, Member, Final +from typed_python.lib.datetime.chrono import Chrono + + +class Offset(Class): + @Entrypoint + def get_offset(self, _: int) -> int: + raise NotImplementedError + + +class FixedOffset(Offset, Final): + offset = Member(int) + + @Entrypoint + def __init__(self, offset: int): + self.offset = offset + + @Entrypoint + def get_offset(self, _: int) -> int: + return self.offset + +# Would like to give RelativeOffset* better names but... +# 1) RelativeOffset_BS_BM_CA_MX_US.... is clunky +# 2) can't easily name start/end of dst, because other regions have same start dates but different times +# +# For nowe we'll just add new 'regions' as necessary. E.g RelativeOffset3: Cuba, RelativeOffset3: EU + lots +# more europe + + +class RelativeOffset1(Offset, Final): + """ + Timezones where the offset from UTC changes with DST beginning on the second Sunday in March at 2:00 + and ending on the first Sunday in November at 2:00. + + Covers: Bahamas, Bermuda, parts of Canada, parts of Greenland, Haiti, parts of Mexico, Saint Pierre and + Miquelon, Turks and Caicos Islands, parts of the United States + """ + + st_offset = Member(int) + dt_offset = Member(int) + + @Entrypoint + def __init__(self, st_offset: int, dt_offset: int): + self.st_offset = st_offset + self.dt_offset = dt_offset + + @Entrypoint + def get_offset(self, ts: int) -> int: + return self.dt_offset if self.is_dst(ts) else self.st_offset + + @Entrypoint + def is_dst(self, ts: float) -> bool: + year = Chrono.year_from_ts(ts) + + # 2:00am second Sunday march + ds_start = Chrono.get_nth_dow_of_month_ts(2, 0, 3, year) + 7200 + + # 2:00 am first sunday in november + ds_end = Chrono.get_nth_dow_of_month_ts(1, 0, 11, year) + 7200 + + return ts >= ds_start and ts <= ds_end + + +class Timezone(Class, Final): + AT = RelativeOffset1(st_offset=21600, dt_offset=18000) + + ADT = FixedOffset(offset=18000) + AST = FixedOffset(offset=21600) + + CT = RelativeOffset1(st_offset=21600, dt_offset=18000) + CDT = FixedOffset(offset=18000) + CST = FixedOffset(offset=21600) + + ET = RelativeOffset1(st_offset=18000, dt_offset=14400) + EDT = FixedOffset(offset=14400) + EST = FixedOffset(offset=18000) + + MT = RelativeOffset1(st_offset=25200, dt_offset=21600) + MDT = FixedOffset(offset=21600) + MST = FixedOffset(offset=25200) + + PT = RelativeOffset1(st_offset=28800, dt_offset=25200) + PDT = FixedOffset(offset=25200) + PST = FixedOffset(offset=28800) + + UTC = FixedOffset(offset=0) + + TZ_STR_TO_OFFSET = Dict(str, Offset)({ + '': UTC, + '+0000': UTC, + 'cdt': CDT, + 'cst': CST, + 'ct': CT, + 'edt': EDT, + 'est': EST, + 'et': ET, + 'gmt': UTC, + 'mdt': MDT, + 'mst': MST, + 'mt': MT, + 'nyc': ET, + 'pdt': PDT, + 'pst': PST, + 'pt': PT, + 'utc': UTC, + 'z': UTC, + }) + + @Entrypoint + @staticmethod + def tz_str_to_utc_offset(tz_str: str, unixtime: int) -> int: + ''' + Get utc offset by timezone abbreviation + Parameters: + tz_abbr(string): a timezone indicator. examples: 'ET', 'EST', 'NYC' + Returns: + (int): The utc offset in seconds + ''' + return Timezone.TZ_STR_TO_OFFSET[tz_str.lower()].get_offset(unixtime) + + @Entrypoint + @staticmethod + def ts_to_utc(ts: float, tz_str: str = '') -> float: + ''' + Converts a timestamp to its equivalent in UTC + Parameters: + ts (float): A unix timetamp + tz_str(str): A timezone abbreviation (e.g. est, edt, nyc) or an ISO 8601 timezone offset (e.g. +0000 or -0101) + Returns: + (int): The utc offset in seconds + ''' + tz_str = tz_str.lower() + + if tz_str in Timezone.TZ_STR_TO_OFFSET: + return ts + Timezone.TZ_STR_TO_OFFSET[tz_str].get_offset(ts) + + return ts + Timezone.parse_tz_offset(tz_str) + + @Entrypoint + @staticmethod + def is_valid_tz_string(tz_str: str): + ''' + Tests if a string represents a supported tz + ''' + return tz_str.lower() in Timezone.TZ_STR_TO_OFFSET + + @Entrypoint + @staticmethod + def is_valid_tz_offset(hour: int, min: int, second: float = 0.0) -> bool: + ''' + Tests if an hour,min combination is a valid offset from UTC + Parameters: + hour(int): The hour + min(int): The minute + Returns: + True if the inputs are in the range UTC-12:00 to UTC+14 + ''' + if hour > 14 or hour < -12: + return False + + if (hour == 14 or hour == -12) and min > 0: + return False + + if min < 0 or min + second >= 60: + return False + + return True + + @Entrypoint + @staticmethod + def parse_tz_offset(offset: str) -> int: + ''' + Converts a set of tokens representing a timezone offset to seconds. + Parameters: + tokens (ListOf(str)): A set of string tokens representing a timezone. E.g. ['Z'] or ['+', '02', ':', '23'] + Returns: + (int): The offset in seconds + ''' + + if offset[0] != '+' and offset[0] != '-': + raise ValueError("tz offset must begin with '+' or '-'", offset) + + sign = offset[0] + value = offset[1:] + + hour, min, second = 0, 0, 0 + + # [+|-]HH or [+|-]HHMM + if len(value) == 2: + hour, min, second = int(value), 0, 0.0 + elif len(value) == 4: + hour, min, second = int(value[:2]), int(value[2:4]), 0.0 + elif len(value) >= 6: + hour, min, second = int(value[:2]), int(value[2:4]), float(value[6:]) + + hour = hour * -1 if sign == '-' else hour + + if Timezone.is_valid_tz_offset(hour, min, second): + return hour * 3600 + (min * 60 if hour > 0 else min * -60) + (second if hour > 0 else second * -1) + else: + raise ValueError('Invalid tz offset: ') diff --git a/typed_python/lib/datetime/timezone_test.py b/typed_python/lib/datetime/timezone_test.py new file mode 100644 index 000000000..26dcad901 --- /dev/null +++ b/typed_python/lib/datetime/timezone_test.py @@ -0,0 +1,112 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from typed_python.lib.datetime.timezone import Timezone + +ST_TIME = 1645812452 # feb 25, 2022 - Standard time +DS_TIME = 1661447252 # aug 25, 2022 - DST time + + +class TestTimezone(unittest.TestCase): + + def test_is_tz_offset_valid(self): + tz_offsets = [ + (-12, 0, 0), # eastmost + (14, 0, 0), # westmost + (10, 4, 4), # random + ] + + for tz_offset in tz_offsets: + assert Timezone.is_valid_tz_offset(tz_offset[0], tz_offset[1]), tz_offset + + def test_is_tz_offset_invalid(self): + tz_offsets = [ + (-13, 0, 0), # out of range + (-12, 1, 0), # out of range + (14, 1, 0), # out of range + (10, 60, 4), # min > 59 + (10, -1, 4), # min < 0 + ] + + for tz_offset in tz_offsets: + assert not Timezone.is_valid_tz_offset(tz_offset[0], tz_offset[1]), tz_offset + + def test_tz_abbr_ct(self): + assert Timezone.tz_str_to_utc_offset('ct', ST_TIME) == 21600 + assert Timezone.tz_str_to_utc_offset('ct', DS_TIME) == 18000 + + def test_tz_abbr_cdt(self): + assert Timezone.tz_str_to_utc_offset('cdt', ST_TIME) == 18000 + assert Timezone.tz_str_to_utc_offset('cdt', DS_TIME) == 18000 + + def test_tz_abbr_cst(self): + assert Timezone.tz_str_to_utc_offset('cst', ST_TIME) == 21600 + assert Timezone.tz_str_to_utc_offset('cst', DS_TIME) == 21600 + + def test_tz_abbr_est(self): + assert Timezone.tz_str_to_utc_offset('est', ST_TIME) == 18000 + assert Timezone.tz_str_to_utc_offset('est', DS_TIME) == 18000 + + def test_tz_abbr_edt(self): + assert Timezone.tz_str_to_utc_offset('edt', ST_TIME) == 14400 + assert Timezone.tz_str_to_utc_offset('edt', DS_TIME) == 14400 + + def test_tz_abbr_et(self): + assert Timezone.tz_str_to_utc_offset('et', ST_TIME) == 18000 + assert Timezone.tz_str_to_utc_offset('et', DS_TIME) == 14400 + + def test_tz_abbr_mt(self): + assert Timezone.tz_str_to_utc_offset('mt', ST_TIME) == 25200 + assert Timezone.tz_str_to_utc_offset('mt', DS_TIME) == 21600 + + def test_tz_abbr_mdt(self): + assert Timezone.tz_str_to_utc_offset('mdt', ST_TIME) == 21600 + assert Timezone.tz_str_to_utc_offset('mdt', DS_TIME) == 21600 + + def test_tz_abbr_mst(self): + assert Timezone.tz_str_to_utc_offset('mst', ST_TIME) == 25200 + assert Timezone.tz_str_to_utc_offset('mst', DS_TIME) == 25200 + + def test_tz_abbr_pt(self): + assert Timezone.tz_str_to_utc_offset('pt', ST_TIME) == 28800 + assert Timezone.tz_str_to_utc_offset('pt', DS_TIME) == 25200 + + def test_tz_abbr_pdt(self): + assert Timezone.tz_str_to_utc_offset('pdt', ST_TIME) == 25200 + assert Timezone.tz_str_to_utc_offset('pdt', DS_TIME) == 25200 + + def test_tz_abbr_pst(self): + assert Timezone.tz_str_to_utc_offset('pst', ST_TIME) == 28800 + assert Timezone.tz_str_to_utc_offset('pst', DS_TIME) == 28800 + + def test_tz_abbr_nyc(self): + assert Timezone.tz_str_to_utc_offset('nyc', ST_TIME) == 18000 + assert Timezone.tz_str_to_utc_offset('nyc', DS_TIME) == 14400 + assert (Timezone.tz_str_to_utc_offset('nyc', ST_TIME) == + Timezone.tz_str_to_utc_offset('et', ST_TIME)) + assert (Timezone.tz_str_to_utc_offset('nyc', DS_TIME) == + Timezone.tz_str_to_utc_offset('et', DS_TIME)) + + def test_tz_abbr_utc(self): + assert Timezone.tz_str_to_utc_offset('utc', ST_TIME) == 0 + assert Timezone.tz_str_to_utc_offset('utc', DS_TIME) == 0 + + def test_tz_abbr_gmt(self): + assert Timezone.tz_str_to_utc_offset('gmt', ST_TIME) == 0 + assert Timezone.tz_str_to_utc_offset('gmt', DS_TIME) == 0 + + def test_tz_abbr_z(self): + assert Timezone.tz_str_to_utc_offset('z', ST_TIME) == 0 + assert Timezone.tz_str_to_utc_offset('z', DS_TIME) == 0 diff --git a/typed_python/lib/foo.py b/typed_python/lib/foo.py new file mode 100644 index 000000000..2ee05a2c5 --- /dev/null +++ b/typed_python/lib/foo.py @@ -0,0 +1,3 @@ +a = "1234567" + +print(a[3:40]) diff --git a/typed_python/lib/timestamp.py b/typed_python/lib/timestamp.py new file mode 100644 index 000000000..bfb5fc33f --- /dev/null +++ b/typed_python/lib/timestamp.py @@ -0,0 +1,127 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typed_python.compiler.runtime import Entrypoint +from typed_python import Class, Final, Member, Held +from typed_python.lib.datetime.date_parser import DateParser +from typed_python.lib.datetime.date_formatter import DateFormatter +from typed_python.lib.datetime.chrono import Chrono + + +@Held +class Timestamp(Class, Final): + """A wrapper around a unix timestamp that adds functionality for parsing and string formatting""" + ts = Member(float) + + @Entrypoint + def __int__(self): + return int(self.ts) + + @Entrypoint + def __float__(self): + return self.ts + + @Entrypoint + def __str__(self): + return self.format() + + @Entrypoint + def __eq__(self, other) -> bool: + return self.ts == other.ts + + @Entrypoint + def __ne__(self, other) -> bool: + return self.ts != other.ts + + @Entrypoint + def __ge__(self, other) -> bool: + return self.ts >= other.ts + + @Entrypoint + def __gt__(self, other) -> bool: + return self.ts > other.ts + + @Entrypoint + def __lt__(self, other) -> bool: + return self.ts < other.ts + + @Entrypoint + def __le__(self, other) -> bool: + return self.ts <= other.ts + + @Entrypoint + def __add__(self, other): + return Timestamp(ts=self.ts + other.ts) + + @Entrypoint + def __sub__(self, other): + return Timestamp(ts=self.ts - other.ts) + + @Entrypoint + @staticmethod + def make(ts: float): + ''' + Creates a Timestamp from a float + Parameters: + ts: a float + Returns: + timestamp (Timestamp): A Timestamp + ''' + return Timestamp(ts=ts) + + @Entrypoint + def __init__(self, ts: float): + self.ts = ts + + @Entrypoint + @staticmethod + def parse(date_str: str): + ''' + Creates a Timestamp from date strings. + Parameters: + date_str (str): A date string. E.g 2022-07-30 17:56:46 + Returns: + timestamp (Timestamp): A Timestamp + ''' + return Timestamp(ts=DateParser.parse(date_str)) + + @Entrypoint + def format(self, utc_offset: int = 0, format: str = "%Y-%m-%d %H:%M:%S") -> str: + ''' + Converts a Timestamp to a string in a given format + Parameters: + utc_offset (int): The offset from UTC in seconds + format (str): A string specifying formatting directives. E.g. '%Y-%m-%dT%H:%M:%S' + Returns: + date_str(str): A string representing the date in the specified format. E.g. "Mon January 2, 2021" + ''' + return DateFormatter.format(self.ts, utc_offset, format) + + @Entrypoint + @staticmethod + def from_date(year=0, month=0, day=0, hour=0, minute=0, second=0): + ''' + Creates a Timestamp from date values. + Parameters: + year (int): The year + month (int): The month. January: 1, February: 2, .... + day (int): The day + hour (int): The hour (0-23) + minute (int): The minute + second (float): The second. + Returns: + timestamp (Timestamp): A Timestamp + ''' + return Timestamp(ts=Chrono.date_to_seconds(year, month, day) + Chrono.time_to_seconds(hour, minute, second)) diff --git a/typed_python/lib/timestamp_test.py b/typed_python/lib/timestamp_test.py new file mode 100644 index 000000000..c3b6ae43a --- /dev/null +++ b/typed_python/lib/timestamp_test.py @@ -0,0 +1,340 @@ +# Copyright 2017-2020 typed_python Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import unittest + +from typed_python.compiler.runtime import Entrypoint, PrintNewFunctionVisitor + +from typed_python.lib.timestamp import Timestamp +from datetime import datetime, timezone +from typed_python import ListOf + + +class Timer: + def __enter__(self): + self.t0 = time.time() + return self + + def __exit__(self, *args): + self.duration = time.time() - self.t0 + print(self.duration) + + def getDuration(self): + return self.duration + + +@Entrypoint +def make_list_of_iso_datestrings(n): + res = ListOf(str)() + for i in range(n): + dt = datetime.fromtimestamp(i) + res.append(dt.isoformat()) + return res + + +@Entrypoint +def make_list_of_datetimes(n): + res = ListOf(datetime)() + for i in range(n): + dt = datetime.fromtimestamp(i) + res.append(dt) + return res + + +@Entrypoint +def listOfTimestamps(N): + res = ListOf(Timestamp)() + for unixtime in range(N): + res.append(Timestamp.make(unixtime)) + + return res + + +@Entrypoint +def listOfDatetimes(N): + res = ListOf(datetime)() + for unixtime in range(N): + res.append(datetime.fromtimestamp(unixtime)) + + return res + + +@Entrypoint +def parseTimestamps(strings: ListOf(str)): + res = ListOf(Timestamp)() + for string in strings: + res.append(Timestamp.parse(string)) + return res + + +@Entrypoint +def parseDatetimes(strings: ListOf(str)): + res = ListOf(datetime)() + for string in strings: + res.append(datetime.strptime(string, '%Y-%m-%dT%H:%M:%S')) + return res + + +@Entrypoint +def formatTimestamps(timestamps: ListOf(Timestamp)): + res = ListOf(str)() + for timestamp in timestamps: + res.append(timestamp.format(format='%Y-%m-%d')) + + return res + + +@Entrypoint +def formatDatetimes(datetimes: ListOf(datetime)): + res = ListOf(str)() + for dt in datetimes: + res.append(dt.strftime('%Y-%m-%d')) + return res + + +class TestTimestamp(unittest.TestCase): + + def test_demo_usage(self): + + # create timestamp from unixtime + Timestamp.make(time.time()) + Timestamp.make(ts=time.time()) + + # create timestamp from ints representing a date (year, month, day, hour, min, sec) + # can omit smaller granularities in order + Timestamp.from_date(2022, 10, 22, 6, 39, 7) + Timestamp.from_date(2022, 10, 22, 6) + Timestamp.from_date(2022, 10) + + # create timestamp from iso 8601 date string + Timestamp.parse('2022-10-22T06:39') + + # create timestamp from iso8601ish string (space instead of T separator) + Timestamp.parse('2022-10-22 06:39') + + # create timestamp from non iso date string + Timestamp.parse('Oct 22, 2022 06:39') + + # with relative tz (offset changes with dst) + Timestamp.parse('2022-10-22T06:39NYC') + + # with relative tz (offset changes with dst) + Timestamp.parse('2022-10-22T06:39ET') + + # with fixed offset tz + Timestamp.parse('2022-10-22T06:39UTC') + + # with fixed offset tz + Timestamp.parse('2022-10-22T06:39EST') + + # with fixed offset tz + Timestamp.parse('2022-10-22T06:39EDT') + + # get date string from timestamp as YYYY-MM-DD + ts = Timestamp.make(time.time()) + ts.format(format='%Y-%m-%d') + + def test_eq(self): + # The following commented block of code sometimes unexpectedly errors with something like + # AssertionError: assert Held(Timestamp)(ts=2,) == Held(Timestamp)(ts=2,) + # + # ts1 = Timestamp.make(2) + # ts2 = Timestamp.make(2) + # assert ts1 == ts2 + # + # Added an otherwise unnecessary Entrypointed inner() function as a workaround + # + # See: https://github.com/APrioriInvestments/typed_python/issues/404 for bug details + @Entrypoint + def inner(): + assert Timestamp.make(2) == Timestamp.make(2) + inner() + + def test_ge(self): + # The following commented block of unexpectedly errors with something like + # AssertionError: assert Held(Timestamp)(ts=1.6694e+09,) >= Held(Timestamp)(ts=1.6694e+09,) + # + # Added an otherwise unnecessary Entrypointed inner() function as a workaround + # + # See: https://github.com/APrioriInvestments/typed_python/issues/404 for bug details + @Entrypoint + def inner(): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime) + ts3 = Timestamp.make(unixtime - 1) + assert ts1 >= ts2 + assert ts1 >= ts3 + inner() + + def test_gt(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime - 1) + assert ts1 > ts2 + + def test_le(self): + # See comments in test_ge above + @Entrypoint + def inner(): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime) + ts3 = Timestamp.make(unixtime + 1) + assert ts1 <= ts2 + assert ts1 <= ts3 + inner() + + def test_lt(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime + 1) + + assert ts1 < ts2 + + def test_ne(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(unixtime + 1) + ts3 = Timestamp.make(unixtime - 1) + + assert ts1 != ts2 + assert ts1 != ts3 + + def test_add(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(5) + ts3 = ts1 + ts2 + assert ts3.ts == unixtime + 5 + + def test_sub(self): + unixtime = time.time() + ts1 = Timestamp.make(unixtime) + ts2 = Timestamp.make(5) + ts3 = ts1 - ts2 + assert ts3.ts == unixtime - 5 + + def test_format_default(self): + # Just a superficial test. format proxies to DateFormatter.format + # which has more extensive testing + unixtime = time.time() + timestamp = Timestamp.make(unixtime + time.localtime().tm_gmtoff) + dt = datetime.fromtimestamp(unixtime) + assert dt.isoformat(timespec='seconds').replace('T', ' ') == timestamp.format() + + def test_format(self): + # Just a superficial test. format proxies to DateFormatter.format + # which has more extensive testing + unixtime = time.time() + timestamp = Timestamp.make(unixtime + time.localtime().tm_gmtoff) + dt = datetime.fromtimestamp(unixtime) + assert dt.isoformat(timespec='seconds') == timestamp.format(format="%Y-%m-%dT%H:%M:%S") + + def test_from_date(self): + unixtime = time.time() + dt_tuple = datetime.fromtimestamp(unixtime, tz=timezone.utc).timetuple() + + timestamp = Timestamp.from_date(year=dt_tuple.tm_year, + month=dt_tuple.tm_mon, + day=dt_tuple.tm_mday, + hour=dt_tuple.tm_hour, + minute=dt_tuple.tm_min, + second=dt_tuple.tm_sec) + assert int(unixtime) == int(timestamp) + + def test_parse(self): + unixtime = time.time() + timestamp = Timestamp.make(unixtime) + dt = datetime.fromtimestamp(unixtime, tz=timezone.utc).timetuple() + date_str = f"{dt.tm_year}-{dt.tm_mon:02d}-{dt.tm_mday:02d} {dt.tm_hour:02d}:{dt.tm_min:02d}:{dt.tm_sec:02d}" + parsed_timestamp = Timestamp.parse(date_str) + + assert ((int(timestamp) == int(parsed_timestamp))) + + def test_compare_timestamp_datetime_from_unixtime(self): + runs = 10000000 + + Timestamp.make(1) + + start = time.time() + listOfTimestamps(runs) + tsTime = time.time() - start + + start = time.time() + listOfDatetimes(runs) + dtTime = time.time() - start + + speedup = dtTime / tsTime + + print('Timestamp.make (' + str(tsTime) + ') is ' + str(speedup) + + ' faster than datetime.fromtimestamp (' + str(dtTime) + ')') + + # assert speedup > 30 and speedup < 40, speedup + + def test_compare_timestamp_datetime_from_string(self): + runs = 100000 + date_strings = make_list_of_iso_datestrings(runs) + + with PrintNewFunctionVisitor(): + Timestamp.parse('1997') + + start = time.time() + parseTimestamps(date_strings) + tsTime = time.time() - start + + start = time.time() + parseDatetimes(date_strings) + dtTime = time.time() - start + + if dtTime > tsTime: + speedup = dtTime / tsTime + compare = 'x faster' + else: + speedup = tsTime / dtTime + compare = 'x slower' + + print('Timestamp.parse (' + str(tsTime) + ') is ' + + str("{:.2f}".format(speedup)) + compare + ' than datetime.strptime (' + str(dtTime) + ')') + # assert speedup > 7 and speedup < 8 + + def test_compare_timestamp_datetime_format(self): + runs = 1000000 + timestamps = listOfTimestamps(runs) + datetimes = listOfDatetimes(runs) + + with PrintNewFunctionVisitor(): + ts = Timestamp.make(2) + ts.format(format='%Y-%m-%d') + + start = time.time() + formatTimestamps(timestamps) + tsTime = time.time() - start + + start = time.time() + formatDatetimes(datetimes) + dtTime = time.time() - start + + if dtTime > tsTime: + speedup = dtTime / tsTime + compare = 'x faster' + else: + speedup = tsTime / dtTime + compare = 'x slower' + + print('Timestamp.format (' + str(tsTime) + ') is ' + + str("{:.2f}".format(speedup)) + compare + ' than datetime.strformat (' + str(dtTime) + ')') + + assert dtTime > tsTime and (speedup > 2 and speedup <= 4)