From 69336bc8c569987c67224c00652b5807731d0632 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Wed, 2 Oct 2024 10:54:00 +0200 Subject: [PATCH] Fix bugs in neo4j.time.DateTime handling * Fix `DateTime` +/- `Duration` computation being wildly off by considering the days of the `DateTime` since UNIX epoch twice. * Fix `Result.to_df` not correctly converting `DateTime`s with `tzinfo` if `parse_dates=True` is passed in. * Fix `DateTime.__ne__` (inequality operator) always falling back to `object.__ne__` (comparison by `id`). --- src/neo4j/_async/work/result.py | 20 +- src/neo4j/_sync/work/result.py | 20 +- src/neo4j/time/__init__.py | 11 +- tests/unit/async_/work/test_result.py | 43 +++- tests/unit/common/time/test_datetime.py | 257 ++++++++++++++++++++++-- tests/unit/sync/work/test_result.py | 43 +++- 6 files changed, 355 insertions(+), 39 deletions(-) diff --git a/src/neo4j/_async/work/result.py b/src/neo4j/_async/work/result.py index 9dfb11ae..19c57295 100644 --- a/src/neo4j/_async/work/result.py +++ b/src/neo4j/_async/work/result.py @@ -892,6 +892,7 @@ async def to_df( consumed. """ import pandas as pd # type: ignore[import] + import pytz if not expand: df = pd.DataFrame(await self.values(), columns=self._keys) @@ -931,14 +932,19 @@ async def to_df( ).all() ) ] + + def datetime_to_timestamp(x): + if not x: + return pd.NaT + tzinfo = getattr(x, "tzinfo", None) + if tzinfo is None: + return pd.Timestamp(x.iso_format()) + return pd.Timestamp( + x.astimezone(pytz.UTC).iso_format() + ).astimezone(tzinfo) + df[dt_columns] = df[dt_columns].apply( - lambda col: col.map( - lambda x: pd.Timestamp(x.iso_format()).replace( - tzinfo=getattr(x, "tzinfo", None) - ) - if x - else pd.NaT - ) + lambda col: col.map(datetime_to_timestamp) ) return df diff --git a/src/neo4j/_sync/work/result.py b/src/neo4j/_sync/work/result.py index 33380694..3223015b 100644 --- a/src/neo4j/_sync/work/result.py +++ b/src/neo4j/_sync/work/result.py @@ -892,6 +892,7 @@ def to_df( consumed. """ import pandas as pd # type: ignore[import] + import pytz if not expand: df = pd.DataFrame(self.values(), columns=self._keys) @@ -931,14 +932,19 @@ def to_df( ).all() ) ] + + def datetime_to_timestamp(x): + if not x: + return pd.NaT + tzinfo = getattr(x, "tzinfo", None) + if tzinfo is None: + return pd.Timestamp(x.iso_format()) + return pd.Timestamp( + x.astimezone(pytz.UTC).iso_format() + ).astimezone(tzinfo) + df[dt_columns] = df[dt_columns].apply( - lambda col: col.map( - lambda x: pd.Timestamp(x.iso_format()).replace( - tzinfo=getattr(x, "tzinfo", None) - ) - if x - else pd.NaT - ) + lambda col: col.map(datetime_to_timestamp) ) return df diff --git a/src/neo4j/time/__init__.py b/src/neo4j/time/__init__.py index 132bf192..38987343 100644 --- a/src/neo4j/time/__init__.py +++ b/src/neo4j/time/__init__.py @@ -2560,7 +2560,7 @@ def __ne__(self, other: object) -> bool: Accepts :class:`.DateTime` and :class:`datetime.datetime`. """ - if not isinstance(other, (Date, date)): + if not isinstance(other, (DateTime, datetime)): return NotImplemented return not self.__eq__(other) @@ -2641,7 +2641,9 @@ def __gt__( # type: ignore[override] def __add__(self, other: timedelta | Duration) -> DateTime: """Add a :class:`datetime.timedelta`.""" if isinstance(other, Duration): - t = self.to_clock_time() + ClockTime( + if other == (0, 0, 0, 0): + return self + t = self.time().to_clock_time() + ClockTime( other.seconds, other.nanoseconds ) days, seconds = symmetric_divmod(t.seconds, 86400) @@ -2651,8 +2653,11 @@ def __add__(self, other: timedelta | Duration) -> DateTime: time_ = Time.from_ticks(seconds * NANO_SECONDS + t.nanoseconds) return self.combine(date_, time_).replace(tzinfo=self.tzinfo) if isinstance(other, timedelta): + if other.total_seconds() == 0: + return self t = self.to_clock_time() + ClockTime( - 86400 * other.days + other.seconds, other.microseconds * 1000 + 86400 * other.days + other.seconds, + other.microseconds * 1000, ) days, seconds = symmetric_divmod(t.seconds, 86400) date_ = Date.from_ordinal(days + 1) diff --git a/tests/unit/async_/work/test_result.py b/tests/unit/async_/work/test_result.py index de71dd48..1291d95e 100644 --- a/tests/unit/async_/work/test_result.py +++ b/tests/unit/async_/work/test_result.py @@ -16,6 +16,7 @@ from __future__ import annotations +import datetime import logging import typing as t import uuid @@ -1082,6 +1083,18 @@ async def test_to_df_expand( assert df.equals(expected_df) +DTS_AROUND_SWEDISH_DST_CHANGE: tuple[datetime.datetime, ...] = ( + datetime.datetime(2024, 3, 31, 0, 30, 0), + datetime.datetime(2024, 3, 31, 1, 30, 0), + datetime.datetime(2024, 3, 31, 2, 30, 0), + datetime.datetime(2024, 3, 31, 3, 30, 0), + datetime.datetime(2024, 10, 27, 0, 30, 0), + datetime.datetime(2024, 10, 27, 1, 30, 0), + datetime.datetime(2024, 10, 27, 2, 30, 0), + datetime.datetime(2024, 10, 27, 3, 30, 0), +) + + @pytest.mark.parametrize( ("keys", "values", "expected_df"), ( @@ -1209,7 +1222,7 @@ async def test_to_df_expand( columns=["mixed"], ), ), - # Column with only None (should not be transfomred to NaT) + # Column with only None (should not be transformed to NaT) ( ["all_none"], [ @@ -1252,6 +1265,34 @@ async def test_to_df_expand( columns=["all_none", "mixed", "n"], ), ), + # Difficult timezones + *( + ( + ["dt_tz"], + [ + [ + pytz.UTC.localize( + neo4j_time.DateTime.from_native(dt) + ).astimezone(pytz.timezone("Europe/Stockholm")) + + neo4j_time.Duration(nanoseconds=add_ns) + ] + for dt in DTS_AROUND_SWEDISH_DST_CHANGE + ], + pd.DataFrame( + [ + [ + pytz.UTC.localize(pd.Timestamp(dt)).astimezone( + pytz.timezone("Europe/Stockholm") + ) + + pd.Timedelta(add_ns, unit="ns") + ] + for dt in DTS_AROUND_SWEDISH_DST_CHANGE + ], + columns=["dt_tz"], + ), + ) + for add_ns in (0, 1) + ), ), ) @pytest.mark.parametrize("expand", [True, False]) diff --git a/tests/unit/common/time/test_datetime.py b/tests/unit/common/time/test_datetime.py index 3dc6bf9e..ebc1deb1 100644 --- a/tests/unit/common/time/test_datetime.py +++ b/tests/unit/common/time/test_datetime.py @@ -20,6 +20,7 @@ import itertools import operator import pickle +import typing as t from datetime import ( datetime, timedelta, @@ -46,10 +47,15 @@ from neo4j.time._clock_implementations import ClockTime +if t.TYPE_CHECKING: + from pytz import BaseTzInfo + + timezone_us_eastern = timezone("US/Eastern") timezone_london = timezone("Europe/London") timezone_berlin = timezone("Europe/Berlin") timezone_utc = timezone("UTC") +timezone_utc_p2 = FixedOffset(120) def make_reduce_datetimes(): @@ -263,6 +269,235 @@ def test_subtract_native_datetime_2(self) -> None: t = dt1 - dt2 assert t == timedelta(days=65, hours=23, seconds=17.914390409) + @pytest.mark.parametrize( + ("dt_early", "delta", "dt_late"), + ( + ( + DateTime(2024, 3, 31, 0, 30, 0), + Duration(nanoseconds=1), + DateTime(2024, 3, 31, 0, 30, 0, 1), + ), + ( + DateTime(2024, 3, 31, 0, 30, 0), + Duration(hours=24), + DateTime(2024, 4, 1, 0, 30, 0), + ), + ( + DateTime(2024, 3, 31, 0, 30, 0), + timedelta(microseconds=1), + DateTime(2024, 3, 31, 0, 30, 0, 1000), + ), + ( + DateTime(2024, 3, 31, 0, 30, 0), + timedelta(hours=24), + DateTime(2024, 4, 1, 0, 30, 0), + ), + ), + ) + @pytest.mark.parametrize( + "tz", + (None, timezone_utc, timezone_utc_p2, timezone_berlin), + ) + def test_add_duration( + self, + dt_early: DateTime | datetime, + delta: Duration | timedelta, + dt_late: DateTime | datetime, + tz: BaseTzInfo | None, + ) -> None: + if tz is not None: + dt_early = timezone_utc.localize(dt_early).astimezone(tz) + dt_late = timezone_utc.localize(dt_late).astimezone(tz) + assert dt_early + delta == dt_late + + @pytest.mark.parametrize( + ("datetime_cls", "delta_cls"), + ( + (datetime, timedelta), # baseline (what Python's datetime does) + (DateTime, Duration), + (DateTime, timedelta), + ), + ) + def test_transition_to_summertime( + self, + datetime_cls: type[DateTime] | type[datetime], + delta_cls: type[Duration] | type[timedelta], + ) -> None: + dt = datetime_cls(2022, 3, 27, 1, 30) + dt = timezone_berlin.localize(dt) + assert dt.utcoffset() == timedelta(hours=1) + assert isinstance(dt, datetime_cls) + time = dt.time() + assert (time.hour, time.minute) == (1, 30) + + dt += delta_cls(hours=1) + + # The native datetime object treats timedelta addition as wall time + # addition. This is imo silly, but what Python decided to do. So want + # our implementation to match that. See also: + # https://stackoverflow.com/questions/76583100/is-pytz-deprecated-now-or-in-the-future-in-python + assert dt.utcoffset() == timedelta(hours=1) + assert isinstance(dt, datetime_cls) + time = dt.time() + assert (time.hour, time.minute) == (2, 30) + + @pytest.mark.parametrize( + ("datetime_cls", "delta_cls"), + ( + (datetime, timedelta), # baseline (what Python's datetime does) + (DateTime, Duration), + (DateTime, timedelta), + ), + ) + def test_transition_from_summertime( + self, + datetime_cls: type[DateTime] | type[datetime], + delta_cls: type[Duration] | type[timedelta], + ) -> None: + dt = datetime_cls(2022, 10, 30, 2, 30) + dt = timezone_berlin.localize(dt, is_dst=True) + assert dt.utcoffset() == timedelta(hours=2) + assert isinstance(dt, datetime_cls) + time = dt.time() + assert (time.hour, time.minute) == (2, 30) + + dt += delta_cls(hours=1) + + # The native datetime object treats timedelta addition as wall time + # addition. This is imo silly, but what Python decided to do. So want + # our implementation to match that. See also: + # https://stackoverflow.com/questions/76583100/is-pytz-deprecated-now-or-in-the-future-in-python + assert dt.utcoffset() == timedelta(hours=2) + assert isinstance(dt, datetime_cls) + time = dt.time() + assert (time.hour, time.minute) == (3, 30) + + @pytest.mark.parametrize( + ("dt1", "dt2"), + ( + ( + DateTime(2018, 4, 27, 23, 0, 17, 914390409), + DateTime(2018, 4, 27, 23, 0, 17, 914390409), + ), + ( + utc.localize(DateTime(2018, 4, 27, 23, 0, 17, 914390409)), + utc.localize(DateTime(2018, 4, 27, 23, 0, 17, 914390409)), + ), + ( + utc.localize(DateTime(2018, 4, 27, 23, 0, 17, 914390409)), + utc.localize( + DateTime(2018, 4, 27, 23, 0, 17, 914390409) + ).astimezone(timezone_berlin), + ), + ), + ) + @pytest.mark.parametrize("native", (True, False)) + def test_eq( + self, + dt1: DateTime | datetime, + dt2: DateTime | datetime, + native: bool, + ) -> None: + assert isinstance(dt1, DateTime) + assert isinstance(dt2, DateTime) + if native: + dt1 = dt1.replace(nanosecond=dt1.nanosecond // 1000 * 1000) + dt2 = dt2.to_native() + assert dt1 == dt2 + assert dt2 == dt1 + # explicitly test that `not !=` is `==` (different code paths) + assert not dt1 != dt2 # noqa + assert not dt2 != dt1 # noqa + + @pytest.mark.parametrize( + ("dt1", "dt2", "native"), + ( + # nanosecond difference + ( + DateTime(2018, 4, 27, 23, 0, 17, 914390408), + DateTime(2018, 4, 27, 23, 0, 17, 914390409), + False, + ), + *( + ( + dt1, + DateTime(2018, 4, 27, 23, 0, 17, 914390409), + native, + ) + for dt1 in ( + DateTime(2018, 4, 27, 23, 0, 17, 914391409), + DateTime(2018, 4, 27, 23, 0, 18, 914390409), + DateTime(2018, 4, 27, 23, 1, 17, 914390409), + DateTime(2018, 4, 27, 22, 0, 17, 914390409), + DateTime(2018, 4, 26, 23, 0, 17, 914390409), + DateTime(2018, 5, 27, 23, 0, 17, 914390409), + DateTime(2019, 4, 27, 23, 0, 17, 914390409), + ) + for native in (True, False) + ), + *( + ( + # type ignore: + # https://github.com/python/typeshed/issues/12715 + tz1.localize(dt1, is_dst=None), # type: ignore[arg-type] + tz2.localize( + DateTime(2018, 4, 27, 23, 0, 17, 914390409), + is_dst=None, # type: ignore[arg-type] + ), + native, + ) + for dt1 in ( + DateTime(2018, 4, 27, 23, 0, 17, 914391409), + DateTime(2018, 4, 27, 23, 0, 18, 914390409), + DateTime(2018, 4, 27, 23, 1, 17, 914390409), + DateTime(2018, 4, 27, 22, 0, 17, 914390409), + DateTime(2018, 4, 26, 23, 0, 17, 914390409), + DateTime(2018, 5, 27, 23, 0, 17, 914390409), + DateTime(2019, 4, 27, 23, 0, 17, 914390409), + ) + for native in (True, False) + for tz1, tz2 in itertools.combinations_with_replacement( + (timezone_utc, timezone_utc_p2, timezone_berlin), 2 + ) + ), + ), + ) + def test_ne( + self, + dt1: DateTime | datetime, + dt2: DateTime | datetime, + native: bool, + ) -> None: + assert isinstance(dt1, DateTime) + assert isinstance(dt2, DateTime) + if native: + dt2 = dt2.to_native() + assert dt1 != dt2 + assert dt2 != dt1 + # explicitly test that `not ==` is `!=` (different code paths) + assert not dt1 == dt2 # noqa + assert not dt2 == dt1 # noqa + + @pytest.mark.parametrize( + "other", + ( + object(), + 1, + DateTime(2018, 4, 27, 23, 0, 17, 914391409).to_clock_time(), + ( + DateTime(2018, 4, 27, 23, 0, 17, 914391409) + - DateTime(1970, 1, 1) + ), + ), + ) + def test_ne_object(self, other: object) -> None: + dt = DateTime(2018, 4, 27, 23, 0, 17, 914391409) + assert dt != other + assert other != dt + # explicitly test that `not ==` is `!=` (different code paths) + assert not dt == other # noqa + assert not other == dt # noqa + def test_normalization(self) -> None: ndt1 = timezone_us_eastern.normalize( DateTime(2018, 4, 27, 23, 0, 17, tzinfo=timezone_us_eastern) @@ -523,26 +758,6 @@ def test_from_native_case_2() -> None: assert dt.tzinfo == FixedOffset(0) -@pytest.mark.parametrize("datetime_cls", (DateTime, datetime)) -def test_transition_to_summertime(datetime_cls) -> None: - dt = datetime_cls(2022, 3, 27, 1, 30) - dt = timezone_berlin.localize(dt) - assert dt.utcoffset() == timedelta(hours=1) - assert isinstance(dt, datetime_cls) - time = dt.time() - assert (time.hour, time.minute) == (1, 30) - - dt += timedelta(hours=1) - - # The native datetime object just bluntly carries over the timezone. You'd - # have to manually convert to UTC, do the calculation, and then convert - # back. Not pretty, but we should make sure our implementation does - assert dt.utcoffset() == timedelta(hours=1) - assert isinstance(dt, datetime_cls) - time = dt.time() - assert (time.hour, time.minute) == (2, 30) - - @pytest.mark.parametrize("datetime_cls", (DateTime, datetime)) @pytest.mark.parametrize( "utc_impl", @@ -747,6 +962,8 @@ def test_equality(dt1, dt2) -> None: def test_inequality(dt1, dt2) -> None: assert dt1 != dt2 assert dt2 != dt1 + assert dt1 != dt2 + assert dt2 != dt1 @pytest.mark.parametrize( diff --git a/tests/unit/sync/work/test_result.py b/tests/unit/sync/work/test_result.py index 32a71930..623d5014 100644 --- a/tests/unit/sync/work/test_result.py +++ b/tests/unit/sync/work/test_result.py @@ -16,6 +16,7 @@ from __future__ import annotations +import datetime import logging import typing as t import uuid @@ -1082,6 +1083,18 @@ def test_to_df_expand( assert df.equals(expected_df) +DTS_AROUND_SWEDISH_DST_CHANGE: tuple[datetime.datetime, ...] = ( + datetime.datetime(2024, 3, 31, 0, 30, 0), + datetime.datetime(2024, 3, 31, 1, 30, 0), + datetime.datetime(2024, 3, 31, 2, 30, 0), + datetime.datetime(2024, 3, 31, 3, 30, 0), + datetime.datetime(2024, 10, 27, 0, 30, 0), + datetime.datetime(2024, 10, 27, 1, 30, 0), + datetime.datetime(2024, 10, 27, 2, 30, 0), + datetime.datetime(2024, 10, 27, 3, 30, 0), +) + + @pytest.mark.parametrize( ("keys", "values", "expected_df"), ( @@ -1209,7 +1222,7 @@ def test_to_df_expand( columns=["mixed"], ), ), - # Column with only None (should not be transfomred to NaT) + # Column with only None (should not be transformed to NaT) ( ["all_none"], [ @@ -1252,6 +1265,34 @@ def test_to_df_expand( columns=["all_none", "mixed", "n"], ), ), + # Difficult timezones + *( + ( + ["dt_tz"], + [ + [ + pytz.UTC.localize( + neo4j_time.DateTime.from_native(dt) + ).astimezone(pytz.timezone("Europe/Stockholm")) + + neo4j_time.Duration(nanoseconds=add_ns) + ] + for dt in DTS_AROUND_SWEDISH_DST_CHANGE + ], + pd.DataFrame( + [ + [ + pytz.UTC.localize(pd.Timestamp(dt)).astimezone( + pytz.timezone("Europe/Stockholm") + ) + + pd.Timedelta(add_ns, unit="ns") + ] + for dt in DTS_AROUND_SWEDISH_DST_CHANGE + ], + columns=["dt_tz"], + ), + ) + for add_ns in (0, 1) + ), ), ) @pytest.mark.parametrize("expand", [True, False])