From d32a3862589663bec2481912d0618477312676fd Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 19 Dec 2022 11:52:39 -0800 Subject: [PATCH 1/6] BUG: Timestamp.replace handle out-of-pydatetime range --- pandas/_libs/tslibs/timestamps.pyx | 18 +++++++++++++++++- .../tests/scalar/timestamp/test_unary_ops.py | 13 +++++++++++++ pandas/tests/tseries/offsets/test_year.py | 12 ++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7593be7bf77f3..303814c1b3b4a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -8,6 +8,7 @@ shadows the python class, where we do any heavy lifting. """ import warnings + cimport cython import numpy as np @@ -80,6 +81,7 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, + check_dts_bounds, cmp_dtstructs, cmp_scalar, convert_reso, @@ -2097,6 +2099,7 @@ default 'raise' object k, v datetime ts_input tzinfo_type tzobj + _TSObject ts # set to naive if needed tzobj = self.tzinfo @@ -2142,7 +2145,20 @@ default 'raise' tzobj = tzinfo # reconstruct & check bounds - if tzobj is not None and treat_tz_as_pytz(tzobj): + if tzobj is None: + # We can avoid going through pydatetime paths, which is robust + # to datetimes outside of pydatetime range. + ts = _TSObject() + check_dts_bounds(&dts, self._creso) + ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) + ts.dts = dts + ts.creso = self._creso + ts.fold = fold + return create_timestamp_from_ts( + ts.value, dts, tzobj, fold, reso=self._creso + ) + + elif tzobj is not None and treat_tz_as_pytz(tzobj): # replacing across a DST boundary may induce a new tzinfo object # see GH#18319 ts_input = tzobj.localize(datetime(dts.year, dts.month, dts.day, diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 1c1f3acc8331f..d87ad7d8966ff 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -13,6 +13,7 @@ from pandas._libs import lib from pandas._libs.tslibs import ( NaT, + OutOfBoundsDatetime, Timedelta, Timestamp, conversion, @@ -356,6 +357,18 @@ def checker(res, ts, nanos): # -------------------------------------------------------------- # Timestamp.replace + def test_replace_out_of_pydatetime_bounds(self): + ts = Timestamp("2016-01-01") + + msg = "Out of bounds nanosecond timestamp: 99999-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + ts.replace(year=99_999) + + ts = ts.as_unit("ms") + result = ts.replace(year=99_999) + assert result.year == 99_999 + assert result.value == Timestamp(np.datetime64("99999-01-01", "ms")).value + def test_replace_non_nano(self): ts = Timestamp._from_value_and_reso( 91514880000000000, NpyDatetimeUnit.NPY_FR_us.value, None diff --git a/pandas/tests/tseries/offsets/test_year.py b/pandas/tests/tseries/offsets/test_year.py index daa5171af2452..2a65ba6dba466 100644 --- a/pandas/tests/tseries/offsets/test_year.py +++ b/pandas/tests/tseries/offsets/test_year.py @@ -7,8 +7,10 @@ from datetime import datetime +import numpy as np import pytest +from pandas import Timestamp from pandas.tests.tseries.offsets.common import ( assert_is_on_offset, assert_offset_equal, @@ -317,3 +319,13 @@ def test_offset(self, case): def test_is_on_offset(self, case): offset, dt, expected = case assert_is_on_offset(offset, dt, expected) + + +def test_add_out_of_pydatetime_range(): + # don't raise in Timestamp.replace + ts = Timestamp(np.datetime64("-20000-12-31")) + off = YearEnd() + + result = ts + off + expected = Timestamp(np.datetime64("-19999-12-31")) + assert result == expected From 15f3aa315978dc70542a1e25802dadba9cb7568d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Feb 2023 10:55:05 -0800 Subject: [PATCH 2/6] update test --- pandas/tests/scalar/timestamp/test_unary_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index b16ab0bc5d5e4..3c89d3e79729e 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -365,7 +365,7 @@ def checker(res, ts, nanos): # Timestamp.replace def test_replace_out_of_pydatetime_bounds(self): - ts = Timestamp("2016-01-01") + ts = Timestamp("2016-01-01").as_unit("ns") msg = "Out of bounds nanosecond timestamp: 99999-01-01 00:00:00" with pytest.raises(OutOfBoundsDatetime, match=msg): From db8bed860a15e1d647f6dab12610a4bf66278404 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 8 Feb 2023 17:13:14 -0800 Subject: [PATCH 3/6] fix repr for out-of-pydatetime bounds --- pandas/_libs/tslibs/timestamps.pyx | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 49cb7febf9c8e..2db927d8a79c3 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1023,19 +1023,20 @@ cdef class _Timestamp(ABCTimestamp): stamp = self._repr_base zone = None - try: - stamp += self.strftime("%z") - except ValueError: - year2000 = self.replace(year=2000) - stamp += year2000.strftime("%z") + if self.tzinfo is not None: + try: + stamp += self.strftime("%z") + except ValueError: + year2000 = self.replace(year=2000) + stamp += year2000.strftime("%z") - if self.tzinfo: - zone = get_timezone(self.tzinfo) - try: - stamp += zone.strftime(" %%Z") - except AttributeError: - # e.g. tzlocal has no `strftime` - pass + if self.tzinfo: + zone = get_timezone(self.tzinfo) + try: + stamp += zone.strftime(" %%Z") + except AttributeError: + # e.g. tzlocal has no `strftime` + pass tz = f", tz='{zone}'" if zone is not None else "" From cbc5cd49e40246e77f4c0338dda901845034d974 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Feb 2023 08:26:19 -0800 Subject: [PATCH 4/6] update test --- pandas/tests/scalar/timestamp/test_unary_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 2a303864c4de8..0646edb01f5ce 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -374,7 +374,7 @@ def test_replace_out_of_pydatetime_bounds(self): ts = ts.as_unit("ms") result = ts.replace(year=99_999) assert result.year == 99_999 - assert result.value == Timestamp(np.datetime64("99999-01-01", "ms")).value + assert result._value == Timestamp(np.datetime64("99999-01-01", "ms"))._value def test_replace_non_nano(self): ts = Timestamp._from_value_and_reso( From 4e0735b469d0537e6ee610d45df62633bdc9b79e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Feb 2023 16:02:21 -0800 Subject: [PATCH 5/6] xfail npdev --- pandas/tests/scalar/timestamp/test_unary_ops.py | 1 + pandas/tests/tseries/offsets/test_year.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 0646edb01f5ce..be24fd7da8591 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -365,6 +365,7 @@ def checker(res, ts, nanos): # Timestamp.replace def test_replace_out_of_pydatetime_bounds(self): + # GH#50348 ts = Timestamp("2016-01-01").as_unit("ns") msg = "Out of bounds nanosecond timestamp: 99999-01-01 00:00:00" diff --git a/pandas/tests/tseries/offsets/test_year.py b/pandas/tests/tseries/offsets/test_year.py index 2a65ba6dba466..480c875c36e04 100644 --- a/pandas/tests/tseries/offsets/test_year.py +++ b/pandas/tests/tseries/offsets/test_year.py @@ -10,6 +10,8 @@ import numpy as np import pytest +from pandas.compat import is_numpy_dev + from pandas import Timestamp from pandas.tests.tseries.offsets.common import ( assert_is_on_offset, @@ -321,8 +323,9 @@ def test_is_on_offset(self, case): assert_is_on_offset(offset, dt, expected) +@pytest.mark.xfail(is_numpy_dev, reason="result year is 1973, unclear why") def test_add_out_of_pydatetime_range(): - # don't raise in Timestamp.replace + # GH#50348 don't raise in Timestamp.replace ts = Timestamp(np.datetime64("-20000-12-31")) off = YearEnd() From cd9bfedaba2c6dc5c21d4c16b54b70e30152d835 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 10 Feb 2023 09:33:03 -0800 Subject: [PATCH 6/6] Update pandas/_libs/tslibs/timestamps.pyx Co-authored-by: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> --- pandas/_libs/tslibs/timestamps.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 05ae10625d50b..9e9dab155a5cf 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1041,8 +1041,7 @@ cdef class _Timestamp(ABCTimestamp): year2000 = self.replace(year=2000) stamp += year2000.strftime("%z") - if self.tzinfo: - zone = get_timezone(self.tzinfo) + zone = get_timezone(self.tzinfo) try: stamp += zone.strftime(" %%Z") except AttributeError: