From 27c442a0c235e22748ea9873d039d86b9f1afe97 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 16 Nov 2022 16:21:25 -0800 Subject: [PATCH 01/10] ENH: infer Timestamp(iso8601string) resolution --- doc/source/whatsnew/v2.0.0.rst | 3 +- pandas/_libs/tslibs/conversion.pyx | 52 ++++++++++++------- pandas/_libs/tslibs/timestamps.pyx | 17 ++++-- pandas/core/arrays/datetimes.py | 11 +++- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/computation/pytables.py | 7 +-- pandas/core/resample.py | 2 +- pandas/tests/arithmetic/test_datetime64.py | 4 +- pandas/tests/arrays/test_timedeltas.py | 2 +- .../indexes/datetimes/methods/test_astype.py | 2 +- .../indexes/datetimes/test_constructors.py | 2 +- pandas/tests/io/json/test_pandas.py | 4 +- .../tests/scalar/timedelta/test_arithmetic.py | 5 +- .../tests/scalar/timestamp/test_arithmetic.py | 10 ++-- .../scalar/timestamp/test_constructors.py | 48 +++++++++++------ .../tests/scalar/timestamp/test_timestamp.py | 13 +++-- .../tests/scalar/timestamp/test_timezones.py | 8 +-- .../tests/scalar/timestamp/test_unary_ops.py | 8 ++- pandas/tests/series/methods/test_asof.py | 2 +- pandas/tests/test_errors.py | 4 +- pandas/tests/tslibs/test_timezones.py | 2 +- 21 files changed, 138 insertions(+), 70 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index f2bed9cc9f782..cad26784accf7 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -331,7 +331,8 @@ Other API changes - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`) - Passing data with dtype of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; timedelta64 data with lower resolution will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) -- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) +- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) +- Passing a string in ISO-8601 format to :class:`Timestamp` will retain the resolution of the parsed input if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`??`) - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) - When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`) - :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 923dfa3c54d26..2c813a94c4973 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -211,6 +211,7 @@ cdef class _TSObject: cdef void ensure_reso(self, NPY_DATETIMEUNIT creso): if self.creso != creso: self.value = convert_reso(self.value, self.creso, creso, False) + self.creso = creso cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, @@ -383,7 +384,8 @@ cdef _TSObject convert_datetime_to_tsobject( cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, - int tzoffset, tzinfo tz=None): + int tzoffset, tzinfo tz=None, + NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Convert a datetimestruct `dts`, along with initial timezone offset `tzoffset` to a _TSObject (with timezone object `tz` - optional). @@ -394,6 +396,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, tzoffset: int tz : tzinfo or None timezone for the timezone-aware output. + reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- @@ -405,16 +408,19 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, datetime dt Py_ssize_t pos - value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) + value = npy_datetimestruct_to_datetime(reso, &dts) obj.dts = dts obj.tzinfo = pytz.FixedOffset(tzoffset) - obj.value = tz_localize_to_utc_single(value, obj.tzinfo) + obj.value = tz_localize_to_utc_single( + value, obj.tzinfo, ambiguous=None, nonexistent=None, creso=reso + ) + obj.creso = reso if tz is None: - check_overflows(obj, NPY_FR_ns) + check_overflows(obj, reso) return obj cdef: - Localizer info = Localizer(tz, NPY_FR_ns) + Localizer info = Localizer(tz, reso) # Infer fold from offset-adjusted obj.value # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute @@ -432,6 +438,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, obj.dts.us, obj.tzinfo, fold=obj.fold) obj = convert_datetime_to_tsobject( dt, tz, nanos=obj.dts.ps // 1000) + obj.ensure_reso(reso) # TODO: more performant to get reso right up front? return obj @@ -468,7 +475,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, int out_local = 0, out_tzoffset = 0, string_to_dts_failed datetime dt int64_t ival - NPY_DATETIMEUNIT out_bestunit + NPY_DATETIMEUNIT out_bestunit, reso if len(ts) == 0 or ts in nat_strings: ts = NaT @@ -491,19 +498,26 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, &out_tzoffset, False ) if not string_to_dts_failed: + reso = get_supported_reso(out_bestunit) try: - check_dts_bounds(&dts, NPY_FR_ns) + check_dts_bounds(&dts, reso) if out_local == 1: - return _create_tsobject_tz_using_offset(dts, - out_tzoffset, tz) + return _create_tsobject_tz_using_offset( + dts, out_tzoffset, tz, reso + ) else: - ival = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) + ival = npy_datetimestruct_to_datetime(reso, &dts) if tz is not None: # shift for _localize_tso - ival = tz_localize_to_utc_single(ival, tz, - ambiguous="raise") - - return convert_to_tsobject(ival, tz, None, False, False) + ival = tz_localize_to_utc_single( + ival, tz, ambiguous="raise", nonexistent=None, creso=reso + ) + obj = _TSObject() + obj.dts = dts + obj.value = ival + obj.creso = reso + maybe_localize_tso(obj, tz, obj.creso) + return obj except OutOfBoundsDatetime: # GH#19382 for just-barely-OutOfBounds falling back to dateutil @@ -516,10 +530,12 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, pass try: - dt = parse_datetime_string(ts, dayfirst=dayfirst, - yearfirst=yearfirst) - except (ValueError, OverflowError): - raise ValueError("could not convert string to Timestamp") + # TODO: use the one that returns reso + dt = parse_datetime_string( + ts, dayfirst=dayfirst, yearfirst=yearfirst + ) + except (ValueError, OverflowError) as err: + raise ValueError("could not convert string to Timestamp") from err return convert_datetime_to_tsobject(dt, tz) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b0208f9ca3296..121127ac23c4d 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -448,6 +448,7 @@ cdef class _Timestamp(ABCTimestamp): # cython semantics, args have been switched and this is __radd__ # TODO(cython3): remove this it moved to __radd__ return other.__add__(self) + return NotImplemented def __radd__(self, other): @@ -991,7 +992,8 @@ cdef class _Timestamp(ABCTimestamp): return self value = convert_reso(self.value, self._creso, reso, round_ok=round_ok) - return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo) + ts = type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo) + return ts def as_unit(self, str unit, bint round_ok=True): """ @@ -1555,8 +1557,17 @@ class Timestamp(_Timestamp): cdef: int64_t nanos - to_offset(freq).nanos # raises on non-fixed freq - nanos = delta_to_nanoseconds(to_offset(freq), self._creso) + freq = to_offset(freq) + freq.nanos # raises on non-fixed freq + nanos = delta_to_nanoseconds(freq, self._creso) + if nanos == 0: + if freq.nanos == 0: + raise ValueError("Division by zero in rounding") + + # e.g. self.unit == "s" and sub-second freq + return self + + # TODO: problem if nanos==0 if self.tz is not None: value = self.tz_localize(None).value diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d0a932ec378b9..6527b165df5d8 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -395,9 +395,13 @@ def _generate_range( # type: ignore[override] if start is not None: start = Timestamp(start) + if start is not NaT: + start = start.as_unit("ns") if end is not None: end = Timestamp(end) + if end is not NaT: + end = end.as_unit("ns") if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") @@ -488,10 +492,13 @@ def _unbox_scalar(self, value) -> np.datetime64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") self._check_compatible_with(value) - return value.asm8 + if value is NaT: + return np.datetime64(value.value, "ns") + else: + return value.as_unit(self.unit).asm8 def _scalar_from_string(self, value) -> Timestamp | NaTType: - return Timestamp(value, tz=self.tz) + return Timestamp(value, tz=self.tz) # .as_unit(self._unit) def _check_compatible_with(self, other) -> None: if other is NaT: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index fe7cade1711d0..8a20c8a2991b4 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -301,7 +301,7 @@ def _unbox_scalar(self, value) -> np.timedelta64: return value.as_unit(self.unit).asm8 def _scalar_from_string(self, value) -> Timedelta | NaTType: - return Timedelta(value) + return Timedelta(value) # .as_unit(self._unit) def _check_compatible_with(self, other) -> None: # we don't have anything to validate. diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 93928d8bf6b83..446f830d4100b 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -215,15 +215,16 @@ def stringify(value): if isinstance(v, (int, float)): v = stringify(v) v = ensure_decoded(v) - v = Timestamp(v) + v = Timestamp(v).as_unit("ns") if v.tz is not None: v = v.tz_convert("UTC") return TermValue(v, v.value, kind) elif kind in ("timedelta64", "timedelta"): if isinstance(v, str): - v = Timedelta(v).value + v = Timedelta(v) else: - v = Timedelta(v, unit="s").value + v = Timedelta(v, unit="s") + v = v.as_unit("ns").value return TermValue(int(v), v, kind) elif meta == "category": metadata = extract_array(self.metadata, extract_numpy=True) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f0ffd694ff953..910b39a3d7b54 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2074,7 +2074,7 @@ def _adjust_dates_anchored( elif origin == "start": origin_nanos = first.value elif isinstance(origin, Timestamp): - origin_nanos = origin.value + origin_nanos = origin.as_unit("ns").value elif origin in ["end", "end_day"]: origin_last = last if origin == "end" else last.ceil("D") sub_freq_times = (origin_last.value - first.value) // freq.nanos diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index bad5335ad2d58..e9685a5a096eb 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1682,7 +1682,7 @@ def test_datetimeindex_sub_timestamp_overflow(self): dtimax = pd.to_datetime(["2021-12-28 17:19", Timestamp.max]) dtimin = pd.to_datetime(["2021-12-28 17:19", Timestamp.min]) - tsneg = Timestamp("1950-01-01") + tsneg = Timestamp("1950-01-01").as_unit("ns") ts_neg_variants = [ tsneg, tsneg.to_pydatetime(), @@ -1690,7 +1690,7 @@ def test_datetimeindex_sub_timestamp_overflow(self): tsneg.to_datetime64().astype("datetime64[D]"), ] - tspos = Timestamp("1980-01-01") + tspos = Timestamp("1980-01-01").as_unit("ns") ts_pos_variants = [ tspos, tspos.to_pydatetime(), diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 2fd7ccc9cf338..a6639a0388642 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -102,7 +102,7 @@ def test_add_pdnat(self, tda): # TODO: 2022-07-11 this is the only test that gets to DTA.tz_convert # or tz_localize with non-nano; implement tests specific to that. def test_add_datetimelike_scalar(self, tda, tz_naive_fixture): - ts = pd.Timestamp("2016-01-01", tz=tz_naive_fixture) + ts = pd.Timestamp("2016-01-01", tz=tz_naive_fixture).as_unit("ns") expected = tda.as_unit("ns") + ts res = tda + ts diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index ccbfd9217373b..512f5c5439631 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -276,7 +276,7 @@ def _check_rng(rng): ) def test_integer_index_astype_datetime(self, tz, dtype): # GH 20997, 20964, 24559 - val = [Timestamp("2018-01-01", tz=tz).value] + val = [Timestamp("2018-01-01", tz=tz).as_unit("ns").value] result = Index(val, name="idx").astype(dtype) expected = DatetimeIndex(["2018-01-01"], tz=tz, name="idx") tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 4aaa2b694102d..e31a00410d2e4 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -796,7 +796,7 @@ def test_constructor_timestamp_near_dst(self): ) def test_constructor_with_int_tz(self, klass, box, tz, dtype): # GH 20997, 20964 - ts = Timestamp("2018-01-01", tz=tz) + ts = Timestamp("2018-01-01", tz=tz).as_unit("ns") result = klass(box([ts.value]), dtype=dtype) expected = klass([ts]) assert result == expected diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2f3fc4d0fcba8..f210413c20c3e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -970,7 +970,9 @@ def test_mixed_timedelta_datetime(self): ts = Timestamp("20130101") frame = DataFrame({"a": [td, ts]}, dtype=object) - expected = DataFrame({"a": [pd.Timedelta(td).as_unit("ns").value, ts.value]}) + expected = DataFrame( + {"a": [pd.Timedelta(td).as_unit("ns").value, ts.as_unit("ns").value]} + ) result = read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"}) tm.assert_frame_equal(result, expected, check_index_type=False) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 72ee89a4b5108..bd1eaba1b49c4 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -99,13 +99,14 @@ def test_td_add_datetimelike_scalar(self, op): assert result is NaT def test_td_add_timestamp_overflow(self): + ts = Timestamp("1700-01-01").as_unit("ns") msg = "Cannot cast 259987 from D to 'ns' without overflow." with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") + ts + Timedelta(13 * 19999, unit="D") msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timestamp("1700-01-01") + timedelta(days=13 * 19999) + ts + timedelta(days=13 * 19999) @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_td(self, op): diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 17fee1ff3f949..31fadfe47db13 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -38,7 +38,7 @@ def test_overflow_offset_raises(self): # xref https://github.com/statsmodels/statsmodels/issues/3374 # ends up multiplying really large numbers which overflow - stamp = Timestamp("2017-01-13 00:00:00") + stamp = Timestamp("2017-01-13 00:00:00").as_unit("ns") offset_overflow = 20169940 * offsets.Day(1) msg = ( "the add operation between " @@ -59,7 +59,7 @@ def test_overflow_offset_raises(self): # xref https://github.com/pandas-dev/pandas/issues/14080 # used to crash, so check for proper overflow exception - stamp = Timestamp("2000/1/1") + stamp = Timestamp("2000/1/1").as_unit("ns") offset_overflow = to_offset("D") * 100**5 lmsg3 = ( @@ -77,8 +77,8 @@ def test_overflow_offset_raises(self): def test_overflow_timestamp_raises(self): # https://github.com/pandas-dev/pandas/issues/31774 msg = "Result is too large" - a = Timestamp("2101-01-01 00:00:00") - b = Timestamp("1688-01-01 00:00:00") + a = Timestamp("2101-01-01 00:00:00").as_unit("ns") + b = Timestamp("1688-01-01 00:00:00").as_unit("ns") with pytest.raises(OutOfBoundsDatetime, match=msg): a - b @@ -235,7 +235,7 @@ def test_add_int_with_freq(self, ts, other): @pytest.mark.parametrize("shape", [(6,), (2, 3)]) def test_addsub_m8ndarray(self, shape): # GH#33296 - ts = Timestamp("2020-04-04 15:45") + ts = Timestamp("2020-04-04 15:45").as_unit("ns") other = np.arange(6).astype("m8[h]").reshape(shape) result = ts + other diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 9c3fa0f64153a..dc5ee8b1660cf 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -100,21 +100,21 @@ def test_constructor(self): for date_str, date, expected in tests: for result in [Timestamp(date_str), Timestamp(date)]: # only with timestring - assert result.value == expected + assert result.as_unit("ns").value == expected # re-creation shouldn't affect to internal value result = Timestamp(result) - assert result.value == expected + assert result.as_unit("ns").value == expected # with timezone for tz, offset in timezones: for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: expected_tz = expected - offset * 3600 * 1_000_000_000 - assert result.value == expected_tz + assert result.as_unit("ns").value == expected_tz # should preserve tz result = Timestamp(result) - assert result.value == expected_tz + assert result.as_unit("ns").value == expected_tz # should convert to UTC if tz is not None: @@ -122,7 +122,7 @@ def test_constructor(self): else: result = Timestamp(result, tz="UTC") expected_utc = expected - offset * 3600 * 1_000_000_000 - assert result.value == expected_utc + assert result.as_unit("ns").value == expected_utc def test_constructor_with_stringoffset(self): # GH 7833 @@ -154,26 +154,26 @@ def test_constructor_with_stringoffset(self): for date_str, expected in tests: for result in [Timestamp(date_str)]: # only with timestring - assert result.value == expected + assert result.as_unit("ns").value == expected # re-creation shouldn't affect to internal value result = Timestamp(result) - assert result.value == expected + assert result.as_unit("ns").value == expected # with timezone for tz, offset in timezones: result = Timestamp(date_str, tz=tz) expected_tz = expected - assert result.value == expected_tz + assert result.as_unit("ns").value == expected_tz # should preserve tz result = Timestamp(result) - assert result.value == expected_tz + assert result.as_unit("ns").value == expected_tz # should convert to UTC result = Timestamp(result).tz_convert("UTC") expected_utc = expected - assert result.value == expected_utc + assert result.as_unit("ns").value == expected_utc # This should be 2013-11-01 05:00 in UTC # converted to Chicago tz @@ -468,11 +468,17 @@ def test_out_of_bounds_value(self): Timestamp(too_high) def test_out_of_bounds_string(self): - msg = "Out of bounds" + msg = "Cannot cast .* to unit='ns' without overflow" with pytest.raises(ValueError, match=msg): - Timestamp("1676-01-01") + Timestamp("1676-01-01").as_unit("ns") with pytest.raises(ValueError, match=msg): - Timestamp("2263-01-01") + Timestamp("2263-01-01").as_unit("ns") + + ts = Timestamp("2263-01-01") + assert ts.unit == "s" + + ts = Timestamp("1676-01-01") + assert ts.unit == "s" def test_barely_out_of_bounds(self): # GH#19529 @@ -517,9 +523,21 @@ def test_bounds_with_different_units(self): @pytest.mark.parametrize("arg", ["001-01-01", "0001-01-01"]) def test_out_of_bounds_string_consistency(self, arg): # GH 15829 - msg = "Out of bounds" + msg = "|".join( + [ + "Cannot cast 1-01-01 00:00:00 to unit='ns' without overflow", + "Out of bounds nanosecond timestamp: 1-01-01 00:00:00", + ] + ) with pytest.raises(OutOfBoundsDatetime, match=msg): - Timestamp(arg) + Timestamp(arg).as_unit("ns") + + if arg == "0001-01-01": + # only the 4-digit year goes through ISO path which gets second reso + # instead of ns reso + ts = Timestamp(arg) + assert ts.unit == "s" + assert ts.year == ts.month == ts.day == 1 def test_min_valid(self): # Ensure that Timestamp.min is a valid Timestamp diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index f5b9a35a53a24..ef1bcae035d8f 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -204,13 +204,17 @@ def test_woy_boundary(self): def test_resolution(self): # GH#21336, GH#21365 - dt = Timestamp("2100-01-01 00:00:00") + dt = Timestamp("2100-01-01 00:00:00.000000000") assert dt.resolution == Timedelta(nanoseconds=1) # Check that the attribute is available on the class, mirroring # the stdlib datetime behavior assert Timestamp.resolution == Timedelta(nanoseconds=1) + assert dt.as_unit("us").resolution == Timedelta(microseconds=1) + assert dt.as_unit("ms").resolution == Timedelta(milliseconds=1) + assert dt.as_unit("s").resolution == Timedelta(seconds=1) + class TestTimestamp: def test_tz(self): @@ -376,7 +380,7 @@ def test_roundtrip(self): # test value to string and back conversions # further test accessors - base = Timestamp("20140101 00:00:00") + base = Timestamp("20140101 00:00:00").as_unit("ns") result = Timestamp(base.value + Timedelta("5ms").value) assert result == Timestamp(f"{base}.005000") @@ -518,7 +522,7 @@ def test_compare_hour13(self): class TestTimestampConversion: def test_conversion(self): # GH#9255 - ts = Timestamp("2000-01-01") + ts = Timestamp("2000-01-01").as_unit("ns") result = ts.to_pydatetime() expected = datetime(2000, 1, 1) @@ -999,7 +1003,8 @@ def test_timestamp_class_min_max_resolution(): class TestAsUnit: def test_as_unit(self): - ts = Timestamp("1970-01-01") + ts = Timestamp("1970-01-01").as_unit("ns") + assert ts.unit == "ns" assert ts.as_unit("ns") is ts diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 912b7d9232abe..953b44b78667b 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -83,7 +83,7 @@ def test_tz_localize_ambiguous(self): ts_dst = ts.tz_localize("US/Eastern", ambiguous=True) ts_no_dst = ts.tz_localize("US/Eastern", ambiguous=False) - assert (ts_no_dst.value - ts_dst.value) / 1e9 == 3600 + assert (ts_no_dst.value - ts_dst.value) == 3600 msg = "Cannot infer offset with only one time" with pytest.raises(ValueError, match=msg): ts.tz_localize("US/Eastern", ambiguous="infer") @@ -167,7 +167,7 @@ def test_tz_localize_ambiguous_compat(self): result_pytz = naive.tz_localize(pytz_zone, ambiguous=0) result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=0) assert result_pytz.value == result_dateutil.value - assert result_pytz.value == 1382835600000000000 + assert result_pytz.value == 1382835600 # fixed ambiguous behavior # see gh-14621, GH#45087 @@ -179,7 +179,7 @@ def test_tz_localize_ambiguous_compat(self): result_pytz = naive.tz_localize(pytz_zone, ambiguous=1) result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=1) assert result_pytz.value == result_dateutil.value - assert result_pytz.value == 1382832000000000000 + assert result_pytz.value == 1382832000 # see gh-14621 assert str(result_pytz) == str(result_dateutil) @@ -410,7 +410,7 @@ def test_timestamp_constructor_near_dst_boundary(self): Timestamp("2017-03-26 02:00", tz="Europe/Paris") result = Timestamp("2017-03-26 02:00:00+0100", tz="Europe/Paris") - naive = Timestamp(result.value) + naive = Timestamp(result.as_unit("ns").value) expected = naive.tz_localize("UTC").tz_convert("Europe/Paris") assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 1c1f3acc8331f..33db48ff8face 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -256,8 +256,14 @@ def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): def test_round_int64(self, timestamp, freq): # check that all rounding modes are accurate to int64 precision # see GH#22591 - dt = Timestamp(timestamp) + dt = Timestamp(timestamp).as_unit("ns") unit = to_offset(freq).nanos + # if dt._unit == "us": + # unit //= 1000 + # elif dt._unit == "ms": + # unit //= 1_000_000 + # elif dt._unit == "s": + # unit //= 1_000_000_000 # test floor result = dt.floor(freq) diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 4381aa3f34f8d..22a0a102af2d1 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -17,7 +17,7 @@ class TestSeriesAsof: def test_asof_nanosecond_index_access(self): - ts = Timestamp("20130101").value + ts = Timestamp("20130101").as_unit("ns").value dti = DatetimeIndex([ts + 50 + i for i in range(100)]) ser = Series(np.random.randn(100), index=dti) diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index 5dffee587adcb..aeddc08e4b888 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -62,9 +62,9 @@ def test_exception_importable(exc): def test_catch_oob(): from pandas import errors - msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00" + msg = "Cannot cast 1500-01-01 00:00:00 to unit='ns' without overflow" with pytest.raises(errors.OutOfBoundsDatetime, match=msg): - pd.Timestamp("15000101") + pd.Timestamp("15000101").as_unit("ns") @pytest.mark.parametrize( diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index aa10ab15f4744..d3b36d8028e31 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -52,7 +52,7 @@ def test_tzlocal_offset(): ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()) offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) - offset = offset.total_seconds() * 1000000000 + offset = offset.total_seconds() assert ts.value + offset == Timestamp("2011-01-01").value From d559ffd6f6c1c9961874fcca0c16951f0e143a32 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 21 Nov 2022 19:19:04 -0800 Subject: [PATCH 02/10] xfail windwos --- pandas/tests/tseries/offsets/test_common.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tseries/offsets/test_common.py b/pandas/tests/tseries/offsets/test_common.py index ca5feceabb411..ae18e11338280 100644 --- a/pandas/tests/tseries/offsets/test_common.py +++ b/pandas/tests/tseries/offsets/test_common.py @@ -7,7 +7,10 @@ OutOfBoundsDatetime, Timestamp, ) -from pandas.compat import IS64 +from pandas.compat import ( + IS64, + is_platform_windows, +) from pandas.tseries.offsets import ( FY5253, @@ -133,6 +136,7 @@ def test_apply_out_of_range(request, tz_naive_fixture, _offset): t = Timestamp("20080101", tz=tz) result = t + offset assert isinstance(result, datetime) + assert t.tzinfo is not None if isinstance(tz, tzlocal) and not IS64 and _offset is not DateOffset: # If we hit OutOfBoundsDatetime on non-64 bit machines @@ -140,6 +144,14 @@ def test_apply_out_of_range(request, tz_naive_fixture, _offset): request.node.add_marker( pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038") ) + elif ( + isinstance(tz, tzlocal) + and is_platform_windows() + and _offset in (QuarterEnd, BQuarterBegin, BQuarterEnd) + ): + request.node.add_marker( + pytest.mark.xfail(reason="After GH#49737 t.tzinfo is None on CI") + ) assert str(t.tzinfo) == str(result.tzinfo) except OutOfBoundsDatetime: From e88f475d7711d047470bb4bc0f74337348ae0f4b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Nov 2022 07:17:25 -0800 Subject: [PATCH 03/10] fix assertion --- pandas/tests/tseries/offsets/test_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tseries/offsets/test_common.py b/pandas/tests/tseries/offsets/test_common.py index ae18e11338280..1b90b94d8a9da 100644 --- a/pandas/tests/tseries/offsets/test_common.py +++ b/pandas/tests/tseries/offsets/test_common.py @@ -136,7 +136,8 @@ def test_apply_out_of_range(request, tz_naive_fixture, _offset): t = Timestamp("20080101", tz=tz) result = t + offset assert isinstance(result, datetime) - assert t.tzinfo is not None + if tz is not None: + assert t.tzinfo is not None if isinstance(tz, tzlocal) and not IS64 and _offset is not DateOffset: # If we hit OutOfBoundsDatetime on non-64 bit machines From cc9178d449d7fcc29239399f95038d6142f42fe1 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Nov 2022 11:27:57 -0800 Subject: [PATCH 04/10] post-merge fixup --- pandas/core/arrays/datetimes.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b1b5c53a056c8..c76ffc3459e4e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -398,13 +398,9 @@ def _generate_range( # type: ignore[override] if start is not None: start = Timestamp(start) - if start is not NaT: - start = start.as_unit("ns") if end is not None: end = Timestamp(end) - if end is not NaT: - end = end.as_unit("ns") if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") From dd58c3fa5756d761a6b774cd1d371ec1567ca520 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 7 Dec 2022 07:41:54 -0800 Subject: [PATCH 05/10] Update doc/source/whatsnew/v2.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 59575b80bc022..098a59a1fa391 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -337,7 +337,7 @@ Other API changes - Passing data with dtype of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; timedelta64 data with lower resolution will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) -- Passing a string in ISO-8601 format to :class:`Timestamp` will retain the resolution of the parsed input if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`??`) +- Passing a string in ISO-8601 format to :class:`Timestamp` will retain the resolution of the parsed input if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49737`) - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) - Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`) - When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`) From 9bc302dfcbf0815ca6861a2b3c44c050297bb184 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 7 Dec 2022 07:43:44 -0800 Subject: [PATCH 06/10] CLN: remove commented-out --- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/tests/scalar/timestamp/test_unary_ops.py | 6 ------ 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 898e4b6112e20..0162f54bf5225 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -517,7 +517,7 @@ def _unbox_scalar(self, value) -> np.datetime64: return value.as_unit(self.unit).asm8 def _scalar_from_string(self, value) -> Timestamp | NaTType: - return Timestamp(value, tz=self.tz) # .as_unit(self._unit) + return Timestamp(value, tz=self.tz) def _check_compatible_with(self, other) -> None: if other is NaT: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index d5136bc19f900..334e5437c2f70 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -316,7 +316,7 @@ def _unbox_scalar(self, value) -> np.timedelta64: return value.as_unit(self.unit).asm8 def _scalar_from_string(self, value) -> Timedelta | NaTType: - return Timedelta(value) # .as_unit(self._unit) + return Timedelta(value) def _check_compatible_with(self, other) -> None: # we don't have anything to validate. diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 33db48ff8face..7f41229237c1f 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -258,12 +258,6 @@ def test_round_int64(self, timestamp, freq): # see GH#22591 dt = Timestamp(timestamp).as_unit("ns") unit = to_offset(freq).nanos - # if dt._unit == "us": - # unit //= 1000 - # elif dt._unit == "ms": - # unit //= 1_000_000 - # elif dt._unit == "s": - # unit //= 1_000_000_000 # test floor result = dt.floor(freq) From 954c71c325adabeef4ae92e67cf98b42eb02eb77 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 14 Dec 2022 13:12:20 -0800 Subject: [PATCH 07/10] update test --- pandas/tests/scalar/timestamp/test_timezones.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 15e47594e8413..e2df4d23bd858 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -103,7 +103,7 @@ def test_tz_localize_ambiguous(self): ts_dst = ts.tz_localize("US/Eastern", ambiguous=True) ts_no_dst = ts.tz_localize("US/Eastern", ambiguous=False) - assert (ts_no_dst.value - ts_dst.value) / 1e9 == 3600 + assert ts_no_dst.value - ts_dst.value == 3600 msg = re.escape( "'ambiguous' parameter must be one of: " "True, False, 'NaT', 'raise' (default)" From d744044cf986c861e0142090b2f57e8a0fc52d90 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 18 Dec 2022 13:03:56 -0800 Subject: [PATCH 08/10] test for unit on parsing iso8601 with offset --- pandas/tests/scalar/timestamp/test_constructors.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 334da067225b1..604429e7c8d78 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -24,6 +24,20 @@ class TestTimestampConstructors: + def test_constructor_from_iso8601_str_with_offset_reso(self): + # GH#49737 + ts = Timestamp("2016-01-01 04:05:06-01:00") + assert ts.unit == "s" + + ts = Timestamp("2016-01-01 04:05:06.000-01:00") + assert ts.unit == "ms" + + ts = Timestamp("2016-01-01 04:05:06.000000-01:00") + assert ts.unit == "us" + + ts = Timestamp("2016-01-01 04:05:06.000000001-01:00") + assert ts.unit == "ns" + def test_constructor_from_date_second_reso(self): # GH#49034 constructing from a pydate object gets lowest supported # reso, i.e. seconds From f490db50d48e0d7466990b2cadaf405325da0695 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 20 Dec 2022 13:17:19 -0800 Subject: [PATCH 09/10] test for round --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/tests/scalar/timestamp/test_unary_ops.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7021ed4114d29..8223d56e3e857 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -795,6 +795,7 @@ Datetimelike - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing empty string and non-ISO8601 format was passed. Now, empty strings will be parsed as :class:`NaT`, for compatibility with how is done for ISO8601 formats (:issue:`50251`) - Bug in :class:`Timestamp` was showing ``UserWarning``, which was not actionable by users, when parsing non-ISO8601 delimited date strings (:issue:`50232`) - Bug in :func:`to_datetime` was showing misleading ``ValueError`` when parsing dates with format containing ISO week directive and ISO weekday directive (:issue:`50308`) +- Bug in :meth:`Timestamp.round` when the ``freq`` argument has zero-duration (e.g. "0ns") returning incorrect results instead of raising (:issue:`49737`) - Timedelta diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 7f41229237c1f..f87922336b714 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -29,6 +29,13 @@ class TestTimestampUnaryOps: # -------------------------------------------------------------- + def test_round_divison_by_zero_raises(self): + ts = Timestamp("2016-01-01") + + msg = "Division by zero in rounding" + with pytest.raises(ValueError, match=msg): + ts.round("0ns") + # Timestamp.round @pytest.mark.parametrize( "timestamp, freq, expected", From d7f8696b512a1b3834d16592971020418e3518a5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Dec 2022 16:02:59 -0800 Subject: [PATCH 10/10] fix offset tests --- pandas/_libs/tslibs/offsets.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 470d1e89e5b88..3ff50be966fa0 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -162,7 +162,11 @@ def apply_wraps(func): result = func(self, other) - result = (<_Timestamp>Timestamp(result))._as_creso(other._creso) + result2 = Timestamp(result).as_unit(other.unit) + if result == result2: + # i.e. the conversion is non-lossy, not the case for e.g. + # test_milliseconds_combination + result = result2 if self._adjust_dst: result = result.tz_localize(tz)