diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ddf5fffb1d80b..85b7b8c846f1a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -227,6 +227,7 @@ Other Enhancements - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) +- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`22647`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f9c604cd76472..7d86ea58dd85a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -27,11 +27,10 @@ from np_datetime import OutOfBoundsDatetime from util cimport (is_string_object, is_datetime64_object, - is_integer_object, is_float_object, is_array) + is_integer_object, is_float_object) from timedeltas cimport cast_from_unit from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, - treat_tz_as_dateutil, treat_tz_as_pytz, get_utcoffset, get_dst_info, get_timezone, maybe_get_tz, tz_compare) from parsing import parse_datetime_string @@ -850,8 +849,9 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, int64_t[:] deltas, idx_shifted ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) + Py_ssize_t delta_idx_offset, delta_idx int64_t *tdata - int64_t v, left, right, val, v_left, v_right + int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins ndarray[int64_t] result, result_a, result_b, dst_hours npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False @@ -1005,9 +1005,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if shift: # Shift the nonexistent time forward to the closest existing # time - remaining_minutes = val % HOURS_NS - new_local = val + (HOURS_NS - remaining_minutes) - delta_idx = trans.searchsorted(new_local, side='right') - 1 + remaining_mins = val % HOURS_NS + new_local = val + (HOURS_NS - remaining_mins) + delta_idx = trans.searchsorted(new_local, side='right') + # Need to subtract 1 from the delta_idx if the UTC offset of + # the target tz is greater than 0 + delta_idx_offset = int(deltas[0] > 0) + delta_idx = delta_idx - delta_idx_offset result[i] = new_local - deltas[delta_idx] elif fill_nonexist: result[i] = NPY_NAT diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 0eec84ecf8285..e45eb34bcafc1 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -484,6 +484,17 @@ class NaTType(_NaT): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 Raises ------ @@ -503,6 +514,17 @@ class NaTType(_NaT): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 Raises ------ @@ -522,6 +544,17 @@ class NaTType(_NaT): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 Raises ------ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 08b0c5472549e..8fb4242ce2cc2 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -721,7 +721,7 @@ class Timestamp(_Timestamp): return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) - def _round(self, freq, mode, ambiguous='raise'): + def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): if self.tz is not None: value = self.tz_localize(None).value else: @@ -733,10 +733,12 @@ class Timestamp(_Timestamp): r = round_nsint64(value, mode, freq)[0] result = Timestamp(r, unit='ns') if self.tz is not None: - result = result.tz_localize(self.tz, ambiguous=ambiguous) + result = result.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) return result - def round(self, freq, ambiguous='raise'): + def round(self, freq, ambiguous='raise', nonexistent='raise'): """ Round the Timestamp to the specified resolution @@ -754,14 +756,27 @@ class Timestamp(_Timestamp): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 Raises ------ ValueError if the freq cannot be converted """ - return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous) + return self._round( + freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent + ) - def floor(self, freq, ambiguous='raise'): + def floor(self, freq, ambiguous='raise', nonexistent='raise'): """ return a new Timestamp floored to this resolution @@ -775,14 +790,25 @@ class Timestamp(_Timestamp): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 Raises ------ ValueError if the freq cannot be converted """ - return self._round(freq, RoundTo.MINUS_INFTY, ambiguous) + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) - def ceil(self, freq, ambiguous='raise'): + def ceil(self, freq, ambiguous='raise', nonexistent='raise'): """ return a new Timestamp ceiled to this resolution @@ -796,12 +822,23 @@ class Timestamp(_Timestamp): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 Raises ------ ValueError if the freq cannot be converted """ - return self._round(freq, RoundTo.PLUS_INFTY, ambiguous) + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) @property def tz(self): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 14325f42ff0d8..326564cca9753 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -113,6 +113,17 @@ class TimelikeOps(object): Only relevant for DatetimeIndex .. versionadded:: 0.24.0 + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 Returns ------- @@ -182,7 +193,7 @@ class TimelikeOps(object): """ ) - def _round(self, freq, mode, ambiguous): + def _round(self, freq, mode, ambiguous, nonexistent): # round the local times values = _ensure_datetimelike_to_i8(self) result = round_nsint64(values, mode, freq) @@ -194,20 +205,22 @@ def _round(self, freq, mode, ambiguous): if 'tz' in attribs: attribs['tz'] = None return self._ensure_localized( - self._shallow_copy(result, **attribs), ambiguous + self._shallow_copy(result, **attribs), ambiguous, nonexistent ) @Appender((_round_doc + _round_example).format(op="round")) - def round(self, freq, ambiguous='raise'): - return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous) + def round(self, freq, ambiguous='raise', nonexistent='raise'): + return self._round( + freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent + ) @Appender((_round_doc + _floor_example).format(op="floor")) - def floor(self, freq, ambiguous='raise'): - return self._round(freq, RoundTo.MINUS_INFTY, ambiguous) + def floor(self, freq, ambiguous='raise', nonexistent='raise'): + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) @Appender((_round_doc + _ceil_example).format(op="ceil")) - def ceil(self, freq, ambiguous='raise'): - return self._round(freq, RoundTo.PLUS_INFTY, ambiguous) + def ceil(self, freq, ambiguous='raise', nonexistent='raise'): + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): @@ -278,7 +291,8 @@ def _evaluate_compare(self, other, op): except TypeError: return result - def _ensure_localized(self, arg, ambiguous='raise', from_utc=False): + def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', + from_utc=False): """ ensure that we are re-localized @@ -289,6 +303,7 @@ def _ensure_localized(self, arg, ambiguous='raise', from_utc=False): ---------- arg : DatetimeIndex / i8 ndarray ambiguous : str, bool, or bool-ndarray, default 'raise' + nonexistent : str, default 'raise' from_utc : bool, default False If True, localize the i8 ndarray to UTC first before converting to the appropriate tz. If False, localize directly to the tz. @@ -305,7 +320,9 @@ def _ensure_localized(self, arg, ambiguous='raise', from_utc=False): if from_utc: arg = arg.tz_localize('UTC').tz_convert(self.tz) else: - arg = arg.tz_localize(self.tz, ambiguous=ambiguous) + arg = arg.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) return arg def _box_values_as_index(self): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index b6c783dc07aec..0c477a021df4d 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -134,8 +134,8 @@ def test_floor(self): assert result == expected @pytest.mark.parametrize('method', ['ceil', 'round', 'floor']) - def test_round_dst_border(self, method): - # GH 18946 round near DST + def test_round_dst_border_ambiguous(self, method): + # GH 18946 round near "fall back" DST ts = Timestamp('2017-10-29 00:00:00', tz='UTC').tz_convert( 'Europe/Madrid' ) @@ -155,6 +155,24 @@ def test_round_dst_border(self, method): with pytest.raises(pytz.AmbiguousTimeError): getattr(ts, method)('H', ambiguous='raise') + @pytest.mark.parametrize('method, ts_str, freq', [ + ['ceil', '2018-03-11 01:59:00-0600', '5min'], + ['round', '2018-03-11 01:59:00-0600', '5min'], + ['floor', '2018-03-11 03:01:00-0500', '2H']]) + def test_round_dst_border_nonexistent(self, method, ts_str, freq): + # GH 23324 round near "spring forward" DST + ts = Timestamp(ts_str, tz='America/Chicago') + result = getattr(ts, method)(freq, nonexistent='shift') + expected = Timestamp('2018-03-11 03:00:00', tz='America/Chicago') + assert result == expected + + result = getattr(ts, method)(freq, nonexistent='NaT') + assert result is NaT + + with pytest.raises(pytz.NonExistentTimeError, + message='2018-03-11 02:00:00'): + getattr(ts, method)(freq, nonexistent='raise') + @pytest.mark.parametrize('timestamp', [ '2018-01-01 0:0:0.124999360', '2018-01-01 0:0:0.125000367', diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 1fd95c4205b0e..2f6efc112819c 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -253,7 +253,7 @@ def test_dt_round_tz(self): @pytest.mark.parametrize('method', ['ceil', 'round', 'floor']) def test_dt_round_tz_ambiguous(self, method): - # GH 18946 round near DST + # GH 18946 round near "fall back" DST df1 = pd.DataFrame([ pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True), pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True), @@ -282,6 +282,27 @@ def test_dt_round_tz_ambiguous(self, method): with pytest.raises(pytz.AmbiguousTimeError): getattr(df1.date.dt, method)('H', ambiguous='raise') + @pytest.mark.parametrize('method, ts_str, freq', [ + ['ceil', '2018-03-11 01:59:00-0600', '5min'], + ['round', '2018-03-11 01:59:00-0600', '5min'], + ['floor', '2018-03-11 03:01:00-0500', '2H']]) + def test_dt_round_tz_nonexistent(self, method, ts_str, freq): + # GH 23324 round near "spring forward" DST + s = Series([pd.Timestamp(ts_str, tz='America/Chicago')]) + result = getattr(s.dt, method)(freq, nonexistent='shift') + expected = Series( + [pd.Timestamp('2018-03-11 03:00:00', tz='America/Chicago')] + ) + tm.assert_series_equal(result, expected) + + result = getattr(s.dt, method)(freq, nonexistent='NaT') + expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz) + tm.assert_series_equal(result, expected) + + with pytest.raises(pytz.NonExistentTimeError, + message='2018-03-11 02:00:00'): + getattr(s.dt, method)(freq, nonexistent='raise') + def test_dt_namespace_accessor_categorical(self): # GH 19468 dti = DatetimeIndex(['20171111', '20181212']).repeat(2)