diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 11de4e60f202d..a9aa49ae5ee66 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -23,6 +23,7 @@ "to_offset", "Tick", "BaseOffset", + "DayDST", "tz_compare", ] @@ -41,6 +42,7 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.offsets import ( BaseOffset, + DayDST, Tick, to_offset, ) diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd index 215c3f849281f..f27e2e0a8b928 100644 --- a/pandas/_libs/tslibs/offsets.pxd +++ b/pandas/_libs/tslibs/offsets.pxd @@ -1,7 +1,7 @@ from numpy cimport int64_t -cpdef to_offset(object obj) +cpdef to_offset(object obj, bint tzaware=*) cdef bint is_offset_object(object obj) cdef bint is_tick_object(object obj) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 39582a94dbdf9..d0492073b93c8 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1009,6 +1009,41 @@ def delta_to_tick(delta: timedelta) -> Tick: return Nano(nanos) +cdef class DayDST(SingleConstructorOffset): + _adjust_dst = True + _attributes = tuple(["n", "normalize"]) + rule_code = "D" # used by parse_time_string + + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n) + if normalize: + # GH#21427 + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) + + def is_on_offset(self, dt) -> bool: + return True + + @apply_wraps + def _apply(self, other): + return other + Timedelta(days=self.n) + + @apply_index_wraps + def apply_index(self, dti): + return self._apply_array(dti) + + @apply_array_wraps + def _apply_array(self, dtarr): + return dtarr + Timedelta(days=self.n) + + @cache_readonly + def freqstr(self) -> str: + if self.n != 1: + return str(self.n) + "DayDST" + return "DayDST" + + # -------------------------------------------------------------------- cdef class RelativeDeltaOffset(BaseOffset): @@ -3569,7 +3604,7 @@ def _get_offset(name: str) -> BaseOffset: return _offset_map[name] -cpdef to_offset(freq): +cpdef to_offset(freq, bint tzaware=False): """ Return DateOffset object from string or tuple representation or datetime.timedelta object. @@ -3577,6 +3612,8 @@ cpdef to_offset(freq): Parameters ---------- freq : str, tuple, datetime.timedelta, DateOffset or None + tzaware : bool, default False + If we have a string "D", whether to interpret that as DayDST. Returns ------- @@ -3629,6 +3666,14 @@ cpdef to_offset(freq): delta = None stride_sign = None + if freq.endswith("DayDST"): + head = freq[:-6] + if len(head): + n = int(head) + else: + n = 1 + return DayDST(n) + try: split = opattern.split(freq) if split[-1] != "" and not split[-1].isspace(): @@ -3673,6 +3718,8 @@ cpdef to_offset(freq): if delta is None: raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) + if type(delta) is Day and tzaware: + return DayDST(delta.n) return delta diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 67696f9740ea1..cf6abfedf9eb4 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -113,7 +113,11 @@ from pandas._libs.tslibs.offsets cimport ( to_offset, ) -from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG +from pandas._libs.tslibs.offsets import ( + INVALID_FREQ_ERR_MSG, + Day, + DayDST, +) cdef: enum: @@ -1629,6 +1633,8 @@ cdef class _Period(PeriodMixin): freq = dtype.date_offset freq = to_offset(freq) + if isinstance(freq, DayDST): + freq = Day(freq.n) if freq.n <= 0: raise ValueError("Frequency must be positive, because it " diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index be39ccd444865..f86630b4fc595 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1350,7 +1350,7 @@ class Timedelta(_Timedelta): ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset - unit = to_offset(freq).nanos + unit = to_offset(freq, tzaware=False).nanos arr = np.array([self.value], dtype="i8") result = round_nsint64(arr, mode, unit)[0] diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 33da9ca858a4c..ddba0a6e37130 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -26,6 +26,7 @@ ) from pandas._libs.tslibs import ( BaseOffset, + DayDST, IncompatibleFrequency, NaT, NaTType, @@ -1081,8 +1082,9 @@ def _add_timedeltalike_scalar(self, other): new_values = new_values.view(self._ndarray.dtype) new_freq = None - if isinstance(self.freq, Tick) or is_period_dtype(self.dtype): + if isinstance(self.freq, (Tick, DayDST)) or is_period_dtype(self.dtype): # adding a scalar preserves freq + # TODO: sure this is accurate for DayDST new_freq = self.freq # error: Unexpected keyword argument "freq" for "_simple_new" of "NDArrayBacked" diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7bd3403abd5cc..1acd8576aae54 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -79,6 +79,7 @@ from pandas.tseries.offsets import ( BDay, Day, + DayDST, Tick, ) @@ -365,7 +366,18 @@ def _from_sequence_not_strict( ambiguous=ambiguous, ) - freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) + try: + freq, freq_infer = dtl.validate_inferred_freq( + freq, inferred_freq, freq_infer + ) + except ValueError: + if isinstance(freq, Tick) and isinstance(inferred_freq, DayDST): + # It is possible that both could be valid, so we'll + # go through _validate_frequency below + inferred_freq = None + freq_infer = False + else: + raise if explicit_none: freq = None @@ -432,10 +444,13 @@ def _generate_range( end, end_tz, end, freq, tz, ambiguous, nonexistent ) if freq is not None: + # FIXME: dont do this # We break Day arithmetic (fixed 24 hour) here and opt for # Day to mean calendar day (23/24/25 hour). Therefore, strip # tz info from start and day to avoid DST arithmetic - if isinstance(freq, Day): + if isinstance(freq, (Day, DayDST)): + if tz is not None: + freq = DayDST(freq.n) if start is not None: start = start.tz_localize(None) if end is not None: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 01018c7263f32..2d4fe3d72acc2 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -29,6 +29,8 @@ from pandas._libs.tslibs.dtypes import FreqGroup from pandas._libs.tslibs.fields import isleapyear_arr from pandas._libs.tslibs.offsets import ( + Day, + DayDST, Tick, delta_to_tick, ) @@ -1108,6 +1110,8 @@ def dt64arr_to_periodarr(data, freq, tz=None): elif isinstance(data, (ABCIndex, ABCSeries)): data = data._values + if isinstance(freq, DayDST): + freq = Day(freq.n) freq = Period._maybe_convert_freq(freq) base = freq._period_dtype_code return c_dt64arr_to_periodarr(data.view("i8"), base, tz), freq diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e283509206344..2793c8af08960 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1012,7 +1012,7 @@ def date_range( DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', '2018-01-05 00:00:00+09:00'], - dtype='datetime64[ns, Asia/Tokyo]', freq='D') + dtype='datetime64[ns, Asia/Tokyo]', freq='DayDST') `closed` controls whether to include `start` and `end` that are on the boundary. The default includes boundary points on either end. diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 01a8982c5fe16..fc329c53b2d9f 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -41,6 +41,7 @@ def test_setitem_invalidates_datetime_index_freq(self): # `freq` attribute on the underlying DatetimeIndex dti = date_range("20130101", periods=3, tz="US/Eastern") + orig_freq = dti.freq ts = dti[1] df = DataFrame({"B": dti}) @@ -50,7 +51,7 @@ def test_setitem_invalidates_datetime_index_freq(self): assert df["B"]._values.freq is None # check that the DatetimeIndex was not altered in place - assert dti.freq == "D" + assert dti.freq is orig_freq assert dti[1] == ts def test_cast_internals(self, float_frame): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 9db6567ca1b56..7ec64e464aa5d 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -68,7 +68,7 @@ def test_getitem(self): result = idx[4::-1] expected = DatetimeIndex( ["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"], - freq="-1D", + freq=-1 * idx.freq, tz=idx.tz, name="idx", ) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index ae4ed04f8adac..3814321513bac 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -377,7 +377,10 @@ def test_setops_preserve_freq(self, tz): result = rng[:50].intersection(rng[25:75]) assert result.name == rng.name - assert result.freqstr == "D" + if tz is None: + assert result.freqstr == "D" + else: + assert result.freqstr == "DayDST" assert result.tz == rng.tz nofreq = DatetimeIndex(list(rng[25:75]), name="other") diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 34e8e2ac3e84a..643f7f02a750b 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -874,7 +874,7 @@ def test_resample_origin_epoch_with_tz_day_vs_24h(): result_1 = ts_1.resample("D", origin="epoch").mean() result_2 = ts_1.resample("24H", origin="epoch").mean() - tm.assert_series_equal(result_1, result_2) + tm.assert_series_equal(result_1, result_2, check_freq=False) # check that we have the same behavior with epoch even if we are not timezone aware ts_no_tz = ts_1.tz_localize(None) @@ -897,7 +897,7 @@ def test_resample_origin_with_day_freq_on_dst(): # GH 31809 tz = "America/Chicago" - def _create_series(values, timestamps, freq="D"): + def _create_series(values, timestamps, freq="DayDST"): return Series( values, index=DatetimeIndex( @@ -1484,7 +1484,7 @@ def test_resample_dst_anchor(): dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz="US/Eastern") df = DataFrame([5], index=dti) - dti = DatetimeIndex(df.index.normalize(), freq="D") + dti = DatetimeIndex(df.index.normalize(), freq="DayDST") expected = DataFrame([5], index=dti) tm.assert_frame_equal(df.resample(rule="D").sum(), expected) df.resample(rule="MS").sum() @@ -1618,7 +1618,11 @@ def test_downsample_dst_at_midnight(): dti = date_range("2018-11-03", periods=3).tz_localize( "America/Havana", ambiguous=True ) - dti = DatetimeIndex(dti, freq="D") + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst"): + # Check that we are requiring ambiguous be passed explicitly + dti = DatetimeIndex(dti, freq="D") + dti = DatetimeIndex(dti, freq="DayDST", ambiguous=True) + expected = DataFrame([7.5, 28.0, 44.5], index=dti) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 70d37f83c7f0c..dc129e1459517 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -281,7 +281,7 @@ def test_resample_with_pytz(self): expected = Series( 2.0, index=pd.DatetimeIndex( - ["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D" + ["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="DayDST" ), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 48a3ebd25c239..ba58ad5bbae89 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -164,7 +164,10 @@ def test_dt_namespace_accessor_datetime64tz(self): tz_result = result.dt.tz assert str(tz_result) == "CET" freq_result = ser.dt.freq - assert freq_result == DatetimeIndex(ser.values, freq="infer").freq + assert ( + freq_result + == DatetimeIndex(ser._values._with_freq(None), freq="infer").freq + ) def test_dt_namespace_accessor_timedelta(self): # GH#7207, GH#11128 diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5f96078ba70b1..0fb75253978e5 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -323,11 +323,12 @@ def test_setitem_invalidates_datetime_index_freq(self): # `freq` attribute on the underlying DatetimeIndex dti = date_range("20130101", periods=3, tz="US/Eastern") + orig_freq = dti.freq ts = dti[1] ser = Series(dti) assert ser._values is not dti assert ser._values._data.base is not dti._data._data.base - assert dti.freq == "D" + assert dti.freq is orig_freq ser.iloc[1] = NaT assert ser._values.freq is None @@ -335,7 +336,7 @@ def test_setitem_invalidates_datetime_index_freq(self): assert ser._values is not dti assert ser._values._data.base is not dti._data._data.base assert dti[1] == ts - assert dti.freq == "D" + assert dti.freq is orig_freq def test_dt64tz_setitem_does_not_mutate_dti(self): # GH#21907, GH#24096 diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index cbbe29fb6cf9a..2e716977f79dc 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -267,6 +267,10 @@ def test_infer_freq_index(freq, expected): def test_infer_freq_tz(tz_naive_fixture, expected, dates): # see gh-7310 tz = tz_naive_fixture + + if expected == "D" and tz is not None: + expected = "DayDST" + idx = DatetimeIndex(dates, tz=tz) assert idx.inferred_freq == expected @@ -380,6 +384,24 @@ def test_infer_freq_business_hour(data, expected): assert idx.inferred_freq == expected +def test_infer_freq_across_dst_not_daily(): + # GH#37295 + dti = date_range( + start=Timestamp("2019-03-26 00:00:00-0400", tz="Canada/Eastern"), + end=Timestamp("2020-10-17 00:00:00-0400", tz="Canada/Eastern"), + freq="D", + ) + assert dti.freq == "DayDST" + + diff = dti - dti.shift() + assert not diff.is_unique + + assert dti.inferred_freq == "DayDST" + + dti2 = DatetimeIndex(dti._with_freq(None), freq="infer") + assert dti2.freq == "DayDST" + + def test_not_monotonic(): rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) rng = rng[::-1] diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 134ba79e7773d..9568de90c7f19 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -50,6 +50,7 @@ CustomBusinessMonthBegin, CustomBusinessMonthEnd, DateOffset, + DayDST, Easter, FY5253Quarter, LastWeekOfMonth, @@ -69,6 +70,7 @@ class TestCommon(Base): # used for .apply and .rollforward expecteds = { "Day": Timestamp("2011-01-02 09:00:00"), + "DayDST": Timestamp("2011-01-02 09:00:00"), "DateOffset": Timestamp("2011-01-02 09:00:00"), "BusinessDay": Timestamp("2011-01-03 09:00:00"), "CustomBusinessDay": Timestamp("2011-01-03 09:00:00"), @@ -161,13 +163,13 @@ def test_offset_freqstr(self, offset_types): offset = self._get_offset(offset_types) freqstr = offset.freqstr - if freqstr not in ("", "", "LWOM-SAT"): + if freqstr not in ("DayDST", "", "", "LWOM-SAT"): code = _get_offset(freqstr) assert offset.rule_code == code def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): - if normalize and issubclass(offset, Tick): + if normalize and issubclass(offset, (Tick, DayDST)): # normalize=True disallowed for Tick subclasses GH#21427 return @@ -270,6 +272,7 @@ def test_rollforward(self, offset_types): # result will not be changed if the target is on the offset no_changes = [ "Day", + "DayDST", "MonthBegin", "SemiMonthBegin", "YearBegin", @@ -348,6 +351,7 @@ def test_rollback(self, offset_types): # result will not be changed if the target is on the offset for n in [ "Day", + "DayDST", "MonthBegin", "SemiMonthBegin", "YearBegin", @@ -369,6 +373,7 @@ def test_rollback(self, offset_types): normalized = { "Day": Timestamp("2010-12-31 00:00:00"), + "DayDST": Timestamp("2010-12-31 00:00:00"), "DateOffset": Timestamp("2010-12-31 00:00:00"), "MonthBegin": Timestamp("2010-12-01 00:00:00"), "SemiMonthBegin": Timestamp("2010-12-15 00:00:00"), @@ -400,7 +405,7 @@ def test_is_on_offset(self, offset_types): assert offset_s.is_on_offset(dt) # when normalize=True, is_on_offset checks time is 00:00:00 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, DayDST)): # normalize=True disallowed for Tick subclasses GH#21427 return offset_n = self._get_offset(offset_types, normalize=True) @@ -432,7 +437,7 @@ def test_add(self, offset_types, tz_naive_fixture): assert result == expected_localize # normalize=True, disallowed for Tick subclasses GH#21427 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, DayDST)): return offset_s = self._get_offset(offset_types, normalize=True) expected = Timestamp(expected.date()) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index d7abb19530837..98c61c919a9c0 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -36,6 +36,7 @@ def test_namespace(): "IncompatibleFrequency", "Resolution", "Tick", + "DayDST", "Timedelta", "dt64arr_to_periodarr", "Timestamp", diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 415af96a29aa3..eb77190c451c0 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -23,6 +23,7 @@ from pandas._libs.tslibs.offsets import ( # noqa:F401 DateOffset, Day, + DayDST, _get_offset, to_offset, ) @@ -342,7 +343,23 @@ def _infer_daily_rule(self) -> str | None: return _maybe_add_count(monthly_rule, self.mdiffs[0]) if self.is_unique: - return self._get_daily_rule() + days = self.deltas[0] / _ONE_DAY + if days % 7 == 0: + # Weekly + wd = int_to_weekday[self.rep_stamp.weekday()] + alias = f"W-{wd}" + return _maybe_add_count(alias, days / 7) + + if getattr(self.index, "tz", None) is not None: + return _maybe_add_count("DayDST", days) + + if not self.is_unique_asi8: + # TODO: default to DayDST or Day? + return _maybe_add_count("DayDST", days) + + if self.is_unique_asi8: + days = self.deltas_asi8[0] / _ONE_DAY + return _maybe_add_count("D", days) if self._is_business_daily(): return "B" diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index cee99d23f8d90..0b76028de0cac 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -21,6 +21,7 @@ CustomBusinessMonthEnd, DateOffset, Day, + DayDST, Easter, FY5253Quarter, Hour, @@ -45,6 +46,7 @@ __all__ = [ "Day", + "DayDST", "BusinessDay", "BDay", "CustomBusinessDay",