From 24fa858811d0e422f4bc81d82a8a2484f65c8b2f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 12 Nov 2020 11:37:23 -0800 Subject: [PATCH 1/7] BUG: freq inference for tz-aware DatetimeIndex --- .../tseries/frequencies/test_inference.py | 26 +++++++++++++++++++ pandas/tseries/frequencies.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index 95edd038dab9b..64e14107864d1 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -367,6 +367,32 @@ def test_infer_freq_business_hour(data, expected): assert idx.inferred_freq == expected +def test_infer_freq_across_dst_not_daily(): + # GH#37295 + dti = date_range( + start=Timestamp("2019-03-26 00:00:00-0400", tz="Canada/Eastern"), + end=Timestamp("2020-10-17 00:00:00-0400", tz="Canada/Eastern"), + freq="D", + ) + diff = dti - dti.shift() + assert not diff.is_unique + + assert dti.inferred_freq is None + + dti2 = DatetimeIndex(dti._with_freq(None), freq="infer") + assert dti2.freq is None + + # Comment in DatetimeArray._generate_range says that we knowingly + # assign a maybe-incorrect freq in pd.date_range: + # + # We break Day arithmetic (fixed 24 hour) here and opt for + # Day to mean calendar day (23/24/25 hour). Therefore, strip + # tz info from start and day to avoid DST arithmetic + # + # As long as that is used, the following assertion will fail + # assert dti.freq is None + + def test_not_monotonic(): rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) rng = rng[::-1] diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 8ef6dac2862db..5e412564a68ce 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -320,7 +320,7 @@ def _infer_daily_rule(self) -> Optional[str]: if monthly_rule: return _maybe_add_count(monthly_rule, self.mdiffs[0]) - if self.is_unique: + if self.is_unique_asi8: days = self.deltas[0] / _ONE_DAY if days % 7 == 0: # Weekly From 0de9228ddd579792ebb332ded16f7014f0867420 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 9 Jun 2021 11:39:02 -0700 Subject: [PATCH 2/7] merge so i can merge, even if not passing --- pandas/tests/resample/test_datetime_index.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index c23a22448fbb0..8514a7cfde561 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1597,7 +1597,11 @@ def test_downsample_dst_at_midnight(): dti = date_range("2018-11-03", periods=3).tz_localize( "America/Havana", ambiguous=True ) - dti = DatetimeIndex(dti, freq="D") + with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst"): + # Check that we are requiring ambiguous be passed explicitly + dti = DatetimeIndex(dti, freq="D") + dti = DatetimeIndex(dti, freq="D", ambiguous=True) + expected = DataFrame([7.5, 28.0, 44.5], index=dti) tm.assert_frame_equal(result, expected) From 7c5ae283138141a1e803e21c547bb71d970ab6f8 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 8 Nov 2021 18:45:13 -0800 Subject: [PATCH 3/7] Implement DayDST --- pandas/_libs/tslibs/__init__.py | 2 + pandas/_libs/tslibs/offsets.pxd | 2 +- pandas/_libs/tslibs/offsets.pyx | 49 +++++++- pandas/_libs/tslibs/period.pyx | 8 +- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/core/arrays/datetimelike.py | 4 +- pandas/core/arrays/datetimes.py | 19 ++- pandas/core/arrays/period.py | 4 + pandas/tests/frame/test_block_internals.py | 3 +- .../tests/indexes/datetimes/test_indexing.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 5 +- pandas/tests/resample/test_datetime_index.py | 8 +- pandas/tests/resample/test_period_index.py | 2 +- .../series/accessors/test_dt_accessor.py | 112 ++++++++++-------- pandas/tests/series/indexing/test_setitem.py | 5 +- .../tseries/frequencies/test_inference.py | 20 ++-- pandas/tests/tseries/offsets/test_offsets.py | 13 +- pandas/tests/tslibs/test_api.py | 1 + pandas/tseries/frequencies.py | 19 ++- pandas/tseries/offsets.py | 2 + 20 files changed, 196 insertions(+), 86 deletions(-) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index e38ed9a20e55b..f4bd3255eb084 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -24,6 +24,7 @@ "to_offset", "Tick", "BaseOffset", + "DayDST", "tz_compare", ] @@ -43,6 +44,7 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.offsets import ( BaseOffset, + DayDST, Tick, to_offset, ) diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd index 215c3f849281f..f27e2e0a8b928 100644 --- a/pandas/_libs/tslibs/offsets.pxd +++ b/pandas/_libs/tslibs/offsets.pxd @@ -1,7 +1,7 @@ from numpy cimport int64_t -cpdef to_offset(object obj) +cpdef to_offset(object obj, bint tzaware=*) cdef bint is_offset_object(object obj) cdef bint is_tick_object(object obj) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 00d02e096c976..2edf4052ec7e4 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -985,6 +985,41 @@ def delta_to_tick(delta: timedelta) -> Tick: return Nano(nanos) +cdef class DayDST(SingleConstructorOffset): + _adjust_dst = True + _attributes = tuple(["n", "normalize"]) + rule_code = "D" # used by parse_time_string + + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n) + if normalize: + # GH#21427 + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) + + def is_on_offset(self, dt) -> bool: + return True + + @apply_wraps + def apply(self, other): + return other + Timedelta(days=self.n) + + @apply_index_wraps + def apply_index(self, dti): + return self._apply_array(dti) + + @apply_array_wraps + def _apply_array(self, dtarr): + return dtarr + Timedelta(days=self.n) + + @cache_readonly + def freqstr(self) -> str: + if self.n != 1: + return str(self.n) + "DayDST" + return "DayDST" + + # -------------------------------------------------------------------- cdef class RelativeDeltaOffset(BaseOffset): @@ -3543,7 +3578,7 @@ def _get_offset(name: str) -> BaseOffset: return _offset_map[name] -cpdef to_offset(freq): +cpdef to_offset(freq, bint tzaware=False): """ Return DateOffset object from string or tuple representation or datetime.timedelta object. @@ -3551,6 +3586,8 @@ cpdef to_offset(freq): Parameters ---------- freq : str, tuple, datetime.timedelta, DateOffset or None + tzaware : bool, default False + If we have a string "D", whether to interpret that as DayDST. Returns ------- @@ -3603,6 +3640,14 @@ cpdef to_offset(freq): delta = None stride_sign = None + if freq.endswith("DayDST"): + head = freq[:-6] + if len(head): + n = int(head) + else: + n = 1 + return DayDST(n) + try: split = opattern.split(freq) if split[-1] != "" and not split[-1].isspace(): @@ -3647,6 +3692,8 @@ cpdef to_offset(freq): if delta is None: raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) + if type(delta) is Day and tzaware: + return DayDST(delta.n) return delta diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f594e0a8bdafd..cf0dfed59a10f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -113,7 +113,11 @@ from pandas._libs.tslibs.offsets cimport ( to_offset, ) -from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG +from pandas._libs.tslibs.offsets import ( + INVALID_FREQ_ERR_MSG, + Day, + DayDST, +) cdef: enum: @@ -1626,6 +1630,8 @@ cdef class _Period(PeriodMixin): freq = dtype.date_offset freq = to_offset(freq) + if isinstance(freq, DayDST): + freq = Day(freq.n) if freq.n <= 0: raise ValueError("Frequency must be positive, because it " diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 43f9be3fef5ee..2a6f1019eaf87 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1346,7 +1346,7 @@ class Timedelta(_Timedelta): ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset - unit = to_offset(freq).nanos + unit = to_offset(freq, tzaware=False).nanos arr = np.array([self.value], dtype="i8") result = round_nsint64(arr, mode, unit)[0] diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f8aa1656c8c30..a5309c7a8a7da 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -26,6 +26,7 @@ ) from pandas._libs.tslibs import ( BaseOffset, + DayDST, IncompatibleFrequency, NaT, NaTType, @@ -1110,8 +1111,9 @@ def _add_timedeltalike_scalar(self, other): new_values = new_values.view(self._ndarray.dtype) new_freq = None - if isinstance(self.freq, Tick) or is_period_dtype(self.dtype): + if isinstance(self.freq, (Tick, DayDST)) or is_period_dtype(self.dtype): # adding a scalar preserves freq + # TODO: sure this is accurate for DayDST new_freq = self.freq # error: Unexpected keyword argument "freq" for "_simple_new" of "NDArrayBacked" diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4fecbe4be9681..665508e8ccd6a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -80,6 +80,7 @@ from pandas.tseries.offsets import ( BDay, Day, + DayDST, Tick, ) @@ -366,7 +367,18 @@ def _from_sequence_not_strict( ambiguous=ambiguous, ) - freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) + try: + freq, freq_infer = dtl.validate_inferred_freq( + freq, inferred_freq, freq_infer + ) + except ValueError as err: + if isinstance(freq, Tick) and isinstance(inferred_freq, DayDST): + # It is possible that both could be valid, so we'll + # go through _validate_frequency below + inferred_freq = None + freq_infer = False + else: + raise if explicit_none: freq = None @@ -433,10 +445,13 @@ def _generate_range( end, end_tz, end, freq, tz, ambiguous, nonexistent ) if freq is not None: + # FIXME: dont do this # We break Day arithmetic (fixed 24 hour) here and opt for # Day to mean calendar day (23/24/25 hour). Therefore, strip # tz info from start and day to avoid DST arithmetic - if isinstance(freq, Day): + if isinstance(freq, (Day, DayDST)): + if tz is not None: + freq = DayDST(freq.n) if start is not None: start = start.tz_localize(None) if end is not None: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 2f36b72229225..ba1fe7559ab6d 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -28,6 +28,8 @@ from pandas._libs.tslibs.dtypes import FreqGroup from pandas._libs.tslibs.fields import isleapyear_arr from pandas._libs.tslibs.offsets import ( + Day, + DayDST, Tick, delta_to_tick, ) @@ -1092,6 +1094,8 @@ def dt64arr_to_periodarr(data, freq, tz=None): elif isinstance(data, (ABCIndex, ABCSeries)): data = data._values + if isinstance(freq, DayDST): + freq = Day(freq.n) freq = Period._maybe_convert_freq(freq) base = freq._period_dtype_code return c_dt64arr_to_periodarr(data.view("i8"), base, tz), freq diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 34854be29ad1f..9c711f31f49c5 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -42,6 +42,7 @@ def test_setitem_invalidates_datetime_index_freq(self): # `freq` attribute on the underlying DatetimeIndex dti = date_range("20130101", periods=3, tz="US/Eastern") + orig_freq = dti.freq ts = dti[1] df = DataFrame({"B": dti}) @@ -51,7 +52,7 @@ def test_setitem_invalidates_datetime_index_freq(self): assert df["B"]._values.freq is None # check that the DatetimeIndex was not altered in place - assert dti.freq == "D" + assert dti.freq is orig_freq assert dti[1] == ts def test_cast_internals(self, float_frame): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index c3152b77d39df..0b2cb00505e63 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -81,7 +81,7 @@ def test_getitem(self): result = idx[4::-1] expected = DatetimeIndex( ["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"], - freq="-1D", + freq=-1 * idx.freq, tz=idx.tz, name="idx", ) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 3201b038c624b..3bec7df9feffe 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -377,7 +377,10 @@ def test_setops_preserve_freq(self, tz): result = rng[:50].intersection(rng[25:75]) assert result.name == rng.name - assert result.freqstr == "D" + if tz is None: + assert result.freqstr == "D" + else: + assert result.freqstr == "DayDST" assert result.tz == rng.tz nofreq = DatetimeIndex(list(rng[25:75]), name="other") diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 1c52a6d332f65..56a15ca046e88 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -874,7 +874,7 @@ def test_resample_origin_epoch_with_tz_day_vs_24h(): result_1 = ts_1.resample("D", origin="epoch").mean() result_2 = ts_1.resample("24H", origin="epoch").mean() - tm.assert_series_equal(result_1, result_2) + tm.assert_series_equal(result_1, result_2, check_freq=False) # check that we have the same behavior with epoch even if we are not timezone aware ts_no_tz = ts_1.tz_localize(None) @@ -897,7 +897,7 @@ def test_resample_origin_with_day_freq_on_dst(): # GH 31809 tz = "America/Chicago" - def _create_series(values, timestamps, freq="D"): + def _create_series(values, timestamps, freq="DayDST"): return Series( values, index=DatetimeIndex( @@ -1484,7 +1484,7 @@ def test_resample_dst_anchor(): dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz="US/Eastern") df = DataFrame([5], index=dti) - dti = DatetimeIndex(df.index.normalize(), freq="D") + dti = DatetimeIndex(df.index.normalize(), freq="DayDST") expected = DataFrame([5], index=dti) tm.assert_frame_equal(df.resample(rule="D").sum(), expected) df.resample(rule="MS").sum() @@ -1621,7 +1621,7 @@ def test_downsample_dst_at_midnight(): with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst"): # Check that we are requiring ambiguous be passed explicitly dti = DatetimeIndex(dti, freq="D") - dti = DatetimeIndex(dti, freq="D", ambiguous=True) + dti = DatetimeIndex(dti, freq="DayDST", ambiguous=True) expected = DataFrame([7.5, 28.0, 44.5], index=dti) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 70d37f83c7f0c..dc129e1459517 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -281,7 +281,7 @@ def test_resample_with_pytz(self): expected = Series( 2.0, index=pd.DatetimeIndex( - ["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D" + ["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="DayDST" ), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index eb7e1d4268605..fcb4bfdb54f00 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -98,62 +98,68 @@ def compare(s, name): Series(date_range("20130101", periods=5, freq="s"), name="xxx"), Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"), ] - for s in cases: + for ser in cases: + assert ser.dt.tz is None + for prop in ok_for_dt: # we test freq below # we ignore week and weekofyear because they are deprecated if prop not in ["freq", "week", "weekofyear"]: - compare(s, prop) + compare(ser, prop) for prop in ok_for_dt_methods: - getattr(s.dt, prop) + getattr(ser.dt, prop) - result = s.dt.to_pydatetime() + result = ser.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object - result = s.dt.tz_localize("US/Eastern") - exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern") - expected = Series(exp_values, index=s.index, name="xxx") + result = ser.dt.tz_localize("US/Eastern") + exp_values = DatetimeIndex(ser.values).tz_localize("US/Eastern") + expected = Series(exp_values, index=ser.index, name="xxx") tm.assert_series_equal(result, expected) + assert ser.dt.tz is None tz_result = result.dt.tz assert str(tz_result) == "US/Eastern" - freq_result = s.dt.freq - assert freq_result == DatetimeIndex(s.values, freq="infer").freq + freq_result = ser.dt.freq + assert freq_result == DatetimeIndex(ser.values, freq="infer").freq # let's localize, then convert - result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") + result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") exp_values = ( - DatetimeIndex(s.values).tz_localize("UTC").tz_convert("US/Eastern") + DatetimeIndex(ser.values).tz_localize("UTC").tz_convert("US/Eastern") ) - expected = Series(exp_values, index=s.index, name="xxx") + expected = Series(exp_values, index=ser.index, name="xxx") tm.assert_series_equal(result, expected) # datetimeindex with tz - s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx") + ser = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx") for prop in ok_for_dt: # we test freq below # we ignore week and weekofyear because they are deprecated if prop not in ["freq", "week", "weekofyear"]: - compare(s, prop) + compare(ser, prop) for prop in ok_for_dt_methods: - getattr(s.dt, prop) + getattr(ser.dt, prop) - result = s.dt.to_pydatetime() + result = ser.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object - result = s.dt.tz_convert("CET") - expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx") + result = ser.dt.tz_convert("CET") + expected = Series(ser._values.tz_convert("CET"), index=ser.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "CET" - freq_result = s.dt.freq - assert freq_result == DatetimeIndex(s.values, freq="infer").freq + freq_result = ser.dt.freq + assert ( + freq_result + == DatetimeIndex(ser._values._with_freq(None), freq="infer").freq + ) # timedelta index cases = [ @@ -166,83 +172,85 @@ def compare(s, name): name="xxx", ), ] - for s in cases: + for ser in cases: for prop in ok_for_td: # we test freq below if prop != "freq": - compare(s, prop) + compare(ser, prop) for prop in ok_for_td_methods: - getattr(s.dt, prop) + getattr(ser.dt, prop) - result = s.dt.components + result = ser.dt.components assert isinstance(result, DataFrame) - tm.assert_index_equal(result.index, s.index) + tm.assert_index_equal(result.index, ser.index) - result = s.dt.to_pytimedelta() + result = ser.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object - result = s.dt.total_seconds() + result = ser.dt.total_seconds() assert isinstance(result, Series) assert result.dtype == "float64" - freq_result = s.dt.freq - assert freq_result == TimedeltaIndex(s.values, freq="infer").freq + freq_result = ser.dt.freq + assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq # both index = date_range("20130101", periods=3, freq="D") - s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx") + ser = Series( + date_range("20140204", periods=3, freq="s"), index=index, name="xxx" + ) exp = Series( np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx" ) - tm.assert_series_equal(s.dt.year, exp) + tm.assert_series_equal(ser.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") - tm.assert_series_equal(s.dt.month, exp) + tm.assert_series_equal(ser.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") - tm.assert_series_equal(s.dt.second, exp) + tm.assert_series_equal(ser.dt.second, exp) - exp = Series([s[0]] * 3, index=index, name="xxx") - tm.assert_series_equal(s.dt.normalize(), exp) + exp = Series([ser[0]] * 3, index=index, name="xxx") + tm.assert_series_equal(ser.dt.normalize(), exp) # periodindex cases = [Series(period_range("20130101", periods=5, freq="D"), name="xxx")] - for s in cases: + for ser in cases: for prop in ok_for_period: # we test freq below if prop != "freq": - compare(s, prop) + compare(ser, prop) for prop in ok_for_period_methods: - getattr(s.dt, prop) + getattr(ser.dt, prop) - freq_result = s.dt.freq - assert freq_result == PeriodIndex(s.values).freq + freq_result = ser.dt.freq + assert freq_result == PeriodIndex(ser.values).freq # test limited display api def get_dir(s): - results = [r for r in s.dt.__dir__() if not r.startswith("_")] + results = [r for r in ser.dt.__dir__() if not r.startswith("_")] return sorted(set(results)) - s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") - results = get_dir(s) + ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx") + results = get_dir(ser) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) - s = Series( + ser = Series( period_range("20130101", periods=5, freq="D", name="xxx").astype(object) ) - results = get_dir(s) + results = get_dir(ser) tm.assert_almost_equal( results, sorted(set(ok_for_period + ok_for_period_methods)) ) # 11295 # ambiguous time error on the conversions - s = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") - s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") - results = get_dir(s) + ser = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") + ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") + results = get_dir(ser) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) exp_values = date_range( "2015-01-01", "2016-01-01", freq="T", tz="UTC" @@ -250,18 +258,18 @@ def get_dir(s): # freq not preserved by tz_localize above exp_values = exp_values._with_freq(None) expected = Series(exp_values, name="xxx") - tm.assert_series_equal(s, expected) + tm.assert_series_equal(ser, expected) # no setting allowed - s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") + ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): - s.dt.hour = 5 + ser.dt.hour = 5 # trying to set a copy msg = "modifications to a property of a datetimelike.+not supported" with pd.option_context("chained_assignment", "raise"): with pytest.raises(com.SettingWithCopyError, match=msg): - s.dt.hour[0] = 5 + ser.dt.hour[0] = 5 @pytest.mark.parametrize( "method, dates", diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index ea754127b98e9..6b484ca43daa2 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -323,11 +323,12 @@ def test_setitem_invalidates_datetime_index_freq(self): # `freq` attribute on the underlying DatetimeIndex dti = date_range("20130101", periods=3, tz="US/Eastern") + orig_freq = dti.freq ts = dti[1] ser = Series(dti) assert ser._values is not dti assert ser._values._data.base is not dti._data._data.base - assert dti.freq == "D" + assert dti.freq is orig_freq ser.iloc[1] = NaT assert ser._values.freq is None @@ -335,7 +336,7 @@ def test_setitem_invalidates_datetime_index_freq(self): assert ser._values is not dti assert ser._values._data.base is not dti._data._data.base assert dti[1] == ts - assert dti.freq == "D" + assert dti.freq is orig_freq def test_dt64tz_setitem_does_not_mutate_dti(self): # GH#21907, GH#24096 diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index 621ffd31ad570..2e716977f79dc 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -267,6 +267,10 @@ def test_infer_freq_index(freq, expected): def test_infer_freq_tz(tz_naive_fixture, expected, dates): # see gh-7310 tz = tz_naive_fixture + + if expected == "D" and tz is not None: + expected = "DayDST" + idx = DatetimeIndex(dates, tz=tz) assert idx.inferred_freq == expected @@ -387,23 +391,15 @@ def test_infer_freq_across_dst_not_daily(): end=Timestamp("2020-10-17 00:00:00-0400", tz="Canada/Eastern"), freq="D", ) + assert dti.freq == "DayDST" + diff = dti - dti.shift() assert not diff.is_unique - assert dti.inferred_freq is None + assert dti.inferred_freq == "DayDST" dti2 = DatetimeIndex(dti._with_freq(None), freq="infer") - assert dti2.freq is None - - # Comment in DatetimeArray._generate_range says that we knowingly - # assign a maybe-incorrect freq in pd.date_range: - # - # We break Day arithmetic (fixed 24 hour) here and opt for - # Day to mean calendar day (23/24/25 hour). Therefore, strip - # tz info from start and day to avoid DST arithmetic - # - # As long as that is used, the following assertion will fail - # assert dti.freq is None + assert dti2.freq == "DayDST" def test_not_monotonic(): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 0c79c0b64f4cd..8207c6bfd5f0f 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -50,6 +50,7 @@ CustomBusinessMonthBegin, CustomBusinessMonthEnd, DateOffset, + DayDST, Easter, FY5253Quarter, LastWeekOfMonth, @@ -69,6 +70,7 @@ class TestCommon(Base): # used for .apply and .rollforward expecteds = { "Day": Timestamp("2011-01-02 09:00:00"), + "DayDST": Timestamp("2011-01-02 09:00:00"), "DateOffset": Timestamp("2011-01-02 09:00:00"), "BusinessDay": Timestamp("2011-01-03 09:00:00"), "CustomBusinessDay": Timestamp("2011-01-03 09:00:00"), @@ -161,13 +163,13 @@ def test_offset_freqstr(self, offset_types): offset = self._get_offset(offset_types) freqstr = offset.freqstr - if freqstr not in ("", "", "LWOM-SAT"): + if freqstr not in ("DayDST", "", "", "LWOM-SAT"): code = _get_offset(freqstr) assert offset.rule_code == code def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): - if normalize and issubclass(offset, Tick): + if normalize and issubclass(offset, (Tick, DayDST)): # normalize=True disallowed for Tick subclasses GH#21427 return @@ -258,6 +260,7 @@ def test_rollforward(self, offset_types): # result will not be changed if the target is on the offset no_changes = [ "Day", + "DayDST", "MonthBegin", "SemiMonthBegin", "YearBegin", @@ -336,6 +339,7 @@ def test_rollback(self, offset_types): # result will not be changed if the target is on the offset for n in [ "Day", + "DayDST", "MonthBegin", "SemiMonthBegin", "YearBegin", @@ -357,6 +361,7 @@ def test_rollback(self, offset_types): normalized = { "Day": Timestamp("2010-12-31 00:00:00"), + "DayDST": Timestamp("2010-12-31 00:00:00"), "DateOffset": Timestamp("2010-12-31 00:00:00"), "MonthBegin": Timestamp("2010-12-01 00:00:00"), "SemiMonthBegin": Timestamp("2010-12-15 00:00:00"), @@ -388,7 +393,7 @@ def test_is_on_offset(self, offset_types): assert offset_s.is_on_offset(dt) # when normalize=True, is_on_offset checks time is 00:00:00 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, DayDST)): # normalize=True disallowed for Tick subclasses GH#21427 return offset_n = self._get_offset(offset_types, normalize=True) @@ -420,7 +425,7 @@ def test_add(self, offset_types, tz_naive_fixture): assert result == expected_localize # normalize=True, disallowed for Tick subclasses GH#21427 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, DayDST)): return offset_s = self._get_offset(offset_types, normalize=True) expected = Timestamp(expected.date()) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 4ded555ed8f73..0cc0bb224a43f 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -37,6 +37,7 @@ def test_namespace(): "IncompatibleFrequency", "Resolution", "Tick", + "DayDST", "Timedelta", "dt64arr_to_periodarr", "Timestamp", diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 2d9078af95279..bde6811134791 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -23,6 +23,7 @@ from pandas._libs.tslibs.offsets import ( # noqa:F401 DateOffset, Day, + DayDST, _get_offset, to_offset, ) @@ -340,8 +341,24 @@ def _infer_daily_rule(self) -> str | None: if monthly_rule: return _maybe_add_count(monthly_rule, self.mdiffs[0]) + if self.is_unique: + days = self.deltas[0] / _ONE_DAY + if days % 7 == 0: + # Weekly + wd = int_to_weekday[self.rep_stamp.weekday()] + alias = f"W-{wd}" + return _maybe_add_count(alias, days / 7) + + if getattr(self.index, "tz", None) is not None: + return _maybe_add_count("DayDST", days) + + if not self.is_unique_asi8: + # TODO: default to DayDST or Day? + return _maybe_add_count("DayDST", days) + if self.is_unique_asi8: - return self._get_daily_rule() + days = self.deltas_asi8[0] / _ONE_DAY + return _maybe_add_count("D", days) if self._is_business_daily(): return "B" diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index cee99d23f8d90..0b76028de0cac 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -21,6 +21,7 @@ CustomBusinessMonthEnd, DateOffset, Day, + DayDST, Easter, FY5253Quarter, Hour, @@ -45,6 +46,7 @@ __all__ = [ "Day", + "DayDST", "BusinessDay", "BDay", "CustomBusinessDay", From e743545c4239c339e32039fe4c2ddf8abfcd69fc Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 8 Nov 2021 20:10:56 -0800 Subject: [PATCH 4/7] interpret D depending on tz --- pandas/core/arrays/datetimes.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 665508e8ccd6a..6540b9472c14a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -371,7 +371,7 @@ def _from_sequence_not_strict( freq, freq_infer = dtl.validate_inferred_freq( freq, inferred_freq, freq_infer ) - except ValueError as err: + except ValueError: if isinstance(freq, Tick) and isinstance(inferred_freq, DayDST): # It is possible that both could be valid, so we'll # go through _validate_frequency below @@ -419,7 +419,6 @@ def _generate_range( "Of the four parameters: start, end, periods, " "and freq, exactly three must be specified" ) - freq = to_offset(freq) if start is not None: start = Timestamp(start) @@ -434,6 +433,11 @@ def _generate_range( start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz = _infer_tz_from_endpoints(start, end, tz) + # If freq is a str, then we will interpret "D" as Day if we are tznaive + # and as DayDST if tzaware + aware = tz is not None + freq = to_offset(freq, tzaware=aware) + if tz is not None: # Localize the start and end arguments start_tz = None if start is None else start.tz @@ -445,22 +449,25 @@ def _generate_range( end, end_tz, end, freq, tz, ambiguous, nonexistent ) if freq is not None: - # FIXME: dont do this - # We break Day arithmetic (fixed 24 hour) here and opt for - # Day to mean calendar day (23/24/25 hour). Therefore, strip - # tz info from start and day to avoid DST arithmetic - if isinstance(freq, (Day, DayDST)): - if tz is not None: - freq = DayDST(freq.n) + + freq_tmp = freq + if isinstance(freq, DayDST): + # Much more performant to use generate_regular_range and + # then tz_localize than to go through generate_range + freq_tmp = Day(freq.n) + + if isinstance(freq_tmp, Day): if start is not None: start = start.tz_localize(None) if end is not None: end = end.tz_localize(None) - if isinstance(freq, Tick): - values = generate_regular_range(start, end, periods, freq) + if isinstance(freq_tmp, Tick): + values = generate_regular_range(start, end, periods, freq_tmp) else: - xdr = generate_range(start=start, end=end, periods=periods, offset=freq) + xdr = generate_range( + start=start, end=end, periods=periods, offset=freq_tmp + ) values = np.array([x.value for x in xdr], dtype=np.int64) _tz = start.tz if start is not None else end.tz From 8cd2dd8781a75edda00abeaf01d3007214bae5d1 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Nov 2021 13:23:05 -0800 Subject: [PATCH 5/7] revert last --- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/core/arrays/datetimes.py | 31 ++++++++++++------------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 112f6c21768ef..426859e7a5d51 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1025,7 +1025,7 @@ cdef class DayDST(SingleConstructorOffset): return True @apply_wraps - def apply(self, other): + def _apply(self, other): return other + Timedelta(days=self.n) @apply_index_wraps diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a0aaf2638666d..099ae812695bf 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -370,7 +370,7 @@ def _from_sequence_not_strict( freq, freq_infer = dtl.validate_inferred_freq( freq, inferred_freq, freq_infer ) - except ValueError: + except ValueError as err: if isinstance(freq, Tick) and isinstance(inferred_freq, DayDST): # It is possible that both could be valid, so we'll # go through _validate_frequency below @@ -418,6 +418,7 @@ def _generate_range( "Of the four parameters: start, end, periods, " "and freq, exactly three must be specified" ) + freq = to_offset(freq) if start is not None: start = Timestamp(start) @@ -432,11 +433,6 @@ def _generate_range( start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz = _infer_tz_from_endpoints(start, end, tz) - # If freq is a str, then we will interpret "D" as Day if we are tznaive - # and as DayDST if tzaware - aware = tz is not None - freq = to_offset(freq, tzaware=aware) - if tz is not None: # Localize the start and end arguments start_tz = None if start is None else start.tz @@ -448,25 +444,22 @@ def _generate_range( end, end_tz, end, freq, tz, ambiguous, nonexistent ) if freq is not None: - - freq_tmp = freq - if isinstance(freq, DayDST): - # Much more performant to use generate_regular_range and - # then tz_localize than to go through generate_range - freq_tmp = Day(freq.n) - - if isinstance(freq_tmp, Day): + # FIXME: dont do this + # We break Day arithmetic (fixed 24 hour) here and opt for + # Day to mean calendar day (23/24/25 hour). Therefore, strip + # tz info from start and day to avoid DST arithmetic + if isinstance(freq, (Day, DayDST)): + if tz is not None: + freq = DayDST(freq.n) if start is not None: start = start.tz_localize(None) if end is not None: end = end.tz_localize(None) - if isinstance(freq_tmp, Tick): - values = generate_regular_range(start, end, periods, freq_tmp) + if isinstance(freq, Tick): + values = generate_regular_range(start, end, periods, freq) else: - xdr = generate_range( - start=start, end=end, periods=periods, offset=freq_tmp - ) + xdr = generate_range(start=start, end=end, periods=periods, offset=freq) values = np.array([x.value for x in xdr], dtype=np.int64) _tz = start.tz if start is not None else end.tz From bb5483580fdd3ab66c6a3f8a7e07ffcf863469c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 Nov 2021 18:53:00 -0800 Subject: [PATCH 6/7] fix doctest --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e283509206344..2793c8af08960 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1012,7 +1012,7 @@ def date_range( DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', '2018-01-05 00:00:00+09:00'], - dtype='datetime64[ns, Asia/Tokyo]', freq='D') + dtype='datetime64[ns, Asia/Tokyo]', freq='DayDST') `closed` controls whether to include `start` and `end` that are on the boundary. The default includes boundary points on either end. From a95ea877902346dc9462ef6f5b9b3b664cdef917 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 29 Nov 2021 11:34:15 -0800 Subject: [PATCH 7/7] lint fixup --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4e288c30fb788..1acd8576aae54 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -370,7 +370,7 @@ def _from_sequence_not_strict( freq, freq_infer = dtl.validate_inferred_freq( freq, inferred_freq, freq_infer ) - except ValueError as err: + except ValueError: if isinstance(freq, Tick) and isinstance(inferred_freq, DayDST): # It is possible that both could be valid, so we'll # go through _validate_frequency below