Skip to content

Commit 6367bd2

Browse files
authored
BUG: tz_localize needs to invalidate freq (#33553)
1 parent 91dcc3a commit 6367bd2

File tree

4 files changed

+37
-4
lines changed

4 files changed

+37
-4
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ Datetimelike
457457
- Bug where :meth:`PeriodIndex` raised when passed a :class:`Series` of strings (:issue:`26109`)
458458
- Bug in :class:`Timestamp` arithmetic when adding or subtracting a ``np.ndarray`` with ``timedelta64`` dtype (:issue:`33296`)
459459
- Bug in :meth:`DatetimeIndex.to_period` not infering the frequency when called with no arguments (:issue:`33358`)
460-
460+
- Bug in :meth:`DatetimeIndex.tz_localize` incorrectly retaining ``freq`` in some cases where the original freq is no longer valid (:issue:`30511`)
461461

462462
Timedelta
463463
^^^^^^^^^

pandas/core/arrays/datetimes.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -886,15 +886,15 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
886886
DatetimeIndex(['2018-03-01 09:00:00-05:00',
887887
'2018-03-02 09:00:00-05:00',
888888
'2018-03-03 09:00:00-05:00'],
889-
dtype='datetime64[ns, US/Eastern]', freq='D')
889+
dtype='datetime64[ns, US/Eastern]', freq=None)
890890
891891
With the ``tz=None``, we can remove the time zone information
892892
while keeping the local time (not converted to UTC):
893893
894894
>>> tz_aware.tz_localize(None)
895895
DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
896896
'2018-03-03 09:00:00'],
897-
dtype='datetime64[ns]', freq='D')
897+
dtype='datetime64[ns]', freq=None)
898898
899899
Be careful with DST changes. When there is sequential data, pandas can
900900
infer the DST time:
@@ -973,7 +973,16 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
973973
)
974974
new_dates = new_dates.view(DT64NS_DTYPE)
975975
dtype = tz_to_dtype(tz)
976-
return self._simple_new(new_dates, dtype=dtype, freq=self.freq)
976+
977+
freq = None
978+
if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])):
979+
# we can preserve freq
980+
# TODO: Also for fixed-offsets
981+
freq = self.freq
982+
elif tz is None and self.tz is None:
983+
# no-op
984+
freq = self.freq
985+
return self._simple_new(new_dates, dtype=dtype, freq=freq)
977986

978987
# ----------------------------------------------------------------
979988
# Conversion Methods - Vectorized analogues of Timestamp methods

pandas/tests/indexes/datetimes/test_timezones.py

+23
Original file line numberDiff line numberDiff line change
@@ -1161,3 +1161,26 @@ def test_iteration_preserves_nanoseconds(self, tz):
11611161
)
11621162
for i, ts in enumerate(index):
11631163
assert ts == index[i]
1164+
1165+
1166+
def test_tz_localize_invalidates_freq():
1167+
# we only preserve freq in unambiguous cases
1168+
1169+
# if localized to US/Eastern, this crosses a DST transition
1170+
dti = date_range("2014-03-08 23:00", "2014-03-09 09:00", freq="H")
1171+
assert dti.freq == "H"
1172+
1173+
result = dti.tz_localize(None) # no-op
1174+
assert result.freq == "H"
1175+
1176+
result = dti.tz_localize("UTC") # unambiguous freq preservation
1177+
assert result.freq == "H"
1178+
1179+
result = dti.tz_localize("US/Eastern", nonexistent="shift_forward")
1180+
assert result.freq is None
1181+
assert result.inferred_freq is None # i.e. we are not _too_ strict here
1182+
1183+
# Case where we _can_ keep freq because we're length==1
1184+
dti2 = dti[:1]
1185+
result = dti2.tz_localize("US/Eastern")
1186+
assert result.freq == "H"

pandas/tests/series/test_arithmetic.py

+1
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ def test_ser_cmp_result_names(self, names, op):
378378

379379
# datetime64tz dtype
380380
dti = dti.tz_localize("US/Central")
381+
dti._set_freq("infer") # freq not preserved by tz_localize
381382
ser = Series(dti).rename(names[1])
382383
result = op(ser, dti)
383384
assert result.name == names[2]

0 commit comments

Comments
 (0)