Skip to content

Commit c01d1bc

Browse files
committed
BUG: tz-awareness for DatetimeIndex.normalize #2338
1 parent 930c140 commit c01d1bc

File tree

4 files changed

+96
-20
lines changed

4 files changed

+96
-20
lines changed

pandas/src/datetime.pyx

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,31 +1433,66 @@ cdef inline int m8_weekday(int64_t val):
14331433
cdef int64_t DAY_NS = 86400000000000LL
14341434

14351435

1436-
def date_normalize(ndarray[int64_t] stamps):
1436+
def date_normalize(ndarray[int64_t] stamps, tz=None):
14371437
cdef:
14381438
Py_ssize_t i, n = len(stamps)
1439-
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
14401439
pandas_datetimestruct dts
1440+
_TSObject tso
1441+
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
14411442

1442-
for i in range(n):
1443-
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
1444-
dts.hour = 0
1445-
dts.min = 0
1446-
dts.sec = 0
1447-
dts.us = 0
1448-
result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
1443+
if tz is not None:
1444+
for i in range(n):
1445+
tso = convert_to_tsobject(stamps[i], tz)
1446+
dts = tso.dts
1447+
dts.hour = 0
1448+
dts.min = 0
1449+
dts.sec = 0
1450+
dts.us = 0
1451+
result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
1452+
else:
1453+
for i in range(n):
1454+
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
1455+
dts.hour = 0
1456+
dts.min = 0
1457+
dts.sec = 0
1458+
dts.us = 0
1459+
result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
14491460

14501461
return result
14511462

1452-
def dates_normalized(ndarray[int64_t] stamps):
1463+
1464+
def dates_normalized(ndarray[int64_t] stamps, tz=None):
14531465
cdef:
14541466
Py_ssize_t i, n = len(stamps)
14551467
pandas_datetimestruct dts
14561468

1457-
for i in range(n):
1458-
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
1459-
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
1460-
return False
1469+
if tz is None or _is_utc(tz):
1470+
for i in range(n):
1471+
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
1472+
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
1473+
return False
1474+
elif _is_tzlocal(tz):
1475+
for i in range(n):
1476+
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
1477+
if (dts.min + dts.sec + dts.us) > 0:
1478+
return False
1479+
dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
1480+
dts.sec, dts.us, tz)
1481+
dt = dt + tz.utcoffset(dt)
1482+
if dt.hour > 0:
1483+
return False
1484+
else:
1485+
trans = _get_transitions(tz)
1486+
deltas = _get_deltas(tz)
1487+
for i in range(n):
1488+
# Adjust datetime64 timestamp, recompute datetimestruct
1489+
pos = trans.searchsorted(stamps[i]) - 1
1490+
inf = tz._transition_info[pos]
1491+
1492+
pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos],
1493+
PANDAS_FR_ns, &dts)
1494+
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
1495+
return False
14611496

14621497
return True
14631498

pandas/tseries/index.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,
470470

471471
def _mpl_repr(self):
472472
# how to represent ourselves to matplotlib
473-
return lib.ints_to_pydatetime(self.asi8)
473+
return lib.ints_to_pydatetime(self.asi8, self.tz)
474474

475475
def __repr__(self):
476476
from pandas.core.format import _format_datetime64
@@ -1175,8 +1175,9 @@ def normalize(self):
11751175
-------
11761176
normalized : DatetimeIndex
11771177
"""
1178-
new_values = lib.date_normalize(self.asi8)
1179-
return DatetimeIndex(new_values, freq='infer', name=self.name)
1178+
new_values = lib.date_normalize(self.asi8, self.tz)
1179+
return DatetimeIndex(new_values, freq='infer', name=self.name,
1180+
tz=self.tz)
11801181

11811182
def __iter__(self):
11821183
return iter(self._get_object_index())
@@ -1218,7 +1219,7 @@ def is_normalized(self):
12181219
"""
12191220
Returns True if all of the dates are at midnight ("no time")
12201221
"""
1221-
return lib.dates_normalized(self.asi8)
1222+
return lib.dates_normalized(self.asi8, self.tz)
12221223

12231224
def equals(self, other):
12241225
"""
@@ -1568,5 +1569,3 @@ def _in_range(start, end, rng_start, rng_end):
15681569
def _time_to_micros(time):
15691570
seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
15701571
return 1000000 * seconds + time.microsecond
1571-
1572-

pandas/tseries/tests/test_timezones.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,40 @@ def test_datetimeindex_tz(self):
725725
rng2 = DatetimeIndex(data=rng, tz='US/Eastern')
726726
self.assert_(rng.equals(rng2))
727727

728+
def test_normalize_tz(self):
729+
rng = date_range('1/1/2000 9:30', periods=10, freq='D',
730+
tz='US/Eastern')
731+
732+
result = rng.normalize()
733+
expected = date_range('1/1/2000', periods=10, freq='D',
734+
tz='US/Eastern')
735+
self.assert_(result.equals(expected))
736+
737+
self.assert_(result.is_normalized)
738+
self.assert_(not rng.is_normalized)
739+
740+
rng = date_range('1/1/2000 9:30', periods=10, freq='D',
741+
tz='UTC')
742+
743+
result = rng.normalize()
744+
expected = date_range('1/1/2000', periods=10, freq='D',
745+
tz='UTC')
746+
self.assert_(result.equals(expected))
747+
748+
self.assert_(result.is_normalized)
749+
self.assert_(not rng.is_normalized)
750+
751+
from dateutil.tz import tzlocal
752+
rng = date_range('1/1/2000 9:30', periods=10, freq='D',
753+
tz=tzlocal())
754+
result = rng.normalize()
755+
expected = date_range('1/1/2000', periods=10, freq='D',
756+
tz=tzlocal())
757+
self.assert_(result.equals(expected))
758+
759+
self.assert_(result.is_normalized)
760+
self.assert_(not rng.is_normalized)
761+
728762
if __name__ == '__main__':
729763
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
730764
exit=False)

vb_suite/timeseries.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,11 @@ def date_range(start=None, end=None, periods=None, freq=None):
167167
period_setitem = \
168168
Benchmark("df['col'] = rng", setup,
169169
start_date=datetime(2012, 8, 1))
170+
171+
setup = common_setup + """
172+
rng = date_range('1/1/2000 9:30', periods=100000, freq='S', tz='US/Eastern')
173+
"""
174+
175+
datetimeindex_normalize = \
176+
Benchmark('rng.normalize()', setup,
177+
start_date=datetime(2012, 9, 1))

0 commit comments

Comments
 (0)