Skip to content

Commit 5541fd7

Browse files
dancsijreback
authored andcommitted
ENH: Add an errors flag to tz_localize
closes #13057 Author: dancsi <[email protected]> Author: Daniel Siladji <[email protected]> Closes #13058 from dancsi/master and squashes the following commits: d4caf8b [Daniel Siladji] ENH: Add an errors flag to tz_localize
1 parent c089110 commit 5541fd7

File tree

5 files changed

+76
-9
lines changed

5 files changed

+76
-9
lines changed

doc/source/whatsnew/v0.18.2.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@ New features
2424

2525

2626

27+
.. _whatsnew_0182.enhancements.other:
2728

29+
Other enhancements
30+
^^^^^^^^^^^^^^^^^^
2831

29-
30-
32+
- The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`)
3133

3234

3335

pandas/tseries/index.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -1812,7 +1812,7 @@ def tz_convert(self, tz):
18121812

18131813
@deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous',
18141814
mapping={True: 'infer', False: 'raise'})
1815-
def tz_localize(self, tz, ambiguous='raise'):
1815+
def tz_localize(self, tz, ambiguous='raise', errors='raise'):
18161816
"""
18171817
Localize tz-naive DatetimeIndex to given time zone (using
18181818
pytz/dateutil), or remove timezone from tz-aware DatetimeIndex
@@ -1832,6 +1832,15 @@ def tz_localize(self, tz, ambiguous='raise'):
18321832
- 'NaT' will return NaT where there are ambiguous times
18331833
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
18341834
times
1835+
errors : 'raise', 'coerce', default 'raise'
1836+
- 'raise' will raise a NonExistentTimeError if a timestamp is not
1837+
valid in the specified timezone (e.g. due to a transition from
1838+
or to DST time)
1839+
- 'coerce' will return NaT if the timestamp can not be converted
1840+
into the specified timezone
1841+
1842+
.. versionadded:: 0.18.2
1843+
18351844
infer_dst : boolean, default False (DEPRECATED)
18361845
Attempt to infer fall dst-transition hours based on order
18371846
@@ -1854,7 +1863,8 @@ def tz_localize(self, tz, ambiguous='raise'):
18541863
# Convert to UTC
18551864

18561865
new_dates = tslib.tz_localize_to_utc(self.asi8, tz,
1857-
ambiguous=ambiguous)
1866+
ambiguous=ambiguous,
1867+
errors=errors)
18581868
new_dates = new_dates.view(_NS_DTYPE)
18591869
return self._shallow_copy(new_dates, tz=tz)
18601870

pandas/tseries/tests/test_timezones.py

+17
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,23 @@ def test_ambiguous_nat(self):
533533
di_test = DatetimeIndex(times, tz='US/Eastern')
534534
self.assert_numpy_array_equal(di_test, localized)
535535

536+
def test_nonexistent_raise_coerce(self):
537+
# See issue 13057
538+
from pytz.exceptions import NonExistentTimeError
539+
times = ['2015-03-08 01:00', '2015-03-08 02:00', '2015-03-08 03:00']
540+
index = DatetimeIndex(times)
541+
tz = 'US/Eastern'
542+
self.assertRaises(NonExistentTimeError,
543+
index.tz_localize, tz=tz)
544+
self.assertRaises(NonExistentTimeError,
545+
index.tz_localize, tz=tz, errors='raise')
546+
result = index.tz_localize(tz=tz, errors='coerce')
547+
test_times = ['2015-03-08 01:00-05:00', 'NaT',
548+
'2015-03-08 03:00-04:00']
549+
expected = DatetimeIndex(test_times)\
550+
.tz_localize('UTC').tz_convert('US/Eastern')
551+
tm.assert_index_equal(result, expected)
552+
536553
# test utility methods
537554
def test_infer_tz(self):
538555
eastern = self.tz('US/Eastern')

pandas/tseries/tests/test_tslib.py

+23
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,29 @@ def test_tz_localize_ambiguous(self):
319319
'tz_localize to localize'):
320320
Timestamp('2011-01-01').tz_convert('Asia/Tokyo')
321321

322+
def test_tz_localize_nonexistent(self):
323+
# See issue 13057
324+
from pytz.exceptions import NonExistentTimeError
325+
times = ['2015-03-08 02:00', '2015-03-08 02:30',
326+
'2015-03-29 02:00', '2015-03-29 02:30']
327+
timezones = ['US/Eastern', 'US/Pacific',
328+
'Europe/Paris', 'Europe/Belgrade']
329+
for t, tz in zip(times, timezones):
330+
ts = Timestamp(t)
331+
self.assertRaises(NonExistentTimeError, ts.tz_localize,
332+
tz)
333+
self.assertRaises(NonExistentTimeError, ts.tz_localize,
334+
tz, errors='raise')
335+
self.assertIs(ts.tz_localize(tz, errors='coerce'),
336+
pd.NaT)
337+
338+
def test_tz_localize_errors_ambiguous(self):
339+
# See issue 13057
340+
from pytz.exceptions import AmbiguousTimeError
341+
ts = pd.Timestamp('2015-11-1 01:00')
342+
self.assertRaises(AmbiguousTimeError,
343+
ts.tz_localize, 'US/Pacific', errors='coerce')
344+
322345
def test_tz_localize_roundtrip(self):
323346
for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']:
324347
for t in ['2014-02-01 09:00', '2014-07-08 09:00',

pandas/tslib.pyx

+20-5
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ class Timestamp(_Timestamp):
460460
def is_year_end(self):
461461
return self._get_start_end_field('is_year_end')
462462

463-
def tz_localize(self, tz, ambiguous='raise'):
463+
def tz_localize(self, tz, ambiguous='raise', errors='raise'):
464464
"""
465465
Convert naive Timestamp to local time zone, or remove
466466
timezone from tz-aware Timestamp.
@@ -475,6 +475,14 @@ class Timestamp(_Timestamp):
475475
that this flag is only applicable for ambiguous fall dst dates)
476476
- 'NaT' will return NaT for an ambiguous time
477477
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
478+
errors : 'raise', 'coerce', default 'raise'
479+
- 'raise' will raise a NonExistentTimeError if a timestamp is not
480+
valid in the specified timezone (e.g. due to a transition from
481+
or to DST time)
482+
- 'coerce' will return NaT if the timestamp can not be converted
483+
into the specified timezone
484+
485+
.. versionadded:: 0.18.2
478486
479487
Returns
480488
-------
@@ -494,7 +502,7 @@ class Timestamp(_Timestamp):
494502
if not isinstance(ambiguous, basestring):
495503
ambiguous = [ambiguous]
496504
value = tz_localize_to_utc(np.array([self.value],dtype='i8'), tz,
497-
ambiguous=ambiguous)[0]
505+
ambiguous=ambiguous, errors=errors)[0]
498506
return Timestamp(value, tz=tz)
499507
else:
500508
if tz is None:
@@ -3943,7 +3951,8 @@ cpdef ndarray _unbox_utcoffsets(object transinfo):
39433951

39443952
@cython.boundscheck(False)
39453953
@cython.wraparound(False)
3946-
def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None):
3954+
def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
3955+
object errors='raise'):
39473956
"""
39483957
Localize tzinfo-naive DateRange to given time zone (using pytz). If
39493958
there are ambiguities in the values, raise AmbiguousTimeError.
@@ -3960,9 +3969,12 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None):
39603969
ndarray[int64_t] result, result_a, result_b, dst_hours
39613970
pandas_datetimestruct dts
39623971
bint infer_dst = False, is_dst = False, fill = False
3972+
bint is_coerce = errors == 'coerce', is_raise = errors == 'raise'
39633973

39643974
# Vectorized version of DstTzInfo.localize
39653975

3976+
assert is_coerce or is_raise
3977+
39663978
if not have_pytz:
39673979
raise Exception("Could not find pytz module")
39683980

@@ -4092,8 +4104,11 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None):
40924104
elif right != NPY_NAT:
40934105
result[i] = right
40944106
else:
4095-
stamp = Timestamp(vals[i])
4096-
raise pytz.NonExistentTimeError(stamp)
4107+
if is_coerce:
4108+
result[i] = NPY_NAT
4109+
else:
4110+
stamp = Timestamp(vals[i])
4111+
raise pytz.NonExistentTimeError(stamp)
40974112

40984113
return result
40994114

0 commit comments

Comments
 (0)