Skip to content

Commit 30d91c7

Browse files
committed
TST: add pandas/tseries/tests/test_timedeltas.py
API: add full timedelta parsing and conversion to np.timedelta64[ns]
1 parent d8280c1 commit 30d91c7

File tree

4 files changed

+197
-22
lines changed

4 files changed

+197
-22
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ API Changes
191191
- provide automatic dtype conversions on _reduce operations (:issue:`3371`)
192192
- exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`)
193193
- default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`)
194+
- moved timedeltas support to pandas.tseries.timedeltas.py; add timedeltas string parsing
194195

195196
Internal Refactoring
196197
~~~~~~~~~~~~~~~~~~~~
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# pylint: disable-msg=E1101,W0612
2+
3+
from datetime import datetime, timedelta
4+
import nose
5+
import unittest
6+
7+
import numpy as np
8+
import pandas as pd
9+
10+
from pandas import (Index, Series, DataFrame, isnull, notnull,
11+
bdate_range, date_range, _np_version_under1p7)
12+
import pandas.core.common as com
13+
from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long
14+
from pandas import compat
15+
from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct
16+
from pandas.util.testing import (assert_series_equal,
17+
assert_frame_equal,
18+
assert_almost_equal,
19+
ensure_clean)
20+
import pandas.util.testing as tm
21+
22+
def _skip_if_numpy_not_friendly():
23+
# not friendly for < 1.7
24+
if _np_version_under1p7:
25+
raise nose.SkipTest("numpy < 1.7")
26+
27+
class TestTimedeltas(unittest.TestCase):
28+
_multiprocess_can_split_ = True
29+
30+
def setUp(self):
31+
pass
32+
33+
def test_numeric_conversions(self):
34+
_skip_if_numpy_not_friendly()
35+
36+
# ns not converted properly
37+
self.assert_(ct(0) == np.timedelta64(0,'ns'))
38+
self.assert_(ct(10) == np.timedelta64(0,'ns'))
39+
self.assert_(ct(10,unit='ns') == np.timedelta64(0,'ns').astype('m8[ns]'))
40+
41+
self.assert_(ct(10,unit='us') == np.timedelta64(10,'us').astype('m8[ns]'))
42+
self.assert_(ct(10,unit='ms') == np.timedelta64(10,'ms').astype('m8[ns]'))
43+
self.assert_(ct(10,unit='s') == np.timedelta64(10,'s').astype('m8[ns]'))
44+
self.assert_(ct(10,unit='d') == np.timedelta64(10,'D').astype('m8[ns]'))
45+
46+
def test_timedelta_conversions(self):
47+
_skip_if_numpy_not_friendly()
48+
49+
self.assert_(ct(timedelta(seconds=1)) == np.timedelta64(1,'s').astype('m8[ns]'))
50+
self.assert_(ct(timedelta(microseconds=1)) == np.timedelta64(1,'us').astype('m8[ns]'))
51+
self.assert_(ct(timedelta(days=1)) == np.timedelta64(1,'D').astype('m8[ns]'))
52+
53+
def test_short_format_converters(self):
54+
_skip_if_numpy_not_friendly()
55+
56+
def conv(v):
57+
return v.astype('m8[ns]')
58+
59+
# ns not converted properly
60+
self.assert_(ct('10') == np.timedelta64(0,'ns'))
61+
self.assert_(ct('10ns') == np.timedelta64(0,'ns'))
62+
self.assert_(ct('100') == np.timedelta64(0,'ns'))
63+
self.assert_(ct('100ns') == np.timedelta64(0,'ns'))
64+
65+
self.assert_(ct('1000') == np.timedelta64(1000,'ns'))
66+
self.assert_(ct('1000ns') == np.timedelta64(1000,'ns'))
67+
self.assert_(ct('1000NS') == np.timedelta64(1000,'ns'))
68+
69+
self.assert_(ct('10us') == np.timedelta64(10000,'ns'))
70+
self.assert_(ct('100us') == np.timedelta64(100000,'ns'))
71+
self.assert_(ct('1000us') == np.timedelta64(1000000,'ns'))
72+
self.assert_(ct('1000Us') == np.timedelta64(1000000,'ns'))
73+
self.assert_(ct('1000uS') == np.timedelta64(1000000,'ns'))
74+
75+
self.assert_(ct('1ms') == np.timedelta64(1000000,'ns'))
76+
self.assert_(ct('10ms') == np.timedelta64(10000000,'ns'))
77+
self.assert_(ct('100ms') == np.timedelta64(100000000,'ns'))
78+
self.assert_(ct('1000ms') == np.timedelta64(1000000000,'ns'))
79+
80+
self.assert_(ct('-1s') == -np.timedelta64(1000000000,'ns'))
81+
self.assert_(ct('1s') == np.timedelta64(1000000000,'ns'))
82+
self.assert_(ct('10s') == np.timedelta64(10000000000,'ns'))
83+
self.assert_(ct('100s') == np.timedelta64(100000000000,'ns'))
84+
self.assert_(ct('1000s') == np.timedelta64(1000000000000,'ns'))
85+
86+
self.assert_(ct('1d') == conv(np.timedelta64(1,'D')))
87+
self.assert_(ct('-1d') == -conv(np.timedelta64(1,'D')))
88+
self.assert_(ct('1D') == conv(np.timedelta64(1,'D')))
89+
self.assert_(ct('10D') == conv(np.timedelta64(10,'D')))
90+
self.assert_(ct('100D') == conv(np.timedelta64(100,'D')))
91+
self.assert_(ct('1000D') == conv(np.timedelta64(1000,'D')))
92+
self.assert_(ct('10000D') == conv(np.timedelta64(10000,'D')))
93+
94+
# space
95+
self.assert_(ct(' 10000D ') == conv(np.timedelta64(10000,'D')))
96+
self.assert_(ct(' - 10000D ') == -conv(np.timedelta64(10000,'D')))
97+
98+
# invalid
99+
self.assertRaises(ValueError, ct, '1foo')
100+
self.assertRaises(ValueError, ct, 'foo')
101+
102+
def test_full_format_converters(self):
103+
_skip_if_numpy_not_friendly()
104+
105+
def conv(v):
106+
return v.astype('m8[ns]')
107+
d1 = np.timedelta64(1,'D')
108+
109+
self.assert_(ct('1days') == conv(d1))
110+
self.assert_(ct('1days,') == conv(d1))
111+
self.assert_(ct('- 1days,') == -conv(d1))
112+
113+
self.assert_(ct('00:00:01') == conv(np.timedelta64(1,'s')))
114+
self.assert_(ct('06:00:01') == conv(np.timedelta64(6*3600+1,'s')))
115+
self.assert_(ct('06:00:01.0') == conv(np.timedelta64(6*3600+1,'s')))
116+
self.assert_(ct('06:00:01.01') == conv(np.timedelta64(1000*(6*3600+1)+10,'ms')))
117+
118+
self.assert_(ct('- 1days, 00:00:01') == -conv(d1+np.timedelta64(1,'s')))
119+
self.assert_(ct('1days, 06:00:01') == conv(d1+np.timedelta64(6*3600+1,'s')))
120+
self.assert_(ct('1days, 06:00:01.01') == conv(d1+np.timedelta64(1000*(6*3600+1)+10,'ms')))
121+
122+
# invalid
123+
self.assertRaises(ValueError, ct, '- 1days, 00')
124+
125+
126+
if __name__ == '__main__':
127+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
128+
exit=False)

pandas/tseries/timedeltas.py

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,29 @@
88
import numpy as np
99
import pandas.tslib as tslib
1010
from pandas import compat, _np_version_under1p7
11-
from pandas.core.common import ABCSeries, is_integer, _values_from_object
11+
from pandas.core.common import (ABCSeries, is_integer,
12+
_values_from_object, is_list_like)
1213

13-
timedelta_search = re.compile(
14-
"^(?P<value>-?\d*\.?\d*)(?P<unit>D|s|ms|us|ns)?$")
14+
repr_timedelta = tslib.repr_timedelta64
15+
repr_timedelta64 = tslib.repr_timedelta64
16+
17+
_short_search = re.compile(
18+
"^\s*(?P<neg>-?)\s*(?P<value>\d*\.?\d*)\s*(?P<unit>d|s|ms|us|ns)?\s*$",re.IGNORECASE)
19+
_full_search = re.compile(
20+
"^\s*(?P<neg>-?)\s*(?P<days>\d+)?\s*(days|d)?,?\s*(?P<time>\d{2}:\d{2}:\d{2})?(?P<frac>\.\d+)?\s*$",re.IGNORECASE)
1521

1622
def _coerce_scalar_to_timedelta_type(r, unit='ns'):
1723
# kludgy here until we have a timedelta scalar
1824
# handle the numpy < 1.7 case
1925

2026
if isinstance(r, compat.string_types):
21-
m = timedelta_search.search(r)
22-
if m:
23-
r = float(m.groupdict()['value'])
24-
u = m.groupdict().get('unit')
25-
if u is not None:
26-
unit = u
27-
else:
28-
raise ValueError("cannot convert timedelta scalar value!")
29-
30-
r = tslib.cast_from_unit(unit, r)
31-
r = timedelta(microseconds=int(r)/1000)
27+
converter = _get_string_converter(r, unit=unit)
28+
r = converter()
29+
r = timedelta(microseconds=r/1000.0)
3230

3331
if is_integer(r):
34-
r = tslib.cast_from_unit(unit, r)
35-
r = timedelta(microseconds=int(r)/1000)
32+
r = tslib.cast_from_unit(r, unit)
33+
r = timedelta(microseconds=r/1000.0)
3634

3735
if _np_version_under1p7:
3836
if not isinstance(r, timedelta):
@@ -49,6 +47,54 @@ def _coerce_scalar_to_timedelta_type(r, unit='ns'):
4947
raise AssertionError("Invalid type for timedelta scalar: %s" % type(r))
5048
return r.astype('timedelta64[ns]')
5149

50+
def _get_string_converter(r, unit='ns'):
51+
""" return a string converter for r to process the timedelta format """
52+
53+
m = _short_search.search(r)
54+
if m:
55+
def convert(r=None, unit=unit, m=m):
56+
if r is not None:
57+
m = _short_search.search(r)
58+
59+
gd = m.groupdict()
60+
61+
r = float(gd['value'])
62+
u = gd.get('unit')
63+
if u is not None:
64+
unit = u.lower()
65+
if gd['neg']:
66+
r *= -1
67+
return tslib.cast_from_unit(r, unit)
68+
return convert
69+
70+
m = _full_search.search(r)
71+
if m:
72+
def convert(r=None, unit=None, m=m):
73+
if r is not None:
74+
m = _full_search.search(r)
75+
76+
gd = m.groupdict()
77+
78+
# convert to seconds
79+
value = float(gd['days'] or 0) * 86400
80+
81+
time = gd['time']
82+
if time:
83+
(hh,mm,ss) = time.split(':')
84+
value += float(hh)*3600 + float(mm)*60 + float(ss)
85+
86+
frac = gd['frac']
87+
if frac:
88+
value += float(frac)
89+
90+
if gd['neg']:
91+
value *= -1
92+
return tslib.cast_from_unit(value, 's')
93+
return convert
94+
95+
# no converter
96+
raise ValueError("cannot create timedelta string converter")
97+
5298
def _possibly_cast_to_timedelta(value, coerce=True):
5399
""" try to cast to timedelta64, if already a timedeltalike, then make
54100
sure that we are [ns] (as numpy 1.6.2 is very buggy in this regards,

pandas/tslib.pyx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -665,14 +665,14 @@ cdef convert_to_tsobject(object ts, object tz, object unit):
665665
if ts == NPY_NAT:
666666
obj.value = NPY_NAT
667667
else:
668-
ts = ts * cast_from_unit(unit,None)
668+
ts = ts * cast_from_unit(None,unit)
669669
obj.value = ts
670670
pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
671671
elif util.is_float_object(ts):
672672
if ts != ts or ts == NPY_NAT:
673673
obj.value = NPY_NAT
674674
else:
675-
ts = cast_from_unit(unit,ts)
675+
ts = cast_from_unit(ts,unit)
676676
obj.value = ts
677677
pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
678678
elif util.is_string_object(ts):
@@ -852,7 +852,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
852852
pandas_datetimestruct dts
853853
bint utc_convert = bool(utc)
854854
_TSObject _ts
855-
int64_t m = cast_from_unit(unit,None)
855+
int64_t m = cast_from_unit(None,unit)
856856

857857
try:
858858
result = np.empty(n, dtype='M8[ns]')
@@ -892,7 +892,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
892892
if val != val or val == iNaT:
893893
iresult[i] = iNaT
894894
else:
895-
iresult[i] = cast_from_unit(unit,val)
895+
iresult[i] = cast_from_unit(val,unit)
896896
else:
897897
try:
898898
if len(val) == 0:
@@ -1276,10 +1276,10 @@ cdef inline _get_datetime64_nanos(object val):
12761276
else:
12771277
return ival
12781278

1279-
cpdef inline int64_t cast_from_unit(object unit, object ts) except -1:
1279+
cpdef inline int64_t cast_from_unit(object ts, object unit) except -1:
12801280
""" return a casting of the unit represented to nanoseconds
12811281
round the fractional part of a float to our precision, p """
1282-
if unit == 'D':
1282+
if unit == 'D' or unit == 'd':
12831283
m = 1000000000L * 86400
12841284
p = 6
12851285
elif unit == 's':

0 commit comments

Comments
 (0)