Skip to content

Commit ef2cfb1

Browse files
committed
ENH: add top-level to_timedelta to convert string/integer based to timedeltas
1 parent 30d91c7 commit ef2cfb1

File tree

8 files changed

+168
-16
lines changed

8 files changed

+168
-16
lines changed

doc/source/io.rst

+4
Original file line numberDiff line numberDiff line change
@@ -2017,6 +2017,10 @@ Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type.
20172017
specified in the format: ``<float>(<unit>)``, where float may be signed (and fractional), and unit can be
20182018
``D,s,ms,us,ns`` for the timedelta. Here's an example:
20192019
2020+
.. warning::
2021+
2022+
This requires ``numpy >= 1.7``
2023+
20202024
.. ipython:: python
20212025
20222026
from datetime import timedelta

doc/source/release.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,8 @@ API Changes
191191
- provide automatic dtype conversions on _reduce operations (:issue:`3371`)
192192
- exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`)
193193
- default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`)
194-
- moved timedeltas support to pandas.tseries.timedeltas.py; add timedeltas string parsing
194+
- moved timedeltas support to pandas.tseries.timedeltas.py; add timedeltas string parsing,
195+
add top-level ``to_timedelta`` function
195196

196197
Internal Refactoring
197198
~~~~~~~~~~~~~~~~~~~~

doc/source/timeseries.rst

+20
Original file line numberDiff line numberDiff line change
@@ -1211,6 +1211,26 @@ Time Deltas & Conversions
12111211

12121212
.. versionadded:: 0.13
12131213

1214+
**string/integer conversion**
1215+
1216+
Using the top-level ``to_timedelta``, you can convert a scalar or array from the standard
1217+
timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``).
1218+
It can also construct Series.
1219+
1220+
.. warning::
1221+
1222+
This requires ``numpy >= 1.7``
1223+
1224+
.. ipython:: python
1225+
1226+
to_timedelta('1 days 06:05:01.00003')
1227+
to_timedelta('15.5us')
1228+
to_timedelta(['1 days 06:05:01.00003','15.5us','nan'])
1229+
to_timedelta(np.arange(5),unit='s')
1230+
to_timedelta(np.arange(5),unit='d')
1231+
1232+
**frequency conversion**
1233+
12141234
Timedeltas can be converted to other 'frequencies' by dividing by another timedelta.
12151235
These operations yield ``float64`` dtyped Series.
12161236

doc/source/v0.13.0.txt

+15
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,21 @@ Enhancements
208208

209209
- ``timedelta64[ns]`` operations
210210

211+
- Using the new top-level ``to_timedelta``, you can convert a scalar or array from the standard
212+
timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``).
213+
214+
.. warning::
215+
216+
This requires ``numpy >= 1.7``
217+
218+
.. ipython:: python
219+
220+
to_timedelta('1 days 06:05:01.00003')
221+
to_timedelta('15.5us')
222+
to_timedelta(['1 days 06:05:01.00003','15.5us','nan'])
223+
to_timedelta(np.arange(5),unit='s')
224+
to_timedelta(np.arange(5),unit='d')
225+
211226
- A Series of dtype ``timedelta64[ns]`` can now be divided by another
212227
``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
213228
is frequency conversion. See :ref:`here<timeseries.timedeltas_convert>` for the docs.

pandas/tseries/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@
77
from pandas.tseries.frequencies import infer_freq
88
from pandas.tseries.period import Period, PeriodIndex, period_range, pnow
99
from pandas.tseries.resample import TimeGrouper
10+
from pandas.tseries.timedeltas import to_timedelta
1011
from pandas.lib import NaT
1112
import pandas.tseries.offsets as offsets

pandas/tseries/tests/test_timedeltas.py

+49-9
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
bdate_range, date_range, _np_version_under1p7)
1212
import pandas.core.common as com
1313
from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long
14-
from pandas import compat
14+
from pandas import compat, to_timedelta, tslib
1515
from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct
1616
from pandas.util.testing import (assert_series_equal,
1717
assert_frame_equal,
@@ -33,10 +33,9 @@ def setUp(self):
3333
def test_numeric_conversions(self):
3434
_skip_if_numpy_not_friendly()
3535

36-
# ns not converted properly
3736
self.assert_(ct(0) == np.timedelta64(0,'ns'))
38-
self.assert_(ct(10) == np.timedelta64(0,'ns'))
39-
self.assert_(ct(10,unit='ns') == np.timedelta64(0,'ns').astype('m8[ns]'))
37+
self.assert_(ct(10) == np.timedelta64(10,'ns'))
38+
self.assert_(ct(10,unit='ns') == np.timedelta64(10,'ns').astype('m8[ns]'))
4039

4140
self.assert_(ct(10,unit='us') == np.timedelta64(10,'us').astype('m8[ns]'))
4241
self.assert_(ct(10,unit='ms') == np.timedelta64(10,'ms').astype('m8[ns]'))
@@ -56,11 +55,10 @@ def test_short_format_converters(self):
5655
def conv(v):
5756
return v.astype('m8[ns]')
5857

59-
# ns not converted properly
60-
self.assert_(ct('10') == np.timedelta64(0,'ns'))
61-
self.assert_(ct('10ns') == np.timedelta64(0,'ns'))
62-
self.assert_(ct('100') == np.timedelta64(0,'ns'))
63-
self.assert_(ct('100ns') == np.timedelta64(0,'ns'))
58+
self.assert_(ct('10') == np.timedelta64(10,'ns'))
59+
self.assert_(ct('10ns') == np.timedelta64(10,'ns'))
60+
self.assert_(ct('100') == np.timedelta64(100,'ns'))
61+
self.assert_(ct('100ns') == np.timedelta64(100,'ns'))
6462

6563
self.assert_(ct('1000') == np.timedelta64(1000,'ns'))
6664
self.assert_(ct('1000ns') == np.timedelta64(1000,'ns'))
@@ -122,6 +120,48 @@ def conv(v):
122120
# invalid
123121
self.assertRaises(ValueError, ct, '- 1days, 00')
124122

123+
def test_nat_converters(self):
124+
_skip_if_numpy_not_friendly()
125+
126+
self.assert_(to_timedelta('nat') == tslib.iNaT)
127+
self.assert_(to_timedelta('nan') == tslib.iNaT)
128+
129+
def test_to_timedelta(self):
130+
_skip_if_numpy_not_friendly()
131+
132+
def conv(v):
133+
return v.astype('m8[ns]')
134+
d1 = np.timedelta64(1,'D')
135+
136+
self.assert_(to_timedelta('1 days 06:05:01.00003') == conv(d1+np.timedelta64(6*3600+5*60+1,'s')+np.timedelta64(30,'us')))
137+
self.assert_(to_timedelta('15.5us') == conv(np.timedelta64(15500,'ns')))
138+
139+
# empty string
140+
result = to_timedelta('')
141+
self.assert_(result == tslib.iNaT)
142+
143+
result = to_timedelta(['', ''])
144+
self.assert_(isnull(result).all())
145+
146+
# pass thru
147+
result = to_timedelta(np.array([np.timedelta64(1,'s')]))
148+
expected = np.array([np.timedelta64(1,'s')])
149+
tm.assert_almost_equal(result,expected)
150+
151+
# ints
152+
result = np.timedelta64(0,'ns')
153+
expected = to_timedelta(0)
154+
self.assert_(result == expected)
155+
156+
# Series
157+
expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)])
158+
result = to_timedelta(Series(['1d','1days 00:00:01']))
159+
tm.assert_series_equal(result, expected)
160+
161+
# with units
162+
result = Series([ np.timedelta64(0,'ns'), np.timedelta64(10,'s').astype('m8[ns]') ],dtype='m8[ns]')
163+
expected = to_timedelta([0,10],unit='s')
164+
tm.assert_series_equal(result, expected)
125165

126166
if __name__ == '__main__':
127167
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

pandas/tseries/timedeltas.py

+74-5
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,86 @@
88
import numpy as np
99
import pandas.tslib as tslib
1010
from pandas import compat, _np_version_under1p7
11-
from pandas.core.common import (ABCSeries, is_integer,
11+
from pandas.core.common import (ABCSeries, is_integer, is_timedelta64_dtype,
1212
_values_from_object, is_list_like)
1313

1414
repr_timedelta = tslib.repr_timedelta64
1515
repr_timedelta64 = tslib.repr_timedelta64
1616

17+
def to_timedelta(arg, box=True, unit='ns'):
18+
"""
19+
Convert argument to timedelta
20+
21+
Parameters
22+
----------
23+
arg : string, timedelta, array of strings (with possible NAs)
24+
box : boolean, default True
25+
If True returns a Series of the results, if False returns ndarray of values
26+
unit : unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer/float number
27+
28+
Returns
29+
-------
30+
ret : timedelta64/arrays of timedelta64 if parsing succeeded
31+
"""
32+
if _np_version_under1p7:
33+
raise ValueError("to_timedelta is not support for numpy < 1.7")
34+
35+
def _convert_listlike(arg, box):
36+
37+
if isinstance(arg, (list,tuple)):
38+
arg = np.array(arg, dtype='O')
39+
40+
if is_timedelta64_dtype(arg):
41+
if box:
42+
from pandas import Series
43+
return Series(arg,dtype='m8[ns]')
44+
return arg
45+
46+
value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ])
47+
if box:
48+
from pandas import Series
49+
value = Series(value,dtype='m8[ns]')
50+
return value
51+
52+
if arg is None:
53+
return arg
54+
elif isinstance(arg, ABCSeries):
55+
from pandas import Series
56+
values = _convert_listlike(arg.values, box=False)
57+
return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]')
58+
elif is_list_like(arg):
59+
return _convert_listlike(arg, box=box)
60+
61+
return _convert_listlike([ arg ], box=False)[0]
62+
1763
_short_search = re.compile(
1864
"^\s*(?P<neg>-?)\s*(?P<value>\d*\.?\d*)\s*(?P<unit>d|s|ms|us|ns)?\s*$",re.IGNORECASE)
1965
_full_search = re.compile(
2066
"^\s*(?P<neg>-?)\s*(?P<days>\d+)?\s*(days|d)?,?\s*(?P<time>\d{2}:\d{2}:\d{2})?(?P<frac>\.\d+)?\s*$",re.IGNORECASE)
67+
_nat_search = re.compile(
68+
"^\s*(nat|nan)\s*$",re.IGNORECASE)
69+
_whitespace = re.compile('^\s*$')
2170

2271
def _coerce_scalar_to_timedelta_type(r, unit='ns'):
2372
# kludgy here until we have a timedelta scalar
2473
# handle the numpy < 1.7 case
2574

75+
def conv(v):
76+
if _np_version_under1p7:
77+
return timedelta(microseconds=v/1000.0)
78+
return np.timedelta64(v)
79+
2680
if isinstance(r, compat.string_types):
2781
converter = _get_string_converter(r, unit=unit)
2882
r = converter()
29-
r = timedelta(microseconds=r/1000.0)
30-
31-
if is_integer(r):
83+
r = conv(r)
84+
elif r == tslib.iNaT:
85+
return r
86+
elif isinstance(r, np.timedelta64):
87+
r = r.astype("m8[{0}]".format(unit.lower()))
88+
elif is_integer(r):
3289
r = tslib.cast_from_unit(r, unit)
33-
r = timedelta(microseconds=r/1000.0)
90+
r = conv(r)
3491

3592
if _np_version_under1p7:
3693
if not isinstance(r, timedelta):
@@ -50,6 +107,12 @@ def _coerce_scalar_to_timedelta_type(r, unit='ns'):
50107
def _get_string_converter(r, unit='ns'):
51108
""" return a string converter for r to process the timedelta format """
52109

110+
# treat as a nan
111+
if _whitespace.search(r):
112+
def convert(r=None, unit=None):
113+
return tslib.iNaT
114+
return convert
115+
53116
m = _short_search.search(r)
54117
if m:
55118
def convert(r=None, unit=unit, m=m):
@@ -92,6 +155,12 @@ def convert(r=None, unit=None, m=m):
92155
return tslib.cast_from_unit(value, 's')
93156
return convert
94157

158+
m = _nat_search.search(r)
159+
if m:
160+
def convert(r=None, unit=None, m=m):
161+
return tslib.iNaT
162+
return convert
163+
95164
# no converter
96165
raise ValueError("cannot create timedelta string converter")
97166

pandas/tslib.pyx

+3-1
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,9 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except -1:
13031303
# to avoid precision issues from float -> int
13041304
base = <int64_t> ts
13051305
frac = ts-base
1306-
return <int64_t> (base*m) + <int64_t> (round(frac,p)*m)
1306+
if p:
1307+
frac = round(frac,p)
1308+
return <int64_t> (base*m) + <int64_t> (frac*m)
13071309

13081310
def cast_to_nanoseconds(ndarray arr):
13091311
cdef:

0 commit comments

Comments
 (0)