diff --git a/doc/source/io.rst b/doc/source/io.rst index 1d3980e216587..da611c0375789 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2009,6 +2009,26 @@ space. These are in terms of the total number of rows in a table. Term('minor_axis', '=', ['A','B']) ], start=0, stop=10) +**Using timedelta64[ns]** + +.. versionadded:: 0.13 + +Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type. Terms can be +specified in the format: ``()``, where float may be signed (and fractional), and unit can be +``D,s,ms,us,ns`` for the timedelta. Here's an example: + +.. warning:: + + This requires ``numpy >= 1.7`` + +.. ipython:: python + + from datetime import timedelta + dftd = DataFrame(dict(A = Timestamp('20130101'), B = [ Timestamp('20130101') + timedelta(days=i,seconds=10) for i in range(10) ])) + dftd['C'] = dftd['A']-dftd['B'] + dftd + store.append('dftd',dftd,data_columns=True) + store.select('dftd',Term("C","<","-3.5D")) Indexing ~~~~~~~~ diff --git a/doc/source/release.rst b/doc/source/release.rst index 087d2880511d2..75194f6877a6e 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -156,6 +156,7 @@ API Changes - a column multi-index will be recreated properly (:issue:`4710`); raise on trying to use a multi-index with data_columns on the same axis - ``select_as_coordinates`` will now return an ``Int64Index`` of the resultant selection set + - support ``timedelta64[ns]`` as a serialization type (:issue:`3577`) - ``JSON`` - added ``date_unit`` parameter to specify resolution of timestamps. Options @@ -190,6 +191,8 @@ API Changes - provide automatic dtype conversions on _reduce operations (:issue:`3371`) - exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`) - default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`) + - moved timedeltas support to pandas.tseries.timedeltas.py; add timedeltas string parsing, + add top-level ``to_timedelta`` function Internal Refactoring ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 11f4ac9f487c2..5dbf1ce77bad8 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1211,6 +1211,26 @@ Time Deltas & Conversions .. versionadded:: 0.13 +**string/integer conversion** + +Using the top-level ``to_timedelta``, you can convert a scalar or array from the standard +timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``). +It can also construct Series. + +.. warning:: + + This requires ``numpy >= 1.7`` + +.. ipython:: python + + to_timedelta('1 days 06:05:01.00003') + to_timedelta('15.5us') + to_timedelta(['1 days 06:05:01.00003','15.5us','nan']) + to_timedelta(np.arange(5),unit='s') + to_timedelta(np.arange(5),unit='d') + +**frequency conversion** + Timedeltas can be converted to other 'frequencies' by dividing by another timedelta. These operations yield ``float64`` dtyped Series. diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index caf218747bdfb..f0a23b46373e9 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -80,7 +80,7 @@ API changes See :ref:`here` for an example. - allow a passed locations array or mask as a ``where`` condition (:issue:`4467`). See :ref:`here` for an example. - + - support ``timedelta64[ns]`` as a serialization type (:issue:`3577`) - the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)`` or ``table(t)`` the same defaults as prior < 0.13.0 remain, e.g. ``put`` implies 'fixed` or 'f' (Fixed) format and ``append`` imples 'table' or 't' (Table) format @@ -208,6 +208,21 @@ Enhancements - ``timedelta64[ns]`` operations + - Using the new top-level ``to_timedelta``, you can convert a scalar or array from the standard + timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``). + + .. warning:: + + This requires ``numpy >= 1.7`` + + .. ipython:: python + + to_timedelta('1 days 06:05:01.00003') + to_timedelta('15.5us') + to_timedelta(['1 days 06:05:01.00003','15.5us','nan']) + to_timedelta(np.arange(5),unit='s') + to_timedelta(np.arange(5),unit='d') + - A Series of dtype ``timedelta64[ns]`` can now be divided by another ``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This is frequency conversion. See :ref:`here` for the docs. diff --git a/pandas/__init__.py b/pandas/__init__.py index a0edb397c28c1..03681d3fa5a3f 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -18,6 +18,19 @@ from datetime import datetime import numpy as np +# XXX: HACK for NumPy 1.5.1 to suppress warnings +try: + np.seterr(all='ignore') + # np.set_printoptions(suppress=True) +except Exception: # pragma: no cover + pass + +# numpy versioning +from distutils.version import LooseVersion +_np_version = np.version.short_version +_np_version_under1p6 = LooseVersion(_np_version) < '1.6' +_np_version_under1p7 = LooseVersion(_np_version) < '1.7' + from pandas.version import version as __version__ from pandas.info import __doc__ diff --git a/pandas/core/common.py b/pandas/core/common.py index ba7c6cc511933..b58bd92a4fd1f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -11,7 +11,6 @@ import pandas.algos as algos import pandas.lib as lib import pandas.tslib as tslib -from distutils.version import LooseVersion from pandas import compat from pandas.compat import StringIO, BytesIO, range, long, u, zip, map from datetime import timedelta @@ -19,15 +18,6 @@ from pandas.core.config import get_option from pandas.core import array as pa - -# XXX: HACK for NumPy 1.5.1 to suppress warnings -try: - np.seterr(all='ignore') - # np.set_printoptions(suppress=True) -except Exception: # pragma: no cover - pass - - class PandasError(Exception): pass @@ -35,11 +25,6 @@ class PandasError(Exception): class AmbiguousIndexError(PandasError, KeyError): pass -# versioning -_np_version = np.version.short_version -_np_version_under1p6 = LooseVersion(_np_version) < '1.6' -_np_version_under1p7 = LooseVersion(_np_version) < '1.7' - _POSSIBLY_CAST_DTYPES = set([np.dtype(t) for t in ['M8[ns]', 'm8[ns]', 'O', 'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64']]) @@ -704,34 +689,13 @@ def diff(arr, n, axis=0): return out_arr - -def _coerce_scalar_to_timedelta_type(r): - # kludgy here until we have a timedelta scalar - # handle the numpy < 1.7 case - - if is_integer(r): - r = timedelta(microseconds=r/1000) - - if _np_version_under1p7: - if not isinstance(r, timedelta): - raise AssertionError("Invalid type for timedelta scalar: %s" % type(r)) - if compat.PY3: - # convert to microseconds in timedelta64 - r = np.timedelta64(int(r.total_seconds()*1e9 + r.microseconds*1000)) - else: - return r - - if isinstance(r, timedelta): - r = np.timedelta64(r) - elif not isinstance(r, np.timedelta64): - raise AssertionError("Invalid type for timedelta scalar: %s" % type(r)) - return r.astype('timedelta64[ns]') - def _coerce_to_dtypes(result, dtypes): """ given a dtypes and a result set, coerce the result elements to the dtypes """ if len(result) != len(dtypes): raise AssertionError("_coerce_to_dtypes requires equal len arrays") + from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type + def conv(r,dtype): try: if isnull(r): @@ -1324,68 +1288,6 @@ def _possibly_convert_platform(values): return values - -def _possibly_cast_to_timedelta(value, coerce=True): - """ try to cast to timedelta64, if already a timedeltalike, then make - sure that we are [ns] (as numpy 1.6.2 is very buggy in this regards, - don't force the conversion unless coerce is True - - if coerce='compat' force a compatibilty coercerion (to timedeltas) if needeed - """ - - # coercion compatability - if coerce == 'compat' and _np_version_under1p7: - - def convert(td, dtype): - - # we have an array with a non-object dtype - if hasattr(td,'item'): - td = td.astype(np.int64).item() - if td == tslib.iNaT: - return td - if dtype == 'm8[us]': - td *= 1000 - return td - - if td == tslib.compat_NaT: - return tslib.iNaT - - # convert td value to a nanosecond value - d = td.days - s = td.seconds - us = td.microseconds - - if dtype == 'object' or dtype == 'm8[ns]': - td = 1000*us + (s + d * 24 * 3600) * 10 ** 9 - else: - raise ValueError("invalid conversion of dtype in np < 1.7 [%s]" % dtype) - - return td - - # < 1.7 coercion - if not is_list_like(value): - value = np.array([ value ]) - - dtype = value.dtype - return np.array([ convert(v,dtype) for v in value ], dtype='m8[ns]') - - # deal with numpy not being able to handle certain timedelta operations - if isinstance(value, (ABCSeries, np.ndarray)) and value.dtype.kind == 'm': - if value.dtype != 'timedelta64[ns]': - value = value.astype('timedelta64[ns]') - return value - - # we don't have a timedelta, but we want to try to convert to one (but - # don't force it) - if coerce: - new_value = tslib.array_to_timedelta64( - _values_from_object(value).astype(object), coerce=False) - if new_value.dtype == 'i8': - value = np.array(new_value, dtype='timedelta64[ns]') - - return value - - def _possibly_cast_to_datetime(value, dtype, coerce=False): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ @@ -1423,6 +1325,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False): from pandas.tseries.tools import to_datetime value = to_datetime(value, coerce=coerce).values elif is_timedelta64: + from pandas.tseries.timedeltas import _possibly_cast_to_timedelta value = _possibly_cast_to_timedelta(value) except: pass @@ -1448,6 +1351,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False): except: pass elif inferred_type in ['timedelta', 'timedelta64']: + from pandas.tseries.timedeltas import _possibly_cast_to_timedelta value = _possibly_cast_to_timedelta(value) return value diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 212e2bad563b6..b9ffe788d183d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -13,7 +13,7 @@ from pandas.tseries.index import DatetimeIndex from pandas.core.internals import BlockManager import pandas.core.common as com -from pandas import compat +from pandas import compat, _np_version_under1p7 from pandas.compat import map, zip, lrange from pandas.core.common import (isnull, notnull, is_list_like, _values_from_object, @@ -1908,7 +1908,7 @@ def abs(self): obj = np.abs(self) # suprimo numpy 1.6 hacking - if com._np_version_under1p7: + if _np_version_under1p7: if self.ndim == 1: if obj.dtype == 'm8[us]': obj = obj.astype('m8[ns]') diff --git a/pandas/core/series.py b/pandas/core/series.py index 4516fcfbaee8e..8d6591c3acd60 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -19,6 +19,7 @@ _asarray_tuplesafe, is_integer_dtype, _NS_DTYPE, _TD_DTYPE, _infer_dtype_from_scalar, is_list_like, _values_from_object, + _possibly_cast_to_datetime, _possibly_castable, _possibly_convert_platform, ABCSparseArray) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, _ensure_index, _handle_legacy_indexes) @@ -32,6 +33,7 @@ from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex, Period from pandas.tseries.offsets import DateOffset +from pandas.tseries.timedeltas import _possibly_cast_to_timedelta from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.compat import zip, lzip, u, OrderedDict @@ -142,7 +144,7 @@ def _convert_to_array(self, values, name=None): values = values.to_series() elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here - values = com._possibly_cast_to_timedelta(values, coerce=coerce) + values = _possibly_cast_to_timedelta(values, coerce=coerce) elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': @@ -160,7 +162,7 @@ def _convert_to_array(self, values, name=None): raise TypeError("cannot use a non-absolute DateOffset in " "datetime/timedelta operations [{0}]".format( ','.join([ com.pprint_thing(v) for v in values[mask] ]))) - values = com._possibly_cast_to_timedelta(os, coerce=coerce) + values = _possibly_cast_to_timedelta(os, coerce=coerce) else: raise TypeError("incompatible type [{0}] for a datetime/timedelta operation".format(pa.array(values).dtype)) @@ -3215,11 +3217,11 @@ def _try_cast(arr, take_fast_path): # perf shortcut as this is the most common case if take_fast_path: - if com._possibly_castable(arr) and not copy and dtype is None: + if _possibly_castable(arr) and not copy and dtype is None: return arr try: - arr = com._possibly_cast_to_datetime(arr, dtype) + arr = _possibly_cast_to_datetime(arr, dtype) subarr = pa.array(arr, dtype=dtype, copy=copy) except (ValueError, TypeError): if dtype is not None and raise_cast_failure: @@ -3266,9 +3268,9 @@ def _try_cast(arr, take_fast_path): subarr = lib.maybe_convert_objects(subarr) else: - subarr = com._possibly_convert_platform(data) + subarr = _possibly_convert_platform(data) - subarr = com._possibly_cast_to_datetime(subarr, dtype) + subarr = _possibly_cast_to_datetime(subarr, dtype) else: subarr = _try_cast(data, False) @@ -3285,7 +3287,7 @@ def _try_cast(arr, take_fast_path): dtype, value = _infer_dtype_from_scalar(value) else: # need to possibly convert the value here - value = com._possibly_cast_to_datetime(value, dtype) + value = _possibly_cast_to_datetime(value, dtype) subarr = pa.empty(len(index), dtype=dtype) subarr.fill(value) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6759e07ed7935..9b6a230f6a551 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -17,7 +17,7 @@ import numpy as np import pandas from pandas import (Series, TimeSeries, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, Timestamp) + MultiIndex, Int64Index, Timestamp, _np_version_under1p7) from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.tseries.api import PeriodIndex, DatetimeIndex @@ -29,6 +29,7 @@ from pandas.core.internals import BlockManager, make_block from pandas.core.reshape import block2d_to_blocknd, factor_indexer from pandas.core.index import _ensure_index +from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type import pandas.core.common as com from pandas.tools.merge import concat from pandas import compat @@ -1527,6 +1528,8 @@ def set_kind(self): self.kind = 'integer' elif dtype.startswith(u('date')): self.kind = 'datetime' + elif dtype.startswith(u('timedelta')): + self.kind = 'timedelta' elif dtype.startswith(u('bool')): self.kind = 'bool' else: @@ -1547,6 +1550,11 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=No if inferred_type == 'datetime64': self.set_atom_datetime64(block) + elif dtype == 'timedelta64[ns]': + if _np_version_under1p7: + raise TypeError( + "timdelta64 is not supported under under numpy < 1.7") + self.set_atom_timedelta64(block) elif inferred_type == 'date': raise TypeError( "[date] is not implemented as a table column") @@ -1667,6 +1675,16 @@ def set_atom_datetime64(self, block, values=None): values = block.values.view('i8') self.set_data(values, 'datetime64') + def get_atom_timedelta64(self, block): + return _tables().Int64Col(shape=block.shape[0]) + + def set_atom_timedelta64(self, block, values=None): + self.kind = 'timedelta64' + self.typ = self.get_atom_timedelta64(block) + if values is None: + values = block.values.view('i8') + self.set_data(values, 'timedelta64') + @property def shape(self): return getattr(self.data, 'shape', None) @@ -1719,6 +1737,8 @@ def convert(self, values, nan_rep, encoding): else: self.data = np.asarray(self.data, dtype='M8[ns]') + elif dtype == u('timedelta64'): + self.data = np.asarray(self.data, dtype='m8[ns]') elif dtype == u('date'): self.data = np.array( [date.fromtimestamp(v) for v in self.data], dtype=object) @@ -1767,6 +1787,9 @@ def get_atom_data(self, block): def get_atom_datetime64(self, block): return _tables().Int64Col() + def get_atom_timedelta64(self, block): + return _tables().Int64Col() + class GenericDataIndexableCol(DataIndexableCol): @@ -2007,6 +2030,11 @@ def read_array(self, key): if dtype == u('datetime64'): ret = np.array(ret, dtype='M8[ns]') + elif dtype == u('timedelta64'): + if _np_version_under1p7: + raise TypeError( + "timedelta64 is not supported under under numpy < 1.7") + ret = np.array(ret, dtype='m8[ns]') if transposed: return ret.T @@ -2214,6 +2242,9 @@ def write_array(self, key, value, items=None): elif value.dtype.type == np.datetime64: self._handle.createArray(self.group, key, value.view('i8')) getattr(self.group, key)._v_attrs.value_type = 'datetime64' + elif value.dtype.type == np.timedelta64: + self._handle.createArray(self.group, key, value.view('i8')) + getattr(self.group, key)._v_attrs.value_type = 'timedelta64' else: if empty_array: self.write_array_empty(key, value) @@ -4000,7 +4031,9 @@ def eval(self): """ set the numexpr expression for this term """ if not self.is_valid: - raise ValueError("query term is not valid [%s]" % str(self)) + raise ValueError("query term is not valid [{0}]\n" + " all queries terms must include a reference to\n" + " either an axis (e.g. index or column), or a data_columns\n".format(str(self))) # convert values if we are in the table if self.is_in_table: @@ -4060,6 +4093,9 @@ def stringify(value): if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v, v.value, kind) + elif kind == u('timedelta64') or kind == u('timedelta'): + v = _coerce_scalar_to_timedelta_type(v,unit='s').item() + return TermValue(int(v), v, kind) elif (isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u('date')): v = time.mktime(v.timetuple()) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 7e5c3f9fff061..3f4ce72198215 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -22,7 +22,8 @@ assert_frame_equal, assert_series_equal) from pandas import concat, Timestamp -from pandas import compat +from pandas import compat, _np_version_under1p7 +from pandas.core import common as com from numpy.testing.decorators import slow @@ -1732,7 +1733,7 @@ def test_unimplemented_dtypes_table_columns(self): # this fails because we have a date in the object block...... self.assertRaises(TypeError, store.append, 'df_unimplemented', df) - def test_table_append_with_timezones(self): + def test_append_with_timezones(self): from datetime import timedelta @@ -1798,6 +1799,51 @@ def compare(a,b): result = store.select('df') assert_frame_equal(result,df) + def test_append_with_timedelta(self): + if _np_version_under1p7: + raise nose.SkipTest("requires numpy >= 1.7") + + # GH 3577 + # append timedelta + + from datetime import timedelta + df = DataFrame(dict(A = Timestamp('20130101'), B = [ Timestamp('20130101') + timedelta(days=i,seconds=10) for i in range(10) ])) + df['C'] = df['A']-df['B'] + df.ix[3:5,'C'] = np.nan + + with ensure_clean(self.path) as store: + + # table + _maybe_remove(store, 'df') + store.append('df',df,data_columns=True) + result = store.select('df') + assert_frame_equal(result,df) + + result = store.select('df',Term("C<100000")) + assert_frame_equal(result,df) + + result = store.select('df',Term("C","<",-3*86400)) + assert_frame_equal(result,df.iloc[3:]) + + result = store.select('df',Term("C","<",'-3D')) + assert_frame_equal(result,df.iloc[3:]) + + # a bit hacky here as we don't really deal with the NaT properly + + result = store.select('df',Term("C","<",'-500000s')) + result = result.dropna(subset=['C']) + assert_frame_equal(result,df.iloc[6:]) + + result = store.select('df',Term("C","<",'-3.5D')) + result = result.iloc[1:] + assert_frame_equal(result,df.iloc[4:]) + + # fixed + _maybe_remove(store, 'df2') + store.put('df2',df) + result = store.select('df2') + assert_frame_equal(result,df) + def test_remove(self): with ensure_clean(self.path) as store: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 723810a19d140..c5af0b0d4d5c8 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3248,9 +3248,10 @@ def test_operators_timedelta64(self): mixed['F'] = Timestamp('20130101') # results in an object array + from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type result = mixed.min() - expected = Series([com._coerce_scalar_to_timedelta_type(timedelta(seconds=5*60+5)), - com._coerce_scalar_to_timedelta_type(timedelta(days=-1)), + expected = Series([_coerce_scalar_to_timedelta_type(timedelta(seconds=5*60+5)), + _coerce_scalar_to_timedelta_type(timedelta(days=-1)), 'foo', 1, 1.0, diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 1f008354756bc..7a993cbcf07f4 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import (Index, Series, DataFrame, isnull, notnull, - bdate_range, date_range) + bdate_range, date_range, _np_version_under1p7) from pandas.core.index import MultiIndex from pandas.tseries.index import Timestamp, DatetimeIndex import pandas.core.config as cf @@ -2188,7 +2188,7 @@ def test_timedeltas_with_DateOffset(self): [Timestamp('20130101 9:06:00.005'), Timestamp('20130101 9:07:00.005')]) assert_series_equal(result, expected) - if not com._np_version_under1p7: + if not _np_version_under1p7: # operate with np.timedelta64 correctly result = s + np.timedelta64(1, 's') @@ -2292,7 +2292,7 @@ def test_timedelta64_operations_with_integers(self): self.assertRaises(TypeError, sop, s2.values) def test_timedelta64_conversions(self): - if com._np_version_under1p7: + if _np_version_under1p7: raise nose.SkipTest("cannot use 2 argument form of timedelta64 conversions with numpy < 1.7") startdate = Series(date_range('2013-01-01', '2013-01-03')) @@ -2317,7 +2317,7 @@ def test_timedelta64_equal_timedelta_supported_ops(self): 'm': 60 * 1000000, 's': 1000000, 'us': 1} def timedelta64(*args): - if com._np_version_under1p7: + if _np_version_under1p7: coeffs = np.array(args) terms = np.array([npy16_mappings[interval] for interval in intervals]) @@ -2426,7 +2426,7 @@ def test_timedelta64_functions(self): assert_series_equal(result, expected) def test_timedelta_fillna(self): - if com._np_version_under1p7: + if _np_version_under1p7: raise nose.SkipTest("timedelta broken in np 1.6.1") #GH 3371 @@ -2498,12 +2498,12 @@ def test_datetime64_fillna(self): assert_series_equal(result,expected) def test_sub_of_datetime_from_TimeSeries(self): - from pandas.core import common as com + from pandas.tseries.timedeltas import _possibly_cast_to_timedelta from datetime import datetime a = Timestamp(datetime(1993, 0o1, 0o7, 13, 30, 00)) b = datetime(1993, 6, 22, 13, 30) a = Series([a]) - result = com._possibly_cast_to_timedelta(np.abs(a - b)) + result = _possibly_cast_to_timedelta(np.abs(a - b)) self.assert_(result.dtype == 'timedelta64[ns]') def test_datetime64_with_index(self): diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index ead5a17c4fab1..c2cc3723802fc 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -7,5 +7,6 @@ from pandas.tseries.frequencies import infer_freq from pandas.tseries.period import Period, PeriodIndex, period_range, pnow from pandas.tseries.resample import TimeGrouper +from pandas.tseries.timedeltas import to_timedelta from pandas.lib import NaT import pandas.tseries.offsets as offsets diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index e91cad62e7dce..1572ca481d8a4 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -7,8 +7,7 @@ import numpy as np from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE, - is_list_like,_possibly_cast_to_timedelta, - _values_from_object, _maybe_box) + is_list_like,_values_from_object, _maybe_box) from pandas.core.index import Index, Int64Index import pandas.compat as compat from pandas.compat import u diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py new file mode 100644 index 0000000000000..551507039112b --- /dev/null +++ b/pandas/tseries/tests/test_timedeltas.py @@ -0,0 +1,168 @@ +# pylint: disable-msg=E1101,W0612 + +from datetime import datetime, timedelta +import nose +import unittest + +import numpy as np +import pandas as pd + +from pandas import (Index, Series, DataFrame, isnull, notnull, + bdate_range, date_range, _np_version_under1p7) +import pandas.core.common as com +from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long +from pandas import compat, to_timedelta, tslib +from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct +from pandas.util.testing import (assert_series_equal, + assert_frame_equal, + assert_almost_equal, + ensure_clean) +import pandas.util.testing as tm + +def _skip_if_numpy_not_friendly(): + # not friendly for < 1.7 + if _np_version_under1p7: + raise nose.SkipTest("numpy < 1.7") + +class TestTimedeltas(unittest.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + pass + + def test_numeric_conversions(self): + _skip_if_numpy_not_friendly() + + self.assert_(ct(0) == np.timedelta64(0,'ns')) + self.assert_(ct(10) == np.timedelta64(10,'ns')) + self.assert_(ct(10,unit='ns') == np.timedelta64(10,'ns').astype('m8[ns]')) + + self.assert_(ct(10,unit='us') == np.timedelta64(10,'us').astype('m8[ns]')) + self.assert_(ct(10,unit='ms') == np.timedelta64(10,'ms').astype('m8[ns]')) + self.assert_(ct(10,unit='s') == np.timedelta64(10,'s').astype('m8[ns]')) + self.assert_(ct(10,unit='d') == np.timedelta64(10,'D').astype('m8[ns]')) + + def test_timedelta_conversions(self): + _skip_if_numpy_not_friendly() + + self.assert_(ct(timedelta(seconds=1)) == np.timedelta64(1,'s').astype('m8[ns]')) + self.assert_(ct(timedelta(microseconds=1)) == np.timedelta64(1,'us').astype('m8[ns]')) + self.assert_(ct(timedelta(days=1)) == np.timedelta64(1,'D').astype('m8[ns]')) + + def test_short_format_converters(self): + _skip_if_numpy_not_friendly() + + def conv(v): + return v.astype('m8[ns]') + + self.assert_(ct('10') == np.timedelta64(10,'ns')) + self.assert_(ct('10ns') == np.timedelta64(10,'ns')) + self.assert_(ct('100') == np.timedelta64(100,'ns')) + self.assert_(ct('100ns') == np.timedelta64(100,'ns')) + + self.assert_(ct('1000') == np.timedelta64(1000,'ns')) + self.assert_(ct('1000ns') == np.timedelta64(1000,'ns')) + self.assert_(ct('1000NS') == np.timedelta64(1000,'ns')) + + self.assert_(ct('10us') == np.timedelta64(10000,'ns')) + self.assert_(ct('100us') == np.timedelta64(100000,'ns')) + self.assert_(ct('1000us') == np.timedelta64(1000000,'ns')) + self.assert_(ct('1000Us') == np.timedelta64(1000000,'ns')) + self.assert_(ct('1000uS') == np.timedelta64(1000000,'ns')) + + self.assert_(ct('1ms') == np.timedelta64(1000000,'ns')) + self.assert_(ct('10ms') == np.timedelta64(10000000,'ns')) + self.assert_(ct('100ms') == np.timedelta64(100000000,'ns')) + self.assert_(ct('1000ms') == np.timedelta64(1000000000,'ns')) + + self.assert_(ct('-1s') == -np.timedelta64(1000000000,'ns')) + self.assert_(ct('1s') == np.timedelta64(1000000000,'ns')) + self.assert_(ct('10s') == np.timedelta64(10000000000,'ns')) + self.assert_(ct('100s') == np.timedelta64(100000000000,'ns')) + self.assert_(ct('1000s') == np.timedelta64(1000000000000,'ns')) + + self.assert_(ct('1d') == conv(np.timedelta64(1,'D'))) + self.assert_(ct('-1d') == -conv(np.timedelta64(1,'D'))) + self.assert_(ct('1D') == conv(np.timedelta64(1,'D'))) + self.assert_(ct('10D') == conv(np.timedelta64(10,'D'))) + self.assert_(ct('100D') == conv(np.timedelta64(100,'D'))) + self.assert_(ct('1000D') == conv(np.timedelta64(1000,'D'))) + self.assert_(ct('10000D') == conv(np.timedelta64(10000,'D'))) + + # space + self.assert_(ct(' 10000D ') == conv(np.timedelta64(10000,'D'))) + self.assert_(ct(' - 10000D ') == -conv(np.timedelta64(10000,'D'))) + + # invalid + self.assertRaises(ValueError, ct, '1foo') + self.assertRaises(ValueError, ct, 'foo') + + def test_full_format_converters(self): + _skip_if_numpy_not_friendly() + + def conv(v): + return v.astype('m8[ns]') + d1 = np.timedelta64(1,'D') + + self.assert_(ct('1days') == conv(d1)) + self.assert_(ct('1days,') == conv(d1)) + self.assert_(ct('- 1days,') == -conv(d1)) + + self.assert_(ct('00:00:01') == conv(np.timedelta64(1,'s'))) + self.assert_(ct('06:00:01') == conv(np.timedelta64(6*3600+1,'s'))) + self.assert_(ct('06:00:01.0') == conv(np.timedelta64(6*3600+1,'s'))) + self.assert_(ct('06:00:01.01') == conv(np.timedelta64(1000*(6*3600+1)+10,'ms'))) + + self.assert_(ct('- 1days, 00:00:01') == -conv(d1+np.timedelta64(1,'s'))) + self.assert_(ct('1days, 06:00:01') == conv(d1+np.timedelta64(6*3600+1,'s'))) + self.assert_(ct('1days, 06:00:01.01') == conv(d1+np.timedelta64(1000*(6*3600+1)+10,'ms'))) + + # invalid + self.assertRaises(ValueError, ct, '- 1days, 00') + + def test_nat_converters(self): + _skip_if_numpy_not_friendly() + + self.assert_(to_timedelta('nat') == tslib.iNaT) + self.assert_(to_timedelta('nan') == tslib.iNaT) + + def test_to_timedelta(self): + _skip_if_numpy_not_friendly() + + def conv(v): + return v.astype('m8[ns]') + d1 = np.timedelta64(1,'D') + + self.assert_(to_timedelta('1 days 06:05:01.00003') == conv(d1+np.timedelta64(6*3600+5*60+1,'s')+np.timedelta64(30,'us'))) + self.assert_(to_timedelta('15.5us') == conv(np.timedelta64(15500,'ns'))) + + # empty string + result = to_timedelta('') + self.assert_(result == tslib.iNaT) + + result = to_timedelta(['', '']) + self.assert_(isnull(result).all()) + + # pass thru + result = to_timedelta(np.array([np.timedelta64(1,'s')])) + expected = np.array([np.timedelta64(1,'s')]) + tm.assert_almost_equal(result,expected) + + # ints + result = np.timedelta64(0,'ns') + expected = to_timedelta(0) + self.assert_(result == expected) + + # Series + expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) + result = to_timedelta(Series(['1d','1days 00:00:01'])) + tm.assert_series_equal(result, expected) + + # with units + result = Series([ np.timedelta64(0,'ns'), np.timedelta64(10,'s').astype('m8[ns]') ],dtype='m8[ns]') + expected = to_timedelta([0,10],unit='s') + tm.assert_series_equal(result, expected) + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py new file mode 100644 index 0000000000000..4d8633546e017 --- /dev/null +++ b/pandas/tseries/timedeltas.py @@ -0,0 +1,226 @@ +""" +timedelta support tools +""" + +import re +from datetime import timedelta + +import numpy as np +import pandas.tslib as tslib +from pandas import compat, _np_version_under1p7 +from pandas.core.common import (ABCSeries, is_integer, is_timedelta64_dtype, + _values_from_object, is_list_like) + +repr_timedelta = tslib.repr_timedelta64 +repr_timedelta64 = tslib.repr_timedelta64 + +def to_timedelta(arg, box=True, unit='ns'): + """ + Convert argument to timedelta + + Parameters + ---------- + arg : string, timedelta, array of strings (with possible NAs) + box : boolean, default True + If True returns a Series of the results, if False returns ndarray of values + unit : unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer/float number + + Returns + ------- + ret : timedelta64/arrays of timedelta64 if parsing succeeded + """ + if _np_version_under1p7: + raise ValueError("to_timedelta is not support for numpy < 1.7") + + def _convert_listlike(arg, box): + + if isinstance(arg, (list,tuple)): + arg = np.array(arg, dtype='O') + + if is_timedelta64_dtype(arg): + if box: + from pandas import Series + return Series(arg,dtype='m8[ns]') + return arg + + value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) + if box: + from pandas import Series + value = Series(value,dtype='m8[ns]') + return value + + if arg is None: + return arg + elif isinstance(arg, ABCSeries): + from pandas import Series + values = _convert_listlike(arg.values, box=False) + return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]') + elif is_list_like(arg): + return _convert_listlike(arg, box=box) + + return _convert_listlike([ arg ], box=False)[0] + +_short_search = re.compile( + "^\s*(?P-?)\s*(?P\d*\.?\d*)\s*(?Pd|s|ms|us|ns)?\s*$",re.IGNORECASE) +_full_search = re.compile( + "^\s*(?P-?)\s*(?P\d+)?\s*(days|d)?,?\s*(?P