From 06af5a67ff3119208d63a41ab46eba36d407cd00 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Nov 2017 19:34:36 -0700 Subject: [PATCH 01/11] move implementation of Timedelta to tslibs.timedeltas --- pandas/_libs/tslib.pxd | 2 +- pandas/_libs/tslib.pyx | 479 +-------------------------- pandas/_libs/tslibs/timedeltas.pxd | 7 +- pandas/_libs/tslibs/timedeltas.pyx | 502 ++++++++++++++++++++++++++++- 4 files changed, 507 insertions(+), 483 deletions(-) diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index 443b3867eb2b5..a2477197faece 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -1,8 +1,8 @@ from numpy cimport ndarray, int64_t from tslibs.conversion cimport convert_to_tsobject +from tslibs.timedeltas cimport convert_to_timedelta64 -cpdef convert_to_timedelta64(object, object) cdef bint _check_all_nulls(obj) cdef _to_i8(object val) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5a4af4550f589..5ed7cb415df19 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -72,8 +72,6 @@ from .tslibs.parsing import parse_datetime_string cimport cython -from pandas.compat import iteritems - import warnings import pytz @@ -86,7 +84,9 @@ import_array() cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT -from tslibs.timedeltas cimport parse_timedelta_string, cast_from_unit +from tslibs.timedeltas cimport cast_from_unit, _delta_to_nanoseconds +from tslibs.timedeltas import (Timedelta, convert_to_timedelta64, + _delta_to_nanoseconds, array_to_timedelta64) from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, @@ -1790,366 +1790,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return oresult -from tslibs.timedeltas cimport _Timedelta as __Timedelta - -# Similar to Timestamp/datetime, this is a construction requirement for -# timedeltas that we need to do object instantiation in python. This will -# serve as a C extension type that shadows the Python class, where we do any -# heavy lifting. -cdef class _Timedelta(__Timedelta): - - def __hash__(_Timedelta self): - if self._has_ns(): - return hash(self.value) - else: - return timedelta.__hash__(self) - - def __richcmp__(_Timedelta self, object other, int op): - cdef: - _Timedelta ots - int ndim - - if isinstance(other, _Timedelta): - ots = other - elif PyDelta_Check(other): - ots = Timedelta(other) - else: - ndim = getattr(other, _NDIM_STRING, -1) - - if ndim != -1: - if ndim == 0: - if is_timedelta64_object(other): - other = Timedelta(other) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - - # only allow ==, != ops - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, - type(other).__name__)) - if util.is_array(other): - return PyObject_RichCompare(np.array([self]), other, op) - return PyObject_RichCompare(other, self, reverse_ops[op]) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) - - return cmp_scalar(self.value, ots.value, op) - - -def _binary_op_method_timedeltalike(op, name): - # define a binary operation that only works if the other argument is - # timedelta like or an array of timedeltalike - def f(self, other): - # an offset - if hasattr(other, 'delta') and not isinstance(other, Timedelta): - return op(self, other.delta) - - # a datetimelike - if (isinstance(other, (datetime, np.datetime64)) - and not (isinstance(other, Timestamp) or other is NaT)): - return op(self, Timestamp(other)) - - # nd-array like - if hasattr(other, 'dtype'): - if other.dtype.kind not in ['m', 'M']: - # raise rathering than letting numpy return wrong answer - return NotImplemented - return op(self.to_timedelta64(), other) - - if not _validate_ops_compat(other): - return NotImplemented - - if other is NaT: - return NaT - - try: - other = Timedelta(other) - except ValueError: - # failed to parse as timedelta - return NotImplemented - - return Timedelta(op(self.value, other.value), unit='ns') - - f.__name__ = name - return f - - -def _op_unary_method(func, name): - - def f(self): - return Timedelta(func(self.value), unit='ns') - f.__name__ = name - return f - - -cdef bint _validate_ops_compat(other): - # return True if we are compat with operating - if _checknull_with_nat(other): - return True - elif PyDelta_Check(other) or is_timedelta64_object(other): - return True - elif util.is_string_object(other): - return True - elif hasattr(other, 'delta'): - return True - return False - - -# Python front end to C extension type _Timedelta -# This serves as the box for timedelta64 - - -class Timedelta(_Timedelta): - """ - Represents a duration, the difference between two dates or times. - - Timedelta is the pandas equivalent of python's ``datetime.timedelta`` - and is interchangable with it in most cases. - - Parameters - ---------- - value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : string, [D,h,m,s,ms,us,ns] - Denote the unit of the input, if input is an integer. Default 'ns'. - days, seconds, microseconds, - milliseconds, minutes, hours, weeks : numeric, optional - Values for construction in compat with datetime.timedelta. - np ints and floats will be coereced to python ints and floats. - - Notes - ----- - The ``.value`` attribute is always in ns. - - """ - - def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta td_base - - if value is _no_input: - if not len(kwargs): - raise ValueError("cannot construct a Timedelta without a " - "value/unit or descriptive keywords " - "(days,seconds....)") - - def _to_py_int_float(v): - if is_integer_object(v): - return int(v) - elif is_float_object(v): - return float(v) - raise TypeError("Invalid type {0}. Must be int or " - "float.".format(type(v))) - - kwargs = dict([(k, _to_py_int_float(v)) - for k, v in iteritems(kwargs)]) - - try: - nano = kwargs.pop('nanoseconds', 0) - value = convert_to_timedelta64( - timedelta(**kwargs), 'ns') + nano - except TypeError as e: - raise ValueError("cannot construct a Timedelta from the " - "passed arguments, allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]") - - if isinstance(value, Timedelta): - value = value.value - elif is_string_object(value): - value = np.timedelta64(parse_timedelta_string(value)) - elif PyDelta_Check(value): - value = convert_to_timedelta64(value, 'ns') - elif is_timedelta64_object(value): - if unit is not None: - value = value.astype('timedelta64[{0}]'.format(unit)) - value = value.astype('timedelta64[ns]') - elif hasattr(value, 'delta'): - value = np.timedelta64(_delta_to_nanoseconds(value.delta), 'ns') - elif is_integer_object(value) or is_float_object(value): - # unit=None is de-facto 'ns' - value = convert_to_timedelta64(value, unit) - elif _checknull_with_nat(value): - return NaT - else: - raise ValueError("Value must be Timedelta, string, integer, " - "float, timedelta or convertible") - - if is_timedelta64_object(value): - value = value.view('i8') - - # nat - if value == NPY_NAT: - return NaT - - # make timedelta happy - td_base = _Timedelta.__new__(cls, microseconds=int(value) / 1000) - td_base.value = value - td_base.is_populated = 0 - return td_base - - def _round(self, freq, rounder): - - cdef int64_t result, unit - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos - result = unit * rounder(self.value / float(unit)) - return Timedelta(result, unit='ns') - - def round(self, freq): - """ - Round the Timedelta to the specified resolution - - Returns - ------- - a new Timedelta rounded to the given resolution of `freq` - - Parameters - ---------- - freq : a freq string indicating the rounding resolution - - Raises - ------ - ValueError if the freq cannot be converted - """ - return self._round(freq, np.round) - - def floor(self, freq): - """ - return a new Timedelta floored to this resolution - - Parameters - ---------- - freq : a freq string indicating the flooring resolution - """ - return self._round(freq, np.floor) - - def ceil(self, freq): - """ - return a new Timedelta ceiled to this resolution - - Parameters - ---------- - freq : a freq string indicating the ceiling resolution - """ - return self._round(freq, np.ceil) - - def __setstate__(self, state): - (value) = state - self.value = value - - def __reduce__(self): - object_state = self.value, - return (Timedelta, object_state) - - __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') - __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') - __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') - __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') - - def __mul__(self, other): - - # nd-array like - if hasattr(other, 'dtype'): - return other * self.to_timedelta64() - - if other is NaT: - return NaT - - # only integers and floats allowed - if not (is_integer_object(other) or is_float_object(other)): - return NotImplemented - - return Timedelta(other * self.value, unit='ns') - - __rmul__ = __mul__ - - def __truediv__(self, other): - - if hasattr(other, 'dtype'): - return self.to_timedelta64() / other - - # integers or floats - if is_integer_object(other) or is_float_object(other): - return Timedelta(self.value /other, unit='ns') - - if not _validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return np.nan - return self.value /float(other.value) - - def __rtruediv__(self, other): - if hasattr(other, 'dtype'): - return other / self.to_timedelta64() - - if not _validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return NaT - return float(other.value) / self.value - - if not PY3: - __div__ = __truediv__ - __rdiv__ = __rtruediv__ - - def __floordiv__(self, other): - - if hasattr(other, 'dtype'): - - # work with i8 - other = other.astype('m8[ns]').astype('i8') - - return self.value // other - - # integers only - if is_integer_object(other): - return Timedelta(self.value // other, unit='ns') - - if not _validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return np.nan - return self.value // other.value - - def __rfloordiv__(self, other): - if hasattr(other, 'dtype'): - - # work with i8 - other = other.astype('m8[ns]').astype('i8') - return other // self.value - - if not _validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return NaT - return other.value // self.value - - __inv__ = _op_unary_method(lambda x: -x, '__inv__') - __neg__ = _op_unary_method(lambda x: -x, '__neg__') - __pos__ = _op_unary_method(lambda x: x, '__pos__') - __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') - - -# resolution in ns -Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) -Timedelta.max = Timedelta(np.iinfo(np.int64).max) - cdef PyTypeObject* td_type = Timedelta @@ -2157,122 +1797,9 @@ cdef inline bint is_timedelta(object o): return Py_TYPE(o) == td_type # isinstance(o, Timedelta) -cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): - """ - Convert an ndarray to an array of timedeltas. If errors == 'coerce', - coerce non-convertible objects to NaT. Otherwise, raise. - """ - - cdef: - Py_ssize_t i, n - ndarray[int64_t] iresult - - if errors not in ('ignore', 'raise', 'coerce'): - raise ValueError("errors must be one of 'ignore', " - "'raise', or 'coerce'}") - - n = values.shape[0] - result = np.empty(n, dtype='m8[ns]') - iresult = result.view('i8') - - # Usually, we have all strings. If so, we hit the fast path. - # If this path fails, we try conversion a different way, and - # this is where all of the error handling will take place. - try: - for i in range(n): - result[i] = parse_timedelta_string(values[i]) - except: - for i in range(n): - try: - result[i] = convert_to_timedelta64(values[i], unit) - except ValueError: - if errors == 'coerce': - result[i] = NPY_NAT - else: - raise - - return iresult - - -cpdef convert_to_timedelta64(object ts, object unit): - """ - Convert an incoming object to a timedelta64 if possible - - Handle these types of objects: - - timedelta/Timedelta - - timedelta64 - - an offset - - np.int64 (with unit providing a possible modifier) - - None/NaT - - Return an ns based int64 - - # kludgy here until we have a timedelta scalar - # handle the numpy < 1.7 case - """ - if _checknull_with_nat(ts): - return np.timedelta64(NPY_NAT) - elif isinstance(ts, Timedelta): - # already in the proper format - ts = np.timedelta64(ts.value) - elif is_datetime64_object(ts): - # only accept a NaT here - if ts.astype('int64') == NPY_NAT: - return np.timedelta64(NPY_NAT) - elif is_timedelta64_object(ts): - ts = ts.astype("m8[{0}]".format(unit.lower())) - elif is_integer_object(ts): - if ts == NPY_NAT: - return np.timedelta64(NPY_NAT) - else: - if util.is_array(ts): - ts = ts.astype('int64').item() - if unit in ['Y', 'M', 'W']: - ts = np.timedelta64(ts, unit) - else: - ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts) - elif is_float_object(ts): - if util.is_array(ts): - ts = ts.astype('int64').item() - if unit in ['Y', 'M', 'W']: - ts = np.timedelta64(int(ts), unit) - else: - ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts) - elif is_string_object(ts): - ts = np.timedelta64(parse_timedelta_string(ts)) - elif hasattr(ts, 'delta'): - ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') - - if PyDelta_Check(ts): - ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') - elif not is_timedelta64_object(ts): - raise ValueError("Invalid type for timedelta " - "scalar: %s" % type(ts)) - return ts.astype('timedelta64[ns]') - - # ---------------------------------------------------------------------- # Conversion routines -cpdef int64_t _delta_to_nanoseconds(delta) except? -1: - if util.is_array(delta): - return delta.astype('m8[ns]').astype('int64') - if hasattr(delta, 'nanos'): - return delta.nanos - if hasattr(delta, 'delta'): - delta = delta.delta - if is_timedelta64_object(delta): - return delta.astype("timedelta64[ns]").item() - if is_integer_object(delta): - return delta - - return (delta.days * 24 * 60 * 60 * 1000000 + - delta.seconds * 1000000 + - delta.microseconds) * 1000 - - def cast_to_nanoseconds(ndarray arr): cdef: Py_ssize_t i, n = arr.size diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 4dfd3f3e9eca5..ed23bd81292d6 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,13 +3,17 @@ from cpython.datetime cimport timedelta -from numpy cimport int64_t +from numpy cimport int64_t, ndarray # Exposed for tslib, not intended for outside use. cdef parse_timedelta_string(object ts) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 +cpdef int64_t _delta_to_nanoseconds(delta) except? -1 +cpdef convert_to_timedelta64(object ts, object unit) +cpdef array_to_timedelta64(ndarray[object] values, unit=*, errors=*) +# TODO: This may no longer need to be exposed cdef class _Timedelta(timedelta): cdef readonly: int64_t value # nanoseconds @@ -19,3 +23,4 @@ cdef class _Timedelta(timedelta): cpdef timedelta to_pytimedelta(_Timedelta self) cpdef bint _has_ns(self) + diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2f177868a6947..a2dbd2bd08e3b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -5,22 +5,31 @@ import collections import sys cdef bint PY3 = (sys.version_info[0] >= 3) -from cpython cimport PyUnicode_Check +from cython cimport Py_ssize_t + +from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare import numpy as np cimport numpy as np -from numpy cimport int64_t +from numpy cimport int64_t, ndarray np.import_array() from cpython.datetime cimport (datetime, timedelta, - PyDelta_Check, PyDateTime_IMPORT) + PyDateTime_CheckExact, + PyDateTime_Check, PyDelta_Check, + PyDateTime_IMPORT) PyDateTime_IMPORT cimport util -from util cimport is_timedelta64_object +from util cimport (is_timedelta64_object, is_datetime64_object, + is_integer_object, is_float_object, + is_string_object) + +from np_datetime cimport cmp_scalar, reverse_ops -from nattype import nat_strings +from nattype import nat_strings, NaT +from nattype cimport _checknull_with_nat # ---------------------------------------------------------------------- # Constants @@ -29,6 +38,8 @@ cdef int64_t NPY_NAT = util.get_nat() cdef int64_t DAY_NS = 86400000000000LL +cdef str _NDIM_STRING = "ndim" + # components named tuple Components = collections.namedtuple('Components', [ 'days', 'hours', 'minutes', 'seconds', @@ -66,8 +77,122 @@ cdef dict timedelta_abbrevs = { 'D': 'd', 'nanos': 'ns', 'nanosecond': 'ns'} +_no_input = object() + # ---------------------------------------------------------------------- +cpdef int64_t _delta_to_nanoseconds(delta) except? -1: + if util.is_array(delta): + return delta.astype('m8[ns]').astype('int64') + if hasattr(delta, 'nanos'): + return delta.nanos + if hasattr(delta, 'delta'): + delta = delta.delta + if is_timedelta64_object(delta): + return delta.astype("timedelta64[ns]").item() + if is_integer_object(delta): + return delta + + return (delta.days * 24 * 60 * 60 * 1000000 + + delta.seconds * 1000000 + + delta.microseconds) * 1000 + + +cpdef convert_to_timedelta64(object ts, object unit): + """ + Convert an incoming object to a timedelta64 if possible + + Handle these types of objects: + - timedelta/Timedelta + - timedelta64 + - an offset + - np.int64 (with unit providing a possible modifier) + - None/NaT + + Return an ns based int64 + + # kludgy here until we have a timedelta scalar + # handle the numpy < 1.7 case + """ + if _checknull_with_nat(ts): + return np.timedelta64(NPY_NAT) + elif isinstance(ts, Timedelta): + # already in the proper format + ts = np.timedelta64(ts.value) + elif is_datetime64_object(ts): + # only accept a NaT here + if ts.astype('int64') == NPY_NAT: + return np.timedelta64(NPY_NAT) + elif is_timedelta64_object(ts): + ts = ts.astype("m8[{0}]".format(unit.lower())) + elif is_integer_object(ts): + if ts == NPY_NAT: + return np.timedelta64(NPY_NAT) + else: + if util.is_array(ts): + ts = ts.astype('int64').item() + if unit in ['Y', 'M', 'W']: + ts = np.timedelta64(ts, unit) + else: + ts = cast_from_unit(ts, unit) + ts = np.timedelta64(ts) + elif is_float_object(ts): + if util.is_array(ts): + ts = ts.astype('int64').item() + if unit in ['Y', 'M', 'W']: + ts = np.timedelta64(int(ts), unit) + else: + ts = cast_from_unit(ts, unit) + ts = np.timedelta64(ts) + elif is_string_object(ts): + ts = np.timedelta64(parse_timedelta_string(ts)) + elif hasattr(ts, 'delta'): + ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') + + if PyDelta_Check(ts): + ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') + elif not is_timedelta64_object(ts): + raise ValueError("Invalid type for timedelta " + "scalar: %s" % type(ts)) + return ts.astype('timedelta64[ns]') + + +cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): + """ + Convert an ndarray to an array of timedeltas. If errors == 'coerce', + coerce non-convertible objects to NaT. Otherwise, raise. + """ + + cdef: + Py_ssize_t i, n + ndarray[int64_t] iresult + + if errors not in ('ignore', 'raise', 'coerce'): + raise ValueError("errors must be one of 'ignore', " + "'raise', or 'coerce'}") + + n = values.shape[0] + result = np.empty(n, dtype='m8[ns]') + iresult = result.view('i8') + + # Usually, we have all strings. If so, we hit the fast path. + # If this path fails, we try conversion a different way, and + # this is where all of the error handling will take place. + try: + for i in range(n): + result[i] = parse_timedelta_string(values[i]) + except: + for i in range(n): + try: + result[i] = convert_to_timedelta64(values[i], unit) + except ValueError: + if errors == 'coerce': + result[i] = NPY_NAT + else: + raise + + return iresult + cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: """ return a casting of the unit represented to nanoseconds @@ -315,9 +440,110 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): n = ''.join(number) + '.' + ''.join(frac) return cast_from_unit(float(n), unit) + +# ---------------------------------------------------------------------- +# Timedelta ops utilities + +cdef bint _validate_ops_compat(other): + # return True if we are compat with operating + if _checknull_with_nat(other): + return True + elif PyDelta_Check(other) or is_timedelta64_object(other): + return True + elif is_string_object(other): + return True + elif hasattr(other, 'delta'): + return True + return False + + +def _op_unary_method(func, name): + def f(self): + return Timedelta(func(self.value), unit='ns') + f.__name__ = name + return f + + +def _binary_op_method_timedeltalike(op, name): + # define a binary operation that only works if the other argument is + # timedelta like or an array of timedeltalike + def f(self, other): + if hasattr(other, 'delta') and not PyDelta_Check(other): + # offsets.Tick + return op(self, other.delta) + + elif other is NaT: + return NaT + + elif is_datetime64_object(other): + from ..tslib import Timestamp + return op(self, Timestamp(other)) + # We are implicitly requiring the canonical behavior to be + # defined by Timestamp methods. + + elif PyDateTime_CheckExact(other): + # a datetimelike, but specifically not a Timestamp + # (Timestamp case will be deferred to after + # `_validate_ops_compat` returns False below) + from ..tslib import Timestamp + return op(self, Timestamp(other)) + + elif hasattr(other, 'dtype'): + # nd-array like + if other.dtype.kind not in ['m', 'M']: + # raise rathering than letting numpy return wrong answer + return NotImplemented + return op(self.to_timedelta64(), other) + + elif not _validate_ops_compat(other): + return NotImplemented + + try: + other = Timedelta(other) + except ValueError: + # failed to parse as timedelta + return NotImplemented + + return Timedelta(op(self.value, other.value), unit='ns') + + f.__name__ = name + return f + + # ---------------------------------------------------------------------- # Timedelta Construction +cdef _to_py_int_float(v): + # Note: This used to be defined inside _timedelta_value_kwargs + # (and Timedelta.__new__ before that), but cython + # will not allow dynamically-defined functions nested that way. + if is_integer_object(v): + return int(v) + elif is_float_object(v): + return float(v) + raise TypeError("Invalid type {0}. Must be int or " + "float.".format(type(v))) + + +cdef _timedelta_value_kwargs(dict kwargs): + # Helper for Timedelta.__new__ + if not len(kwargs): + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords (days,seconds....)") + + kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} + + nano = kwargs.pop('nanoseconds', 0) + try: + value = convert_to_timedelta64(timedelta(**kwargs), 'ns') + nano + except TypeError as e: + raise ValueError("cannot construct a Timedelta from the " + "passed arguments, allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]") + return value + + # Similar to Timestamp/datetime, this is a construction requirement for # timedeltas that we need to do object instantiation in python. This will # serve as a C extension type that shadows the Python class, where we do any @@ -332,6 +558,51 @@ cdef class _Timedelta(timedelta): # higher than np.ndarray and np.matrix __array_priority__ = 100 + def __hash__(_Timedelta self): + if self._has_ns(): + return hash(self.value) + else: + return timedelta.__hash__(self) + + def __richcmp__(_Timedelta self, object other, int op): + cdef: + _Timedelta ots + int ndim + + if isinstance(other, _Timedelta): + ots = other + elif PyDelta_Check(other): + ots = Timedelta(other) + else: + ndim = getattr(other, _NDIM_STRING, -1) + + if ndim != -1: + if ndim == 0: + if is_timedelta64_object(other): + other = Timedelta(other) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + + # only allow ==, != ops + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, + type(other).__name__)) + if util.is_array(other): + return PyObject_RichCompare(np.array([self]), other, op) + return PyObject_RichCompare(other, self, reverse_ops[op]) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, type(other).__name__)) + + return cmp_scalar(self.value, ots.value, op) + cpdef bint _has_ns(self): return self.value % 1000 != 0 @@ -621,3 +892,224 @@ cdef class _Timedelta(timedelta): tpl = 'P{td.days}DT{td.hours}H{td.minutes}M{seconds}S'.format( td=components, seconds=seconds) return tpl + + +# Python front end to C extension type _Timedelta +# This serves as the box for timedelta64 + +class Timedelta(_Timedelta): + """ + Represents a duration, the difference between two dates or times. + + Timedelta is the pandas equivalent of python's ``datetime.timedelta`` + and is interchangable with it in most cases. + + Parameters + ---------- + value : Timedelta, timedelta, np.timedelta64, string, or integer + unit : string, [D,h,m,s,ms,us,ns] + Denote the unit of the input, if input is an integer. Default 'ns'. + days, seconds, microseconds, + milliseconds, minutes, hours, weeks : numeric, optional + Values for construction in compat with datetime.timedelta. + np ints and floats will be coereced to python ints and floats. + + Notes + ----- + The ``.value`` attribute is always in ns. + + """ + def __new__(cls, object value=_no_input, unit=None, **kwargs): + cdef _Timedelta td_base + + if value is _no_input: + value = _timedelta_value_kwargs(kwargs) + + if isinstance(value, Timedelta): + value = value.value + elif util.is_string_object(value): + value = np.timedelta64(parse_timedelta_string(value)) + elif PyDelta_Check(value): + value = convert_to_timedelta64(value, 'ns') + elif is_timedelta64_object(value): + if unit is not None: + value = value.astype('timedelta64[{0}]'.format(unit)) + value = value.astype('timedelta64[ns]') + elif hasattr(value, 'delta'): + value = np.timedelta64(_delta_to_nanoseconds(value.delta), 'ns') + elif is_integer_object(value) or util.is_float_object(value): + # unit=None is de-facto 'ns' + value = convert_to_timedelta64(value, unit) + elif _checknull_with_nat(value): + return NaT + else: + raise ValueError( + "Value must be Timedelta, string, integer, " + "float, timedelta or convertible") + + if is_timedelta64_object(value): + value = value.view('i8') + + # nat + if value == NPY_NAT: + return NaT + + # make timedelta happy + td_base = _Timedelta.__new__(cls, microseconds=int(value) / 1000) + td_base.value = value + td_base.is_populated = 0 + return td_base + + def __setstate__(self, state): + (value) = state + self.value = value + + def __reduce__(self): + object_state = self.value, + return (Timedelta, object_state) + + def _round(self, freq, rounder): + cdef: + int64_t result, unit + + from pandas.tseries.frequencies import to_offset + unit = to_offset(freq).nanos + result = unit * rounder(self.value / float(unit)) + return Timedelta(result, unit='ns') + + def round(self, freq): + """ + Round the Timedelta to the specified resolution + + Returns + ------- + a new Timedelta rounded to the given resolution of `freq` + + Parameters + ---------- + freq : a freq string indicating the rounding resolution + + Raises + ------ + ValueError if the freq cannot be converted + """ + return self._round(freq, np.round) + + def floor(self, freq): + """ + return a new Timedelta floored to this resolution + + Parameters + ---------- + freq : a freq string indicating the flooring resolution + """ + return self._round(freq, np.floor) + + def ceil(self, freq): + """ + return a new Timedelta ceiled to this resolution + + Parameters + ---------- + freq : a freq string indicating the ceiling resolution + """ + return self._round(freq, np.ceil) + + # ---------------------------------------------------------------- + # Arithmetic Methods + # TODO: Can some of these be defined in the cython class? + + __inv__ = _op_unary_method(lambda x: -x, '__inv__') + __neg__ = _op_unary_method(lambda x: -x, '__neg__') + __pos__ = _op_unary_method(lambda x: x, '__pos__') + __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + + __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') + __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') + __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') + + def __mul__(self, other): + if hasattr(other, 'dtype'): + # ndarray-like + return other * self.to_timedelta64() + + elif other is NaT: + return NaT + + elif not (is_integer_object(other) or is_float_object(other)): + # only integers and floats allowed + return NotImplemented + + return Timedelta(other * self.value, unit='ns') + + __rmul__ = __mul__ + + def __truediv__(self, other): + if hasattr(other, 'dtype'): + return self.to_timedelta64() / other + + elif is_integer_object(other) or is_float_object(other): + # integers or floats + return Timedelta(self.value / other, unit='ns') + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value / float(other.value) + + def __rtruediv__(self, other): + if hasattr(other, 'dtype'): + return other / self.to_timedelta64() + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return NaT + return float(other.value) / self.value + + if not PY3: + __div__ = __truediv__ + __rdiv__ = __rtruediv__ + + def __floordiv__(self, other): + if hasattr(other, 'dtype'): + # work with i8 + other = other.astype('m8[ns]').astype('i8') + return self.value // other + + elif is_integer_object(other): + # integers only + return Timedelta(self.value // other, unit='ns') + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value // other.value + + def __rfloordiv__(self, other): + if hasattr(other, 'dtype'): + # work with i8 + other = other.astype('m8[ns]').astype('i8') + return other // self.value + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return NaT + return other.value // self.value + + +# resolution in ns +Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) +Timedelta.max = Timedelta(np.iinfo(np.int64).max) From 60c9a9d6665f3c903022ef655932a6cb171d93bd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Nov 2017 22:04:53 -0700 Subject: [PATCH 02/11] whitespace fixup --- pandas/_libs/tslibs/timedeltas.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a2dbd2bd08e3b..1893d00e22688 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -529,7 +529,8 @@ cdef _timedelta_value_kwargs(dict kwargs): # Helper for Timedelta.__new__ if not len(kwargs): raise ValueError("cannot construct a Timedelta without a " - "value/unit or descriptive keywords (days,seconds....)") + "value/unit or descriptive keywords " + "(days,seconds....)") kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} From b59264068931b100f3ac395fb9a9b90fdb178454 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Nov 2017 08:08:24 -0700 Subject: [PATCH 03/11] fixes per reviewer comments --- pandas/_libs/tslibs/timedeltas.pxd | 13 ------------- pandas/_libs/tslibs/timedeltas.pyx | 14 +++++--------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index ed23bd81292d6..410e89120189a 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -11,16 +11,3 @@ cpdef int64_t cast_from_unit(object ts, object unit) except? -1 cpdef int64_t _delta_to_nanoseconds(delta) except? -1 cpdef convert_to_timedelta64(object ts, object unit) cpdef array_to_timedelta64(ndarray[object] values, unit=*, errors=*) - - -# TODO: This may no longer need to be exposed -cdef class _Timedelta(timedelta): - cdef readonly: - int64_t value # nanoseconds - object freq # frequency reference - bint is_populated # are my components populated - int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns - - cpdef timedelta to_pytimedelta(_Timedelta self) - cpdef bint _has_ns(self) - diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 1893d00e22688..46ac8d29d71fb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -475,19 +475,15 @@ def _binary_op_method_timedeltalike(op, name): elif other is NaT: return NaT - elif is_datetime64_object(other): + elif is_datetime64_object(other) or PyDateTime_CheckExact(other): + # the PyDateTime_CheckExact case is for a datetime object that + # is specifically *not* a Timestamp, as the Timestamp case will be + # handled after `_validate_ops_compat` returns False below from ..tslib import Timestamp return op(self, Timestamp(other)) # We are implicitly requiring the canonical behavior to be # defined by Timestamp methods. - elif PyDateTime_CheckExact(other): - # a datetimelike, but specifically not a Timestamp - # (Timestamp case will be deferred to after - # `_validate_ops_compat` returns False below) - from ..tslib import Timestamp - return op(self, Timestamp(other)) - elif hasattr(other, 'dtype'): # nd-array like if other.dtype.kind not in ['m', 'M']: @@ -516,7 +512,7 @@ def _binary_op_method_timedeltalike(op, name): cdef _to_py_int_float(v): # Note: This used to be defined inside _timedelta_value_kwargs # (and Timedelta.__new__ before that), but cython - # will not allow dynamically-defined functions nested that way. + # will not allow `cdef` functions to be defined dynamically. if is_integer_object(v): return int(v) elif is_float_object(v): From 73b5d2113c5d9e2c235829111a4418622d173d64 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Nov 2017 11:11:35 -0700 Subject: [PATCH 04/11] update imports of _delta_to_nanoseconds; de-privatize --- pandas/_libs/tslib.pyx | 6 +++--- pandas/_libs/tslibs/timedeltas.pxd | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 8 ++++---- pandas/core/indexes/datetimelike.py | 5 +++-- pandas/core/indexes/period.py | 11 ++++++----- pandas/core/resample.py | 6 +++--- pandas/tseries/offsets.py | 6 +++--- 7 files changed, 23 insertions(+), 21 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5ed7cb415df19..fc7b939e31682 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -84,9 +84,9 @@ import_array() cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT -from tslibs.timedeltas cimport cast_from_unit, _delta_to_nanoseconds +from tslibs.timedeltas cimport cast_from_unit, delta_to_nanoseconds from tslibs.timedeltas import (Timedelta, convert_to_timedelta64, - _delta_to_nanoseconds, array_to_timedelta64) + array_to_timedelta64) from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, @@ -1070,7 +1070,7 @@ cdef class _Timestamp(datetime): return Timestamp((self.freq * other).apply(self), freq=self.freq) elif PyDelta_Check(other) or hasattr(other, 'delta'): - nanos = _delta_to_nanoseconds(other) + nanos = delta_to_nanoseconds(other) result = Timestamp(self.value + nanos, tz=self.tzinfo, freq=self.freq) if getattr(other, 'normalize', False): diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 410e89120189a..3e7b88b208e89 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -8,6 +8,6 @@ from numpy cimport int64_t, ndarray # Exposed for tslib, not intended for outside use. cdef parse_timedelta_string(object ts) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 -cpdef int64_t _delta_to_nanoseconds(delta) except? -1 +cpdef int64_t delta_to_nanoseconds(delta) except? -1 cpdef convert_to_timedelta64(object ts, object unit) cpdef array_to_timedelta64(ndarray[object] values, unit=*, errors=*) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 46ac8d29d71fb..13353afe22acd 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -81,7 +81,7 @@ _no_input = object() # ---------------------------------------------------------------------- -cpdef int64_t _delta_to_nanoseconds(delta) except? -1: +cpdef int64_t delta_to_nanoseconds(delta) except? -1: if util.is_array(delta): return delta.astype('m8[ns]').astype('int64') if hasattr(delta, 'nanos'): @@ -147,10 +147,10 @@ cpdef convert_to_timedelta64(object ts, object unit): elif is_string_object(ts): ts = np.timedelta64(parse_timedelta_string(ts)) elif hasattr(ts, 'delta'): - ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') + ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') if PyDelta_Check(ts): - ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') + ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') elif not is_timedelta64_object(ts): raise ValueError("Invalid type for timedelta " "scalar: %s" % type(ts)) @@ -933,7 +933,7 @@ class Timedelta(_Timedelta): value = value.astype('timedelta64[{0}]'.format(unit)) value = value.astype('timedelta64[ns]') elif hasattr(value, 'delta'): - value = np.timedelta64(_delta_to_nanoseconds(value.delta), 'ns') + value = np.timedelta64(delta_to_nanoseconds(value.delta), 'ns') elif is_integer_object(value) or util.is_float_object(value): # unit=None is de-facto 'ns' value = convert_to_timedelta64(value, unit) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cc9361b550c5b..6da2e8231bd0a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -24,8 +24,9 @@ from pandas.core.common import AbstractMethodError import pandas.io.formats.printing as printing -from pandas._libs import (tslib as libts, lib, iNaT, NaT) +from pandas._libs import lib, iNaT, NaT from pandas._libs.period import Period +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly @@ -701,7 +702,7 @@ def _add_delta_td(self, other): # add a delta of a timedeltalike # return the i8 result view - inc = libts._delta_to_nanoseconds(other) + inc = delta_to_nanoseconds(other) new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view('i8') if self.hasnans: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 148ca2725fbdc..1b6f089978fb7 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -36,6 +36,7 @@ get_period_field_arr, _validate_end_alias, _quarter_to_myear) from pandas._libs.tslibs.fields import isleapyear_arr +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, _ensure_index @@ -646,10 +647,10 @@ def _maybe_convert_timedelta(self, other): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): if isinstance(other, np.ndarray): - nanos = np.vectorize(tslib._delta_to_nanoseconds)(other) + nanos = np.vectorize(delta_to_nanoseconds)(other) else: - nanos = tslib._delta_to_nanoseconds(other) - offset_nanos = tslib._delta_to_nanoseconds(offset) + nanos = delta_to_nanoseconds(other) + offset_nanos = delta_to_nanoseconds(offset) check = np.all(nanos % offset_nanos == 0) if check: return nanos // offset_nanos @@ -666,8 +667,8 @@ def _maybe_convert_timedelta(self, other): elif is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): - nanos = tslib._delta_to_nanoseconds(other) - offset_nanos = tslib._delta_to_nanoseconds(offset) + nanos = delta_to_nanoseconds(other) + offset_nanos = delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos elif is_integer(other): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 5a571f9077999..eeb6faf20ffce 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -13,7 +13,7 @@ from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.core.indexes.datetimes import DatetimeIndex, date_range from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds +from pandas.tseries.offsets import DateOffset, Tick, Day, delta_to_nanoseconds from pandas.core.indexes.period import PeriodIndex import pandas.core.common as com import pandas.core.algorithms as algos @@ -1186,7 +1186,7 @@ def _adjust_bin_edges(self, binner, ax_values): bin_edges = binner.asi8 if self.freq != 'D' and is_superperiod(self.freq, 'D'): - day_nanos = _delta_to_nanoseconds(timedelta(1)) + day_nanos = delta_to_nanoseconds(timedelta(1)) if self.closed == 'right': bin_edges = bin_edges + day_nanos - 1 @@ -1312,7 +1312,7 @@ def _get_range_edges(first, last, offset, closed='left', base=0): if isinstance(offset, Tick): is_day = isinstance(offset, Day) - day_nanos = _delta_to_nanoseconds(timedelta(1)) + day_nanos = delta_to_nanoseconds(timedelta(1)) # #1165 if (is_day and day_nanos % offset.nanos == 0) or not is_day: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 984670005a8c4..e05fd851294e6 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -14,7 +14,7 @@ from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta from pandas.util._decorators import cache_readonly -from pandas._libs.tslib import _delta_to_nanoseconds +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.offsets import ( ApplyTypeError, as_datetime, _is_normalized, @@ -2691,7 +2691,7 @@ def delta(self): @property def nanos(self): - return _delta_to_nanoseconds(self.delta) + return delta_to_nanoseconds(self.delta) def apply(self, other): # Timestamp can handle tz and nano sec, thus no need to use apply_wraps @@ -2736,7 +2736,7 @@ def _delta_to_tick(delta): else: return Second(seconds) else: - nanos = _delta_to_nanoseconds(delta) + nanos = delta_to_nanoseconds(delta) if nanos % 1000000 == 0: return Milli(nanos // 1000000) elif nanos % 1000 == 0: From 98e339a517b1d4a0bd398ef11dfaefd4e6821485 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Nov 2017 11:12:56 -0700 Subject: [PATCH 05/11] fixup uncomment --- pandas/_libs/tslibs/timedeltas.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 13353afe22acd..a96ff27d44875 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -546,11 +546,11 @@ cdef _timedelta_value_kwargs(dict kwargs): # serve as a C extension type that shadows the Python class, where we do any # heavy lifting. cdef class _Timedelta(timedelta): - # cdef readonly: - # int64_t value # nanoseconds - # object freq # frequency reference - # bint is_populated # are my components populated - # int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns + cdef readonly: + int64_t value # nanoseconds + object freq # frequency reference + bint is_populated # are my components populated + int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns # higher than np.ndarray and np.matrix __array_priority__ = 100 From cc1886b7d12d68f607f41556b76a7987f57cee35 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Nov 2017 13:14:22 -0700 Subject: [PATCH 06/11] update import --- pandas/_libs/period.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 0456033dbb731..40d970c7b20f2 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -30,6 +30,7 @@ from pandas._libs import tslib from pandas._libs.tslib import Timestamp, iNaT, NaT from tslibs.timezones cimport ( is_utc, is_tzlocal, get_utcoffset, get_dst_info, maybe_get_tz) +from tslibs.timedeltas cimport delta_to_nanoseconds from tslibs.parsing import parse_time_string, NAT_SENTINEL from tslibs.frequencies cimport get_freq_code @@ -716,8 +717,8 @@ cdef class _Period(object): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): - nanos = tslib._delta_to_nanoseconds(other) - offset_nanos = tslib._delta_to_nanoseconds(offset) + nanos = delta_to_nanoseconds(other) + offset_nanos = delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: ordinal = self.ordinal + (nanos // offset_nanos) From 017683221e294514593e3e377ef37f16e7ad48b3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Nov 2017 19:16:23 -0700 Subject: [PATCH 07/11] whitespace fixup --- pandas/_libs/tslibs/timedeltas.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a96ff27d44875..14af3a7bf9f91 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -547,9 +547,9 @@ cdef _timedelta_value_kwargs(dict kwargs): # heavy lifting. cdef class _Timedelta(timedelta): cdef readonly: - int64_t value # nanoseconds - object freq # frequency reference - bint is_populated # are my components populated + int64_t value # nanoseconds + object freq # frequency reference + bint is_populated # are my components populated int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns # higher than np.ndarray and np.matrix From 5130c213658d271c96c8278193d7c7d2839dc8e4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 5 Nov 2017 08:39:51 -0800 Subject: [PATCH 08/11] add depends --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bd7c8f175607c..8055e494b14f6 100755 --- a/setup.py +++ b/setup.py @@ -525,6 +525,7 @@ def pxd(name): 'pyxfile': '_libs/period', 'pxdfiles': ['_libs/src/util', '_libs/lib', + '_libs/tslibs/timedeltas' '_libs/tslibs/timezones', '_libs/tslibs/nattype'], 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], @@ -586,7 +587,8 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.tslibs.timedeltas': { 'pyxfile': '_libs/tslibs/timedeltas', - 'pxdfiles': ['_libs/src/util'], + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/nattype'], 'depends': np_datetime_headers, 'sources': np_datetime_sources}, '_libs.tslibs.timezones': { From 967d60874a471f75d8369a144fb984f24ea26ba0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 5 Nov 2017 09:53:16 -0800 Subject: [PATCH 09/11] typo fixup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8055e494b14f6..ed8eda06c02f6 100755 --- a/setup.py +++ b/setup.py @@ -525,7 +525,7 @@ def pxd(name): 'pyxfile': '_libs/period', 'pxdfiles': ['_libs/src/util', '_libs/lib', - '_libs/tslibs/timedeltas' + '_libs/tslibs/timedeltas', '_libs/tslibs/timezones', '_libs/tslibs/nattype'], 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], From 9a465263f13c89fb032398a0eacfe81a6e728aa5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Nov 2017 16:25:16 -0800 Subject: [PATCH 10/11] edits per reviewer requests --- pandas/_libs/tslibs/timedeltas.pyx | 46 ++++++++++++------------------ 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 14af3a7bf9f91..623babe5422a8 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -38,8 +38,6 @@ cdef int64_t NPY_NAT = util.get_nat() cdef int64_t DAY_NS = 86400000000000LL -cdef str _NDIM_STRING = "ndim" - # components named tuple Components = collections.namedtuple('Components', [ 'days', 'hours', 'minutes', 'seconds', @@ -510,9 +508,8 @@ def _binary_op_method_timedeltalike(op, name): # Timedelta Construction cdef _to_py_int_float(v): - # Note: This used to be defined inside _timedelta_value_kwargs - # (and Timedelta.__new__ before that), but cython - # will not allow `cdef` functions to be defined dynamically. + # Note: This used to be defined inside Timedelta.__new__ + # but cython will not allow `cdef` functions to be defined dynamically. if is_integer_object(v): return int(v) elif is_float_object(v): @@ -521,26 +518,6 @@ cdef _to_py_int_float(v): "float.".format(type(v))) -cdef _timedelta_value_kwargs(dict kwargs): - # Helper for Timedelta.__new__ - if not len(kwargs): - raise ValueError("cannot construct a Timedelta without a " - "value/unit or descriptive keywords " - "(days,seconds....)") - - kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} - - nano = kwargs.pop('nanoseconds', 0) - try: - value = convert_to_timedelta64(timedelta(**kwargs), 'ns') + nano - except TypeError as e: - raise ValueError("cannot construct a Timedelta from the " - "passed arguments, allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]") - return value - - # Similar to Timestamp/datetime, this is a construction requirement for # timedeltas that we need to do object instantiation in python. This will # serve as a C extension type that shadows the Python class, where we do any @@ -571,7 +548,7 @@ cdef class _Timedelta(timedelta): elif PyDelta_Check(other): ots = Timedelta(other) else: - ndim = getattr(other, _NDIM_STRING, -1) + ndim = getattr(other, "ndim", -1) if ndim != -1: if ndim == 0: @@ -920,7 +897,22 @@ class Timedelta(_Timedelta): cdef _Timedelta td_base if value is _no_input: - value = _timedelta_value_kwargs(kwargs) + if not len(kwargs): + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords " + "(days,seconds....)") + + kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} + + nano = kwargs.pop('nanoseconds', 0) + try: + value = nano + convert_to_timedelta64(timedelta(**kwargs), + 'ns') + except TypeError as e: + raise ValueError("cannot construct a Timedelta from the " + "passed arguments, allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]") if isinstance(value, Timedelta): value = value.value From f5e8fe0d04e3511aeafc16f0024538f9f2b7ef52 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Nov 2017 21:44:40 -0800 Subject: [PATCH 11/11] update imports --- pandas/_libs/src/inference.pyx | 3 ++- pandas/_libs/tslib.pxd | 1 - pandas/_libs/tslib.pyx | 3 +-- pandas/core/indexes/timedeltas.py | 3 ++- pandas/core/tools/timedeltas.py | 8 +++++--- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index ec060335c220e..f2edf48a6b829 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -3,7 +3,8 @@ from decimal import Decimal cimport util cimport cython from tslibs.nattype import NaT -from tslib cimport convert_to_tsobject, convert_to_timedelta64 +from tslib cimport convert_to_tsobject +from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone from datetime import datetime, timedelta iNaT = util.get_nat() diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index a2477197faece..1c2c679904868 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -1,7 +1,6 @@ from numpy cimport ndarray, int64_t from tslibs.conversion cimport convert_to_tsobject -from tslibs.timedeltas cimport convert_to_timedelta64 cdef bint _check_all_nulls(obj) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c9cbe095aa984..6d793b6770113 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -84,8 +84,7 @@ cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT from tslibs.timedeltas cimport cast_from_unit, delta_to_nanoseconds -from tslibs.timedeltas import (Timedelta, convert_to_timedelta64, - array_to_timedelta64) +from tslibs.timedeltas import Timedelta from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 729edc81bb642..c9701d0d8dae8 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -34,6 +34,7 @@ from pandas.tseries.offsets import Tick, DateOffset from pandas._libs import (lib, index as libindex, tslib as libts, join as libjoin, Timedelta, NaT, iNaT) +from pandas._libs.tslibs.timedeltas import array_to_timedelta64 def _td_index_cmp(opname, nat_result=False): @@ -286,7 +287,7 @@ def _box_func(self): def _simple_new(cls, values, name=None, freq=None, **kwargs): values = np.array(values, copy=False) if values.dtype == np.object_: - values = libts.array_to_timedelta64(values) + values = array_to_timedelta64(values) if values.dtype != _TD_DTYPE: values = _ensure_int64(values).view(_TD_DTYPE) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index f61d9f90d6ca2..94e2f2342bd51 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -5,6 +5,8 @@ import numpy as np import pandas as pd import pandas._libs.tslib as tslib +from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64, + array_to_timedelta64) from pandas.core.dtypes.common import ( _ensure_object, @@ -140,7 +142,7 @@ def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'): """Convert string 'r' to a timedelta object.""" try: - result = tslib.convert_to_timedelta64(r, unit) + result = convert_to_timedelta64(r, unit) except ValueError: if errors == 'raise': raise @@ -169,8 +171,8 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): 'timedelta64[ns]', copy=False) else: try: - value = tslib.array_to_timedelta64(_ensure_object(arg), - unit=unit, errors=errors) + value = array_to_timedelta64(_ensure_object(arg), + unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) except ValueError: if errors == 'ignore':