consolidate Timedelta creation

patrickmckenna · patrickmckenna · commit c9e209e525e7 · 2022-05-10T13:57:38.000-07:00
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -32,6 +32,7 @@ import_datetime()
 
 cimport pandas._libs.tslibs.util as util
 from pandas._libs cimport ops
+from pandas._libs.missing cimport C_NA
 from pandas._libs.tslibs.base cimport ABCTimestamp
 from pandas._libs.tslibs.conversion cimport (
     cast_from_unit,
@@ -308,21 +309,6 @@ cdef convert_to_timedelta64(object ts, str unit):
     return ts.astype("timedelta64[ns]")
 
 
-cpdef to_timedelta64(object value, str unit):
-    """
-    Wrapper around convert_to_timedelta64() that does overflow checks.
-    TODO: also construct non-nano
-    TODO: do all overflow-unsafe operations here
-    TODO: constrain unit to a more specific type
-    """
-    with cython.overflowcheck(True):
-        try:
-            return convert_to_timedelta64(value, unit)
-        except OverflowError as ex:
-            msg = f"{value} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]"
-            raise OutOfBoundsTimedelta(msg) from ex
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def array_to_timedelta64(
@@ -682,8 +668,7 @@ cdef bint _validate_ops_compat(other):
 
 def _op_unary_method(func, name):
     def f(self):
-        new_value = func(self.value)
-        return _timedelta_from_value_and_reso(new_value, self._reso)
+        return create_timedelta(func(self.value), "ignore", self._reso)
     f.__name__ = name
     return f
 
@@ -700,20 +685,6 @@ cpdef int64_t calc_int_int(object op, object a, object b) except? -1:
         raise OutOfBoundsTimedelta(msg) from ex
 
 
-cpdef int64_t calc_int_float(object op, object a, object b) except? -1:
-    """
-    Calculate op(int, double), raising if any of the following aren't safe conversions:
-      - a to int64_t
-      - b to double
-      - result to int64_t
-    """
-    try:
-        return ops.calc_int_float(op, a, b)
-    except OverflowError as ex:
-        msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]"
-        raise OutOfBoundsTimedelta(msg) from ex
-
-
 def _binary_op_method_timedeltalike(op, name):
     # define a binary operation that only works if the other argument is
     # timedelta like or an array of timedeltalike
@@ -758,10 +729,7 @@ def _binary_op_method_timedeltalike(op, name):
         if self._reso != other._reso:
             raise NotImplementedError
 
-        result = calc_int_int(op, self.value, other.value)
-        if result == NPY_NAT:
-            return NaT
-        return _timedelta_from_value_and_reso(result, self._reso)
+        return create_timedelta(op(self.value, other.value), "ignore", self._reso)
 
     f.__name__ = name
     return f
@@ -892,7 +860,7 @@ cdef _to_py_int_float(v):
 
 
 def _timedelta_unpickle(value, reso):
-    return _timedelta_from_value_and_reso(value, reso)
+    return create_timedelta(value, "ignore", reso)
 
 
 cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
@@ -923,6 +891,44 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
     return td_base
 
 
+@cython.overflowcheck(True)
+cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_reso):
+    """
+    Timedelta factory.
+
+    Timedelta.__new__ just does arg validation (at least currently). Also, some internal
+    functions expect to be able to create non-nano reso Timedeltas, but Timedelta.__new__
+    doesn't yet expose that.
+
+    _timedelta_from_value_and_reso does, but only accepts limited args, and doesn't check for overflow.
+    """
+    cdef:
+        int64_t out_value
+
+    if isinstance(value, _Timedelta):
+        return value
+    if value is C_NA:
+        raise ValueError("Not supported")
+
+    try:
+        # if unit == "ns", no need to create an m8[ns] just to read the (same) value back
+        # if unit == "ignore", assume caller wants to invoke an overflow-safe version of
+        # _timedelta_from_value_and_reso, and that any float rounding is acceptable
+        if (is_integer_object(value) or is_float_object(value)) and in_unit in ("ns", "ignore"):
+            if util.is_nan(value):
+                return NaT
+            out_value = <int64_t>value
+        else:
+            out_value = convert_to_timedelta64(value, in_unit).view(np.int64)
+    except OverflowError as ex:
+        msg = f"{value} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]"
+        raise OutOfBoundsTimedelta(msg) from ex
+
+    if out_value == NPY_NAT:
+        return NaT
+    return _timedelta_from_value_and_reso(out_value, out_reso)
+
+
 # Similar to Timestamp/datetime, this is a construction requirement for
 # timedeltas that we need to do object instantiation in python. This will
 # serve as a C extension type that shadows the Python class, where we do any
@@ -1406,7 +1412,7 @@ cdef class _Timedelta(timedelta):
     @classmethod
     def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
         # exposing as classmethod for testing
-        return _timedelta_from_value_and_reso(value, reso)
+        return create_timedelta(value, "ignore", reso)
 
 
 # Python front end to C extension type _Timedelta
@@ -1474,37 +1480,27 @@ class Timedelta(_Timedelta):
     )
 
     def __new__(cls, object value=_no_input, unit=None, **kwargs):
-        cdef _Timedelta td_base
-
-        if isinstance(value, _Timedelta):
-            return value
-        if checknull_with_nat(value):
-            return NaT
-
-        if unit in {"Y", "y", "M"}:
-            raise ValueError(
-                "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
-                "represent unambiguous timedelta values durations."
-            )
-        if isinstance(value, str) and unit is not None:
-            raise ValueError("unit must not be specified if the value is a str")
-        elif value is _no_input and not kwargs:
-            raise ValueError(
-                "cannot construct a Timedelta without a value/unit "
-                "or descriptive keywords (days,seconds....)"
-            )
-        if not kwargs.keys() <= set(cls._allowed_kwargs):
-            raise ValueError(
-                "cannot construct a Timedelta from the passed arguments, "
-                f"allowed keywords are {cls._allowed_kwargs}"
-            )
+        cdef:
+            _Timedelta td_base
+            NPY_DATETIMEUNIT out_reso = NPY_FR_ns
 
-        # GH43764, convert any input to nanoseconds first, to ensure any potential
-        # nanosecond contributions from kwargs parsed as floats are included
-        kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()})
-        if kwargs:
-            value = to_timedelta64(
-                sum((
+        # process kwargs iff no value passed
+        if value is _no_input:
+            if not kwargs:
+                raise ValueError(
+                    "cannot construct a Timedelta without a value/unit "
+                    "or descriptive keywords (days,seconds....)"
+                )
+            if not kwargs.keys() <= set(cls._allowed_kwargs):
+                raise ValueError(
+                    "cannot construct a Timedelta from the passed arguments, "
+                    f"allowed keywords are {cls._allowed_kwargs}"
+                )
+            # GH43764, convert any input to nanoseconds first, to ensure any potential
+            # nanosecond contributions from kwargs parsed as floats are included
+            kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()})
+            ns = sum(
+                (
                     kwargs["weeks"] * 7 * 24 * 3600 * 1_000_000_000,
                     kwargs["days"] * 24 * 3600 * 1_000_000_000,
                     kwargs["hours"] * 3600 * 1_000_000_000,
@@ -1513,19 +1509,18 @@ class Timedelta(_Timedelta):
                     kwargs["milliseconds"] * 1_000_000,
                     kwargs["microseconds"] * 1_000,
                     kwargs["nanoseconds"],
-                )),
-                "ns",
+                )
             )
-        else:
-            if is_integer_object(value) or is_float_object(value):
-                unit = parse_timedelta_unit(unit)
-            else:
-                unit = "ns"
-            value = to_timedelta64(value, unit)
+            return create_timedelta(ns, "ns", out_reso)
 
-        if is_td64nat(value):
-            return NaT
-        return _timedelta_from_value_and_reso(value.view("i8"), NPY_FR_ns)
+        if isinstance(value, str) and unit is not None:
+            raise ValueError("unit must not be specified if the value is a str")
+        elif unit in {"Y", "y", "M"}:
+            raise ValueError(
+                "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
+                "represent unambiguous timedelta values durations."
+            )
+        return create_timedelta(value, parse_timedelta_unit(unit), out_reso)
 
     def __setstate__(self, state):
         if len(state) == 1:
@@ -1602,14 +1597,14 @@ class Timedelta(_Timedelta):
     # Arithmetic Methods
     # TODO: Can some of these be defined in the cython class?
 
-    __neg__ = _op_unary_method(lambda x: -x, '__neg__')
-    __pos__ = _op_unary_method(lambda x: x, '__pos__')
-    __abs__ = _op_unary_method(lambda x: abs(x), '__abs__')
+    __neg__ = _op_unary_method(operator.neg, "__neg__")
+    __pos__ = _op_unary_method(operator.pos, "__pos__")
+    __abs__ = _op_unary_method(operator.abs, "__abs__")
 
-    __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__')
-    __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__')
-    __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__')
-    __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__')
+    __add__ = _binary_op_method_timedeltalike(operator.add, "__add__")
+    __radd__ = _binary_op_method_timedeltalike(operator.add, "__radd__")
+    __sub__ = _binary_op_method_timedeltalike(operator.sub, "__sub__")
+    __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, "__rsub__")
 
     def __mul__(self, other):
         if util.is_nan(other):
@@ -1618,13 +1613,9 @@ class Timedelta(_Timedelta):
         if is_array(other):
             # ndarray-like
             return other * self.to_timedelta64()
-        if is_integer_object(other):
-            value = calc_int_int(operator.mul, self.value, other)
-            return _timedelta_from_value_and_reso(value, self._reso)
-        if is_float_object(other):
-            value = calc_int_float(operator.mul, self.value, other)
-            return _timedelta_from_value_and_reso(value, self._reso)
-
+        if is_integer_object(other) or is_float_object(other):
+            # can't call Timedelta b/c it doesn't (yet) expose reso
+            return create_timedelta(self.value * other, "ignore", self._reso)
         return NotImplemented
 
     __rmul__ = __mul__
diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -339,7 +339,7 @@ def test_from_offset(self, tick_classes):
         offset = tick_classes(1)
         assert Timedelta(offset).value == offset.nanos
 
-    @pytest.mark.parametrize("td_unit", TD_UNITS)
+    @pytest.mark.parametrize("td_unit", chain.from_iterable(TD_UNITS))
     def test_from_td64_ignores_unit(self, td_unit: str, td_overflow_msg: str):
         """
         Ignore the unit, as it may cause silently overflows leading to incorrect
@@ -355,7 +355,6 @@ def test_from_td64_ignores_unit(self, td_unit: str, td_overflow_msg: str):
         ("args", "kwargs"),
         [
             ((), {}),
-            (("ps",), {}),
             (("ns",), {}),
             (("ms",), {}),
             ((), {"seconds": 3}),
@@ -367,8 +366,6 @@ def test_from_td_ignores_other_args(self, args: tuple, kwargs: dict):
         new = Timedelta(original, *args, **kwargs)
 
         assert new == original
-        if not any((args, kwargs)):
-            assert new is original
 
     def test_from_timedelta(self, timedelta_kwarg: str):
         kwargs = {timedelta_kwarg: 1}
@@ -601,11 +598,12 @@ def test_sub_preserves_reso(self, non_nano_td, non_nano_reso):
         assert res == expected
         assert res._reso == non_nano_reso
 
-    def test_mul_preserves_reso(self, non_nano_td, non_nano_reso):
+    @pytest.mark.parametrize("factor", (2, 2.5))
+    def test_mul_preserves_reso(self, non_nano_td, non_nano_reso, factor):
         # The non_nano_td fixture should always be far from the implementation
         #  bound, so doubling does not risk overflow.
-        res = non_nano_td * 2
-        assert res.value == non_nano_td.value * 2
+        res = non_nano_td * factor
+        assert res.value == non_nano_td.value * factor
         assert res._reso == non_nano_reso
 
     def test_cmp_cross_reso(self, non_nano_td):