diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 712119caae6f2..13fa33d5c3412 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -71,6 +71,7 @@ Performance Improvements - Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) - Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) +- :class`DateOffset` arithmetic performance is improved (:issue:`18218`) - .. _whatsnew_0220.docs: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 87be9fa910101..c64b6568a0495 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -4,7 +4,7 @@ cimport cython import time -from cpython.datetime cimport timedelta, time as dt_time +from cpython.datetime cimport datetime, timedelta, time as dt_time from dateutil.relativedelta import relativedelta @@ -13,9 +13,9 @@ cimport numpy as np np.import_array() -from util cimport is_string_object +from util cimport is_string_object, is_integer_object -from pandas._libs.tslib import pydt_to_i8 +from pandas._libs.tslib import pydt_to_i8, monthrange from frequencies cimport get_freq_code from conversion cimport tz_convert_single @@ -375,3 +375,56 @@ class BaseOffset(_BaseOffset): # i.e. isinstance(other, (ABCDatetimeIndex, ABCSeries)) return other - self return -self + other + + +# ---------------------------------------------------------------------- +# RelativeDelta Arithmetic + + +cpdef datetime shift_month(datetime stamp, int months, object day_opt=None): + """ + Given a datetime (or Timestamp) `stamp`, an integer `months` and an + option `day_opt`, return a new datetimelike that many months later, + with day determined by `day_opt` using relativedelta semantics. + + Scalar analogue of tslib.shift_months + + Parameters + ---------- + stamp : datetime or Timestamp + months : int + day_opt : None, 'start', 'end', or an integer + None: returned datetimelike has the same day as the input, or the + last day of the month if the new month is too short + 'start': returned datetimelike has day=1 + 'end': returned datetimelike has day on the last day of the month + int: returned datetimelike has day equal to day_opt + + Returns + ------- + shifted : datetime or Timestamp (same as input `stamp`) + """ + cdef: + int year, month, day + int dim, dy + + dy = (stamp.month + months) // 12 + month = (stamp.month + months) % 12 + + if month == 0: + month = 12 + dy -= 1 + year = stamp.year + dy + + dim = monthrange(year, month)[1] + if day_opt is None: + day = min(stamp.day, dim) + elif day_opt == 'start': + day = 1 + elif day_opt == 'end': + day = dim + elif is_integer_object(day_opt): + day = min(day_opt, dim) + else: + raise ValueError(day_opt) + return stamp.replace(year=year, month=month, day=day) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 5d1ea71d5cef5..4dc26f4dd69e2 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -22,6 +22,7 @@ _int_to_weekday, _weekday_to_int, _determine_offset, apply_index_wraps, + shift_month, BeginMixin, EndMixin, BaseOffset) @@ -252,6 +253,8 @@ def apply_index(self, i): "applied vectorized".format(kwd=kwd)) def isAnchored(self): + # TODO: Does this make sense for the general case? It would help + # if there were a canonical docstring for what isAnchored means. return (self.n == 1) def _params(self): @@ -721,6 +724,7 @@ def apply(self, other): return result else: + # TODO: Figure out the end of this sente raise ApplyTypeError( 'Only know how to combine business hour with ') @@ -927,10 +931,10 @@ def apply(self, other): n = self.n _, days_in_month = tslib.monthrange(other.year, other.month) if other.day != days_in_month: - other = other + relativedelta(months=-1, day=31) + other = shift_month(other, -1, 'end') if n <= 0: n = n + 1 - other = other + relativedelta(months=n, day=31) + other = shift_month(other, n, 'end') return other @apply_index_wraps @@ -956,7 +960,7 @@ def apply(self, other): if other.day > 1 and n <= 0: # then roll forward if n<=0 n += 1 - return other + relativedelta(months=n, day=1) + return shift_month(other, n, 'start') @apply_index_wraps def apply_index(self, i): @@ -1002,12 +1006,12 @@ def apply(self, other): if not self.onOffset(other): _, days_in_month = tslib.monthrange(other.year, other.month) if 1 < other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n > 0: # rollforward so subtract 1 n -= 1 elif self.day_of_month < other.day < days_in_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n < 0: # rollforward in the negative direction so add 1 n += 1 @@ -1084,11 +1088,11 @@ def onOffset(self, dt): def _apply(self, n, other): # if other.day is not day_of_month move to day_of_month and update n if other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n > 0: n -= 1 elif other.day > self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n == 0: n = 1 else: @@ -1096,7 +1100,7 @@ def _apply(self, n, other): months = n // 2 day = 31 if n % 2 else self.day_of_month - return other + relativedelta(months=months, day=day) + return shift_month(other, months, day) def _get_roll(self, i, before_day_of_month, after_day_of_month): n = self.n @@ -1141,13 +1145,13 @@ def onOffset(self, dt): def _apply(self, n, other): # if other.day is not day_of_month move to day_of_month and update n if other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n == 0: n = -1 else: n -= 1 elif other.day > self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n == 0: n = 1 elif n < 0: @@ -1155,7 +1159,7 @@ def _apply(self, n, other): months = n // 2 + n % 2 day = 1 if n % 2 else self.day_of_month - return other + relativedelta(months=months, day=day) + return shift_month(other, months, day) def _get_roll(self, i, before_day_of_month, after_day_of_month): n = self.n @@ -1191,7 +1195,7 @@ def apply(self, other): n = n - 1 elif n <= 0 and other.day > lastBDay: n = n + 1 - other = other + relativedelta(months=n, day=31) + other = shift_month(other, n, 'end') if other.weekday() > 4: other = other - BDay() @@ -1215,7 +1219,7 @@ def apply(self, other): other = other + timedelta(days=first - other.day) n -= 1 - other = other + relativedelta(months=n) + other = shift_month(other, n, None) wkday, _ = tslib.monthrange(other.year, other.month) first = _get_firstbday(wkday) result = datetime(other.year, other.month, first, @@ -1520,8 +1524,7 @@ def apply(self, other): else: months = self.n + 1 - other = self.getOffsetOfMonth( - other + relativedelta(months=months, day=1)) + other = self.getOffsetOfMonth(shift_month(other, months, 'start')) other = datetime(other.year, other.month, other.day, base.hour, base.minute, base.second, base.microsecond) return other @@ -1612,8 +1615,7 @@ def apply(self, other): else: months = self.n + 1 - return self.getOffsetOfMonth( - other + relativedelta(months=months, day=1)) + return self.getOffsetOfMonth(shift_month(other, months, 'start')) def getOffsetOfMonth(self, dt): m = MonthEnd() @@ -1716,7 +1718,7 @@ def apply(self, other): elif n <= 0 and other.day > lastBDay and monthsToGo == 0: n = n + 1 - other = other + relativedelta(months=monthsToGo + 3 * n, day=31) + other = shift_month(other, monthsToGo + 3 * n, 'end') other = tslib._localize_pydatetime(other, base.tzinfo) if other.weekday() > 4: other = other - BDay() @@ -1761,7 +1763,7 @@ def apply(self, other): n = n - 1 # get the first bday for result - other = other + relativedelta(months=3 * n - monthsSince) + other = shift_month(other, 3 * n - monthsSince, None) wkday, _ = tslib.monthrange(other.year, other.month) first = _get_firstbday(wkday) result = datetime(other.year, other.month, first, @@ -1795,7 +1797,7 @@ def apply(self, other): if n > 0 and not (other.day >= days_in_month and monthsToGo == 0): n = n - 1 - other = other + relativedelta(months=monthsToGo + 3 * n, day=31) + other = shift_month(other, monthsToGo + 3 * n, 'end') return other @apply_index_wraps @@ -1830,7 +1832,7 @@ def apply(self, other): # after start, so come back an extra period as if rolled forward n = n + 1 - other = other + relativedelta(months=3 * n - monthsSince, day=1) + other = shift_month(other, 3 * n - monthsSince, 'start') return other @apply_index_wraps @@ -1889,7 +1891,7 @@ def apply(self, other): (other.month == self.month and other.day > lastBDay)): years += 1 - other = other + relativedelta(years=years) + other = shift_month(other, 12 * years, None) _, days_in_month = tslib.monthrange(other.year, self.month) result = datetime(other.year, self.month, days_in_month, @@ -1927,7 +1929,7 @@ def apply(self, other): years += 1 # set first bday for result - other = other + relativedelta(years=years) + other = shift_month(other, years * 12, None) wkday, days_in_month = tslib.monthrange(other.year, self.month) first = _get_firstbday(wkday) return datetime(other.year, self.month, first, other.hour, @@ -2145,8 +2147,8 @@ def onOffset(self, dt): if self.variation == "nearest": # We have to check the year end of "this" cal year AND the previous - return year_end == dt or \ - self.get_year_end(dt - relativedelta(months=1)) == dt + return (year_end == dt or + self.get_year_end(shift_month(dt, -1, None)) == dt) else: return year_end == dt @@ -2226,8 +2228,8 @@ def get_year_end(self, dt): def get_target_month_end(self, dt): target_month = datetime( dt.year, self.startingMonth, 1, tzinfo=dt.tzinfo) - next_month_first_of = target_month + relativedelta(months=+1) - return next_month_first_of + relativedelta(days=-1) + next_month_first_of = shift_month(target_month, 1, None) + return next_month_first_of + timedelta(days=-1) def _get_year_end_nearest(self, dt): target_date = self.get_target_month_end(dt) @@ -2382,7 +2384,7 @@ def apply(self, other): qtr_lens = self.get_weeks(other + self._offset) for weeks in qtr_lens: - start += relativedelta(weeks=weeks) + start += timedelta(weeks=weeks) if start > other: other = start n -= 1 @@ -2399,7 +2401,7 @@ def apply(self, other): qtr_lens = self.get_weeks(other) for weeks in reversed(qtr_lens): - end -= relativedelta(weeks=weeks) + end -= timedelta(weeks=weeks) if end < other: other = end n -= 1 @@ -2442,7 +2444,7 @@ def onOffset(self, dt): current = next_year_end for qtr_len in qtr_lens[0:4]: - current += relativedelta(weeks=qtr_len) + current += timedelta(weeks=qtr_len) if dt == current: return True return False