Skip to content

Commit ba6b4eb

Browse files
authored
REF: dispatch DTI/TDI setops to RangeIndex (#44039)
1 parent 445bb9f commit ba6b4eb

File tree

2 files changed

+62
-18
lines changed

2 files changed

+62
-18
lines changed

pandas/core/indexes/datetimelike.py

+58-17
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
Resolution,
2929
Tick,
3030
parsing,
31+
to_offset,
3132
)
3233
from pandas.compat.numpy import function as nv
3334
from pandas.util._decorators import (
@@ -61,6 +62,7 @@
6162
NDArrayBackedExtensionIndex,
6263
inherit_names,
6364
)
65+
from pandas.core.indexes.range import RangeIndex
6466
from pandas.core.tools.timedeltas import to_timedelta
6567

6668
if TYPE_CHECKING:
@@ -433,12 +435,61 @@ def values(self) -> np.ndarray:
433435
# --------------------------------------------------------------------
434436
# Set Operation Methods
435437

438+
@cache_readonly
439+
def _as_range_index(self) -> RangeIndex:
440+
# Convert our i8 representations to RangeIndex
441+
# Caller is responsible for checking isinstance(self.freq, Tick)
442+
freq = cast(Tick, self.freq)
443+
tick = freq.delta.value
444+
rng = range(self[0].value, self[-1].value + tick, tick)
445+
return RangeIndex(rng)
446+
447+
def _can_range_setop(self, other):
448+
return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)
449+
450+
def _wrap_range_setop(self, other, res_i8):
451+
new_freq = None
452+
if not len(res_i8):
453+
# RangeIndex defaults to step=1, which we don't want.
454+
new_freq = self.freq
455+
elif isinstance(res_i8, RangeIndex):
456+
new_freq = to_offset(Timedelta(res_i8.step))
457+
res_i8 = res_i8
458+
459+
# TODO: we cannot just do
460+
# type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq)
461+
# because test_setops_preserve_freq fails with _validate_frequency raising.
462+
# This raising is incorrect, as 'on_freq' is incorrect. This will
463+
# be fixed by GH#41493
464+
res_values = res_i8.values.view(self._data._ndarray.dtype)
465+
result = type(self._data)._simple_new(
466+
res_values, dtype=self.dtype, freq=new_freq
467+
)
468+
return self._wrap_setop_result(other, result)
469+
470+
def _range_intersect(self, other, sort):
471+
# Dispatch to RangeIndex intersection logic.
472+
left = self._as_range_index
473+
right = other._as_range_index
474+
res_i8 = left.intersection(right, sort=sort)
475+
return self._wrap_range_setop(other, res_i8)
476+
477+
def _range_union(self, other, sort):
478+
# Dispatch to RangeIndex union logic.
479+
left = self._as_range_index
480+
right = other._as_range_index
481+
res_i8 = left.union(right, sort=sort)
482+
return self._wrap_range_setop(other, res_i8)
483+
436484
def _intersection(self, other: Index, sort=False) -> Index:
437485
"""
438486
intersection specialized to the case with matching dtypes and both non-empty.
439487
"""
440488
other = cast("DatetimeTimedeltaMixin", other)
441489

490+
if self._can_range_setop(other):
491+
return self._range_intersect(other, sort=sort)
492+
442493
if not self._can_fast_intersect(other):
443494
result = Index._intersection(self, other, sort=sort)
444495
# We need to invalidate the freq because Index._intersection
@@ -453,7 +504,6 @@ def _intersection(self, other: Index, sort=False) -> Index:
453504
return self._fast_intersect(other, sort)
454505

455506
def _fast_intersect(self, other, sort):
456-
457507
# to make our life easier, "sort" the two ranges
458508
if self[0] <= other[0]:
459509
left, right = self, other
@@ -485,19 +535,9 @@ def _can_fast_intersect(self: _T, other: _T) -> bool:
485535
# Because freq is not None, we must then be monotonic decreasing
486536
return False
487537

488-
elif self.freq.is_anchored():
489-
# this along with matching freqs ensure that we "line up",
490-
# so intersection will preserve freq
491-
# GH#42104
492-
return self.freq.n == 1
493-
494-
elif isinstance(self.freq, Tick):
495-
# We "line up" if and only if the difference between two of our points
496-
# is a multiple of our freq
497-
diff = self[0] - other[0]
498-
remainder = diff % self.freq.delta
499-
return remainder == Timedelta(0)
500-
538+
# this along with matching freqs ensure that we "line up",
539+
# so intersection will preserve freq
540+
# Note we are assuming away Ticks, as those go through _range_intersect
501541
# GH#42104
502542
return self.freq.n == 1
503543

@@ -516,6 +556,7 @@ def _can_fast_union(self: _T, other: _T) -> bool:
516556
return False
517557

518558
if len(self) == 0 or len(other) == 0:
559+
# only reached via union_many
519560
return True
520561

521562
# to make our life easier, "sort" the two ranges
@@ -544,10 +585,7 @@ def _fast_union(self: _TDT, other: _TDT, sort=None) -> _TDT:
544585
loc = right.searchsorted(left_start, side="left")
545586
right_chunk = right._values[:loc]
546587
dates = concat_compat((left._values, right_chunk))
547-
# With sort being False, we can't infer that result.freq == self.freq
548-
# TODO: no tests rely on the _with_freq("infer"); needed?
549588
result = type(self)._simple_new(dates, name=self.name)
550-
result = result._with_freq("infer")
551589
return result
552590
else:
553591
left, right = other, self
@@ -573,6 +611,9 @@ def _union(self, other, sort):
573611
assert isinstance(other, type(self))
574612
assert self.dtype == other.dtype
575613

614+
if self._can_range_setop(other):
615+
return self._range_union(other, sort=sort)
616+
576617
if self._can_fast_union(other):
577618
result = self._fast_union(other, sort=sort)
578619
# in the case with sort=None, the _can_fast_union check ensures

pandas/core/indexes/range.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,11 @@ def _intersection(self, other: Index, sort=False):
562562

563563
if (self.step < 0 and other.step < 0) is not (new_index.step < 0):
564564
new_index = new_index[::-1]
565+
565566
if sort is None:
566-
new_index = new_index.sort_values()
567+
# TODO: can revert to just `if sort is None` after GH#43666
568+
if new_index.step < 0:
569+
new_index = new_index[::-1]
567570

568571
return new_index
569572

0 commit comments

Comments
 (0)