78
78
LossySetitemError ,
79
79
can_hold_element ,
80
80
common_dtype_categorical_compat ,
81
- ensure_dtype_can_hold_na ,
82
81
find_result_type ,
83
82
infer_dtype_from ,
84
83
maybe_cast_pointwise_result ,
@@ -351,6 +350,7 @@ def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]:
351
350
# can_use_libjoin assures sv and ov are ndarrays
352
351
sv = cast (np .ndarray , sv )
353
352
ov = cast (np .ndarray , ov )
353
+ # similar but not identical to ov.searchsorted(sv)
354
354
return libjoin .left_join_indexer_unique (sv , ov )
355
355
356
356
@final
@@ -3128,7 +3128,7 @@ def union(self, other, sort=None):
3128
3128
if not is_dtype_equal (self .dtype , other .dtype ):
3129
3129
if (
3130
3130
isinstance (self , ABCMultiIndex )
3131
- and not is_object_dtype (unpack_nested_dtype (other ))
3131
+ and not is_object_dtype (_unpack_nested_dtype (other ))
3132
3132
and len (other ) > 0
3133
3133
):
3134
3134
raise NotImplementedError (
@@ -3209,6 +3209,8 @@ def _union(self, other: Index, sort):
3209
3209
result_dups = algos .union_with_duplicates (self , other )
3210
3210
return _maybe_try_sort (result_dups , sort )
3211
3211
3212
+ # The rest of this method is analogous to Index._intersection_via_get_indexer
3213
+
3212
3214
# Self may have duplicates; other already checked as unique
3213
3215
# find indexes of things in "other" that are not in "self"
3214
3216
if self ._index_as_unique :
@@ -3796,7 +3798,7 @@ def _should_partial_index(self, target: Index) -> bool:
3796
3798
return False
3797
3799
# See https://github.com/pandas-dev/pandas/issues/47772 the commented
3798
3800
# out code can be restored (instead of hardcoding `return True`)
3799
- # once that issue if fixed
3801
+ # once that issue is fixed
3800
3802
# "Index" has no attribute "left"
3801
3803
# return self.left._should_compare(target) # type: ignore[attr-defined]
3802
3804
return True
@@ -4774,6 +4776,9 @@ def _join_monotonic(
4774
4776
assert other .dtype == self .dtype
4775
4777
4776
4778
if self .equals (other ):
4779
+ # This is a convenient place for this check, but its correctness
4780
+ # does not depend on monotonicity, so it could go earlier
4781
+ # in the calling method.
4777
4782
ret_index = other if how == "right" else self
4778
4783
return ret_index , None , None
4779
4784
@@ -5758,6 +5763,9 @@ def get_indexer_non_unique(
5758
5763
that = target .astype (dtype , copy = False )
5759
5764
return this .get_indexer_non_unique (that )
5760
5765
5766
+ # TODO: get_indexer has fastpaths for both Categorical-self and
5767
+ # Categorical-target. Can we do something similar here?
5768
+
5761
5769
# Note: _maybe_promote ensures we never get here with MultiIndex
5762
5770
# self and non-Multi target
5763
5771
tgt_values = target ._get_engine_target ()
@@ -5918,7 +5926,7 @@ def _get_indexer_non_comparable(
5918
5926
If doing an inequality check, i.e. method is not None.
5919
5927
"""
5920
5928
if method is not None :
5921
- other = unpack_nested_dtype (target )
5929
+ other = _unpack_nested_dtype (target )
5922
5930
raise TypeError (f"Cannot compare dtypes { self .dtype } and { other .dtype } " )
5923
5931
5924
5932
no_matches = - 1 * np .ones (target .shape , dtype = np .intp )
@@ -5994,16 +6002,6 @@ def _find_common_type_compat(self, target) -> DtypeObj:
5994
6002
Implementation of find_common_type that adjusts for Index-specific
5995
6003
special cases.
5996
6004
"""
5997
- if is_valid_na_for_dtype (target , self .dtype ):
5998
- # e.g. setting NA value into IntervalArray[int64]
5999
- dtype = ensure_dtype_can_hold_na (self .dtype )
6000
- if is_dtype_equal (self .dtype , dtype ):
6001
- raise NotImplementedError (
6002
- "This should not be reached. Please report a bug at "
6003
- "github.com/pandas-dev/pandas"
6004
- )
6005
- return dtype
6006
-
6007
6005
target_dtype , _ = infer_dtype_from (target , pandas_dtype = True )
6008
6006
6009
6007
# special case: if one dtype is uint64 and the other a signed int, return object
@@ -6036,7 +6034,7 @@ def _should_compare(self, other: Index) -> bool:
6036
6034
# respectively.
6037
6035
return False
6038
6036
6039
- other = unpack_nested_dtype (other )
6037
+ other = _unpack_nested_dtype (other )
6040
6038
dtype = other .dtype
6041
6039
return self ._is_comparable_dtype (dtype ) or is_object_dtype (dtype )
6042
6040
@@ -6048,6 +6046,8 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
6048
6046
return dtype .kind == "b"
6049
6047
elif is_numeric_dtype (self .dtype ):
6050
6048
return is_numeric_dtype (dtype )
6049
+ # TODO: this was written assuming we only get here with object-dtype,
6050
+ # which is nom longer correct. Can we specialize for EA?
6051
6051
return True
6052
6052
6053
6053
@final
@@ -7135,7 +7135,7 @@ def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
7135
7135
return names
7136
7136
7137
7137
7138
- def unpack_nested_dtype (other : _IndexT ) -> _IndexT :
7138
+ def _unpack_nested_dtype (other : Index ) -> Index :
7139
7139
"""
7140
7140
When checking if our dtype is comparable with another, we need
7141
7141
to unpack CategoricalDtype to look at its categories.dtype.
@@ -7149,12 +7149,10 @@ def unpack_nested_dtype(other: _IndexT) -> _IndexT:
7149
7149
Index
7150
7150
"""
7151
7151
dtype = other .dtype
7152
- if is_categorical_dtype (dtype ):
7152
+ if isinstance (dtype , CategoricalDtype ):
7153
7153
# If there is ever a SparseIndex, this could get dispatched
7154
7154
# here too.
7155
- # error: Item "dtype[Any]"/"ExtensionDtype" of "Union[dtype[Any],
7156
- # ExtensionDtype]" has no attribute "categories"
7157
- return dtype .categories # type: ignore[union-attr]
7155
+ return dtype .categories
7158
7156
return other
7159
7157
7160
7158
0 commit comments