From 8569c7e9afc0c3286519a61d0c42c6539cd223c4 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 5 Jun 2021 11:23:03 -0700 Subject: [PATCH 1/3] REF: de-duplicate symmetric_difference --- pandas/core/indexes/base.py | 36 ++++++------------- pandas/core/indexes/category.py | 4 +++ pandas/core/indexes/interval.py | 5 --- pandas/core/indexes/multi.py | 4 +-- pandas/core/indexes/range.py | 12 ------- .../tests/indexes/categorical/test_equals.py | 9 +++++ 6 files changed, 25 insertions(+), 45 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 871c6a4a1c41d..f0c6a5998d108 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3196,6 +3196,10 @@ def difference(self, other, sort=None): # Note: we do not (yet) sort even if sort=None GH#24959 return self.rename(result_name) + if not self._should_compare(other): + # The difference is always going to be everything in self + return self.rename(result_name) + result = self._difference(other, sort=sort) return self._wrap_setop_result(other, result) @@ -3260,33 +3264,13 @@ def symmetric_difference(self, other, result_name=None, sort=None): if result_name is None: result_name = result_name_update - if not self._should_compare(other): - return self.union(other, sort=sort).rename(result_name) - elif not is_dtype_equal(self.dtype, other.dtype): - dtype = find_common_type([self.dtype, other.dtype]) - this = self.astype(dtype, copy=False) - that = other.astype(dtype, copy=False) - return this.symmetric_difference(that, sort=sort).rename(result_name) + left = self.difference(other, sort=False) + right = other.difference(self, sort=False) + result = left.union(right, sort=sort) - this = self._get_unique_index() - other = other._get_unique_index() - indexer = this.get_indexer_for(other) - - # {this} minus {other} - common_indexer = indexer.take((indexer != -1).nonzero()[0]) - left_indexer = np.setdiff1d( - np.arange(this.size), common_indexer, assume_unique=True - ) - left_diff = this._values.take(left_indexer) - - # {other} minus {this} - right_indexer = (indexer == -1).nonzero()[0] - right_diff = other._values.take(right_indexer) - - the_diff = concat_compat([left_diff, right_diff]) - the_diff = _maybe_try_sort(the_diff, sort) - - return Index(the_diff, name=result_name) + if result_name is not None: + result = result.rename(result_name) + return result @final def _assert_can_do_setop(self, other) -> bool: diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 554cf33e22555..ab44ae832c791 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -269,6 +269,10 @@ def _is_dtype_compat(self, other) -> Categorical: raise TypeError( "categories must match existing categories when appending" ) + + elif other._is_multi: + # preempt raising NotImplementedError in isna call + raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex") else: values = other diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b906f88d98a46..9c40726cbbe17 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -169,11 +169,6 @@ def wrapped(self, other, sort=False): result = getattr(self.astype(object), op_name)(other, sort=sort) return result.astype(self.dtype) - elif not self._should_compare(other): - # GH#19016: ensure set op will not return a prohibited dtype - result = getattr(self.astype(object), op_name)(other, sort=sort) - return result.astype(self.dtype) - return method(self, other, sort) return wrapped diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6825ef4ba4198..a7a5323fee13f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3694,9 +3694,9 @@ def symmetric_difference(self, other, result_name=None, sort=None): return type(self)( levels=[[] for _ in range(self.nlevels)], codes=[[] for _ in range(self.nlevels)], - names=tups.name, + names=tups.names, ) - return type(self).from_tuples(tups, names=tups.name) + return tups # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ead1a2a4a544b..da6c061040282 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -730,18 +730,6 @@ def _difference(self, other, sort=None): new_index = new_index[::-1] return new_index - def symmetric_difference(self, other, result_name: Hashable = None, sort=None): - if not isinstance(other, RangeIndex) or sort is not None: - return super().symmetric_difference(other, result_name, sort) - - left = self.difference(other) - right = other.difference(self) - result = left.union(right) - - if result_name is not None: - result = result.rename(result_name) - return result - # -------------------------------------------------------------------- def _concat(self, indexes: list[Index], name: Hashable) -> Index: diff --git a/pandas/tests/indexes/categorical/test_equals.py b/pandas/tests/indexes/categorical/test_equals.py index 2648155c938b0..1ed8f3a903439 100644 --- a/pandas/tests/indexes/categorical/test_equals.py +++ b/pandas/tests/indexes/categorical/test_equals.py @@ -5,6 +5,7 @@ Categorical, CategoricalIndex, Index, + MultiIndex, ) @@ -79,3 +80,11 @@ def test_equals_non_category(self): other = Index(["A", "B", "D", np.nan]) assert not ci.equals(other) + + def test_equals_multiindex(self): + # dont raise NotImplementedError when calling is_dtype_compat + + mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)]) + ci = mi.to_flat_index().astype("category") + + assert not ci.equals(mi) From 471fba0d13ef0e411a158361aa36deddf91441d7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 5 Jun 2021 12:52:50 -0700 Subject: [PATCH 2/3] REF: share DatetimeIndex._union --- pandas/core/indexes/datetimelike.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 19167677257f7..06a3a427e54cf 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -60,7 +60,6 @@ inherit_names, make_wrapped_arith_op, ) -from pandas.core.indexes.numeric import Int64Index from pandas.core.tools.timedeltas import to_timedelta if TYPE_CHECKING: @@ -782,11 +781,7 @@ def _union(self, other, sort): # that result.freq == self.freq return result else: - i8self = Int64Index._simple_new(self.asi8) - i8other = Int64Index._simple_new(other.asi8) - i8result = i8self._union(i8other, sort=sort) - result = type(self)(i8result, dtype=self.dtype, freq="infer") - return result + return super()._union(other, sort=sort)._with_freq("infer") # -------------------------------------------------------------------- # Join Methods From 9dba7da866ae93f6ad36f3f5d8444677105cc993 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Jun 2021 17:04:13 -0700 Subject: [PATCH 3/3] dummy commit to force CI