diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 35785613fb1e2..601eb782653f9 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -190,6 +190,7 @@ Datetimelike - Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) - Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`) - Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`) +- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) Timedelta ^^^^^^^^^ @@ -221,7 +222,7 @@ Strings Interval ^^^^^^^^ -- Bug in :meth:`IntervalIndex.intersection` always returning object-dtype when intersecting with :class:`CategoricalIndex` (:issue:`38653`) +- Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`) - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e425ee1a78de5..275c977e9b37b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2606,6 +2606,7 @@ def _validate_sort_keyword(self, sort): f"None or False; {sort} was passed." ) + @final def union(self, other, sort=None): """ Form the union of two Index objects. @@ -2818,6 +2819,7 @@ def _wrap_setop_result(self, other, result): return self._shallow_copy(result, name=name) # TODO: standardize return type of non-union setops type(self vs other) + @final def intersection(self, other, sort=False): """ Form the intersection of two Index objects. @@ -3035,9 +3037,17 @@ def symmetric_difference(self, other, result_name=None, sort=None): if result_name is None: result_name = result_name_update + if not self._should_compare(other): + return self.union(other).rename(result_name) + elif not is_dtype_equal(self.dtype, other.dtype): + dtype = find_common_type([self.dtype, other.dtype]) + this = self.astype(dtype, copy=False) + that = other.astype(dtype, copy=False) + return this.symmetric_difference(that, sort=sort).rename(result_name) + this = self._get_unique_index() other = other._get_unique_index() - indexer = this.get_indexer(other) + indexer = this.get_indexer_for(other) # {this} minus {other} common_indexer = indexer.take((indexer != -1).nonzero()[0]) @@ -3057,7 +3067,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): except TypeError: pass - return Index(the_diff, dtype=self.dtype, name=result_name) + return Index(the_diff, name=result_name) def _assert_can_do_setop(self, other): if not is_list_like(other): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1b3c4cfcccd2b..824d78d1a8d05 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1016,7 +1016,6 @@ def func(self, other, sort=sort): _union = _setop("union") difference = _setop("difference") - symmetric_difference = _setop("symmetric_difference") # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8df7e6912b1b2..7746d7e617f8b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -14,10 +14,8 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_datetime64_any_dtype, - is_dtype_equal, is_float, is_integer, - is_object_dtype, is_scalar, pandas_dtype, ) @@ -635,16 +633,6 @@ def _setop(self, other, sort, opname: str): def _intersection(self, other, sort=False): return self._setop(other, sort, opname="intersection") - def _difference(self, other, sort): - - if is_object_dtype(other): - return self.astype(object).difference(other).astype(self.dtype) - - elif not is_dtype_equal(self.dtype, other.dtype): - return self - - return self._setop(other, sort, opname="difference") - def _union(self, other, sort): return self._setop(other, sort, opname="_union") diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py index 7bfe81e0645cb..4b7901407d94a 100644 --- a/pandas/tests/indexes/interval/test_setops.py +++ b/pandas/tests/indexes/interval/test_setops.py @@ -158,6 +158,7 @@ def test_symmetric_difference(self, closed, sort): index.left.astype("float64"), index.right, closed=closed ) result = index.symmetric_difference(other, sort=sort) + expected = empty_index(dtype="float64", closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 538e937703de6..1035ac1f0e60b 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -248,13 +248,14 @@ def test_symmetric_difference(self, index): # GH#10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: + result = first.symmetric_difference(case) + if is_datetime64tz_dtype(first): - with pytest.raises(ValueError, match="Tz-aware"): - # `second.values` casts to tznaive - # TODO: should the symmetric_difference then be the union? - first.symmetric_difference(case) + # second.values casts to tznaive + expected = first.union(case) + tm.assert_index_equal(result, expected) continue - result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) if isinstance(index, MultiIndex): @@ -448,7 +449,9 @@ def test_intersection_difference_match_empty(self, index, sort): tm.assert_index_equal(inter, diff, exact=True) -@pytest.mark.parametrize("method", ["intersection", "union"]) +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) def test_setop_with_categorical(index, sort, method): if isinstance(index, MultiIndex): # tested separately in tests.indexes.multi.test_setops