diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 2bc7e13e39ec4..4493ddd0b2822 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1594,19 +1594,16 @@ See :ref:`Advanced Indexing ` for usage of MultiIndexes. Set operations on Index objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The two main operations are ``union (|)`` and ``intersection (&)``. -These can be directly called as instance methods or used via overloaded -operators. Difference is provided via the ``.difference()`` method. +The two main operations are ``union`` and ``intersection``. +Difference is provided via the ``.difference()`` method. .. ipython:: python a = pd.Index(['c', 'b', 'a']) b = pd.Index(['c', 'e', 'd']) - a | b - a & b a.difference(b) -Also available is the ``symmetric_difference (^)`` operation, which returns elements +Also available is the ``symmetric_difference`` operation, which returns elements that appear in either ``idx1`` or ``idx2``, but not in both. This is equivalent to the Index created by ``idx1.difference(idx2).union(idx2.difference(idx1))``, with duplicates dropped. @@ -1616,7 +1613,6 @@ with duplicates dropped. idx1 = pd.Index([1, 2, 3, 4]) idx2 = pd.Index([2, 3, 4, 5]) idx1.symmetric_difference(idx2) - idx1 ^ idx2 .. note:: @@ -1631,7 +1627,7 @@ integer values are converted to float idx1 = pd.Index([0, 1, 2]) idx2 = pd.Index([0.5, 1.5]) - idx1 | idx2 + idx1.union(idx2) .. _indexing.missing: diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index e6d06aa6bd1a0..1621b37f31b23 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -466,7 +466,7 @@ at the new values. ser = pd.Series(np.sort(np.random.uniform(size=100))) # interpolate at new_index - new_index = ser.index | pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]) + new_index = ser.index.union(pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])) interp_s = ser.reindex(new_index).interpolate(method="pchip") interp_s[49:51] diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 84f594acf5e4c..aaa39e26f4359 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -338,6 +338,7 @@ Deprecations - Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`) - :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`) - Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`) +- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1938722225b98..e4b2bcb3e7a48 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2512,14 +2512,35 @@ def __iadd__(self, other): @final def __and__(self, other): + warnings.warn( + "Index.__and__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__and__. Use index.intersection(other) instead", + FutureWarning, + stacklevel=2, + ) return self.intersection(other) @final def __or__(self, other): + warnings.warn( + "Index.__or__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__or__. Use index.union(other) instead", + FutureWarning, + stacklevel=2, + ) return self.union(other) @final def __xor__(self, other): + warnings.warn( + "Index.__xor__ operating as a set operation is deprecated, " + "in the future this will be a logical operation matching " + "Series.__xor__. Use index.symmetric_difference(other) instead", + FutureWarning, + stacklevel=2, + ) return self.symmetric_difference(other) @final diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index bdd3afe747d1d..29a489866d111 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3139,12 +3139,12 @@ def _convert_to_indexer(r) -> Int64Index: r = r.nonzero()[0] return Int64Index(r) - def _update_indexer(idxr, indexer=indexer): + def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index: if indexer is None: indexer = Index(np.arange(n)) if idxr is None: return indexer - return indexer & idxr + return indexer.intersection(idxr) for i, k in enumerate(seq): @@ -3162,7 +3162,9 @@ def _update_indexer(idxr, indexer=indexer): idxrs = _convert_to_indexer( self._get_level_indexer(x, level=i, indexer=indexer) ) - indexers = idxrs if indexers is None else indexers | idxrs + indexers = (idxrs if indexers is None else indexers).union( + idxrs + ) except KeyError: # ignore not founds diff --git a/pandas/core/series.py b/pandas/core/series.py index 19d07a8c5e6bf..e4a805a18bcdb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -725,7 +725,7 @@ def __array_ufunc__( # it to handle *args. index = alignable[0].index for s in alignable[1:]: - index |= s.index + index = index.union(s.index) inputs = tuple( x.reindex(index) if issubclass(t, Series) else x for x, t in zip(inputs, types) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 4cd19800d4e26..5013365896fb2 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -475,10 +475,10 @@ def __init__( if cols is not None: # all missing, raise - if not len(Index(cols) & df.columns): + if not len(Index(cols).intersection(df.columns)): raise KeyError("passes columns are not ALL present dataframe") - if len(Index(cols) & df.columns) != len(cols): + if len(Index(cols).intersection(df.columns)) != len(cols): # Deprecated in GH#17295, enforced in 1.0.0 raise KeyError("Not all names specified in 'columns' are found") diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 288bc0adc5162..98b9a585d890e 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -268,7 +268,7 @@ def __init__( if ( (obj.ndim == 1) and (obj.name in set(obj.index.names)) - or len(obj.columns & obj.index.names) + or len(obj.columns.intersection(obj.index.names)) ): msg = "Overlapping names between the index and columns" raise ValueError(msg) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 93c92c0b8f1ab..3dbfd8b64cbba 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -300,7 +300,8 @@ def test_intersection_bug_1708(self): index_1 = date_range("1/1/2012", periods=4, freq="12H") index_2 = index_1 + DateOffset(hours=1) - result = index_1 & index_2 + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 assert len(result) == 0 @pytest.mark.parametrize("tz", tz) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 0b17c1c4c9679..4ac9a27069a3f 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -105,11 +105,13 @@ def test_symmetric_difference(idx, sort): def test_multiindex_symmetric_difference(): # GH 13490 idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"]) - result = idx ^ idx + with tm.assert_produces_warning(FutureWarning): + result = idx ^ idx assert result.names == idx.names idx2 = idx.copy().rename(["A", "B"]) - result = idx ^ idx2 + with tm.assert_produces_warning(FutureWarning): + result = idx ^ idx2 assert result.names == [None, None] diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f37c3dff1e338..3750df4b65066 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1008,7 +1008,8 @@ def test_symmetric_difference(self, sort): tm.assert_index_equal(result, expected) # __xor__ syntax - expected = index1 ^ index2 + with tm.assert_produces_warning(FutureWarning): + expected = index1 ^ index2 assert tm.equalContents(result, expected) assert result.name is None diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 1a40fe550be61..7b886a9353322 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -93,5 +93,18 @@ def test_union_dtypes(left, right, expected): right = pandas_dtype(right) a = pd.Index([], dtype=left) b = pd.Index([], dtype=right) - result = (a | b).dtype + result = a.union(b).dtype assert result == expected + + +def test_dunder_inplace_setops_deprecated(index): + # GH#37374 these will become logical ops, not setops + + with tm.assert_produces_warning(FutureWarning): + index |= index + + with tm.assert_produces_warning(FutureWarning): + index &= index + + with tm.assert_produces_warning(FutureWarning): + index ^= index diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 16ac70d9f23f2..94fdfefa497a3 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -97,13 +97,15 @@ def test_intersection_bug_1708(self): index_1 = timedelta_range("1 day", periods=4, freq="h") index_2 = index_1 + pd.offsets.Hour(5) - result = index_1 & index_2 + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 assert len(result) == 0 index_1 = timedelta_range("1 day", periods=4, freq="h") index_2 = index_1 + pd.offsets.Hour(1) - result = index_1 & index_2 + with tm.assert_produces_warning(FutureWarning): + result = index_1 & index_2 expected = timedelta_range("1 day 01:00:00", periods=3, freq="h") tm.assert_index_equal(result, expected) assert result.freq == expected.freq diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 7681807b60989..d3d33d6fe847e 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1112,9 +1112,9 @@ def test_resample_anchored_multiday(): # # See: https://github.com/pandas-dev/pandas/issues/8683 - index = pd.date_range( - "2014-10-14 23:06:23.206", periods=3, freq="400L" - ) | pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L") + index1 = pd.date_range("2014-10-14 23:06:23.206", periods=3, freq="400L") + index2 = pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L") + index = index1.union(index2) s = Series(np.random.randn(5), index=index) diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 3d53e7ac26338..7cfda2464f21a 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -269,11 +269,13 @@ def test_reversed_xor_with_index_returns_index(self): idx2 = Index([1, 0, 1, 0]) expected = Index.symmetric_difference(idx1, ser) - result = idx1 ^ ser + with tm.assert_produces_warning(FutureWarning): + result = idx1 ^ ser tm.assert_index_equal(result, expected) expected = Index.symmetric_difference(idx2, ser) - result = idx2 ^ ser + with tm.assert_produces_warning(FutureWarning): + result = idx2 ^ ser tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -304,11 +306,13 @@ def test_reversed_logical_op_with_index_returns_series(self, op): idx2 = Index([1, 0, 1, 0]) expected = Series(op(idx1.values, ser.values)) - result = op(ser, idx1) + with tm.assert_produces_warning(FutureWarning): + result = op(ser, idx1) tm.assert_series_equal(result, expected) expected = Series(op(idx2.values, ser.values)) - result = op(ser, idx2) + with tm.assert_produces_warning(FutureWarning): + result = op(ser, idx2) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -324,7 +328,9 @@ def test_reverse_ops_with_index(self, op, expected): # multi-set Index ops are buggy, so let's avoid duplicates... ser = Series([True, False]) idx = Index([False, True]) - result = op(ser, idx) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # behaving as set ops is deprecated, will become logical ops + result = op(ser, idx) tm.assert_index_equal(result, expected) def test_logical_ops_label_based(self): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f52d0d0fccab8..538a52d84b73a 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -645,7 +645,9 @@ def test_str_cat_align_mixed_inputs(self, join): u = np.array(["A", "B", "C", "D"]) expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"]) # joint index of rhs [t, u]; u will be forced have index of s - rhs_idx = t.index & s.index if join == "inner" else t.index | s.index + rhs_idx = ( + t.index.intersection(s.index) if join == "inner" else t.index.union(s.index) + ) expected = expected_outer.loc[s.index.join(rhs_idx, how=join)] result = s.str.cat([t, u], join=join, na_rep="-")