Skip to content

Commit 7aa0ccf

Browse files
committed
Add optional sort parameter to difference method in subclasses
1 parent 9de04bf commit 7aa0ccf

File tree

3 files changed

+34
-13
lines changed

3 files changed

+34
-13
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ Other Enhancements
213213
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
214214
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
215215
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`)
216+
- :meth:`Index.difference` now has an optional ``sort`` parameter to specify whether the results should be sorted if possible (:issue:`17839`)
216217

217218
.. _whatsnew_0240.api_breaking:
218219

pandas/core/indexes/multi.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -2767,10 +2767,18 @@ def intersection(self, other):
27672767
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
27682768
names=result_names)
27692769

2770-
def difference(self, other):
2770+
def difference(self, other, sort=True):
27712771
"""
27722772
Compute sorted set difference of two MultiIndex objects
27732773
2774+
Parameters
2775+
----------
2776+
other : MultiIndex
2777+
sort : bool, default True
2778+
Sort the resulting MultiIndex if possible
2779+
2780+
.. versionadded:: 0.24.0
2781+
27742782
Returns
27752783
-------
27762784
diff : MultiIndex
@@ -2786,8 +2794,10 @@ def difference(self, other):
27862794
labels=[[]] * self.nlevels,
27872795
names=result_names, verify_integrity=False)
27882796

2789-
difference = sorted(set(self._ndarray_values) -
2790-
set(other._ndarray_values))
2797+
difference = set(self._ndarray_values) - set(other._ndarray_values)
2798+
2799+
if sort:
2800+
difference = sorted(difference)
27912801

27922802
if len(difference) == 0:
27932803
return MultiIndex(levels=[[]] * self.nlevels,

pandas/tests/indexes/test_base.py

+20-10
Original file line numberDiff line numberDiff line change
@@ -1044,15 +1044,17 @@ def test_iadd_string(self):
10441044

10451045
@pytest.mark.parametrize("second_name,expected", [
10461046
(None, None), ('name', 'name')])
1047-
def test_difference_name_preservation(self, second_name, expected):
1047+
@pytest.mark.parametrize("sort", [
1048+
(True, False)])
1049+
def test_difference_name_preservation(self, second_name, expected, sort):
10481050
# TODO: replace with fixturesult
10491051
first = self.strIndex[5:20]
10501052
second = self.strIndex[:10]
10511053
answer = self.strIndex[10:20]
10521054

10531055
first.name = 'name'
10541056
second.name = second_name
1055-
result = first.difference(second)
1057+
result = first.difference(second, sort)
10561058

10571059
assert tm.equalContents(result, answer)
10581060

@@ -1061,18 +1063,22 @@ def test_difference_name_preservation(self, second_name, expected):
10611063
else:
10621064
assert result.name == expected
10631065

1064-
def test_difference_empty_arg(self):
1066+
@pytest.mark.parametrize("sort", [
1067+
(True, False)])
1068+
def test_difference_empty_arg(self, sort):
10651069
first = self.strIndex[5:20]
10661070
first.name == 'name'
1067-
result = first.difference([])
1071+
result = first.difference([], sort=sort)
10681072

10691073
assert tm.equalContents(result, first)
10701074
assert result.name == first.name
10711075

1072-
def test_difference_identity(self):
1076+
@pytest.mark.parametrize("sort", [
1077+
(True, False)])
1078+
def test_difference_identity(self, sort):
10731079
first = self.strIndex[5:20]
10741080
first.name == 'name'
1075-
result = first.difference(first)
1081+
result = first.difference(first, sort)
10761082

10771083
assert len(result) == 0
10781084
assert result.name == first.name
@@ -1121,13 +1127,15 @@ def test_symmetric_difference_non_index(self):
11211127
assert tm.equalContents(result, expected)
11221128
assert result.name == 'new_name'
11231129

1124-
def test_difference_type(self):
1130+
@pytest.mark.parametrize("sort", [
1131+
(True, False)])
1132+
def test_difference_type(self, sort):
11251133
# GH 20040
11261134
# If taking difference of a set and itself, it
11271135
# needs to preserve the type of the index
11281136
skip_index_keys = ['repeats']
11291137
for key, index in self.generate_index_types(skip_index_keys):
1130-
result = index.difference(index)
1138+
result = index.difference(index, sort)
11311139
expected = index.drop(index)
11321140
tm.assert_index_equal(result, expected)
11331141

@@ -2341,13 +2349,15 @@ def test_intersection_different_type_base(self, klass):
23412349
result = first.intersection(klass(second.values))
23422350
assert tm.equalContents(result, second)
23432351

2344-
def test_difference_base(self):
2352+
@pytest.mark.parametrize("sort", [
2353+
(True, False)])
2354+
def test_difference_base(self, sort):
23452355
# (same results for py2 and py3 but sortedness not tested elsewhere)
23462356
index = self.create_index()
23472357
first = index[:4]
23482358
second = index[3:]
23492359

2350-
result = first.difference(second)
2360+
result = first.difference(second, sort)
23512361
expected = Index([0, 1, 'a'])
23522362
tm.assert_index_equal(result, expected)
23532363

0 commit comments

Comments
 (0)