Skip to content

Commit e301671

Browse files
committed
ENH: per-level sort order in sort/sort_index. close #928
1 parent c091a8a commit e301671

File tree

5 files changed

+90
-16
lines changed

5 files changed

+90
-16
lines changed

RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ pandas 0.9.1
2727

2828
**Release date:** NOT YET RELEASED
2929

30+
**New features**
31+
32+
- Can specify multiple sort orders in DataFrame/Series.sort/sort_index (#928)
33+
3034
**Improvements to existing features**
3135

3236
- Improve performance of datetime + business day offset with large number of

pandas/core/frame.py

+21-8
Original file line numberDiff line numberDiff line change
@@ -2796,13 +2796,18 @@ def sort(self, columns=None, column=None, axis=0, ascending=True,
27962796
columns : object
27972797
Column name(s) in frame. Accepts a column name or a list or tuple
27982798
for a nested sort.
2799-
ascending : boolean, default True
2800-
Sort ascending vs. descending
2799+
ascending : boolean or list, default True
2800+
Sort ascending vs. descending. Specify list for multiple sort
2801+
orders
28012802
axis : {0, 1}
28022803
Sort index/rows versus columns
28032804
inplace : boolean, default False
28042805
Sort the DataFrame without creating a new instance
28052806
2807+
Examples
2808+
--------
2809+
>>> result = df.sort(['A', 'B'], ascending=[1, 0])
2810+
28062811
Returns
28072812
-------
28082813
sorted : DataFrame
@@ -2826,11 +2831,16 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False):
28262831
by : object
28272832
Column name(s) in frame. Accepts a column name or a list or tuple
28282833
for a nested sort.
2829-
ascending : boolean, default True
2830-
Sort ascending vs. descending
2834+
ascending : boolean or list, default True
2835+
Sort ascending vs. descending. Specify list for multiple sort
2836+
orders
28312837
inplace : boolean, default False
28322838
Sort the DataFrame without creating a new instance
28332839
2840+
Examples
2841+
--------
2842+
>>> result = df.sort_index(by=['A', 'B'], ascending=[1, 0])
2843+
28342844
Returns
28352845
-------
28362846
sorted : DataFrame
@@ -2846,14 +2856,17 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False):
28462856
assert(axis == 0)
28472857
if isinstance(by, (tuple, list)):
28482858
keys = [self[x].values for x in by]
2849-
indexer = _lexsort_indexer(keys)
2859+
indexer = _lexsort_indexer(keys, orders=ascending)
28502860
else:
28512861
indexer = self[by].values.argsort()
2862+
if not ascending:
2863+
indexer = indexer[::-1]
2864+
elif isinstance(labels, MultiIndex):
2865+
indexer = _lexsort_indexer(labels.labels, orders=ascending)
28522866
else:
28532867
indexer = labels.argsort()
2854-
2855-
if not ascending:
2856-
indexer = indexer[::-1]
2868+
if not ascending:
2869+
indexer = indexer[::-1]
28572870

28582871
if inplace:
28592872
if axis == 1:

pandas/core/groupby.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -2103,18 +2103,31 @@ def _indexer_from_factorized(labels, shape, compress=True):
21032103
return indexer
21042104

21052105

2106-
def _lexsort_indexer(keys):
2106+
def _lexsort_indexer(keys, orders=None):
21072107
labels = []
21082108
shape = []
2109-
for key in keys:
2109+
2110+
if isinstance(orders, bool):
2111+
orders = [orders] * len(keys)
2112+
elif orders is None:
2113+
orders = [True] * len(keys)
2114+
2115+
for key, order in zip(keys, orders):
21102116
rizer = lib.Factorizer(len(key))
21112117

21122118
if not key.dtype == np.object_:
21132119
key = key.astype('O')
21142120

21152121
ids, _ = rizer.factorize(key, sort=True)
2122+
2123+
n = len(rizer.uniques)
2124+
shape.append(n)
2125+
if not order:
2126+
mask = ids == -1
2127+
ids = np.where(mask, -1, n - ids)
2128+
21162129
labels.append(ids)
2117-
shape.append(len(rizer.uniques))
2130+
21182131
return _indexer_from_factorized(labels, shape)
21192132

21202133

pandas/core/series.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -1749,15 +1749,28 @@ def sort_index(self, ascending=True):
17491749
17501750
Parameters
17511751
----------
1752-
ascending : boolean, default True
1753-
Sort ascending vs. descending
1752+
ascending : boolean or list, default True
1753+
Sort ascending vs. descending. Specify list for multiple sort
1754+
orders
1755+
1756+
Examples
1757+
--------
1758+
>>> result1 = s.sort_index(ascending=False)
1759+
>>> result2 = s.sort_index(ascending=[1, 0])
17541760
17551761
Returns
17561762
-------
17571763
sorted_obj : Series
17581764
"""
1759-
new_labels, indexer = self.index.order(return_indexer=True,
1760-
ascending=ascending)
1765+
index = self.index
1766+
if isinstance(index, MultiIndex):
1767+
from pandas.core.groupby import _lexsort_indexer
1768+
indexer = _lexsort_indexer(index.labels, orders=ascending)
1769+
new_labels = index.take(indexer)
1770+
else:
1771+
new_labels, indexer = index.order(return_indexer=True,
1772+
ascending=ascending)
1773+
17611774
new_values = self.values.take(indexer)
17621775
return Series(new_values, new_labels, name=self.name)
17631776

pandas/tests/test_frame.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -5657,7 +5657,9 @@ def test_sort_index_multicolumn(self):
56575657
assert_frame_equal(result, expected)
56585658

56595659
result = frame.sort_index(by=['A', 'B'], ascending=False)
5660-
expected = frame.take(indexer[::-1])
5660+
indexer = np.lexsort((frame['B'].rank(ascending=False),
5661+
frame['A'].rank(ascending=False)))
5662+
expected = frame.take(indexer)
56615663
assert_frame_equal(result, expected)
56625664

56635665
result = frame.sort_index(by=['B', 'A'])
@@ -5695,6 +5697,35 @@ def test_sort_index_inplace(self):
56955697
expected = frame.ix[:, ::-1]
56965698
assert_frame_equal(df, expected)
56975699

5700+
def test_sort_index_different_sortorder(self):
5701+
import random
5702+
A = np.arange(20).repeat(5)
5703+
B = np.tile(np.arange(5), 20)
5704+
5705+
indexer = np.random.permutation(100)
5706+
A = A.take(indexer)
5707+
B = B.take(indexer)
5708+
5709+
df = DataFrame({'A' : A, 'B' : B,
5710+
'C' : np.random.randn(100)})
5711+
5712+
result = df.sort_index(by=['A', 'B'], ascending=[1, 0])
5713+
5714+
ex_indexer = np.lexsort((df.B.max() - df.B, df.A))
5715+
expected = df.take(ex_indexer)
5716+
assert_frame_equal(result, expected)
5717+
5718+
# test with multiindex, too
5719+
idf = df.set_index(['A', 'B'])
5720+
5721+
result = idf.sort_index(ascending=[1, 0])
5722+
expected = idf.take(ex_indexer)
5723+
assert_frame_equal(result, expected)
5724+
5725+
# also, Series!
5726+
result = idf['C'].sort_index(ascending=[1, 0])
5727+
assert_series_equal(result, expected['C'])
5728+
56985729
def test_sort_inplace(self):
56995730
frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4],
57005731
columns=['A', 'B', 'C', 'D'])

0 commit comments

Comments
 (0)