Skip to content

Commit 65849d3

Browse files
IamJeffGjorisvandenbossche
authored andcommitted
TST: Clean up tests of DataFrame.sort_{index,values} (#13496)
* TST: Clean up tests of DataFrame.sort_{index,values} * Factor out Series sorting tests to own file. * Delegate deprecated sort() and order() to their own tests. Before this commit, the `Series.sort_values()` tests relied on deprecated `Series.sort()` and `Series.order()` as the source of truth. However they both merely called `Series.sort_values()` under the hood. This commit consolidates the core test logic against `.sort_values()` directly, while `.sort()` and `.order()` merely check for equivalence with `.sort_values()`. Also removes some no-op assertions that had rotted from the old days of `sort()`/`order()`. * Remove 'by' docstring from Series.sort_values * Document defaults for optional sorting args * Move more sort_values, sort_index tests to be together. * Add test for Series.sort_index(sort_remaining=True) * Improve `sort_values` tests when multiple `by`s Duplicates values in the test DataFrame are necessary to fully test this feature. * PEP8 cleanup * Annotate tests with GH issue * Fix indentation - docstring string replacement
1 parent 2f7fdd0 commit 65849d3

File tree

6 files changed

+226
-215
lines changed

6 files changed

+226
-215
lines changed

pandas/core/frame.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,12 @@
6868
# ---------------------------------------------------------------------
6969
# Docstring templates
7070

71-
_shared_doc_kwargs = dict(axes='index, columns', klass='DataFrame',
72-
axes_single_arg="{0, 1, 'index', 'columns'}")
71+
_shared_doc_kwargs = dict(
72+
axes='index, columns', klass='DataFrame',
73+
axes_single_arg="{0, 1, 'index', 'columns'}",
74+
optional_by="""
75+
by : str or list of str
76+
Name or list of names which refer to the axis items.""")
7377

7478
_numeric_only_doc = """numeric_only : boolean, default None
7579
Include only float, int, boolean data. If None, will attempt to use

pandas/core/generic.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,13 @@
3737
# goal is to be able to define the docs close to function, while still being
3838
# able to share
3939
_shared_docs = dict()
40-
_shared_doc_kwargs = dict(axes='keywords for axes', klass='NDFrame',
41-
axes_single_arg='int or labels for object',
42-
args_transpose='axes to permute (int or label for'
43-
' object)')
40+
_shared_doc_kwargs = dict(
41+
axes='keywords for axes', klass='NDFrame',
42+
axes_single_arg='int or labels for object',
43+
args_transpose='axes to permute (int or label for object)',
44+
optional_by="""
45+
by : str or list of str
46+
Name or list of names which refer to the axis items.""")
4447

4548

4649
def is_dictlike(x):
@@ -1961,21 +1964,20 @@ def add_suffix(self, suffix):
19611964
.. versionadded:: 0.17.0
19621965
19631966
Parameters
1964-
----------
1965-
by : string name or list of names which refer to the axis items
1966-
axis : %(axes)s to direct sorting
1967-
ascending : bool or list of bool
1967+
----------%(optional_by)s
1968+
axis : %(axes)s to direct sorting, default 0
1969+
ascending : bool or list of bool, default True
19681970
Sort ascending vs. descending. Specify list for multiple sort
19691971
orders. If this is a list of bools, must match the length of
19701972
the by.
1971-
inplace : bool
1973+
inplace : bool, default False
19721974
if True, perform operation in-place
1973-
kind : {`quicksort`, `mergesort`, `heapsort`}
1975+
kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
19741976
Choice of sorting algorithm. See also ndarray.np.sort for more
19751977
information. `mergesort` is the only stable algorithm. For
19761978
DataFrames, this option is only applied when sorting on a single
19771979
column or label.
1978-
na_position : {'first', 'last'}
1980+
na_position : {'first', 'last'}, default 'last'
19791981
`first` puts NaNs at the beginning, `last` puts NaNs at the end
19801982
19811983
Returns
@@ -1997,16 +1999,16 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
19971999
if not None, sort on values in specified index level(s)
19982000
ascending : boolean, default True
19992001
Sort ascending vs. descending
2000-
inplace : bool
2002+
inplace : bool, default False
20012003
if True, perform operation in-place
2002-
kind : {`quicksort`, `mergesort`, `heapsort`}
2004+
kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
20032005
Choice of sorting algorithm. See also ndarray.np.sort for more
20042006
information. `mergesort` is the only stable algorithm. For
20052007
DataFrames, this option is only applied when sorting on a single
20062008
column or label.
2007-
na_position : {'first', 'last'}
2009+
na_position : {'first', 'last'}, default 'last'
20082010
`first` puts NaNs at the beginning, `last` puts NaNs at the end
2009-
sort_remaining : bool
2011+
sort_remaining : bool, default True
20102012
if true and sorting by level and index is multilevel, sort by other
20112013
levels too (in order) after sorting by specified level
20122014

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@
6262
axes='index', klass='Series', axes_single_arg="{0, 'index'}",
6363
inplace="""inplace : boolean, default False
6464
If True, performs operation inplace and returns None.""",
65-
duplicated='Series')
65+
duplicated='Series',
66+
optional_by='')
6667

6768

6869
def _coerce_method(converter):

pandas/tests/frame/test_sorting.py

+55-61
Original file line numberDiff line numberDiff line change
@@ -21,75 +21,68 @@ class TestDataFrameSorting(tm.TestCase, TestData):
2121

2222
_multiprocess_can_split_ = True
2323

24-
def test_sort_values(self):
25-
# API for 9816
24+
def test_sort_index(self):
25+
# GH13496
2626

27-
# sort_index
2827
frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4],
2928
columns=['A', 'B', 'C', 'D'])
3029

31-
# 9816 deprecated
32-
with tm.assert_produces_warning(FutureWarning):
33-
frame.sort(columns='A')
34-
with tm.assert_produces_warning(FutureWarning):
35-
frame.sort()
36-
30+
# axis=0 : sort rows by index labels
3731
unordered = frame.ix[[3, 2, 4, 1]]
38-
expected = unordered.sort_index()
39-
4032
result = unordered.sort_index(axis=0)
33+
expected = frame
4134
assert_frame_equal(result, expected)
4235

43-
unordered = frame.ix[:, [2, 1, 3, 0]]
44-
expected = unordered.sort_index(axis=1)
36+
result = unordered.sort_index(ascending=False)
37+
expected = frame[::-1]
38+
assert_frame_equal(result, expected)
4539

40+
# axis=1 : sort columns by column names
41+
unordered = frame.ix[:, [2, 1, 3, 0]]
4642
result = unordered.sort_index(axis=1)
47-
assert_frame_equal(result, expected)
43+
assert_frame_equal(result, frame)
44+
45+
result = unordered.sort_index(axis=1, ascending=False)
46+
expected = frame.ix[:, ::-1]
4847
assert_frame_equal(result, expected)
4948

50-
# sortlevel
51-
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
49+
def test_sort_index_multiindex(self):
50+
# GH13496
51+
52+
# sort rows by specified level of multi-index
53+
mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC'))
5254
df = DataFrame([[1, 2], [3, 4]], mi)
5355

5456
result = df.sort_index(level='A', sort_remaining=False)
5557
expected = df.sortlevel('A', sort_remaining=False)
5658
assert_frame_equal(result, expected)
5759

60+
# sort columns by specified level of multi-index
5861
df = df.T
5962
result = df.sort_index(level='A', axis=1, sort_remaining=False)
6063
expected = df.sortlevel('A', axis=1, sort_remaining=False)
6164
assert_frame_equal(result, expected)
6265

63-
# MI sort, but no by
66+
# MI sort, but no level: sort_level has no effect
6467
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
6568
df = DataFrame([[1, 2], [3, 4]], mi)
6669
result = df.sort_index(sort_remaining=False)
6770
expected = df.sort_index()
6871
assert_frame_equal(result, expected)
6972

70-
def test_sort_index(self):
73+
def test_sort(self):
7174
frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4],
7275
columns=['A', 'B', 'C', 'D'])
7376

74-
# axis=0
75-
unordered = frame.ix[[3, 2, 4, 1]]
76-
sorted_df = unordered.sort_index(axis=0)
77-
expected = frame
78-
assert_frame_equal(sorted_df, expected)
79-
80-
sorted_df = unordered.sort_index(ascending=False)
81-
expected = frame[::-1]
82-
assert_frame_equal(sorted_df, expected)
83-
84-
# axis=1
85-
unordered = frame.ix[:, ['D', 'B', 'C', 'A']]
86-
sorted_df = unordered.sort_index(axis=1)
87-
expected = frame
88-
assert_frame_equal(sorted_df, expected)
77+
# 9816 deprecated
78+
with tm.assert_produces_warning(FutureWarning):
79+
frame.sort(columns='A')
80+
with tm.assert_produces_warning(FutureWarning):
81+
frame.sort()
8982

90-
sorted_df = unordered.sort_index(axis=1, ascending=False)
91-
expected = frame.ix[:, ::-1]
92-
assert_frame_equal(sorted_df, expected)
83+
def test_sort_values(self):
84+
frame = DataFrame([[1, 1, 2], [3, 1, 0], [4, 5, 6]],
85+
index=[1, 2, 3], columns=list('ABC'))
9386

9487
# by column
9588
sorted_df = frame.sort_values(by='A')
@@ -109,16 +102,17 @@ def test_sort_index(self):
109102
sorted_df = frame.sort_values(by=['A'], ascending=[False])
110103
assert_frame_equal(sorted_df, expected)
111104

112-
# check for now
113-
sorted_df = frame.sort_values(by='A')
114-
assert_frame_equal(sorted_df, expected[::-1])
115-
expected = frame.sort_values(by='A')
105+
# multiple bys
106+
sorted_df = frame.sort_values(by=['B', 'C'])
107+
expected = frame.loc[[2, 1, 3]]
116108
assert_frame_equal(sorted_df, expected)
117109

118-
expected = frame.sort_values(by=['A', 'B'], ascending=False)
119-
sorted_df = frame.sort_values(by=['A', 'B'])
110+
sorted_df = frame.sort_values(by=['B', 'C'], ascending=False)
120111
assert_frame_equal(sorted_df, expected[::-1])
121112

113+
sorted_df = frame.sort_values(by=['B', 'A'], ascending=[True, False])
114+
assert_frame_equal(sorted_df, expected)
115+
122116
self.assertRaises(ValueError, lambda: frame.sort_values(
123117
by=['A', 'B'], axis=2, inplace=True))
124118

@@ -130,6 +124,25 @@ def test_sort_index(self):
130124
with assertRaisesRegexp(ValueError, msg):
131125
frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5)
132126

127+
def test_sort_values_inplace(self):
128+
frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4],
129+
columns=['A', 'B', 'C', 'D'])
130+
131+
sorted_df = frame.copy()
132+
sorted_df.sort_values(by='A', inplace=True)
133+
expected = frame.sort_values(by='A')
134+
assert_frame_equal(sorted_df, expected)
135+
136+
sorted_df = frame.copy()
137+
sorted_df.sort_values(by='A', ascending=False, inplace=True)
138+
expected = frame.sort_values(by='A', ascending=False)
139+
assert_frame_equal(sorted_df, expected)
140+
141+
sorted_df = frame.copy()
142+
sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True)
143+
expected = frame.sort_values(by=['A', 'B'], ascending=False)
144+
assert_frame_equal(sorted_df, expected)
145+
133146
def test_sort_index_categorical_index(self):
134147

135148
df = (DataFrame({'A': np.arange(6, dtype='int64'),
@@ -361,25 +374,6 @@ def test_sort_index_different_sortorder(self):
361374
result = idf['C'].sort_index(ascending=[1, 0])
362375
assert_series_equal(result, expected['C'])
363376

364-
def test_sort_inplace(self):
365-
frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4],
366-
columns=['A', 'B', 'C', 'D'])
367-
368-
sorted_df = frame.copy()
369-
sorted_df.sort_values(by='A', inplace=True)
370-
expected = frame.sort_values(by='A')
371-
assert_frame_equal(sorted_df, expected)
372-
373-
sorted_df = frame.copy()
374-
sorted_df.sort_values(by='A', ascending=False, inplace=True)
375-
expected = frame.sort_values(by='A', ascending=False)
376-
assert_frame_equal(sorted_df, expected)
377-
378-
sorted_df = frame.copy()
379-
sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True)
380-
expected = frame.sort_values(by=['A', 'B'], ascending=False)
381-
assert_frame_equal(sorted_df, expected)
382-
383377
def test_sort_index_duplicates(self):
384378

385379
# with 9816, these are all translated to .sort_values

0 commit comments

Comments
 (0)