pandas-dev · jreback · Mar 22, 2018 · Feb 19, 2018 · Mar 2, 2018 · Mar 2, 2018
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -2,6 +2,7 @@
 import numpy as np
 
 from pandas.errors import AbstractMethodError
+from pandas.compat.numpy import function as nv
 
 _not_implemented_message = "{} does not implement {}."
 
@@ -236,6 +237,57 @@ def isna(self):
         """
         raise AbstractMethodError(self)
 
+    def _values_for_argsort(self):
+        # type: () -> ndarray
+        """Return values for sorting.
+
+        Returns
+        -------
+        ndarray
+            The transformed values should maintain the ordering between values
+            within the array.
+
+        See Also
+        --------
+        ExtensionArray.argsort
+        """
+        # Note: this is used in `ExtensionArray.argsort`.
+        return np.array(self)
+
+    def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
+        """
+        Return the indices that would sort this array.
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            Whether the indices should result in an ascending
+            or descending sort.
+        kind : {'quicksort', 'mergesort', 'heapsort'}, optional
+            Sorting algorithm.
+        *args, **kwargs:
+            passed through to :func:`numpy.argsort`.
+
+        Returns
+        -------
+        index_array : ndarray
+            Array of indices that sort ``self``.
+
+        See Also
+        --------
+        numpy.argsort : Sorting implementation used internally.
+        """
+        # Implementor note: You have two places to override the behavior of
+        # argsort.
+        # 1. _values_for_argsort : construct the values passed to np.argsort
+        # 2. argsort : total control over sorting.
+        ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
+        values = self._values_for_argsort()
+        result = np.argsort(values, kind=kind, **kwargs)
+        if not ascending:
+            result = result[::-1]
+        return result
+
     def fillna(self, value=None, method=None, limit=None):
         """ Fill NA/NaN values using the specified method.
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1378,17 +1378,24 @@ def check_for_ordered(self, op):
                             "you can use .as_ordered() to change the "
                             "Categorical to an ordered one\n".format(op=op))
 
-    def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
-        """
-        Returns the indices that would sort the Categorical instance if
-        'sort_values' was called. This function is implemented to provide
-        compatibility with numpy ndarray objects.
+    def _values_for_argsort(self):
+        return self._codes.copy()
 
-        While an ordering is applied to the category values, arg-sorting
-        in this context refers more to organizing and grouping together
-        based on matching category values. Thus, this function can be
-        called on an unordered Categorical instance unlike the functions
-        'Categorical.min' and 'Categorical.max'.
+    def argsort(self, *args, **kwargs):
+        # TODO(PY2): use correct signature
+        # We have to do *args, **kwargs to avoid a a py2-only signature
+        # issue since np.argsort differs from argsort.
+        """Return the indicies that would sort the Categorical.
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            Whether the indices should result in an ascending
+            or descending sort.
+        kind : {'quicksort', 'mergesort', 'heapsort'}, optional
+            Sorting algorithm.
+        *args, **kwargs:
+            passed through to :func:`numpy.argsort`.
 
         Returns
         -------
@@ -1397,12 +1404,28 @@ def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
         See also
         --------
         numpy.ndarray.argsort
+
+        Notes
+        -----
+        While an ordering is applied to the category values, arg-sorting
+        in this context refers more to organizing and grouping together
+        based on matching category values. Thus, this function can be
+        called on an unordered Categorical instance unlike the functions
+        'Categorical.min' and 'Categorical.max'.
+
+        Examples
+        --------
+        >>> pd.Categorical(['b', 'b', 'a', 'c']).argsort()
+        array([2, 0, 1, 3])
+
+        >>> cat = pd.Categorical(['b', 'b', 'a', 'c'],
+        ...                      categories=['c', 'b', 'a'],
+        ...                      ordered=True)
+        >>> cat.argsort()
+        array([3, 0, 1, 2])
         """
-        ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
-        result = np.argsort(self._codes.copy(), kind=kind, **kwargs)
-        if not ascending:
-            result = result[::-1]
-        return result
+        # Keep the implementation here just for the docstring.
+        return super(Categorical, self).argsort(*args, **kwargs)
 
     def sort_values(self, inplace=False, ascending=True, na_position='last'):
         """ Sorts the Categorical by category value returning a new

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -32,6 +32,46 @@ def test_apply_simple_series(self, data):
         result = pd.Series(data).apply(id)
         assert isinstance(result, pd.Series)
 
+    def test_argsort(self, data_for_sorting):
+        result = pd.Series(data_for_sorting).argsort()
+        expected = pd.Series(np.array([2, 0, 1], dtype=np.int64))
+        self.assert_series_equal(result, expected)
+
+    def test_argsort_missing(self, data_missing_for_sorting):
+        result = pd.Series(data_missing_for_sorting).argsort()
+        expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
+        self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('ascending', [True, False])
+    def test_sort_values(self, data_for_sorting, ascending):
+        ser = pd.Series(data_for_sorting)
+        result = ser.sort_values(ascending=ascending)
+        expected = ser.iloc[[2, 0, 1]]
+        if not ascending:
+            expected = expected[::-1]
+
+        self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('ascending', [True, False])
+    def test_sort_values_missing(self, data_missing_for_sorting, ascending):
+        ser = pd.Series(data_missing_for_sorting)
+        result = ser.sort_values(ascending=ascending)
+        if ascending:
+            expected = ser.iloc[[2, 0, 1]]
+        else:
+            expected = ser.iloc[[0, 2, 1]]
+        self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('ascending', [True, False])
+    def test_sort_values_frame(self, data_for_sorting, ascending):
+        df = pd.DataFrame({"A": [1, 2, 1],
+                           "B": data_for_sorting})
+        result = df.sort_values(['A', 'B'])
+        expected = pd.DataFrame({"A": [1, 1, 2],
+                                 'B': data_for_sorting.take([2, 0, 1])},
+                                index=[2, 0, 1])
+        self.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize('box', [pd.Series, lambda x: x])
     @pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique])
     def test_unique(self, data, box, method):

diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
@@ -29,6 +29,18 @@ def data_missing():
     return Categorical([np.nan, 'A'])
 
 
+@pytest.fixture
+def data_for_sorting():
+    return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],
+                       ordered=True)
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return Categorical(['A', None, 'B'], categories=['B', 'A'],
+                       ordered=True)
+
+
 @pytest.fixture
 def na_value():
     return np.nan

diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
@@ -30,6 +30,26 @@ def all_data(request, data, data_missing):
         return data_missing
 
 
+@pytest.fixture
+def data_for_sorting():
+    """Length-3 array with a known sort order.
+
+    This should be three items [B, C, A] with
+    A < B < C
+    """
+    raise NotImplementedError
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    """Length-3 array with a known sort order.
+
+    This should be three items [B, NA, A] with
+    A < B and NA missing.
+    """
+    raise NotImplementedError
+
+
 @pytest.fixture
 def na_cmp():
     """Binary operator for comparing NA values.

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -25,6 +25,20 @@ def data_missing():
     return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
 
 
+@pytest.fixture
+def data_for_sorting():
+    return DecimalArray([decimal.Decimal('1'),
+                         decimal.Decimal('2'),
+                         decimal.Decimal('0')])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return DecimalArray([decimal.Decimal('1'),
+                         decimal.Decimal('NaN'),
+                         decimal.Decimal('0')])
+
+
 @pytest.fixture
 def na_cmp():
     return lambda x, y: x.is_nan() and y.is_nan()
@@ -48,11 +62,17 @@ def assert_series_equal(self, left, right, *args, **kwargs):
                                       *args, **kwargs)
 
     def assert_frame_equal(self, left, right, *args, **kwargs):
-        self.assert_series_equal(left.dtypes, right.dtypes)
-        for col in left.columns:
+        # TODO(EA): select_dtypes
+        decimals = (left.dtypes == 'decimal').index
+
+        for col in decimals:
             self.assert_series_equal(left[col], right[col],
                                      *args, **kwargs)
 
+        left = left.drop(columns=decimals)
+        right = right.drop(columns=decimals)
+        tm.assert_frame_equal(left, right, *args, **kwargs)
+
 
 class TestDtype(BaseDecimal, base.BaseDtypeTests):
     pass

diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -44,7 +44,11 @@ def __getitem__(self, item):
             return self._constructor_from_sequence([
                 x for x, m in zip(self, item) if m
             ])
+        elif isinstance(item, collections.Iterable):
+            # fancy indexing
+            return type(self)([self.data[i] for i in item])
         else:
+            # slice
             return type(self)(self.data[item])
 
     def __setitem__(self, key, value):
@@ -104,6 +108,13 @@ def _concat_same_type(cls, to_concat):
         data = list(itertools.chain.from_iterable([x.data for x in to_concat]))
         return cls(data)
 
+    def _values_for_argsort(self):
+        # Disable NumPy's shape inference by including an empty tuple...
+        # If all the elemnts of self are the same size P, NumPy will
+        # cast them to an (N, P) array, instead of an (N,) array of tuples.
+        frozen = [()] + list(tuple(x.items()) for x in self)
+        return np.array(frozen, dtype=object)[1:]
+
 
 def make_data():
     # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -29,6 +29,16 @@ def data_missing():
     return JSONArray([{}, {'a': 10}])
 
 
+@pytest.fixture
+def data_for_sorting():
+    return JSONArray([{'b': 1}, {'c': 4}, {'a': 2, 'c': 3}])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return JSONArray([{'b': 1}, {}, {'a': 4}])
+
+
 @pytest.fixture
 def na_value():
     return {}
@@ -70,10 +80,39 @@ def test_fillna_frame(self):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="Unhashable")
+    unhashable = pytest.mark.skip(reason="Unhashable")
+    unstable = pytest.mark.skipif(sys.version_info <= (3, 5),
+                                  reason="Dictionary order unstable")
+
+    @unhashable
     def test_value_counts(self, all_data, dropna):
         pass
 
+    @unhashable
+    def test_sort_values_frame(self):
+        # TODO (EA.factorize): see if _values_for_factorize allows this.
+        pass
+
+    @unstable
+    def test_argsort(self, data_for_sorting):
+        super(TestMethods, self).test_argsort(data_for_sorting)
+
+    @unstable
+    def test_argsort_missing(self, data_missing_for_sorting):
+        super(TestMethods, self).test_argsort_missing(
+            data_missing_for_sorting)
+
+    @unstable
+    @pytest.mark.parametrize('ascending', [True, False])
+    def test_sort_values(self, data_for_sorting, ascending):
+        super(TestMethods, self).test_sort_values(
+            data_for_sorting, ascending)
+
+    @pytest.mark.parametrize('ascending', [True, False])
+    def test_sort_values_missing(self, data_missing_for_sorting, ascending):
+        super(TestMethods, self).test_sort_values_missing(
+            data_missing_for_sorting, ascending)
+
 
 class TestCasting(base.BaseCastingTests):
     pass