diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3f8d9d3916797..ee5098749105f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -260,6 +260,26 @@ The following methods now also correctly output values for unobserved categories df.groupby(["cat_1", "cat_2"], observed=False)["value"].count() +By default :meth:`Categorical.min` now returns the minimum instead of np.nan +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When :class:`Categorical` contains ``np.nan``, +:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min() + Out[1]: nan + + +*pandas 1.0.0* + +.. ipython:: python + + pd.Categorical([1, 2, np.nan], ordered=True).min() + .. _whatsnew_1000.api_breaking.deps: Increased minimum versions for dependencies @@ -366,6 +386,8 @@ Deprecations - :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) - :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) - :meth:`Categorical.take_nd` is deprecated, use :meth:`Categorical.take` instead (:issue:`27745`) +- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`) +- .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ec1f5d2d6214c..6a7a225845440 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2157,7 +2157,8 @@ def _reduce(self, name, axis=0, **kwargs): raise TypeError(f"Categorical cannot perform the operation {name}") return func(**kwargs) - def min(self, numeric_only=None, **kwargs): + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def min(self, skipna=True): """ The minimum value of the object. @@ -2173,17 +2174,18 @@ def min(self, numeric_only=None, **kwargs): min : the minimum of this `Categorical` """ self.check_for_ordered("min") - if numeric_only: - good = self._codes != -1 - pointer = self._codes[good].min(**kwargs) - else: - pointer = self._codes.min(**kwargs) - if pointer == -1: - return np.nan + good = self._codes != -1 + if not good.all(): + if skipna: + pointer = self._codes[good].min() + else: + return np.nan else: - return self.categories[pointer] + pointer = self._codes.min() + return self.categories[pointer] - def max(self, numeric_only=None, **kwargs): + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def max(self, skipna=True): """ The maximum value of the object. @@ -2199,15 +2201,15 @@ def max(self, numeric_only=None, **kwargs): max : the maximum of this `Categorical` """ self.check_for_ordered("max") - if numeric_only: - good = self._codes != -1 - pointer = self._codes[good].max(**kwargs) - else: - pointer = self._codes.max(**kwargs) - if pointer == -1: - return np.nan + good = self._codes != -1 + if not good.all(): + if skipna: + pointer = self._codes[good].max() + else: + return np.nan else: - return self.categories[pointer] + pointer = self._codes.max() + return self.categories[pointer] def mode(self, dropna=True): """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 56039605651ac..01a8231f5fd02 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3869,9 +3869,7 @@ def _reduce( self._get_axis_number(axis) if isinstance(delegate, Categorical): - # TODO deprecate numeric_only argument for Categorical and use - # skipna as well, see GH25303 - return delegate._reduce(name, numeric_only=numeric_only, **kwds) + return delegate._reduce(name, skipna=skipna, **kwds) elif isinstance(delegate, ExtensionArray): # dispatch to ExtensionArray interface return delegate._reduce(name, skipna=skipna, **kwds) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 279f1492d7dad..637a47eba0597 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -35,31 +35,43 @@ def test_min_max(self): assert _min == "d" assert _max == "a" + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_with_nan(self, skipna): + # GH 25303 cat = Categorical( [np.nan, "b", "c", np.nan], categories=["d", "c", "b", "a"], ordered=True ) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == "b" + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) - _min = cat.min(numeric_only=True) - assert _min == "c" - _max = cat.max(numeric_only=True) - assert _max == "b" + if skipna is False: + assert np.isnan(_min) + assert np.isnan(_max) + else: + assert _min == "c" + assert _max == "b" cat = Categorical( [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True ) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == 1 - - _min = cat.min(numeric_only=True) - assert _min == 2 - _max = cat.max(numeric_only=True) - assert _max == 1 + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) + + if skipna is False: + assert np.isnan(_min) + assert np.isnan(_max) + else: + assert _min == 2 + assert _max == 1 + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_deprecate_numeric_only_min_max(self, method): + # GH 25303 + cat = Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + with tm.assert_produces_warning(expected_warning=FutureWarning): + getattr(cat, method)(numeric_only=True) @pytest.mark.parametrize( "values,categories,exp_mode", diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index b0ef0c58ca65a..80d148c919ab2 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1043,7 +1043,7 @@ def test_min_max(self): ) _min = cat.min() _max = cat.max() - assert np.isnan(_min) + assert _min == "c" assert _max == "b" cat = Series( @@ -1053,30 +1053,24 @@ def test_min_max(self): ) _min = cat.min() _max = cat.max() - assert np.isnan(_min) + assert _min == 2 assert _max == 1 - def test_min_max_numeric_only(self): - # TODO deprecate numeric_only argument for Categorical and use - # skipna as well, see GH25303 + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_skipna(self, skipna): + # GH 25303 cat = Series( Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True) ) + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == "a" - - _min = cat.min(numeric_only=True) - _max = cat.max(numeric_only=True) - assert _min == "b" - assert _max == "a" - - _min = cat.min(numeric_only=False) - _max = cat.max(numeric_only=False) - assert np.isnan(_min) - assert _max == "a" + if skipna is True: + assert _min == "b" + assert _max == "a" + else: + assert np.isnan(_min) + assert np.isnan(_max) class TestSeriesMode: