From bb3d23aed621fa4c154e366e8abd6b0cf448e11a Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:38:48 -0500 Subject: [PATCH 01/18] Update test_reductions.py --- pandas/tests/arrays/sparse/test_reductions.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pandas/tests/arrays/sparse/test_reductions.py b/pandas/tests/arrays/sparse/test_reductions.py index a33a282bb4869..712816b28f33e 100644 --- a/pandas/tests/arrays/sparse/test_reductions.py +++ b/pandas/tests/arrays/sparse/test_reductions.py @@ -268,3 +268,28 @@ def test_na_value_if_no_valid_values(self, func, data, dtype, expected): assert result is NaT or np.isnat(result) else: assert np.isnan(result) + + +class TestArgmaxArgmin: + @pytest.mark.parametrize( + "arr,argmax_expected,argmin_expected", + [ + (SparseArray([1, 2, 0, 1, 2]), 1, 2), + (SparseArray([-1, -2, 0, -1, -2]), 2, 1), + (SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2), + (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2), + (SparseArray([0] * 10 + [-1], fill_value=0), 0, 10), + (SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10), + (SparseArray([0] * 10 + [-1], fill_value=1), 0, 10), + ], + ) + def test_argmax_argmin(self, arr, argmax_expected, argmin_expected): + argmax_result = arr.argmax() + argmin_result = arr.argmin() + assert argmax_result == argmax_expected + assert argmin_result == argmin_expected From f7f00fc44950a9f7f229fd354aa6d24ef6d71375 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:42:04 -0500 Subject: [PATCH 02/18] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index f313b49cd198d..15a969b09c352 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -279,6 +279,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- Implemented faster :meth:`ExtensionArray.argmax` and :meth:`ExtensionArray.argmin` for :meth:`SparseArray` (:issue:`34197`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From fdf7e96da2f763f3494f3fbcf16bdaf3ccfa0a47 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:53:16 -0500 Subject: [PATCH 03/18] Update array.py --- pandas/core/arrays/sparse/array.py | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 5653d87a4570b..762eb6f019c39 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1179,6 +1179,13 @@ def copy(self: SparseArrayT) -> SparseArrayT: values = self.sp_values.copy() return self._simple_new(values, self.sp_index, self.dtype) + def _values_for_argsort(self) -> np.ndarray: + return self._sparse_values + + def _mask_for_argsort(self) -> np.ndarray: + mask = ExtensionArray._mask_for_argsort(self) + return mask[self._sparse_index.indices] + @classmethod def _concat_same_type( cls: type[SparseArrayT], to_concat: Sequence[SparseArrayT] @@ -1636,6 +1643,30 @@ def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar: else: return na_value_for_dtype(self.dtype.subtype, compat=False) + def _argmin_argmax(self, candidate: int, kind: Literal["min", "max"]) -> int: + if isna(self.fill_value): + return candidate + if kind == "min" and self[candidate] < self.fill_value: + return candidate + if kind == "max" and self[candidate] > self.fill_value: + return candidate + _loc = self._first_fill_value_loc() + if _loc == -1: + # fill_value doesn't exist + return candidate + else: + return _loc + + def argmax(self, skipna: bool = True) -> int: + _candidate = ExtensionArray.argmax(self, skipna=skipna) + candidate = self._sparse_index.indices[_candidate] + return self._argmin_argmax(candidate, "max") + + def argmin(self, skipna: bool = True) -> int: + _candidate = ExtensionArray.argmin(self, skipna=skipna) + candidate = self._sparse_index.indices[_candidate] + return self._argmin_argmax(candidate, "min") + # ------------------------------------------------------------------------ # Ufuncs # ------------------------------------------------------------------------ From 301c5d95d46c46e03964e52e464571f6dcebcf8f Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:54:51 -0500 Subject: [PATCH 04/18] Update base.py --- pandas/core/arrays/base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6c9b7adadb7b0..18fed169946da 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -666,6 +666,9 @@ def _values_for_argsort(self) -> np.ndarray: # Note: this is used in `ExtensionArray.argsort/argmin/argmax`. return np.array(self) + def _mask_for_argsort(self) -> np.ndarray: + return np.asarray(isna(self)) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def argsort( self, @@ -705,7 +708,8 @@ def argsort( # it is recommended to also override argmax/argmin ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) - values = self._values_for_argsort() + # TODO: ENH argsort for SparseArray + values = ExtensionArray._values_for_argsort(self) return nargsort( values, kind=kind, From e36f60590e7456d442efb3056a56b509e78ebdfb Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:56:49 -0500 Subject: [PATCH 05/18] Update sorting.py --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 16facfc915e40..9d9c53538dcab 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -465,7 +465,7 @@ def nargminmax(values: ExtensionArray, method: str, axis: int = 0): assert method in {"argmax", "argmin"} func = np.argmax if method == "argmax" else np.argmin - mask = np.asarray(isna(values)) + mask = values._mask_for_argsort() arr_values = values._values_for_argsort() if arr_values.ndim > 1: From 11421bd85b27ed714ff019d9b7df9fd7f15fead2 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 13:06:44 -0500 Subject: [PATCH 06/18] fix format --- pandas/tests/arrays/sparse/test_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/sparse/test_reductions.py b/pandas/tests/arrays/sparse/test_reductions.py index 712816b28f33e..c9cd480916af7 100644 --- a/pandas/tests/arrays/sparse/test_reductions.py +++ b/pandas/tests/arrays/sparse/test_reductions.py @@ -268,7 +268,7 @@ def test_na_value_if_no_valid_values(self, func, data, dtype, expected): assert result is NaT or np.isnat(result) else: assert np.isnan(result) - + class TestArgmaxArgmin: @pytest.mark.parametrize( From 196a4c31533880c9ef603d35188984ef1aee8855 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 13:09:09 -0500 Subject: [PATCH 07/18] Update array.py --- pandas/core/arrays/sparse/array.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 762eb6f019c39..26dff52d6686e 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1643,12 +1643,12 @@ def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar: else: return na_value_for_dtype(self.dtype.subtype, compat=False) - def _argmin_argmax(self, candidate: int, kind: Literal["min", "max"]) -> int: + def _argmin_argmax(self, candidate: int, kind: Literal["argmin", "argmax"]) -> int: if isna(self.fill_value): return candidate - if kind == "min" and self[candidate] < self.fill_value: + if kind == "argmin" and self[candidate] < self.fill_value: return candidate - if kind == "max" and self[candidate] > self.fill_value: + if kind == "argmax" and self[candidate] > self.fill_value: return candidate _loc = self._first_fill_value_loc() if _loc == -1: @@ -1660,12 +1660,12 @@ def _argmin_argmax(self, candidate: int, kind: Literal["min", "max"]) -> int: def argmax(self, skipna: bool = True) -> int: _candidate = ExtensionArray.argmax(self, skipna=skipna) candidate = self._sparse_index.indices[_candidate] - return self._argmin_argmax(candidate, "max") + return self._argmin_argmax(candidate, "argmax") def argmin(self, skipna: bool = True) -> int: _candidate = ExtensionArray.argmin(self, skipna=skipna) candidate = self._sparse_index.indices[_candidate] - return self._argmin_argmax(candidate, "min") + return self._argmin_argmax(candidate, "argmin") # ------------------------------------------------------------------------ # Ufuncs From 89e24e647c40a580c434f6ab4432bcb5d09ae22d Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 13:55:40 -0500 Subject: [PATCH 08/18] Update base.py --- pandas/core/arrays/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 18fed169946da..d481389ecf615 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -709,7 +709,10 @@ def argsort( ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) # TODO: ENH argsort for SparseArray - values = ExtensionArray._values_for_argsort(self) + if type(self).__name__ == "SparseArray": + values = ExtensionArray._values_for_argsort(self) + else: + values = self._values_for_argsort() return nargsort( values, kind=kind, From 2861307252885e5e3933c203b2d6911c485404ba Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 15:32:25 -0500 Subject: [PATCH 09/18] Update sorting.py --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 9d9c53538dcab..16facfc915e40 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -465,7 +465,7 @@ def nargminmax(values: ExtensionArray, method: str, axis: int = 0): assert method in {"argmax", "argmin"} func = np.argmax if method == "argmax" else np.argmin - mask = values._mask_for_argsort() + mask = np.asarray(isna(values)) arr_values = values._values_for_argsort() if arr_values.ndim > 1: From 784ecb797a0b404f69e1f4aa849f469076f69237 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 15:33:23 -0500 Subject: [PATCH 10/18] Update base.py --- pandas/core/arrays/base.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d481389ecf615..6c9b7adadb7b0 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -666,9 +666,6 @@ def _values_for_argsort(self) -> np.ndarray: # Note: this is used in `ExtensionArray.argsort/argmin/argmax`. return np.array(self) - def _mask_for_argsort(self) -> np.ndarray: - return np.asarray(isna(self)) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def argsort( self, @@ -708,11 +705,7 @@ def argsort( # it is recommended to also override argmax/argmin ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) - # TODO: ENH argsort for SparseArray - if type(self).__name__ == "SparseArray": - values = ExtensionArray._values_for_argsort(self) - else: - values = self._values_for_argsort() + values = self._values_for_argsort() return nargsort( values, kind=kind, From 96bd36847f208cf6e8c7e4de414afe247b7db0a7 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 15:34:41 -0500 Subject: [PATCH 11/18] Update array.py --- pandas/core/arrays/sparse/array.py | 35 ++++++++++++++++++------------ 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 26dff52d6686e..a7d0db3af5804 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1179,13 +1179,6 @@ def copy(self: SparseArrayT) -> SparseArrayT: values = self.sp_values.copy() return self._simple_new(values, self.sp_index, self.dtype) - def _values_for_argsort(self) -> np.ndarray: - return self._sparse_values - - def _mask_for_argsort(self) -> np.ndarray: - mask = ExtensionArray._mask_for_argsort(self) - return mask[self._sparse_index.indices] - @classmethod def _concat_same_type( cls: type[SparseArrayT], to_concat: Sequence[SparseArrayT] @@ -1643,7 +1636,19 @@ def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar: else: return na_value_for_dtype(self.dtype.subtype, compat=False) - def _argmin_argmax(self, candidate: int, kind: Literal["argmin", "argmax"]) -> int: + def _argmin_argmax(self, kind: Literal["argmin", "argmax"]) -> int: + + values = self._sparse_values + mask = np.asarray(isna(self))[self._sparse_index.indices] + func = np.argmax if kind == "argmax" else np.argmin + + idx = np.arange(values.shape[0]) + non_nans = values[~mask] + non_nan_idx = idx[~mask] + + _candidate = non_nan_idx[func(non_nans)] + candidate = self._sparse_index.indices[_candidate] + if isna(self.fill_value): return candidate if kind == "argmin" and self[candidate] < self.fill_value: @@ -1658,14 +1663,16 @@ def _argmin_argmax(self, candidate: int, kind: Literal["argmin", "argmax"]) -> i return _loc def argmax(self, skipna: bool = True) -> int: - _candidate = ExtensionArray.argmax(self, skipna=skipna) - candidate = self._sparse_index.indices[_candidate] - return self._argmin_argmax(candidate, "argmax") + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return self._argmin_argmax("argmax") def argmin(self, skipna: bool = True) -> int: - _candidate = ExtensionArray.argmin(self, skipna=skipna) - candidate = self._sparse_index.indices[_candidate] - return self._argmin_argmax(candidate, "argmin") + validate_bool_kwarg(skipna, "skipna") + if not skipna and self._hasna: + raise NotImplementedError + return self._argmin_argmax("argmin") # ------------------------------------------------------------------------ # Ufuncs From dfccadf8d1db62c6def080c58a8b26f5a98eb990 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 15:41:00 -0500 Subject: [PATCH 12/18] Update test_reductions.py --- pandas/tests/arrays/sparse/test_reductions.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/arrays/sparse/test_reductions.py b/pandas/tests/arrays/sparse/test_reductions.py index c9cd480916af7..2be76bfc7de7b 100644 --- a/pandas/tests/arrays/sparse/test_reductions.py +++ b/pandas/tests/arrays/sparse/test_reductions.py @@ -293,3 +293,11 @@ def test_argmax_argmin(self, arr, argmax_expected, argmin_expected): argmin_result = arr.argmin() assert argmax_result == argmax_expected assert argmin_result == argmin_expected + + @pytest.mark.parametrize( + "arr,method", [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")], + ) + def test_empty_array(self, arr, method): + msg = "attempt to get {} of an empty sequence".format(method) + with pytest.raises(ValueError, match=msg): + arr.argmax() if method == "argmax" else arr.argmin() From 393f35879fe961b7daf8b6fbe98d3f5fb0c64e1a Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 15:51:37 -0500 Subject: [PATCH 13/18] fix format --- pandas/tests/arrays/sparse/test_reductions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/sparse/test_reductions.py b/pandas/tests/arrays/sparse/test_reductions.py index 2be76bfc7de7b..969efdcda26a2 100644 --- a/pandas/tests/arrays/sparse/test_reductions.py +++ b/pandas/tests/arrays/sparse/test_reductions.py @@ -295,9 +295,10 @@ def test_argmax_argmin(self, arr, argmax_expected, argmin_expected): assert argmin_result == argmin_expected @pytest.mark.parametrize( - "arr,method", [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")], + "arr,method", + [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")], ) def test_empty_array(self, arr, method): - msg = "attempt to get {} of an empty sequence".format(method) + msg = f"attempt to get {method} of an empty sequence" with pytest.raises(ValueError, match=msg): arr.argmax() if method == "argmax" else arr.argmin() From 9f43ec6a98580d8f1c74d068afb31ec902c63e8c Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Mon, 18 Jul 2022 15:52:12 -0500 Subject: [PATCH 14/18] fix import --- pandas/core/arrays/sparse/array.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index a7d0db3af5804..69c05cfc4c645 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -42,7 +42,10 @@ from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level -from pandas.util._validators import validate_insert_loc +from pandas.util._validators import ( + validate_bool_kwarg, + validate_insert_loc, +) from pandas.core.dtypes.astype import astype_nansafe from pandas.core.dtypes.cast import ( From f1fb365b07c9abee9b026f425eab909c0702f682 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Sun, 24 Jul 2022 20:49:27 -0500 Subject: [PATCH 15/18] Update test_reductions.py --- pandas/tests/arrays/sparse/test_reductions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/arrays/sparse/test_reductions.py b/pandas/tests/arrays/sparse/test_reductions.py index 969efdcda26a2..2dd80c52f1419 100644 --- a/pandas/tests/arrays/sparse/test_reductions.py +++ b/pandas/tests/arrays/sparse/test_reductions.py @@ -286,6 +286,10 @@ class TestArgmaxArgmin: (SparseArray([0] * 10 + [-1], fill_value=0), 0, 10), (SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10), (SparseArray([0] * 10 + [-1], fill_value=1), 0, 10), + (SparseArray([-1] + [0] * 10, fill_value=0), 1, 0), + (SparseArray([1] + [0] * 10, fill_value=0), 0, 1), + (SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0), + (SparseArray([1] + [0] * 10, fill_value=1), 0, 1), ], ) def test_argmax_argmin(self, arr, argmax_expected, argmin_expected): From 0057def5441685669ad7cf9418c778cc4ded6b4c Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Tue, 26 Jul 2022 15:27:01 -0500 Subject: [PATCH 16/18] Update array.py --- pandas/core/arrays/sparse/array.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 69c05cfc4c645..bf65da0412642 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1642,7 +1642,8 @@ def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar: def _argmin_argmax(self, kind: Literal["argmin", "argmax"]) -> int: values = self._sparse_values - mask = np.asarray(isna(self))[self._sparse_index.indices] + index = self._sparse_index.indices + mask = np.asarray(isna(values)) func = np.argmax if kind == "argmax" else np.argmin idx = np.arange(values.shape[0]) @@ -1650,7 +1651,7 @@ def _argmin_argmax(self, kind: Literal["argmin", "argmax"]) -> int: non_nan_idx = idx[~mask] _candidate = non_nan_idx[func(non_nans)] - candidate = self._sparse_index.indices[_candidate] + candidate = index[_candidate] if isna(self.fill_value): return candidate From b80ea9e37a99900a90c0226480ef48d21c80e99c Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Tue, 26 Jul 2022 22:09:20 -0500 Subject: [PATCH 17/18] move to perf --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 15a969b09c352..fdc9219e37e4c 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -279,7 +279,6 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) -- Implemented faster :meth:`ExtensionArray.argmax` and :meth:`ExtensionArray.argmin` for :meth:`SparseArray` (:issue:`34197`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: @@ -803,6 +802,7 @@ Performance improvements - Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`) - Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`) - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47404`, :issue:`47405`) +- Performance improvement in ``argmax`` and ``argmin`` for :meth:`SparseArray` (:issue:`34197`) - .. --------------------------------------------------------------------------- From 8ee4987f48f6ffbb0407abc0d836e6bfa6ba7072 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 27 Jul 2022 09:40:02 -0700 Subject: [PATCH 18/18] Update doc/source/whatsnew/v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index fdc9219e37e4c..7a90d96926475 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -802,7 +802,7 @@ Performance improvements - Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`) - Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`) - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47404`, :issue:`47405`) -- Performance improvement in ``argmax`` and ``argmin`` for :meth:`SparseArray` (:issue:`34197`) +- Performance improvement in ``argmax`` and ``argmin`` for :class:`arrays.SparseArray` (:issue:`34197`) - .. ---------------------------------------------------------------------------