PERF: extract_array (#52351)

jbrockmendel · web-flow · commit 91c2cb5b4b43 · 2023-04-02T17:24:35.000+02:00
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -907,17 +907,12 @@ def _map_values(self, mapper, na_action=None, convert: bool = True):
             If the function returns a tuple with more than one element
             a MultiIndex will be returned.
         """
-        arr = extract_array(self, extract_numpy=True, extract_range=True)
+        arr = self._values
 
         if isinstance(arr, ExtensionArray):
             return arr.map(mapper, na_action=na_action)
 
-        # Argument 1 to "map_array" has incompatible type
-        # "Union[IndexOpsMixin, ndarray[Any, Any]]";
-        # expected "Union[ExtensionArray, ndarray[Any, Any]]
-        return algorithms.map_array(
-            arr, mapper, na_action=na_action, convert=convert  # type: ignore[arg-type]
-        )
+        return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)
 
     @final
     def value_counts(
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -55,8 +55,6 @@
     ABCDataFrame,
     ABCExtensionArray,
     ABCIndex,
-    ABCPandasArray,
-    ABCRangeIndex,
     ABCSeries,
 )
 from pandas.core.dtypes.missing import isna
@@ -379,6 +377,21 @@ def array(
     return PandasArray._from_sequence(data, dtype=dtype, copy=copy)
 
 
+_typs = frozenset(
+    {
+        "index",
+        "rangeindex",
+        "multiindex",
+        "datetimeindex",
+        "timedeltaindex",
+        "periodindex",
+        "categoricalindex",
+        "intervalindex",
+        "series",
+    }
+)
+
+
 @overload
 def extract_array(
     obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ...
@@ -438,19 +451,22 @@ def extract_array(
     >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True)
     array([1, 2, 3])
     """
-    if isinstance(obj, (ABCIndex, ABCSeries)):
-        if isinstance(obj, ABCRangeIndex):
+    typ = getattr(obj, "_typ", None)
+    if typ in _typs:
+        # i.e. isinstance(obj, (ABCIndex, ABCSeries))
+        if typ == "rangeindex":
             if extract_range:
-                return obj._values
-            # https://github.com/python/mypy/issues/1081
-            # error: Incompatible return value type (got "RangeIndex", expected
-            # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]")
-            return obj  # type: ignore[return-value]
+                # error: "T" has no attribute "_values"
+                return obj._values  # type: ignore[attr-defined]
+            return obj
 
-        return obj._values
+        # error: "T" has no attribute "_values"
+        return obj._values  # type: ignore[attr-defined]
 
-    elif extract_numpy and isinstance(obj, ABCPandasArray):
-        return obj.to_numpy()
+    elif extract_numpy and typ == "npy_extension":
+        # i.e. isinstance(obj, ABCPandasArray)
+        # error: "T" has no attribute "to_numpy"
+        return obj.to_numpy()  # type: ignore[attr-defined]
 
     return obj
 
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -57,8 +57,6 @@
     notna,
 )
 
-from pandas.core.construction import extract_array
-
 bn = import_optional_dependency("bottleneck", errors="warn")
 _BOTTLENECK_INSTALLED = bn is not None
 _USE_BOTTLENECK = False
@@ -308,9 +306,6 @@ def _get_values(
     #  with scalar fill_value.  This guarantee is important for the
     #  np.where call below
     assert is_scalar(fill_value)
-    # error: Incompatible types in assignment (expression has type "Union[Any,
-    # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
-    values = extract_array(values, extract_numpy=True)  # type: ignore[assignment]
 
     mask = _maybe_get_mask(values, skipna, mask)
 
@@ -522,12 +517,12 @@ def nanany(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, 2])
-    >>> nanops.nanany(s)
+    >>> nanops.nanany(s.values)
     True
 
     >>> from pandas.core import nanops
     >>> s = pd.Series([np.nan])
-    >>> nanops.nanany(s)
+    >>> nanops.nanany(s.values)
     False
     """
     if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":
@@ -577,12 +572,12 @@ def nanall(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, 2, np.nan])
-    >>> nanops.nanall(s)
+    >>> nanops.nanall(s.values)
     True
 
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, 0])
-    >>> nanops.nanall(s)
+    >>> nanops.nanall(s.values)
     False
     """
     if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":
@@ -637,7 +632,7 @@ def nansum(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, 2, np.nan])
-    >>> nanops.nansum(s)
+    >>> nanops.nansum(s.values)
     3.0
     """
     values, mask, dtype, dtype_max, _ = _get_values(
@@ -705,7 +700,7 @@ def nanmean(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, 2, np.nan])
-    >>> nanops.nanmean(s)
+    >>> nanops.nanmean(s.values)
     1.5
     """
     values, mask, dtype, dtype_max, _ = _get_values(
@@ -761,7 +756,7 @@ def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, np.nan, 2, 2])
-    >>> nanops.nanmedian(s)
+    >>> nanops.nanmedian(s.values)
     2.0
     """
 
@@ -928,7 +923,7 @@ def nanstd(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, np.nan, 2, 3])
-    >>> nanops.nanstd(s)
+    >>> nanops.nanstd(s.values)
     1.0
     """
     if values.dtype == "M8[ns]":
@@ -944,7 +939,7 @@ def nanstd(
 @disallow("M8", "m8")
 @bottleneck_switch(ddof=1)
 def nanvar(
-    values,
+    values: np.ndarray,
     *,
     axis: AxisInt | None = None,
     skipna: bool = True,
@@ -975,10 +970,9 @@ def nanvar(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, np.nan, 2, 3])
-    >>> nanops.nanvar(s)
+    >>> nanops.nanvar(s.values)
     1.0
     """
-    values = extract_array(values, extract_numpy=True)
     dtype = values.dtype
     mask = _maybe_get_mask(values, skipna, mask)
     if is_any_int_dtype(dtype):
@@ -1050,7 +1044,7 @@ def nansem(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, np.nan, 2, 3])
-    >>> nanops.nansem(s)
+    >>> nanops.nansem(s.values)
      0.5773502691896258
     """
     # This checks if non-numeric-like data is passed with numeric_only=False
@@ -1229,12 +1223,9 @@ def nanskew(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, np.nan, 1, 2])
-    >>> nanops.nanskew(s)
+    >>> nanops.nanskew(s.values)
     1.7320508075688787
     """
-    # error: Incompatible types in assignment (expression has type "Union[Any,
-    # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
-    values = extract_array(values, extract_numpy=True)  # type: ignore[assignment]
     mask = _maybe_get_mask(values, skipna, mask)
     if not is_float_dtype(values.dtype):
         values = values.astype("f8")
@@ -1319,12 +1310,9 @@ def nankurt(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, np.nan, 1, 3, 2])
-    >>> nanops.nankurt(s)
+    >>> nanops.nankurt(s.values)
     -1.2892561983471076
     """
-    # error: Incompatible types in assignment (expression has type "Union[Any,
-    # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
-    values = extract_array(values, extract_numpy=True)  # type: ignore[assignment]
     mask = _maybe_get_mask(values, skipna, mask)
     if not is_float_dtype(values.dtype):
         values = values.astype("f8")
@@ -1413,7 +1401,7 @@ def nanprod(
     --------
     >>> from pandas.core import nanops
     >>> s = pd.Series([1, 2, 3, np.nan])
-    >>> nanops.nanprod(s)
+    >>> nanops.nanprod(s.values)
     6.0
     """
     mask = _maybe_get_mask(values, skipna, mask)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -17,7 +17,6 @@
     date_range,
 )
 import pandas._testing as tm
-from pandas.core import nanops
 from pandas.tests.groupby import get_groupby_method_args
 from pandas.util import _test_decorators as td
 
@@ -365,7 +364,7 @@ def test_cython_median():
     labels[::17] = np.nan
 
     result = df.groupby(labels).median()
-    exp = df.groupby(labels).agg(nanops.nanmedian)
+    exp = df.groupby(labels).agg(np.nanmedian)
     tm.assert_frame_equal(result, exp)
 
     df = DataFrame(np.random.randn(1000, 5))
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
@@ -1238,8 +1238,8 @@ def test_nanops_independent_of_mask_param(operation):
     # GH22764
     ser = Series([1, 2, np.nan, 3, np.nan, 4])
     mask = ser.isna()
-    median_expected = operation(ser)
-    median_result = operation(ser, mask=mask)
+    median_expected = operation(ser._values)
+    median_result = operation(ser._values, mask=mask)
     assert median_expected == median_result