Skip to content

Commit 91c2cb5

Browse files
authored
PERF: extract_array (#52351)
1 parent 9051aee commit 91c2cb5

File tree

5 files changed

+47
-49
lines changed

5 files changed

+47
-49
lines changed

pandas/core/base.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -907,17 +907,12 @@ def _map_values(self, mapper, na_action=None, convert: bool = True):
907907
If the function returns a tuple with more than one element
908908
a MultiIndex will be returned.
909909
"""
910-
arr = extract_array(self, extract_numpy=True, extract_range=True)
910+
arr = self._values
911911

912912
if isinstance(arr, ExtensionArray):
913913
return arr.map(mapper, na_action=na_action)
914914

915-
# Argument 1 to "map_array" has incompatible type
916-
# "Union[IndexOpsMixin, ndarray[Any, Any]]";
917-
# expected "Union[ExtensionArray, ndarray[Any, Any]]
918-
return algorithms.map_array(
919-
arr, mapper, na_action=na_action, convert=convert # type: ignore[arg-type]
920-
)
915+
return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)
921916

922917
@final
923918
def value_counts(

pandas/core/construction.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@
5555
ABCDataFrame,
5656
ABCExtensionArray,
5757
ABCIndex,
58-
ABCPandasArray,
59-
ABCRangeIndex,
6058
ABCSeries,
6159
)
6260
from pandas.core.dtypes.missing import isna
@@ -379,6 +377,21 @@ def array(
379377
return PandasArray._from_sequence(data, dtype=dtype, copy=copy)
380378

381379

380+
_typs = frozenset(
381+
{
382+
"index",
383+
"rangeindex",
384+
"multiindex",
385+
"datetimeindex",
386+
"timedeltaindex",
387+
"periodindex",
388+
"categoricalindex",
389+
"intervalindex",
390+
"series",
391+
}
392+
)
393+
394+
382395
@overload
383396
def extract_array(
384397
obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ...
@@ -438,19 +451,22 @@ def extract_array(
438451
>>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True)
439452
array([1, 2, 3])
440453
"""
441-
if isinstance(obj, (ABCIndex, ABCSeries)):
442-
if isinstance(obj, ABCRangeIndex):
454+
typ = getattr(obj, "_typ", None)
455+
if typ in _typs:
456+
# i.e. isinstance(obj, (ABCIndex, ABCSeries))
457+
if typ == "rangeindex":
443458
if extract_range:
444-
return obj._values
445-
# https://github.com/python/mypy/issues/1081
446-
# error: Incompatible return value type (got "RangeIndex", expected
447-
# "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]")
448-
return obj # type: ignore[return-value]
459+
# error: "T" has no attribute "_values"
460+
return obj._values # type: ignore[attr-defined]
461+
return obj
449462

450-
return obj._values
463+
# error: "T" has no attribute "_values"
464+
return obj._values # type: ignore[attr-defined]
451465

452-
elif extract_numpy and isinstance(obj, ABCPandasArray):
453-
return obj.to_numpy()
466+
elif extract_numpy and typ == "npy_extension":
467+
# i.e. isinstance(obj, ABCPandasArray)
468+
# error: "T" has no attribute "to_numpy"
469+
return obj.to_numpy() # type: ignore[attr-defined]
454470

455471
return obj
456472

pandas/core/nanops.py

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,6 @@
5757
notna,
5858
)
5959

60-
from pandas.core.construction import extract_array
61-
6260
bn = import_optional_dependency("bottleneck", errors="warn")
6361
_BOTTLENECK_INSTALLED = bn is not None
6462
_USE_BOTTLENECK = False
@@ -308,9 +306,6 @@ def _get_values(
308306
# with scalar fill_value. This guarantee is important for the
309307
# np.where call below
310308
assert is_scalar(fill_value)
311-
# error: Incompatible types in assignment (expression has type "Union[Any,
312-
# Union[ExtensionArray, ndarray]]", variable has type "ndarray")
313-
values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
314309

315310
mask = _maybe_get_mask(values, skipna, mask)
316311

@@ -522,12 +517,12 @@ def nanany(
522517
--------
523518
>>> from pandas.core import nanops
524519
>>> s = pd.Series([1, 2])
525-
>>> nanops.nanany(s)
520+
>>> nanops.nanany(s.values)
526521
True
527522
528523
>>> from pandas.core import nanops
529524
>>> s = pd.Series([np.nan])
530-
>>> nanops.nanany(s)
525+
>>> nanops.nanany(s.values)
531526
False
532527
"""
533528
if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":
@@ -577,12 +572,12 @@ def nanall(
577572
--------
578573
>>> from pandas.core import nanops
579574
>>> s = pd.Series([1, 2, np.nan])
580-
>>> nanops.nanall(s)
575+
>>> nanops.nanall(s.values)
581576
True
582577
583578
>>> from pandas.core import nanops
584579
>>> s = pd.Series([1, 0])
585-
>>> nanops.nanall(s)
580+
>>> nanops.nanall(s.values)
586581
False
587582
"""
588583
if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":
@@ -637,7 +632,7 @@ def nansum(
637632
--------
638633
>>> from pandas.core import nanops
639634
>>> s = pd.Series([1, 2, np.nan])
640-
>>> nanops.nansum(s)
635+
>>> nanops.nansum(s.values)
641636
3.0
642637
"""
643638
values, mask, dtype, dtype_max, _ = _get_values(
@@ -705,7 +700,7 @@ def nanmean(
705700
--------
706701
>>> from pandas.core import nanops
707702
>>> s = pd.Series([1, 2, np.nan])
708-
>>> nanops.nanmean(s)
703+
>>> nanops.nanmean(s.values)
709704
1.5
710705
"""
711706
values, mask, dtype, dtype_max, _ = _get_values(
@@ -761,7 +756,7 @@ def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=
761756
--------
762757
>>> from pandas.core import nanops
763758
>>> s = pd.Series([1, np.nan, 2, 2])
764-
>>> nanops.nanmedian(s)
759+
>>> nanops.nanmedian(s.values)
765760
2.0
766761
"""
767762

@@ -928,7 +923,7 @@ def nanstd(
928923
--------
929924
>>> from pandas.core import nanops
930925
>>> s = pd.Series([1, np.nan, 2, 3])
931-
>>> nanops.nanstd(s)
926+
>>> nanops.nanstd(s.values)
932927
1.0
933928
"""
934929
if values.dtype == "M8[ns]":
@@ -944,7 +939,7 @@ def nanstd(
944939
@disallow("M8", "m8")
945940
@bottleneck_switch(ddof=1)
946941
def nanvar(
947-
values,
942+
values: np.ndarray,
948943
*,
949944
axis: AxisInt | None = None,
950945
skipna: bool = True,
@@ -975,10 +970,9 @@ def nanvar(
975970
--------
976971
>>> from pandas.core import nanops
977972
>>> s = pd.Series([1, np.nan, 2, 3])
978-
>>> nanops.nanvar(s)
973+
>>> nanops.nanvar(s.values)
979974
1.0
980975
"""
981-
values = extract_array(values, extract_numpy=True)
982976
dtype = values.dtype
983977
mask = _maybe_get_mask(values, skipna, mask)
984978
if is_any_int_dtype(dtype):
@@ -1050,7 +1044,7 @@ def nansem(
10501044
--------
10511045
>>> from pandas.core import nanops
10521046
>>> s = pd.Series([1, np.nan, 2, 3])
1053-
>>> nanops.nansem(s)
1047+
>>> nanops.nansem(s.values)
10541048
0.5773502691896258
10551049
"""
10561050
# This checks if non-numeric-like data is passed with numeric_only=False
@@ -1229,12 +1223,9 @@ def nanskew(
12291223
--------
12301224
>>> from pandas.core import nanops
12311225
>>> s = pd.Series([1, np.nan, 1, 2])
1232-
>>> nanops.nanskew(s)
1226+
>>> nanops.nanskew(s.values)
12331227
1.7320508075688787
12341228
"""
1235-
# error: Incompatible types in assignment (expression has type "Union[Any,
1236-
# Union[ExtensionArray, ndarray]]", variable has type "ndarray")
1237-
values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
12381229
mask = _maybe_get_mask(values, skipna, mask)
12391230
if not is_float_dtype(values.dtype):
12401231
values = values.astype("f8")
@@ -1319,12 +1310,9 @@ def nankurt(
13191310
--------
13201311
>>> from pandas.core import nanops
13211312
>>> s = pd.Series([1, np.nan, 1, 3, 2])
1322-
>>> nanops.nankurt(s)
1313+
>>> nanops.nankurt(s.values)
13231314
-1.2892561983471076
13241315
"""
1325-
# error: Incompatible types in assignment (expression has type "Union[Any,
1326-
# Union[ExtensionArray, ndarray]]", variable has type "ndarray")
1327-
values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
13281316
mask = _maybe_get_mask(values, skipna, mask)
13291317
if not is_float_dtype(values.dtype):
13301318
values = values.astype("f8")
@@ -1413,7 +1401,7 @@ def nanprod(
14131401
--------
14141402
>>> from pandas.core import nanops
14151403
>>> s = pd.Series([1, 2, 3, np.nan])
1416-
>>> nanops.nanprod(s)
1404+
>>> nanops.nanprod(s.values)
14171405
6.0
14181406
"""
14191407
mask = _maybe_get_mask(values, skipna, mask)

pandas/tests/groupby/test_function.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
date_range,
1818
)
1919
import pandas._testing as tm
20-
from pandas.core import nanops
2120
from pandas.tests.groupby import get_groupby_method_args
2221
from pandas.util import _test_decorators as td
2322

@@ -365,7 +364,7 @@ def test_cython_median():
365364
labels[::17] = np.nan
366365

367366
result = df.groupby(labels).median()
368-
exp = df.groupby(labels).agg(nanops.nanmedian)
367+
exp = df.groupby(labels).agg(np.nanmedian)
369368
tm.assert_frame_equal(result, exp)
370369

371370
df = DataFrame(np.random.randn(1000, 5))

pandas/tests/test_nanops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,8 +1238,8 @@ def test_nanops_independent_of_mask_param(operation):
12381238
# GH22764
12391239
ser = Series([1, 2, np.nan, 3, np.nan, 4])
12401240
mask = ser.isna()
1241-
median_expected = operation(ser)
1242-
median_result = operation(ser, mask=mask)
1241+
median_expected = operation(ser._values)
1242+
median_result = operation(ser._values, mask=mask)
12431243
assert median_expected == median_result
12441244

12451245

0 commit comments

Comments
 (0)