Skip to content

Commit 4a34497

Browse files
authored
API: rolling.apply will pass Series to function (#20584)
closes #5071
1 parent da33359 commit 4a34497

File tree

5 files changed

+479
-306
lines changed

5 files changed

+479
-306
lines changed

doc/source/whatsnew/v0.23.0.txt

+32
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,35 @@ The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtyp
6565
pd.get_dummies(df, columns=['c'], dtype=bool).dtypes
6666

6767

68+
.. _whatsnew_0230.enhancements.window_raw:
69+
70+
Rolling/Expanding.apply() accepts a ``raw`` keyword to pass a ``Series`` to the function
71+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
72+
73+
:func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
74+
:func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` have gained a ``raw=None`` parameter.
75+
This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The
76+
default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``.
77+
In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`)
78+
79+
.. ipython:: python
80+
81+
s = pd.Series(np.arange(5), np.arange(5) + 1)
82+
s
83+
84+
Pass a ``Series``:
85+
86+
.. ipython:: python
87+
88+
s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False)
89+
90+
Mimic the original behavior of passing a ndarray:
91+
92+
.. ipython:: python
93+
94+
s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True)
95+
96+
6897
.. _whatsnew_0230.enhancements.merge_on_columns_and_levels:
6998

7099
Merging on a combination of columns and index levels
@@ -817,6 +846,7 @@ Other API Changes
817846
- :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`)
818847
- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`).
819848
- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`)
849+
- A user-defined-function that is passed to :func:`Series.rolling().aggregate() <pandas.core.window.Rolling.aggregate>`, :func:`DataFrame.rolling().aggregate() <pandas.core.window.Rolling.aggregate>`, or its expanding cousins, will now *always* be passed a ``Series``, rather than an ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here <whatsnew_0230.enhancements.window_raw>`. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`)
820850

821851
.. _whatsnew_0230.deprecations:
822852

@@ -845,6 +875,8 @@ Deprecations
845875
- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`)
846876
- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`)
847877
- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`).
878+
- :func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
879+
:func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`)
848880

849881
.. _whatsnew_0230.prior_deprecations:
850882

pandas/_libs/window.pyx

+32-14
Original file line numberDiff line numberDiff line change
@@ -1432,39 +1432,44 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
14321432
return output
14331433

14341434

1435-
def roll_generic(ndarray[float64_t, cast=True] input,
1435+
def roll_generic(object obj,
14361436
int64_t win, int64_t minp, object index, object closed,
1437-
int offset, object func,
1437+
int offset, object func, bint raw,
14381438
object args, object kwargs):
14391439
cdef:
14401440
ndarray[double_t] output, counts, bufarr
1441+
ndarray[float64_t, cast=True] arr
14411442
float64_t *buf
14421443
float64_t *oldbuf
14431444
int64_t nobs = 0, i, j, s, e, N
14441445
bint is_variable
14451446
ndarray[int64_t] start, end
14461447

1447-
if not input.flags.c_contiguous:
1448-
input = input.copy('C')
1449-
1450-
n = len(input)
1448+
n = len(obj)
14511449
if n == 0:
1452-
return input
1450+
return obj
1451+
1452+
arr = np.asarray(obj)
1453+
1454+
# ndarray input
1455+
if raw:
1456+
if not arr.flags.c_contiguous:
1457+
arr = arr.copy('C')
14531458

1454-
counts = roll_sum(np.concatenate([np.isfinite(input).astype(float),
1459+
counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float),
14551460
np.array([0.] * offset)]),
14561461
win, minp, index, closed)[offset:]
14571462

1458-
start, end, N, win, minp, is_variable = get_window_indexer(input, win,
1463+
start, end, N, win, minp, is_variable = get_window_indexer(arr, win,
14591464
minp, index,
14601465
closed,
14611466
floor=0)
14621467

14631468
output = np.empty(N, dtype=float)
14641469

14651470
if is_variable:
1471+
# variable window arr or series
14661472

1467-
# variable window
14681473
if offset != 0:
14691474
raise ValueError("unable to roll_generic with a non-zero offset")
14701475

@@ -1473,7 +1478,20 @@ def roll_generic(ndarray[float64_t, cast=True] input,
14731478
e = end[i]
14741479

14751480
if counts[i] >= minp:
1476-
output[i] = func(input[s:e], *args, **kwargs)
1481+
if raw:
1482+
output[i] = func(arr[s:e], *args, **kwargs)
1483+
else:
1484+
output[i] = func(obj.iloc[s:e], *args, **kwargs)
1485+
else:
1486+
output[i] = NaN
1487+
1488+
elif not raw:
1489+
# series
1490+
for i from 0 <= i < N:
1491+
if counts[i] >= minp:
1492+
sl = slice(int_max(i + offset - win + 1, 0),
1493+
int_min(i + offset + 1, N))
1494+
output[i] = func(obj.iloc[sl], *args, **kwargs)
14771495
else:
14781496
output[i] = NaN
14791497

@@ -1482,12 +1500,12 @@ def roll_generic(ndarray[float64_t, cast=True] input,
14821500
# truncated windows at the beginning, through first full-length window
14831501
for i from 0 <= i < (int_min(win, N) - offset):
14841502
if counts[i] >= minp:
1485-
output[i] = func(input[0: (i + offset + 1)], *args, **kwargs)
1503+
output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
14861504
else:
14871505
output[i] = NaN
14881506

14891507
# remaining full-length windows
1490-
buf = <float64_t *> input.data
1508+
buf = <float64_t *> arr.data
14911509
bufarr = np.empty(win, dtype=float)
14921510
oldbuf = <float64_t *> bufarr.data
14931511
for i from (win - offset) <= i < (N - offset):
@@ -1502,7 +1520,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
15021520
# truncated windows at the end
15031521
for i from int_max(N - offset, 0) <= i < N:
15041522
if counts[i] >= minp:
1505-
output[i] = func(input[int_max(i + offset - win + 1, 0): N],
1523+
output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
15061524
*args,
15071525
**kwargs)
15081526
else:

pandas/core/generic.py

+2
Original file line numberDiff line numberDiff line change
@@ -4292,6 +4292,8 @@ def pipe(self, func, *args, **kwargs):
42924292
Notes
42934293
-----
42944294
`agg` is an alias for `aggregate`. Use the alias.
4295+
4296+
A passed user-defined-function will be passed a Series for evaluation.
42954297
""")
42964298

42974299
_shared_docs['transform'] = ("""

pandas/core/window.py

+43-11
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def _center_window(self, result, window):
314314
def aggregate(self, arg, *args, **kwargs):
315315
result, how = self._aggregate(arg, *args, **kwargs)
316316
if result is None:
317-
return self.apply(arg, args=args, kwargs=kwargs)
317+
return self.apply(arg, raw=False, args=args, kwargs=kwargs)
318318
return result
319319

320320
agg = aggregate
@@ -954,23 +954,53 @@ def count(self):
954954
Parameters
955955
----------
956956
func : function
957-
Must produce a single value from an ndarray input
958-
\*args and \*\*kwargs are passed to the function""")
957+
Must produce a single value from an ndarray input if ``raw=True``
958+
or a Series if ``raw=False``
959+
raw : bool, default None
960+
* ``False`` : passes each row or column as a Series to the
961+
function.
962+
* ``True`` or ``None`` : the passed function will receive ndarray
963+
objects instead.
964+
If you are just applying a NumPy reduction function this will
965+
achieve much better performance.
966+
967+
The `raw` parameter is required and will show a FutureWarning if
968+
not passed. In the future `raw` will default to False.
969+
970+
.. versionadded:: 0.23.0
971+
972+
\*args and \*\*kwargs are passed to the function""")
973+
974+
def apply(self, func, raw=None, args=(), kwargs={}):
975+
from pandas import Series
959976

960-
def apply(self, func, args=(), kwargs={}):
961977
# TODO: _level is unused?
962978
_level = kwargs.pop('_level', None) # noqa
963979
window = self._get_window()
964980
offset = _offset(window, self.center)
965981
index, indexi = self._get_index()
966982

983+
# TODO: default is for backward compat
984+
# change to False in the future
985+
if raw is None:
986+
warnings.warn(
987+
"Currently, 'apply' passes the values as ndarrays to the "
988+
"applied function. In the future, this will change to passing "
989+
"it as Series objects. You need to specify 'raw=True' to keep "
990+
"the current behaviour, and you can pass 'raw=False' to "
991+
"silence this warning", FutureWarning, stacklevel=3)
992+
raw = True
993+
967994
def f(arg, window, min_periods, closed):
968995
minp = _use_window(min_periods, window)
969-
return _window.roll_generic(arg, window, minp, indexi, closed,
970-
offset, func, args, kwargs)
996+
if not raw:
997+
arg = Series(arg, index=self.obj.index)
998+
return _window.roll_generic(
999+
arg, window, minp, indexi,
1000+
closed, offset, func, raw, args, kwargs)
9711001

9721002
return self._apply(f, func, args=args, kwargs=kwargs,
973-
center=False)
1003+
center=False, raw=raw)
9741004

9751005
def sum(self, *args, **kwargs):
9761006
nv.validate_window_func('sum', args, kwargs)
@@ -1498,8 +1528,9 @@ def count(self):
14981528
@Substitution(name='rolling')
14991529
@Appender(_doc_template)
15001530
@Appender(_shared_docs['apply'])
1501-
def apply(self, func, args=(), kwargs={}):
1502-
return super(Rolling, self).apply(func, args=args, kwargs=kwargs)
1531+
def apply(self, func, raw=None, args=(), kwargs={}):
1532+
return super(Rolling, self).apply(
1533+
func, raw=raw, args=args, kwargs=kwargs)
15031534

15041535
@Substitution(name='rolling')
15051536
@Appender(_shared_docs['sum'])
@@ -1756,8 +1787,9 @@ def count(self, **kwargs):
17561787
@Substitution(name='expanding')
17571788
@Appender(_doc_template)
17581789
@Appender(_shared_docs['apply'])
1759-
def apply(self, func, args=(), kwargs={}):
1760-
return super(Expanding, self).apply(func, args=args, kwargs=kwargs)
1790+
def apply(self, func, raw=None, args=(), kwargs={}):
1791+
return super(Expanding, self).apply(
1792+
func, raw=raw, args=args, kwargs=kwargs)
17611793

17621794
@Substitution(name='expanding')
17631795
@Appender(_shared_docs['sum'])

0 commit comments

Comments
 (0)