Skip to content

Allow rolling API to accept BaseIndexer subclass #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
62 changes: 62 additions & 0 deletions pandas/_libs/custom_window.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import abc
from typing import Optional, Sequence, Tuple, Union

import numpy as np

from pandas.tseries.offsets import DateOffset

BeginEnd = Tuple[np.ndarray, np.ndarray]

# TODO: Refactor MockFixedWindowIndexer, FixedWindowIndexer,
# VariableWindowIndexer to also have `get_window_bounds` methods that
# only calculates start & stop

# TODO: Currently, when win_type is specified, it calls a special routine,
# `roll_window`, while None win_type ops dispatch to specific methods.
# Consider consolidating?


class BaseIndexer(abc.ABC):
def __init__(self, index, offset, keys):
# TODO: The alternative is for the `rolling` API to accept
# index, offset, and keys as keyword arguments
self.index = index
self.offset = offset # type: Union[str, DateOffset]
self.keys = keys # type: Sequence[np.ndarray]

@classmethod
@abc.abstractmethod
def get_window_bounds(
cls,
win_type: Optional[str] = None,
min_periods: Optional[int] = None,
center: Optional[bool] = None,
closed: Optional[str] = None,
) -> BeginEnd:
"""
Compute the bounds of a window.

Users should subclass this class to implement a custom method
to calculate window bounds

Parameters
----------
win_type : str, default None
win_type passed from the top level rolling API

min_periods : int, default None
min_periods passed from the top level rolling API

center : bool, default None
center passed from the top level rolling API

closed : str, default None
closed passed from the top level rolling API

Returns
-------
BeginEnd
A tuple of ndarray[int64]s, indicating the boundaries of each
window

"""
22 changes: 19 additions & 3 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import numpy as np

import pandas._libs.custom_window as libwindow_custom
import pandas._libs.window as libwindow
from pandas.compat._optional import import_optional_dependency
from pandas.compat.numpy import function as nv
Expand Down Expand Up @@ -481,14 +482,19 @@ class Window(_Window):

Parameters
----------
window : int, or offset
window : int, offset, or BaseIndexer subclass
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally type as much as possible (again I know that things are not typed now), maybe makes sense to do a pre-cursor to type what we have now

Size of the moving window. This is the number of observations used for
calculating the statistic. Each window will be a fixed size.

If its an offset then this will be the time period of each window. Each
window will be a variable sized based on the observations included in
the time-period. This is only valid for datetimelike indexes. This is
new in 0.19.0

If a BaseIndexer subclass is passed, calculates the window boundaries
based on the defined ``get_window_bounds`` method. Additional rolling
keyword arguments, namely `min_periods`, `center`, `win_type`, and
`closed` will be passed to `get_window_bounds`.
min_periods : int, default None
Minimum number of observations in window required to have a value
(otherwise result is NA). For a window that is specified by an offset,
Expand Down Expand Up @@ -631,7 +637,7 @@ def validate(self):
super().validate()

window = self.window
if isinstance(window, (list, tuple, np.ndarray)):
if isinstance(window, (list, tuple, np.ndarray, libwindow_custom.BaseIndexer)):
pass
elif is_integer(window):
if window <= 0:
Expand Down Expand Up @@ -693,6 +699,13 @@ def _pop_args(win_type, arg_names, kwargs):
win_type = _validate_win_type(self.win_type, kwargs)
# GH #15662. `False` makes symmetric window, rather than periodic.
return sig.get_window(win_type, window, False).astype(float)
elif isinstance(window, libwindow_custom.BaseIndexer):
return window.get_window_span(
win_type=self.win_type,
min_periods=self.min_periods,
center=self.center,
closed=self.closed,
)

def _apply_window(self, mean=True, **kwargs):
"""
Expand Down Expand Up @@ -1731,7 +1744,8 @@ def validate(self):
# min_periods must be an integer
if self.min_periods is None:
self.min_periods = 1

elif isinstance(self.window, libwindow_custom.BaseIndexer):
pass
elif not is_integer(self.window):
raise ValueError("window must be an integer")
elif self.window < 0:
Expand Down Expand Up @@ -2782,6 +2796,8 @@ def _get_center_of_mass(comass, span, halflife, alpha):


def _offset(window, center):
# TODO: (MATT) If the window is a BaseIndexer subclass,
# we need to pass in the materialized window
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the type of window here? It looks like it can be a sequence or an integer?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. After a light audit, I anticipate the materialized window may be passed here and other times and Indexer class might be passed here.

Overall I think this routine is for label formatting.

if not is_integer(window):
window = len(window)
offset = (window - 1) / 2.0 if center else 0
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/window/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import pytest

import pandas._libs.custom_window as libwindow_custom

from pandas import date_range, offsets


@pytest.fixture(params=[True, False])
def raw(request):
Expand Down Expand Up @@ -47,3 +51,18 @@ def center(request):
@pytest.fixture(params=[None, 1])
def min_periods(request):
return request.param


@pytest.fixture
def dummy_custom_indexer():
class DummyIndexer(libwindow_custom.BaseIndexer):
def __init__(self, index, offset, keys):
super().__init__(index, offset, keys)

def get_window_bounds(self, **kwargs):
pass

idx = date_range("2019", freq="D", periods=3)
offset = offsets.BusinessDay(1)
keys = ["A"]
return DummyIndexer(index=idx, offset=offset, keys=keys)
15 changes: 15 additions & 0 deletions pandas/tests/window/test_custom_indexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pandas import Series


def test_custom_indexer_validates(
dummy_custom_indexer, win_types, closed, min_periods, center
):
# Test passing a BaseIndexer subclass does not raise validation errors
s = Series(range(10))
s.rolling(
dummy_custom_indexer,
win_type=win_types,
center=center,
min_periods=min_periods,
closed=closed,
)