-
Notifications
You must be signed in to change notification settings - Fork 3
Allow rolling API to accept BaseIndexer subclass #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
60f41db
f269944
6b45a0d
f2e815b
266dc4e
6fab256
010aeb6
97b1386
691f72b
f94ae89
7e34fa0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import abc | ||
from typing import Optional, Sequence, Tuple, Union | ||
|
||
import numpy as np | ||
|
||
from pandas.tseries.offsets import DateOffset | ||
|
||
BeginEnd = Tuple[np.ndarray, np.ndarray] | ||
|
||
# TODO: Refactor MockFixedWindowIndexer, FixedWindowIndexer, | ||
# VariableWindowIndexer to also have `get_window_bounds` methods that | ||
# only calculates start & stop | ||
|
||
# TODO: Currently, when win_type is specified, it calls a special routine, | ||
# `roll_window`, while None win_type ops dispatch to specific methods. | ||
# Consider consolidating? | ||
|
||
|
||
class BaseIndexer(abc.ABC): | ||
def __init__(self, index, offset, keys): | ||
# TODO: The alternative is for the `rolling` API to accept | ||
# index, offset, and keys as keyword arguments | ||
self.index = index | ||
self.offset = offset # type: Union[str, DateOffset] | ||
self.keys = keys # type: Sequence[np.ndarray] | ||
|
||
@classmethod | ||
@abc.abstractmethod | ||
def get_window_bounds( | ||
cls, | ||
win_type: Optional[str] = None, | ||
min_periods: Optional[int] = None, | ||
center: Optional[bool] = None, | ||
closed: Optional[str] = None, | ||
) -> BeginEnd: | ||
""" | ||
Compute the bounds of a window. | ||
|
||
Users should subclass this class to implement a custom method | ||
to calculate window bounds | ||
|
||
Parameters | ||
---------- | ||
win_type : str, default None | ||
win_type passed from the top level rolling API | ||
|
||
min_periods : int, default None | ||
min_periods passed from the top level rolling API | ||
|
||
center : bool, default None | ||
center passed from the top level rolling API | ||
|
||
closed : str, default None | ||
closed passed from the top level rolling API | ||
|
||
Returns | ||
------- | ||
BeginEnd | ||
A tuple of ndarray[int64]s, indicating the boundaries of each | ||
window | ||
|
||
""" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
|
||
import numpy as np | ||
|
||
import pandas._libs.custom_window as libwindow_custom | ||
import pandas._libs.window as libwindow | ||
from pandas.compat._optional import import_optional_dependency | ||
from pandas.compat.numpy import function as nv | ||
|
@@ -481,14 +482,19 @@ class Window(_Window): | |
|
||
Parameters | ||
---------- | ||
window : int, or offset | ||
window : int, offset, or BaseIndexer subclass | ||
Size of the moving window. This is the number of observations used for | ||
calculating the statistic. Each window will be a fixed size. | ||
|
||
If its an offset then this will be the time period of each window. Each | ||
window will be a variable sized based on the observations included in | ||
the time-period. This is only valid for datetimelike indexes. This is | ||
new in 0.19.0 | ||
|
||
If a BaseIndexer subclass is passed, calculates the window boundaries | ||
based on the defined ``get_window_bounds`` method. Additional rolling | ||
keyword arguments, namely `min_periods`, `center`, `win_type`, and | ||
`closed` will be passed to `get_window_bounds`. | ||
min_periods : int, default None | ||
Minimum number of observations in window required to have a value | ||
(otherwise result is NA). For a window that is specified by an offset, | ||
|
@@ -631,7 +637,7 @@ def validate(self): | |
super().validate() | ||
|
||
window = self.window | ||
if isinstance(window, (list, tuple, np.ndarray)): | ||
if isinstance(window, (list, tuple, np.ndarray, libwindow_custom.BaseIndexer)): | ||
pass | ||
elif is_integer(window): | ||
if window <= 0: | ||
|
@@ -693,6 +699,13 @@ def _pop_args(win_type, arg_names, kwargs): | |
win_type = _validate_win_type(self.win_type, kwargs) | ||
# GH #15662. `False` makes symmetric window, rather than periodic. | ||
return sig.get_window(win_type, window, False).astype(float) | ||
elif isinstance(window, libwindow_custom.BaseIndexer): | ||
return window.get_window_span( | ||
win_type=self.win_type, | ||
min_periods=self.min_periods, | ||
center=self.center, | ||
closed=self.closed, | ||
) | ||
|
||
def _apply_window(self, mean=True, **kwargs): | ||
""" | ||
|
@@ -1731,7 +1744,8 @@ def validate(self): | |
# min_periods must be an integer | ||
if self.min_periods is None: | ||
self.min_periods = 1 | ||
|
||
elif isinstance(self.window, libwindow_custom.BaseIndexer): | ||
pass | ||
elif not is_integer(self.window): | ||
raise ValueError("window must be an integer") | ||
elif self.window < 0: | ||
|
@@ -2782,6 +2796,8 @@ def _get_center_of_mass(comass, span, halflife, alpha): | |
|
||
|
||
def _offset(window, center): | ||
# TODO: (MATT) If the window is a BaseIndexer subclass, | ||
# we need to pass in the materialized window | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the type of window here? It looks like it can be a sequence or an integer? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. After a light audit, I anticipate the materialized window may be passed here and other times and Overall I think this routine is for label formatting. |
||
if not is_integer(window): | ||
window = len(window) | ||
offset = (window - 1) / 2.0 if center else 0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from pandas import Series | ||
|
||
|
||
def test_custom_indexer_validates( | ||
dummy_custom_indexer, win_types, closed, min_periods, center | ||
): | ||
# Test passing a BaseIndexer subclass does not raise validation errors | ||
s = Series(range(10)) | ||
s.rolling( | ||
dummy_custom_indexer, | ||
win_type=win_types, | ||
center=center, | ||
min_periods=min_periods, | ||
closed=closed, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ideally type as much as possible (again I know that things are not typed now), maybe makes sense to do a pre-cursor to type what we have now