Skip to content

Enable origin and offset arguments in resample #7284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
06b2c83
Initial work toward enabling origin and offset arguments in resample
spencerkclark Nov 13, 2022
0339084
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2022
ffeb7a8
Fix _convert_offset_to_timedelta
spencerkclark Nov 13, 2022
9b850a1
Merge branch 'resample-update' of https://github.com/spencerkclark/xa…
spencerkclark Nov 13, 2022
9590458
Reduce number of tests
spencerkclark Nov 23, 2022
6db613e
Address initial review comments
spencerkclark Nov 23, 2022
85eb312
Add more typing information
spencerkclark Nov 23, 2022
04b1633
Make cftime import lazy
spencerkclark Nov 23, 2022
23b3fb6
Fix module_available import and test
spencerkclark Nov 23, 2022
0ac422f
Remove old origin argument
spencerkclark Nov 23, 2022
4db89cf
Add type annotations for resample_cftime.py
spencerkclark Nov 25, 2022
92c949e
Add None as a possibility for closed and label
spencerkclark Nov 25, 2022
fb724ce
Add what's new entry
spencerkclark Nov 25, 2022
e2fb20f
Add missing type annotation
spencerkclark Nov 25, 2022
cd655df
Delete added line
spencerkclark Nov 25, 2022
572799e
Fix typing errors
spencerkclark Nov 25, 2022
3e74c4a
Add comment and test for as_timedelta stub
spencerkclark Nov 25, 2022
cdc59c3
Remove old code
spencerkclark Nov 25, 2022
9026054
Merge branch 'main' into resample-update
dcherian Nov 28, 2022
fd13ba1
Merge remote-tracking branch 'upstream/main' into resample-update
dcherian Nov 28, 2022
4dbf694
[test-upstream]
dcherian Nov 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ v2022.11.1 (unreleased)

New Features
~~~~~~~~~~~~

- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample`
and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`). By `Spencer
Clark <https://github.com/spencerkclark>`_.
- Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`).
By `Gregory Lee <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.

Expand Down
4 changes: 4 additions & 0 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ def __mul__(self, other):
return new_self * other
return type(self)(n=other * self.n)

def as_timedelta(self):
"""All Tick subclasses must implement an as_timedelta method."""
raise NotImplementedError


def _get_day_of_month(other, day_option):
"""Find the day in `other`'s month that satisfies a BaseCFTimeOffset's
Expand Down
42 changes: 38 additions & 4 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,13 @@
from .indexes import Index
from .resample import Resample
from .rolling_exp import RollingExp
from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords
from .types import (
DatetimeLike,
DTypeLikeSave,
ScalarOrArray,
SideOptions,
T_DataWithCoords,
)
from .variable import Variable

DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
Expand Down Expand Up @@ -817,7 +823,9 @@ def _resample(
skipna: bool | None,
closed: SideOptions | None,
label: SideOptions | None,
base: int,
base: int | None,
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
keep_attrs: bool | None,
loffset: datetime.timedelta | str | None,
restore_coord_dims: bool | None,
Expand Down Expand Up @@ -845,6 +853,18 @@ def _resample(
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.

If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -960,10 +980,24 @@ def _resample(
if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
from .resample_cftime import CFTimeGrouper

grouper = CFTimeGrouper(freq, closed, label, base, loffset)
grouper = CFTimeGrouper(
freq=freq,
closed=closed,
label=label,
base=base,
loffset=loffset,
origin=origin,
offset=offset,
)
else:
grouper = pd.Grouper(
freq=freq, closed=closed, label=label, base=base, loffset=loffset
freq=freq,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,
)
group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
Expand Down
21 changes: 19 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from .rolling import DataArrayCoarsen, DataArrayRolling
from .types import (
CoarsenBoundaryOptions,
DatetimeLike,
DatetimeUnitOptions,
Dims,
ErrorOptions,
Expand Down Expand Up @@ -6531,7 +6532,9 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int = 0,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
keep_attrs: bool | None = None,
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
Expand All @@ -6555,10 +6558,22 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, default = 0
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.

If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -6640,6 +6655,8 @@ def resample(
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
keep_attrs=keep_attrs,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
Expand Down
21 changes: 19 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
CoarsenBoundaryOptions,
CombineAttrsOptions,
CompatOptions,
DatetimeLike,
DatetimeUnitOptions,
Dims,
ErrorOptions,
Expand Down Expand Up @@ -9128,7 +9129,9 @@ def resample(
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int = 0,
base: int | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
keep_attrs: bool | None = None,
loffset: datetime.timedelta | str | None = None,
restore_coord_dims: bool | None = None,
Expand All @@ -9152,10 +9155,22 @@ def resample(
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, default = 0
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.

If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
Expand Down Expand Up @@ -9190,6 +9205,8 @@ def resample(
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
keep_attrs=keep_attrs,
loffset=loffset,
restore_coord_dims=restore_coord_dims,
Expand Down
Loading