From 673169b182d96318c3fefeb99d76c61733579011 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Jun 2024 09:07:45 -0600 Subject: [PATCH 1/8] Add test for #9108 --- xarray/tests/test_groupby.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 47cda064143..e93a49fc9d2 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -22,6 +22,7 @@ create_test_data, has_cftime, has_flox, + requires_cftime, requires_dask, requires_flox, requires_scipy, @@ -2606,3 +2607,12 @@ def test_default_flox_method() -> None: assert kwargs["method"] == "cohorts" else: assert "method" not in kwargs + + +@requires_cftime +def test_cftime_resample_gh_9108(): + ds = Dataset( + {"pr": ("time", np.random.random((10,)))}, + coords={"time": xr.date_range("0001-01-01", periods=10, freq="D")}, + ) + ds.resample(time="ME") From 0b071e369a88f6672accc2a99c40d3b4b334f2f9 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Jun 2024 09:29:43 -0600 Subject: [PATCH 2/8] Update xarray/tests/test_groupby.py Co-authored-by: Spencer Clark --- xarray/tests/test_groupby.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index e93a49fc9d2..66657258e01 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2610,6 +2610,7 @@ def test_default_flox_method() -> None: @requires_cftime +@pytest.mark.filterwarnings("ignore") def test_cftime_resample_gh_9108(): ds = Dataset( {"pr": ("time", np.random.random((10,)))}, From 356d112037d2c76f47504ea2529bdbf2fe178ce0 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 1 Aug 2024 08:04:07 -0400 Subject: [PATCH 3/8] Take has_year_zero into account in offset arithmetic --- xarray/coding/cftime_offsets.py | 46 ++++++----- xarray/tests/test_cftime_offsets.py | 114 +++++++++++++++++++--------- 2 files changed, 104 insertions(+), 56 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index c2712569782..7d136de8cc2 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -43,6 +43,7 @@ from __future__ import annotations import re +import warnings from datetime import datetime, timedelta from functools import partial from typing import TYPE_CHECKING, ClassVar @@ -252,7 +253,7 @@ def _get_day_of_month(other, day_option): if day_option == "start": return 1 elif day_option == "end": - return _days_in_month(other) + return other.daysinmonth elif day_option is None: # Note: unlike `_shift_month`, _get_day_of_month does not # allow day_option = None @@ -261,15 +262,6 @@ def _get_day_of_month(other, day_option): raise ValueError(day_option) -def _days_in_month(date): - """The number of days in the month of the given date""" - if date.month == 12: - reference = type(date)(date.year + 1, 1, 1) - else: - reference = type(date)(date.year, date.month + 1, 1) - return (reference - timedelta(days=1)).day - - def _adjust_n_months(other_day, n, reference_day): """Adjust the number of times a monthly offset is applied based on the day of a given date, and the reference day provided. @@ -298,22 +290,34 @@ def _shift_month(date, months, day_option="start"): if cftime is None: raise ModuleNotFoundError("No module named 'cftime'") + has_year_zero = date.has_year_zero delta_year = (date.month + months) // 12 month = (date.month + months) % 12 if month == 0: month = 12 delta_year = delta_year - 1 + + if not has_year_zero: + if date.year < 0 and date.year + delta_year >= 0: + delta_year = delta_year + 1 + elif date.year > 0 and date.year + delta_year <= 0: + delta_year = delta_year - 1 + year = date.year + delta_year - if day_option == "start": - day = 1 - elif day_option == "end": - reference = type(date)(year, month, 1) - day = _days_in_month(reference) - else: - raise ValueError(day_option) - return date.replace(year=year, month=month, day=day) + # Silence warnings associated with generating dates with years < 1. + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="this date/calendar/year zero") + + if day_option == "start": + day = 1 + elif day_option == "end": + reference = type(date)(year, month, 1, has_year_zero=has_year_zero) + day = reference.daysinmonth + else: + raise ValueError(day_option) + return date.replace(year=year, month=month, day=day) def roll_qtrday(other, n, month, day_option, modby=3): @@ -391,13 +395,13 @@ class MonthEnd(BaseCFTimeOffset): _freq = "ME" def __apply__(self, other): - n = _adjust_n_months(other.day, self.n, _days_in_month(other)) + n = _adjust_n_months(other.day, self.n, other.daysinmonth) return _shift_month(other, n, "end") def onOffset(self, date): """Check if the given date is in the set of possible dates created using a length-one version of this offset class.""" - return date.day == _days_in_month(date) + return date.day == date.daysinmonth _MONTH_ABBREVIATIONS = { @@ -589,7 +593,7 @@ class YearEnd(YearOffset): def onOffset(self, date): """Check if the given date is in the set of possible dates created using a length-one version of this offset class.""" - return date.day == _days_in_month(date) and date.month == self.month + return date.day == date.daysinmonth and date.month == self.month def rollforward(self, date): """Roll date forward to nearest end of year""" diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index eabb7d2f4d6..5375bc249e4 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from itertools import product from typing import Callable, Literal @@ -24,7 +25,6 @@ Tick, YearBegin, YearEnd, - _days_in_month, _legacy_to_new_freq, _new_to_legacy_freq, cftime_range, @@ -589,22 +589,6 @@ def test_minus_offset_error(a, b): b - a -def test_days_in_month_non_december(calendar): - date_type = get_date_type(calendar) - reference = date_type(1, 4, 1) - assert _days_in_month(reference) == 30 - - -def test_days_in_month_december(calendar): - if calendar == "360_day": - expected = 30 - else: - expected = 31 - date_type = get_date_type(calendar) - reference = date_type(1, 12, 5) - assert _days_in_month(reference) == expected - - @pytest.mark.parametrize( ("initial_date_args", "offset", "expected_date_args"), [ @@ -657,7 +641,7 @@ def test_add_month_end( # Here the days at the end of each month varies based on the calendar used expected_date_args = ( - expected_year_month + (_days_in_month(reference),) + expected_sub_day + expected_year_month + (reference.daysinmonth,) + expected_sub_day ) expected = date_type(*expected_date_args) assert result == expected @@ -694,9 +678,7 @@ def test_add_month_end_onOffset( date_type = get_date_type(calendar) reference_args = initial_year_month + (1,) reference = date_type(*reference_args) - initial_date_args = ( - initial_year_month + (_days_in_month(reference),) + initial_sub_day - ) + initial_date_args = initial_year_month + (reference.daysinmonth,) + initial_sub_day initial = date_type(*initial_date_args) result = initial + offset reference_args = expected_year_month + (1,) @@ -704,7 +686,7 @@ def test_add_month_end_onOffset( # Here the days at the end of each month varies based on the calendar used expected_date_args = ( - expected_year_month + (_days_in_month(reference),) + expected_sub_day + expected_year_month + (reference.daysinmonth,) + expected_sub_day ) expected = date_type(*expected_date_args) assert result == expected @@ -756,7 +738,7 @@ def test_add_year_end( # Here the days at the end of each month varies based on the calendar used expected_date_args = ( - expected_year_month + (_days_in_month(reference),) + expected_sub_day + expected_year_month + (reference.daysinmonth,) + expected_sub_day ) expected = date_type(*expected_date_args) assert result == expected @@ -792,9 +774,7 @@ def test_add_year_end_onOffset( date_type = get_date_type(calendar) reference_args = initial_year_month + (1,) reference = date_type(*reference_args) - initial_date_args = ( - initial_year_month + (_days_in_month(reference),) + initial_sub_day - ) + initial_date_args = initial_year_month + (reference.daysinmonth,) + initial_sub_day initial = date_type(*initial_date_args) result = initial + offset reference_args = expected_year_month + (1,) @@ -802,7 +782,7 @@ def test_add_year_end_onOffset( # Here the days at the end of each month varies based on the calendar used expected_date_args = ( - expected_year_month + (_days_in_month(reference),) + expected_sub_day + expected_year_month + (reference.daysinmonth,) + expected_sub_day ) expected = date_type(*expected_date_args) assert result == expected @@ -854,7 +834,7 @@ def test_add_quarter_end( # Here the days at the end of each month varies based on the calendar used expected_date_args = ( - expected_year_month + (_days_in_month(reference),) + expected_sub_day + expected_year_month + (reference.daysinmonth,) + expected_sub_day ) expected = date_type(*expected_date_args) assert result == expected @@ -890,9 +870,7 @@ def test_add_quarter_end_onOffset( date_type = get_date_type(calendar) reference_args = initial_year_month + (1,) reference = date_type(*reference_args) - initial_date_args = ( - initial_year_month + (_days_in_month(reference),) + initial_sub_day - ) + initial_date_args = initial_year_month + (reference.daysinmonth,) + initial_sub_day initial = date_type(*initial_date_args) result = initial + offset reference_args = expected_year_month + (1,) @@ -900,7 +878,7 @@ def test_add_quarter_end_onOffset( # Here the days at the end of each month varies based on the calendar used expected_date_args = ( - expected_year_month + (_days_in_month(reference),) + expected_sub_day + expected_year_month + (reference.daysinmonth,) + expected_sub_day ) expected = date_type(*expected_date_args) assert result == expected @@ -957,7 +935,7 @@ def test_onOffset_month_or_quarter_or_year_end( date_type = get_date_type(calendar) reference_args = year_month_args + (1,) reference = date_type(*reference_args) - date_args = year_month_args + (_days_in_month(reference),) + sub_day_args + date_args = year_month_args + (reference.daysinmonth,) + sub_day_args date = date_type(*date_args) result = offset.onOffset(date) assert result @@ -1005,7 +983,7 @@ def test_rollforward(calendar, offset, initial_date_args, partial_expected_date_ elif isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)): reference_args = partial_expected_date_args + (1,) reference = date_type(*reference_args) - expected_date_args = partial_expected_date_args + (_days_in_month(reference),) + expected_date_args = partial_expected_date_args + (reference.daysinmonth,) else: expected_date_args = partial_expected_date_args expected = date_type(*expected_date_args) @@ -1056,7 +1034,7 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_arg elif isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)): reference_args = partial_expected_date_args + (1,) reference = date_type(*reference_args) - expected_date_args = partial_expected_date_args + (_days_in_month(reference),) + expected_date_args = partial_expected_date_args + (reference.daysinmonth,) else: expected_date_args = partial_expected_date_args expected = date_type(*expected_date_args) @@ -1787,3 +1765,69 @@ def test_date_range_no_freq(start, end, periods): expected = pd.date_range(start=start, end=end, periods=periods) np.testing.assert_array_equal(result, expected) + + +@pytest.mark.parametrize( + "offset", + [ + MonthBegin(n=1), + MonthEnd(n=1), + QuarterBegin(n=1), + QuarterEnd(n=1), + YearBegin(n=1), + YearEnd(n=1), + ], + ids=lambda x: f"{x}", +) +@pytest.mark.parametrize("has_year_zero", [False, True]) +def test_offset_addition_preserves_has_year_zero(offset, has_year_zero): + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="this date/calendar/year zero") + datetime = cftime.DatetimeGregorian(-1, 12, 31, has_year_zero=has_year_zero) + + result = datetime + offset + assert result.has_year_zero == datetime.has_year_zero + if has_year_zero: + assert result.year == 0 + else: + assert result.year == 1 + + +@pytest.mark.parametrize( + "offset", + [ + MonthBegin(n=1), + MonthEnd(n=1), + QuarterBegin(n=1), + QuarterEnd(n=1), + YearBegin(n=1), + YearEnd(n=1), + ], + ids=lambda x: f"{x}", +) +@pytest.mark.parametrize("has_year_zero", [False, True]) +def test_offset_subtraction_preserves_has_year_zero(offset, has_year_zero): + datetime = cftime.DatetimeGregorian(1, 1, 1, has_year_zero=has_year_zero) + result = datetime - offset + assert result.has_year_zero == datetime.has_year_zero + if has_year_zero: + assert result.year == 0 + else: + assert result.year == -1 + + +@pytest.mark.parametrize("has_year_zero", [False, True]) +def test_offset_day_option_end_accounts_for_has_year_zero(has_year_zero): + offset = MonthEnd(n=1) + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="this date/calendar/year zero") + datetime = cftime.DatetimeGregorian(-1, 1, 31, has_year_zero=has_year_zero) + + result = datetime + offset + assert result.has_year_zero == datetime.has_year_zero + if has_year_zero: + assert result.day == 28 + else: + assert result.day == 29 From 8d3b2bda0fa530391ab424bb328680be9b2222ba Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 1 Aug 2024 06:58:49 -0600 Subject: [PATCH 4/8] Fix test --- xarray/tests/test_groupby.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 84ca2953501..6c9254966d9 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2507,11 +2507,18 @@ def test_default_flox_method() -> None: @requires_cftime @pytest.mark.filterwarnings("ignore") def test_cftime_resample_gh_9108(): + import cftime + ds = Dataset( {"pr": ("time", np.random.random((10,)))}, coords={"time": xr.date_range("0001-01-01", periods=10, freq="D")}, ) - ds.resample(time="ME") + actual = ds.resample(time="ME").mean() + expected = ds.mean("time").expand_dims( + time=[cftime.DatetimeGregorian(1, 1, 31, 0, 0, 0, 0, has_year_zero=False)] + ) + assert actual.time.data[0].has_year_zero == ds.time.data[0].has_year_zero + assert_equal(actual, expected) def test_custom_grouper() -> None: From 6f4f07d9bdeca19463f12d25958a00da417ae969 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 1 Aug 2024 06:59:52 -0600 Subject: [PATCH 5/8] Add whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0d146a7fd0d..23dc87cda82 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,8 @@ Bug fixes - Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). By `Tom Nicholas `_. +- Fix resampling with cftime when the time vector contains the "0001-01-01" date. (:issue:`9108`, :pull:`9116`) + By `Spencer Clark `_ and `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ From 62ec05dd3be96c5d4e9a7d11e4f5e7024f2b9596 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 1 Aug 2024 10:04:34 -0600 Subject: [PATCH 6/8] Update doc/whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 23dc87cda82..2a1a3b049a1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,7 +38,7 @@ Bug fixes - Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). By `Tom Nicholas `_. - Fix resampling with cftime when the time vector contains the "0001-01-01" date. (:issue:`9108`, :pull:`9116`) - By `Spencer Clark `_ and `Deepak Cherian `_. + By `Spencer Clark `_ and `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ From 8761ca8170011049b35d3138186bb9750520c6fd Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 1 Aug 2024 20:49:58 -0400 Subject: [PATCH 7/8] Add more detail to what's new entry --- doc/whats-new.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2a1a3b049a1..03237b3d1c7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,8 +37,12 @@ Bug fixes - Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). By `Tom Nicholas `_. -- Fix resampling with cftime when the time vector contains the "0001-01-01" date. (:issue:`9108`, :pull:`9116`) - By `Spencer Clark `_ and `Deepak Cherian `_. +- Fix resampling error with monthly, quarterly, or yearly frequencies with + cftime when the time bins straddle the date "0001-01-01". A common situation + where this occurs is when the time coordinate is bounded by the date + "0001-01-01" (:issue:`9108`, :pull:`9116`) By `Spencer Clark + `_ and `Deepak Cherian + `_. Documentation ~~~~~~~~~~~~~ From 7d8a9a3f7c8cebeb2e142a68019fe65d4f0c177f Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 1 Aug 2024 20:54:49 -0400 Subject: [PATCH 8/8] Modify wording slightly, since this does not always happen when the time coordinate includes "0001-01-01". --- doc/whats-new.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 03237b3d1c7..4cd34c4cf54 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,9 +38,9 @@ Bug fixes - Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). By `Tom Nicholas `_. - Fix resampling error with monthly, quarterly, or yearly frequencies with - cftime when the time bins straddle the date "0001-01-01". A common situation - where this occurs is when the time coordinate is bounded by the date - "0001-01-01" (:issue:`9108`, :pull:`9116`) By `Spencer Clark + cftime when the time bins straddle the date "0001-01-01". For example, this + can happen in certain circumstances when the time coordinate contains the + date "0001-01-01". (:issue:`9108`, :pull:`9116`) By `Spencer Clark `_ and `Deepak Cherian `_.