From e56ede2515eeb208f984c28963b7857919b700ba Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Mon, 10 May 2021 20:25:19 +0200 Subject: [PATCH 1/3] Raise error for invalid reference date for encoding time units --- xarray/coding/times.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 54400414ebc..bf9c2270c45 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -104,6 +104,8 @@ def _ensure_padded_year(ref_date): # No four-digit strings, assume the first digits are the year and pad # appropriately matches_start_digits = re.match(r"(\d+)(.*)", ref_date) + if not matches_start_digits: + raise ValueError(f"invalid reference date for time units: {ref_date}") ref_year, everything_else = [s for s in matches_start_digits.groups()] ref_date_padded = "{:04d}{}".format(int(ref_year), everything_else) From eab4fa817f93b7dc3cdfb91234c8f9d75bd2c2b9 Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Wed, 12 May 2021 20:30:47 +0200 Subject: [PATCH 2/3] Add tests for invalid units and reference date --- xarray/coding/times.py | 1 + xarray/tests/test_coding_times.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index bf9c2270c45..66afd24dc6f 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -127,6 +127,7 @@ def _unpack_netcdf_time_units(units): # whitespace. It also ensures that the year is padded with zeros # so it will be correctly understood by pandas (via dateutil). matches = re.match(r"(.+) since (.+)", units) + if not matches: raise ValueError(f"invalid time units: {units}") diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 67ffbf147f0..9f8ccf2edcf 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -823,6 +823,12 @@ def test_encode_cf_datetime_overflow(shape): roundtrip = decode_cf_datetime(num, units, calendar) np.testing.assert_array_equal(dates, roundtrip) + with pytest.raises(ValueError, match="invalid time units"): + encode_cf_datetime(dates, units="days after 2000-01-01") + + with pytest.raises(ValueError, match="invalid reference date"): + encode_cf_datetime(dates, units="days since NO_YEAR") + def test_encode_cf_datetime_pandas_min(): # GH 2623 From 701aeee15f06b36b3d3c968cea8002162d7a89eb Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 13 May 2021 11:42:22 -0600 Subject: [PATCH 3/3] Update whats-new + merge. --- doc/whats-new.rst | 2 ++ xarray/coding/times.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d07a2741eef..c95a4eda004 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,6 +27,8 @@ New Features - :py:meth:`Dataset.interp` now allows interpolation with non-numerical datatypes, such as booleans, instead of dropping them. (:issue:`4761` :pull:`5008`). By `Jimmy Westling `_. +- Raise more informative error when decoding time variables with invalid reference dates. + (:issue:`5199`, :pull:`5288`). By `Giacomo Caria `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 29097067cac..f62a3961207 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -127,7 +127,6 @@ def _unpack_netcdf_time_units(units): # whitespace. It also ensures that the year is padded with zeros # so it will be correctly understood by pandas (via dateutil). matches = re.match(r"(.+) since (.+)", units) - if not matches: raise ValueError(f"invalid time units: {units}")