From 89aa2e2a8ad9a89aae52199617c8f693881252a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 13 Feb 2025 11:57:28 +0100 Subject: [PATCH 01/13] mask/scale datetimes/timedeltas only if they will be decoded, better handle partial coding --- xarray/coding/times.py | 16 +++++++-- xarray/coding/variables.py | 57 ++++++++++++++++++++++++++----- xarray/conventions.py | 8 +++-- xarray/tests/test_coding_times.py | 18 ++++++++-- xarray/tests/test_conventions.py | 5 +-- 5 files changed, 84 insertions(+), 20 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 68369dac0d7..47f2d8ee19a 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1315,9 +1315,11 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: units = encoding.pop("units", None) calendar = encoding.pop("calendar", None) - dtype = encoding.get("dtype", None) + dtype = encoding.pop("dtype", None) (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype) - + # if no dtype is provided, preserve data.dtype in encoding + if dtype is None: + safe_setitem(encoding, "dtype", data.dtype, name=name) safe_setitem(attrs, "units", units, name=name) safe_setitem(attrs, "calendar", calendar, name=name) @@ -1369,8 +1371,16 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + dtype = None + if "add_offset" in encoding or "scale_factor" in encoding: + encoding.pop("dtype") + dtype = data.dtype if data.dtype.kind == "f" else "float64" + data, units = encode_cf_timedelta( - data, encoding.pop("units", None), encoding.get("dtype", None) + data, encoding.pop("units", None), encoding.get("dtype", dtype) ) safe_setitem(attrs, "units", units, name=name) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 83112628dbb..5a4f8b38aa4 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -234,6 +234,8 @@ def _apply_mask( def _is_time_like(units): # test for time-like + # return "datetime" for datetetime-like + # return "timedelta" for timedelta-like if units is None: return False time_strings = [ @@ -255,9 +257,9 @@ def _is_time_like(units): _unpack_netcdf_time_units(units) except ValueError: return False - return True + return "datetime" else: - return any(tstr == units for tstr in time_strings) + return "timedelta" if any(tstr == units for tstr in time_strings) else False def _check_fill_values(attrs, name, dtype): @@ -367,6 +369,14 @@ def _encode_unsigned_fill_value( class CFMaskCoder(VariableCoder): """Mask or unmask fill values according to CF conventions.""" + def __init__( + self, + decode_times: bool = False, + decode_timedelta: bool = False, + ) -> None: + self.decode_times = decode_times + self.decode_timedelta = decode_timedelta + def encode(self, variable: Variable, name: T_Name = None): dims, data, attrs, encoding = unpack_for_encoding(variable) @@ -393,10 +403,13 @@ def encode(self, variable: Variable, name: T_Name = None): if fv_exists: # Ensure _FillValue is cast to same dtype as data's + # but not for packed data encoding["_FillValue"] = ( _encode_unsigned_fill_value(name, fv, dtype) if has_unsigned else dtype.type(fv) + if "add_offset" not in encoding and "scale_factor" not in encoding + else fv ) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) @@ -409,6 +422,8 @@ def encode(self, variable: Variable, name: T_Name = None): _encode_unsigned_fill_value(name, mv, dtype) if has_unsigned else dtype.type(mv) + if "add_offset" not in encoding and "scale_factor" not in encoding + else mv ), ) fill_value = pop_to(encoding, attrs, "missing_value", name=name) @@ -416,10 +431,17 @@ def encode(self, variable: Variable, name: T_Name = None): # apply fillna if fill_value is not None and not pd.isnull(fill_value): # special case DateTime to properly handle NaT - if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu": - data = duck_array_ops.where( - data != np.iinfo(np.int64).min, data, fill_value - ) + if _is_time_like(attrs.get("units")): + if data.dtype.kind in "iu": + data = duck_array_ops.where( + data != np.iinfo(np.int64).min, data, fill_value + ) + else: + data = duck_array_ops.fillna(data, fill_value) + if np.array(fill_value).dtype.kind in "iu": + data = duck_array_ops.astype( + duck_array_ops.around(data), type(fill_value) + ) else: data = duck_array_ops.fillna(data, fill_value) @@ -458,9 +480,15 @@ def decode(self, variable: Variable, name: T_Name = None): if encoded_fill_values: # special case DateTime to properly handle NaT + # we need to check if time-like will be decoded or not + # in further processing dtype: np.typing.DTypeLike decoded_fill_value: Any - if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu": + is_time_like = _is_time_like(attrs.get("units")) + if ( + (is_time_like == "datetime" and self.decode_times) + or (is_time_like == "timedelta" and self.decode_timedelta) + ) and data.dtype.kind in "iu": dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min else: if "scale_factor" not in attrs and "add_offset" not in attrs: @@ -549,6 +577,14 @@ class CFScaleOffsetCoder(VariableCoder): decode_values = encoded_values * scale_factor + add_offset """ + def __init__( + self, + decode_times: bool = False, + decode_timedelta: bool = False, + ) -> None: + self.decode_times = decode_times + self.decode_timedelta = decode_timedelta + def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) @@ -580,8 +616,13 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: add_offset = np.asarray(add_offset).item() # if we have a _FillValue/masked_value we already have the wanted # floating point dtype here (via CFMaskCoder), so no check is necessary - # only check in other cases + # only check in other cases and for time-like dtype = data.dtype + is_time_like = _is_time_like(attrs.get("units")) + if (is_time_like == "datetime" and self.decode_times) or ( + is_time_like == "timedelta" and self.decode_timedelta + ): + dtype = _choose_float_dtype(dtype, encoding) if "_FillValue" not in encoding and "missing_value" not in encoding: dtype = _choose_float_dtype(dtype, encoding) diff --git a/xarray/conventions.py b/xarray/conventions.py index f67af95b4ce..071dab43c28 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -194,8 +194,12 @@ def decode_cf_variable( if mask_and_scale: for coder in [ - variables.CFMaskCoder(), - variables.CFScaleOffsetCoder(), + variables.CFMaskCoder( + decode_times=decode_times, decode_timedelta=decode_timedelta + ), + variables.CFScaleOffsetCoder( + decode_times=decode_times, decode_timedelta=decode_timedelta + ), ]: var = coder.decode(var, name=name) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 2e61e5d853e..778c0090955 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1429,9 +1429,9 @@ def test_roundtrip_datetime64_nanosecond_precision_warning( ) -> None: # test warning if times can't be serialized faithfully times = [ - np.datetime64("1970-01-01T00:01:00", "ns"), - np.datetime64("NaT"), - np.datetime64("1970-01-02T00:01:00", "ns"), + np.datetime64("1970-01-01T00:01:00", time_unit), + np.datetime64("NaT", time_unit), + np.datetime64("1970-01-02T00:01:00", time_unit), ] units = "days since 1970-01-10T01:01:00" needed_units = "hours" @@ -1901,3 +1901,15 @@ def test_lazy_decode_timedelta_error() -> None: ) with pytest.raises(OutOfBoundsTimedelta, match="overflow"): decoded.load() + + +@pytest.mark.parametrize("decode_timedelta", [True, False]) +@pytest.mark.parametrize("mask_and_scale", [True, False]) +def test_decode_timedelta_mask_and_scale(decode_timedelta, mask_and_scale) -> None: + attrs = {"units": "days", "_FillValue": np.int16(-1), "add_offset": 100.0} + encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) + decoded = conventions.decode_cf_variable( + "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta + ) + result = conventions.encode_cf_variable(decoded, name="foo") + assert_equal(encoded, result) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 8d3827fac54..63b5084ece8 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -511,16 +511,13 @@ def test_decode_dask_times(self) -> None: @pytest.mark.parametrize("time_unit", ["s", "ms", "us", "ns"]) def test_decode_cf_time_kwargs(self, time_unit) -> None: - # todo: if we set timedelta attrs "units": "days" - # this errors on the last decode_cf wrt to the lazy_elemwise_func - # trying to convert twice ds = Dataset.from_dict( { "coords": { "timedelta": { "data": np.array([1, 2, 3], dtype="int64"), "dims": "timedelta", - "attrs": {"units": "seconds"}, + "attrs": {"units": "days"}, }, "time": { "data": np.array([1, 2, 3], dtype="int64"), From d4fe3fe5f8ea2365116e242238f56dc7693d838a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 14 Feb 2025 10:41:06 +0100 Subject: [PATCH 02/13] comments --- xarray/coding/variables.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 5a4f8b38aa4..72125da3332 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -416,6 +416,7 @@ def encode(self, variable: Variable, name: T_Name = None): if mv_exists: # try to use _FillValue, if it exists to align both values # or use missing_value and ensure it's cast to same dtype as data's + # but not for packed data encoding["missing_value"] = attrs.get( "_FillValue", ( @@ -437,7 +438,11 @@ def encode(self, variable: Variable, name: T_Name = None): data != np.iinfo(np.int64).min, data, fill_value ) else: + # if we have float data (data was packed prior masking) + # we just fillna data = duck_array_ops.fillna(data, fill_value) + # but if the fill_value is of integer type + # we need to round and cast if np.array(fill_value).dtype.kind in "iu": data = duck_array_ops.astype( duck_array_ops.around(data), type(fill_value) From 058217e358dc7d39ed16937b4dd45943298b794a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 14 Feb 2025 10:56:32 +0100 Subject: [PATCH 03/13] typing --- xarray/coding/variables.py | 9 +++++---- xarray/tests/test_coding_times.py | 20 ++++++++++++++------ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 72125da3332..36b8b40acd2 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd +from build.lib.xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type @@ -371,8 +372,8 @@ class CFMaskCoder(VariableCoder): def __init__( self, - decode_times: bool = False, - decode_timedelta: bool = False, + decode_times: bool | CFDatetimeCoder = False, + decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta @@ -584,8 +585,8 @@ class CFScaleOffsetCoder(VariableCoder): def __init__( self, - decode_times: bool = False, - decode_timedelta: bool = False, + decode_times: bool | CFDatetimeCoder = False, + decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 778c0090955..480bd96f3f4 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -638,7 +638,9 @@ def test_cf_timedelta_2d() -> None: @pytest.mark.parametrize("encoding_unit", FREQUENCIES_TO_ENCODING_UNITS.values()) -def test_decode_cf_timedelta_time_unit(time_unit, encoding_unit) -> None: +def test_decode_cf_timedelta_time_unit( + time_unit: PDDatetimeUnitOptions, encoding_unit +) -> None: encoded = 1 encoding_unit_as_numpy = _netcdf_to_numpy_timeunit(encoding_unit) if np.timedelta64(1, time_unit) > np.timedelta64(1, encoding_unit_as_numpy): @@ -652,7 +654,9 @@ def test_decode_cf_timedelta_time_unit(time_unit, encoding_unit) -> None: assert result.dtype == expected.dtype -def test_decode_cf_timedelta_time_unit_out_of_bounds(time_unit) -> None: +def test_decode_cf_timedelta_time_unit_out_of_bounds( + time_unit: PDDatetimeUnitOptions, +) -> None: # Define a scale factor that will guarantee overflow with the given # time_unit. scale_factor = np.timedelta64(1, time_unit) // np.timedelta64(1, "ns") @@ -661,7 +665,7 @@ def test_decode_cf_timedelta_time_unit_out_of_bounds(time_unit) -> None: decode_cf_timedelta(encoded, "days", time_unit) -def test_cf_timedelta_roundtrip_large_value(time_unit) -> None: +def test_cf_timedelta_roundtrip_large_value(time_unit: PDDatetimeUnitOptions) -> None: value = np.timedelta64(np.iinfo(np.int64).max, time_unit) encoded, units = encode_cf_timedelta(value) decoded = decode_cf_timedelta(encoded, units, time_unit=time_unit) @@ -983,7 +987,7 @@ def test_use_cftime_default_standard_calendar_out_of_range( @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) @pytest.mark.parametrize("units_year", [1500, 2000, 2500]) def test_use_cftime_default_non_standard_calendar( - calendar, units_year, time_unit + calendar, units_year, time_unit: PDDatetimeUnitOptions ) -> None: from cftime import num2date @@ -1620,7 +1624,9 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None _ENCODE_DATETIME64_VIA_DASK_TESTS.values(), ids=_ENCODE_DATETIME64_VIA_DASK_TESTS.keys(), ) -def test_encode_cf_datetime_datetime64_via_dask(freq, units, dtype, time_unit) -> None: +def test_encode_cf_datetime_datetime64_via_dask( + freq, units, dtype, time_unit: PDDatetimeUnitOptions +) -> None: import dask.array times_pd = pd.date_range(start="1700", freq=freq, periods=3, unit=time_unit) @@ -1905,7 +1911,9 @@ def test_lazy_decode_timedelta_error() -> None: @pytest.mark.parametrize("decode_timedelta", [True, False]) @pytest.mark.parametrize("mask_and_scale", [True, False]) -def test_decode_timedelta_mask_and_scale(decode_timedelta, mask_and_scale) -> None: +def test_decode_timedelta_mask_and_scale( + decode_timedelta: bool, mask_and_scale: bool +) -> None: attrs = {"units": "days", "_FillValue": np.int16(-1), "add_offset": 100.0} encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( From 18274ad1a7b9b9fc715385d925234baab3ec9d6e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 09:57:13 +0000 Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 36b8b40acd2..995b867db47 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -9,8 +9,8 @@ import numpy as np import pandas as pd - from build.lib.xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder + from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type From b80a578be72b12800d4be18216042ba6f34d3895 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 14 Feb 2025 10:58:40 +0100 Subject: [PATCH 05/13] Apply suggestions from code review --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 995b867db47..3d0a10c7275 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from build.lib.xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder +from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable From ff189a9d2ba33762003335350bbb5c0421a85a3c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 09:59:10 +0000 Subject: [PATCH 06/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 3d0a10c7275..83234cd0fa0 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -9,8 +9,8 @@ import numpy as np import pandas as pd -from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder +from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type From b7d61bc02825b7d8f73d10459c6419a7cd48a986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 14 Feb 2025 11:18:26 +0100 Subject: [PATCH 07/13] fix typing and imports --- xarray/coding/variables.py | 9 ++++----- xarray/conventions.py | 6 ++++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 83234cd0fa0..72125da3332 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -10,7 +10,6 @@ import numpy as np import pandas as pd -from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type @@ -372,8 +371,8 @@ class CFMaskCoder(VariableCoder): def __init__( self, - decode_times: bool | CFDatetimeCoder = False, - decode_timedelta: bool | CFTimedeltaCoder = False, + decode_times: bool = False, + decode_timedelta: bool = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta @@ -585,8 +584,8 @@ class CFScaleOffsetCoder(VariableCoder): def __init__( self, - decode_times: bool | CFDatetimeCoder = False, - decode_timedelta: bool | CFTimedeltaCoder = False, + decode_times: bool = False, + decode_timedelta: bool = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta diff --git a/xarray/conventions.py b/xarray/conventions.py index 071dab43c28..53169f04457 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -193,12 +193,14 @@ def decode_cf_variable( var = variables.Numpy2StringDTypeCoder().decode(var) if mask_and_scale: + dec_times = True if decode_times else False + dec_timedelta = True if decode_timedelta else False for coder in [ variables.CFMaskCoder( - decode_times=decode_times, decode_timedelta=decode_timedelta + decode_times=dec_times, decode_timedelta=dec_timedelta ), variables.CFScaleOffsetCoder( - decode_times=decode_times, decode_timedelta=decode_timedelta + decode_times=dec_times, decode_timedelta=dec_timedelta ), ]: var = coder.decode(var, name=name) From 031aaa2122bec35408a9b1b808bb8eb9d0895622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 25 Feb 2025 10:18:49 +0100 Subject: [PATCH 08/13] refactor according to review concerns and suggestions --- xarray/coding/times.py | 20 +++++++++++++----- xarray/coding/variables.py | 35 ++++++++++++++++++------------- xarray/tests/test_coding_times.py | 20 +++++++++++++++++- 3 files changed, 54 insertions(+), 21 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 47f2d8ee19a..01e3bd1cf1d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1316,7 +1316,14 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: units = encoding.pop("units", None) calendar = encoding.pop("calendar", None) dtype = encoding.pop("dtype", None) + + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + if "add_offset" in encoding or "scale_factor" in encoding: + dtype = data.dtype if data.dtype.kind == "f" else "float64" (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype) + # if no dtype is provided, preserve data.dtype in encoding if dtype is None: safe_setitem(encoding, "dtype", data.dtype, name=name) @@ -1371,17 +1378,20 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) + dtype = encoding.pop("dtype", None) + # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder - dtype = None if "add_offset" in encoding or "scale_factor" in encoding: - encoding.pop("dtype") dtype = data.dtype if data.dtype.kind == "f" else "float64" - data, units = encode_cf_timedelta( - data, encoding.pop("units", None), encoding.get("dtype", dtype) - ) + data, units = encode_cf_timedelta(data, encoding.pop("units", None), dtype) + + # if no dtype is provided, preserve data.dtype in encoding + if dtype is None: + safe_setitem(encoding, "dtype", data.dtype, name=name) + safe_setitem(attrs, "units", units, name=name) return Variable(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 72125da3332..77e2f0602ce 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -484,25 +484,30 @@ def decode(self, variable: Variable, name: T_Name = None): ) if encoded_fill_values: - # special case DateTime to properly handle NaT - # we need to check if time-like will be decoded or not - # in further processing dtype: np.typing.DTypeLike decoded_fill_value: Any - is_time_like = _is_time_like(attrs.get("units")) - if ( - (is_time_like == "datetime" and self.decode_times) - or (is_time_like == "timedelta" and self.decode_timedelta) - ) and data.dtype.kind in "iu": - dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min + # in case of packed data we have to decode into float + # in any case + if "scale_factor" in attrs or "add_offset" in attrs: + dtype, decoded_fill_value = ( + _choose_float_dtype(data.dtype, attrs), + np.nan, + ) else: - if "scale_factor" not in attrs and "add_offset" not in attrs: - dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) - else: + # in case of no-packing special case DateTime/Timedelta to properly + # handle NaT, we need to check if time-like will be decoded + # or not in further processing + is_time_like = _is_time_like(attrs.get("units")) + if ( + (is_time_like == "datetime" and self.decode_times) + or (is_time_like == "timedelta" and self.decode_timedelta) + ) and data.dtype.kind in "iu": dtype, decoded_fill_value = ( - _choose_float_dtype(data.dtype, attrs), - np.nan, - ) + np.int64, + np.iinfo(np.int64).min, + ) # np.dtype(f"{is_time_like}64[s]") + else: + dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) transform = partial( _apply_mask, diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 480bd96f3f4..8f031593a27 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -525,6 +525,24 @@ def test_decoded_cf_datetime_array_2d(time_unit: PDDatetimeUnitOptions) -> None: assert_array_equal(np.asarray(result), expected) +@pytest.mark.parametrize("decode_times", [True, False]) +@pytest.mark.parametrize("mask_and_scale", [True, False]) +def test_decode_datetime_mask_and_scale( + decode_times: bool, mask_and_scale: bool +) -> None: + attrs = { + "units": "nanoseconds since 1970-01-01", + "_FillValue": np.int16(-1), + "add_offset": 100000.0, + } + encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) + decoded = conventions.decode_cf_variable( + "foo", encoded, mask_and_scale=mask_and_scale, decode_times=decode_times + ) + result = conventions.encode_cf_variable(decoded, name="foo") + assert_equal(encoded, result) + + FREQUENCIES_TO_ENCODING_UNITS = { "ns": "nanoseconds", "us": "microseconds", @@ -1914,7 +1932,7 @@ def test_lazy_decode_timedelta_error() -> None: def test_decode_timedelta_mask_and_scale( decode_timedelta: bool, mask_and_scale: bool ) -> None: - attrs = {"units": "days", "_FillValue": np.int16(-1), "add_offset": 100.0} + attrs = {"units": "nanoseconds", "_FillValue": np.int16(-1), "add_offset": 100000.0} encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta From 03aa1b83b198b675fe21fa1144c52671f2a467ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 25 Feb 2025 11:30:55 +0100 Subject: [PATCH 09/13] retain retain dtype for packed data in datetime/timedelta encoding --- xarray/coding/times.py | 16 ++++++++++------ xarray/tests/test_coding_times.py | 7 +++++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 01e3bd1cf1d..06c2163a62a 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1320,13 +1320,15 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder + set_dtype_encoding = None if "add_offset" in encoding or "scale_factor" in encoding: + set_dtype_encoding = dtype dtype = data.dtype if data.dtype.kind == "f" else "float64" (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype) - # if no dtype is provided, preserve data.dtype in encoding - if dtype is None: - safe_setitem(encoding, "dtype", data.dtype, name=name) + # retain dtype for packed data + if set_dtype_encoding is not None: + safe_setitem(encoding, "dtype", set_dtype_encoding, name=name) safe_setitem(attrs, "units", units, name=name) safe_setitem(attrs, "calendar", calendar, name=name) @@ -1383,14 +1385,16 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder + set_dtype_encoding = None if "add_offset" in encoding or "scale_factor" in encoding: + set_dtype_encoding = dtype dtype = data.dtype if data.dtype.kind == "f" else "float64" data, units = encode_cf_timedelta(data, encoding.pop("units", None), dtype) - # if no dtype is provided, preserve data.dtype in encoding - if dtype is None: - safe_setitem(encoding, "dtype", data.dtype, name=name) + # retain dtype for packed data + if set_dtype_encoding is not None: + safe_setitem(encoding, "dtype", set_dtype_encoding, name=name) safe_setitem(attrs, "units", units, name=name) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 8f031593a27..00f622309fc 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -532,6 +532,7 @@ def test_decode_datetime_mask_and_scale( ) -> None: attrs = { "units": "nanoseconds since 1970-01-01", + "calendar": "proleptic_gregorian", "_FillValue": np.int16(-1), "add_offset": 100000.0, } @@ -540,7 +541,8 @@ def test_decode_datetime_mask_and_scale( "foo", encoded, mask_and_scale=mask_and_scale, decode_times=decode_times ) result = conventions.encode_cf_variable(decoded, name="foo") - assert_equal(encoded, result) + assert_identical(encoded, result) + assert encoded.dtype == result.dtype FREQUENCIES_TO_ENCODING_UNITS = { @@ -1938,4 +1940,5 @@ def test_decode_timedelta_mask_and_scale( "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta ) result = conventions.encode_cf_variable(decoded, name="foo") - assert_equal(encoded, result) + assert_identical(encoded, result) + assert encoded.dtype == result.dtype From 31ed6bbc9015e05fa02364f2b5db02c7ab0dc8eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 25 Feb 2025 13:15:00 +0100 Subject: [PATCH 10/13] simplify code, add whats-new.rst entry --- doc/whats-new.rst | 2 ++ xarray/coding/variables.py | 16 +++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a10a8c8851f..00251ba883b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,8 @@ Bug fixes Haacker `_. - Fix ``isel`` for multi-coordinate Xarray indexes (:issue:`10063`, :pull:`10066`). By `Benoit Bovy `_. +- Improve handling of dtype and NaT when encoding/decoding masked and packaged datetimes and timedeltas (:issue:`8957`, :pull:`10050`). + By `Kai Mühlbauer `_. Documentation diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 77e2f0602ce..a2534df0ec7 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -502,10 +502,8 @@ def decode(self, variable: Variable, name: T_Name = None): (is_time_like == "datetime" and self.decode_times) or (is_time_like == "timedelta" and self.decode_timedelta) ) and data.dtype.kind in "iu": - dtype, decoded_fill_value = ( - np.int64, - np.iinfo(np.int64).min, - ) # np.dtype(f"{is_time_like}64[s]") + dtype = np.int64 + decoded_fill_value = np.iinfo(np.int64).min else: dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) @@ -624,17 +622,17 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: scale_factor = np.asarray(scale_factor).item() if np.ndim(add_offset) > 0: add_offset = np.asarray(add_offset).item() - # if we have a _FillValue/masked_value we already have the wanted + # if we have a _FillValue/masked_value in encoding we already have the wanted # floating point dtype here (via CFMaskCoder), so no check is necessary # only check in other cases and for time-like dtype = data.dtype is_time_like = _is_time_like(attrs.get("units")) - if (is_time_like == "datetime" and self.decode_times) or ( - is_time_like == "timedelta" and self.decode_timedelta + if ( + ("_FillValue" not in encoding and "missing_value" not in encoding) + or (is_time_like == "datetime" and self.decode_times) + or (is_time_like == "timedelta" and self.decode_timedelta) ): dtype = _choose_float_dtype(dtype, encoding) - if "_FillValue" not in encoding and "missing_value" not in encoding: - dtype = _choose_float_dtype(dtype, encoding) transform = partial( _scale_offset_decoding, From dab3b0e68c43054c533bb52c4353fd8f45e39277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 5 Mar 2025 10:32:13 +0100 Subject: [PATCH 11/13] Update xarray/coding/variables.py Co-authored-by: Spencer Clark --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index a2534df0ec7..c428ef8694b 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -234,7 +234,7 @@ def _apply_mask( def _is_time_like(units): # test for time-like - # return "datetime" for datetetime-like + # return "datetime" for datetime-like # return "timedelta" for timedelta-like if units is None: return False From a304375f6ecdf5471ef47a35ef79b4200a1cbe60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 5 Mar 2025 11:43:54 +0100 Subject: [PATCH 12/13] refactor common code into common.py to prevent circular imports when passing decode_times and decode_timedelta to CFMaskCoder and CFScaleOffsetCoder --- xarray/coding/common.py | 136 +++++++++++++++++++++++++++++++ xarray/coding/times.py | 2 +- xarray/coding/variables.py | 160 ++++++------------------------------- xarray/conventions.py | 6 +- 4 files changed, 165 insertions(+), 139 deletions(-) create mode 100644 xarray/coding/common.py diff --git a/xarray/coding/common.py b/xarray/coding/common.py new file mode 100644 index 00000000000..1b455009668 --- /dev/null +++ b/xarray/coding/common.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +from collections.abc import Callable, Hashable, MutableMapping +from typing import TYPE_CHECKING, Any, Union + +import numpy as np + +from xarray.core import indexing +from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array + +if TYPE_CHECKING: + T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] + T_Name = Union[Hashable, None] + + +class SerializationWarning(RuntimeWarning): + """Warnings about encoding/decoding issues in serialization.""" + + +class VariableCoder: + """Base class for encoding and decoding transformations on variables. + + We use coders for transforming variables between xarray's data model and + a format suitable for serialization. For example, coders apply CF + conventions for how data should be represented in netCDF files. + + Subclasses should implement encode() and decode(), which should satisfy + the identity ``coder.decode(coder.encode(variable)) == variable``. If any + options are necessary, they should be implemented as arguments to the + __init__ method. + + The optional name argument to encode() and decode() exists solely for the + sake of better error messages, and should correspond to the name of + variables in the underlying store. + """ + + def encode(self, variable: Variable, name: T_Name = None) -> Variable: + """Convert an encoded variable to a decoded variable""" + raise NotImplementedError() + + def decode(self, variable: Variable, name: T_Name = None) -> Variable: + """Convert a decoded variable to an encoded variable""" + raise NotImplementedError() + + +class _ElementwiseFunctionArray(indexing.ExplicitlyIndexedNDArrayMixin): + """Lazily computed array holding values of elemwise-function. + + Do not construct this object directly: call lazy_elemwise_func instead. + + Values are computed upon indexing or coercion to a NumPy array. + """ + + def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): + assert not is_chunked_array(array) + self.array = indexing.as_indexable(array) + self.func = func + self._dtype = dtype + + @property + def dtype(self) -> np.dtype: + return np.dtype(self._dtype) + + def _oindex_get(self, key): + return type(self)(self.array.oindex[key], self.func, self.dtype) + + def _vindex_get(self, key): + return type(self)(self.array.vindex[key], self.func, self.dtype) + + def __getitem__(self, key): + return type(self)(self.array[key], self.func, self.dtype) + + def get_duck_array(self): + return self.func(self.array.get_duck_array()) + + def __repr__(self) -> str: + return f"{type(self).__name__}({self.array!r}, func={self.func!r}, dtype={self.dtype!r})" + + +def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): + """Lazily apply an element-wise function to an array. + Parameters + ---------- + array : any valid value of Variable._data + func : callable + Function to apply to indexed slices of an array. For use with dask, + this should be a pickle-able object. + dtype : coercible to np.dtype + Dtype for the result of this function. + + Returns + ------- + Either a dask.array.Array or _ElementwiseFunctionArray. + """ + if is_chunked_array(array): + chunkmanager = get_chunked_array_type(array) + + return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] + else: + return _ElementwiseFunctionArray(array, func, dtype) + + +def safe_setitem(dest, key: Hashable, value, name: T_Name = None): + if key in dest: + var_str = f" on variable {name!r}" if name else "" + raise ValueError( + f"failed to prevent overwriting existing key {key} in attrs{var_str}. " + "This is probably an encoding field used by xarray to describe " + "how a variable is serialized. To proceed, remove this key from " + "the variable's attributes manually." + ) + dest[key] = value + + +def pop_to( + source: MutableMapping, dest: MutableMapping, key: Hashable, name: T_Name = None +) -> Any: + """ + A convenience function which pops a key k from source to dest. + None values are not passed on. If k already exists in dest an + error is raised. + """ + value = source.pop(key, None) + if value is not None: + safe_setitem(dest, key, value, name=name) + return value + + +def unpack_for_encoding(var: Variable) -> T_VarTuple: + return var.dims, var.data, var.attrs.copy(), var.encoding.copy() + + +def unpack_for_decoding(var: Variable) -> T_VarTuple: + return var.dims, var._data, var.attrs.copy(), var.encoding.copy() diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 06c2163a62a..997639e9a91 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -11,7 +11,7 @@ import pandas as pd from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta -from xarray.coding.variables import ( +from xarray.coding.common import ( SerializationWarning, VariableCoder, lazy_elemwise_func, diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index c428ef8694b..1b7bc95e2b4 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -3,87 +3,31 @@ from __future__ import annotations import warnings -from collections.abc import Callable, Hashable, MutableMapping +from collections.abc import Hashable, MutableMapping from functools import partial from typing import TYPE_CHECKING, Any, Union import numpy as np import pandas as pd +from xarray.coding.common import ( + SerializationWarning, + VariableCoder, + lazy_elemwise_func, + pop_to, + safe_setitem, + unpack_for_decoding, + unpack_for_encoding, +) +from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable -from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] T_Name = Union[Hashable, None] -class SerializationWarning(RuntimeWarning): - """Warnings about encoding/decoding issues in serialization.""" - - -class VariableCoder: - """Base class for encoding and decoding transformations on variables. - - We use coders for transforming variables between xarray's data model and - a format suitable for serialization. For example, coders apply CF - conventions for how data should be represented in netCDF files. - - Subclasses should implement encode() and decode(), which should satisfy - the identity ``coder.decode(coder.encode(variable)) == variable``. If any - options are necessary, they should be implemented as arguments to the - __init__ method. - - The optional name argument to encode() and decode() exists solely for the - sake of better error messages, and should correspond to the name of - variables in the underlying store. - """ - - def encode(self, variable: Variable, name: T_Name = None) -> Variable: - """Convert an encoded variable to a decoded variable""" - raise NotImplementedError() - - def decode(self, variable: Variable, name: T_Name = None) -> Variable: - """Convert a decoded variable to an encoded variable""" - raise NotImplementedError() - - -class _ElementwiseFunctionArray(indexing.ExplicitlyIndexedNDArrayMixin): - """Lazily computed array holding values of elemwise-function. - - Do not construct this object directly: call lazy_elemwise_func instead. - - Values are computed upon indexing or coercion to a NumPy array. - """ - - def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): - assert not is_chunked_array(array) - self.array = indexing.as_indexable(array) - self.func = func - self._dtype = dtype - - @property - def dtype(self) -> np.dtype: - return np.dtype(self._dtype) - - def _oindex_get(self, key): - return type(self)(self.array.oindex[key], self.func, self.dtype) - - def _vindex_get(self, key): - return type(self)(self.array.vindex[key], self.func, self.dtype) - - def __getitem__(self, key): - return type(self)(self.array[key], self.func, self.dtype) - - def get_duck_array(self): - return self.func(self.array.get_duck_array()) - - def __repr__(self) -> str: - return f"{type(self).__name__}({self.array!r}, func={self.func!r}, dtype={self.dtype!r})" - - class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): """Decode arrays on the fly from non-native to native endianness @@ -161,63 +105,6 @@ def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) -def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): - """Lazily apply an element-wise function to an array. - Parameters - ---------- - array : any valid value of Variable._data - func : callable - Function to apply to indexed slices of an array. For use with dask, - this should be a pickle-able object. - dtype : coercible to np.dtype - Dtype for the result of this function. - - Returns - ------- - Either a dask.array.Array or _ElementwiseFunctionArray. - """ - if is_chunked_array(array): - chunkmanager = get_chunked_array_type(array) - - return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] - else: - return _ElementwiseFunctionArray(array, func, dtype) - - -def unpack_for_encoding(var: Variable) -> T_VarTuple: - return var.dims, var.data, var.attrs.copy(), var.encoding.copy() - - -def unpack_for_decoding(var: Variable) -> T_VarTuple: - return var.dims, var._data, var.attrs.copy(), var.encoding.copy() - - -def safe_setitem(dest, key: Hashable, value, name: T_Name = None): - if key in dest: - var_str = f" on variable {name!r}" if name else "" - raise ValueError( - f"failed to prevent overwriting existing key {key} in attrs{var_str}. " - "This is probably an encoding field used by xarray to describe " - "how a variable is serialized. To proceed, remove this key from " - "the variable's attributes manually." - ) - dest[key] = value - - -def pop_to( - source: MutableMapping, dest: MutableMapping, key: Hashable, name: T_Name = None -) -> Any: - """ - A convenience function which pops a key k from source to dest. - None values are not passed on. If k already exists in dest an - error is raised. - """ - value = source.pop(key, None) - if value is not None: - safe_setitem(dest, key, value, name=name) - return value - - def _apply_mask( data: np.ndarray, encoded_fill_values: list, @@ -371,8 +258,8 @@ class CFMaskCoder(VariableCoder): def __init__( self, - decode_times: bool = False, - decode_timedelta: bool = False, + decode_times: bool | CFDatetimeCoder = False, + decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta @@ -407,9 +294,11 @@ def encode(self, variable: Variable, name: T_Name = None): encoding["_FillValue"] = ( _encode_unsigned_fill_value(name, fv, dtype) if has_unsigned - else dtype.type(fv) - if "add_offset" not in encoding and "scale_factor" not in encoding - else fv + else ( + dtype.type(fv) + if "add_offset" not in encoding and "scale_factor" not in encoding + else fv + ) ) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) @@ -422,9 +311,12 @@ def encode(self, variable: Variable, name: T_Name = None): ( _encode_unsigned_fill_value(name, mv, dtype) if has_unsigned - else dtype.type(mv) - if "add_offset" not in encoding and "scale_factor" not in encoding - else mv + else ( + dtype.type(mv) + if "add_offset" not in encoding + and "scale_factor" not in encoding + else mv + ) ), ) fill_value = pop_to(encoding, attrs, "missing_value", name=name) @@ -587,8 +479,8 @@ class CFScaleOffsetCoder(VariableCoder): def __init__( self, - decode_times: bool = False, - decode_timedelta: bool = False, + decode_times: bool | CFDatetimeCoder = False, + decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta diff --git a/xarray/conventions.py b/xarray/conventions.py index 53169f04457..071dab43c28 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -193,14 +193,12 @@ def decode_cf_variable( var = variables.Numpy2StringDTypeCoder().decode(var) if mask_and_scale: - dec_times = True if decode_times else False - dec_timedelta = True if decode_timedelta else False for coder in [ variables.CFMaskCoder( - decode_times=dec_times, decode_timedelta=dec_timedelta + decode_times=decode_times, decode_timedelta=decode_timedelta ), variables.CFScaleOffsetCoder( - decode_times=dec_times, decode_timedelta=dec_timedelta + decode_times=decode_times, decode_timedelta=decode_timedelta ), ]: var = coder.decode(var, name=name) From 200eae2dc41546ab6966f7eb612242bcd8ac1f52 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Mar 2025 07:20:00 +0000 Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- xarray/tests/test_coding_times.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2c47162992d..994fc70339c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -71,7 +71,7 @@ Bug fixes By `Benoit Bovy `_. - Fix dask tokenization when opening each node in :py:func:`xarray.open_datatree` (:issue:`10098`, :pull:`10100`). By `Sam Levang `_. -- Improve handling of dtype and NaT when encoding/decoding masked and packaged +- Improve handling of dtype and NaT when encoding/decoding masked and packaged datetimes and timedeltas (:issue:`8957`, :pull:`10050`). By `Kai Mühlbauer `_. diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index c5386bab919..e736339da1b 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1949,10 +1949,10 @@ def test_decode_timedelta_mask_and_scale( assert_identical(encoded, result) assert encoded.dtype == result.dtype - + def test_decode_floating_point_timedelta_no_serialization_warning() -> None: attrs = {"units": "seconds"} encoded = Variable(["time"], [0, 0.1, 0.2], attrs=attrs) decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True) with assert_no_warnings(): - decoded.load() \ No newline at end of file + decoded.load()