pydata · dcherian · Mar 7, 2025 · Feb 13, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -1315,9 +1315,11 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
 
             units = encoding.pop("units", None)
             calendar = encoding.pop("calendar", None)
-            dtype = encoding.get("dtype", None)
+            dtype = encoding.pop("dtype", None)
             (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype)
-
+            # if no dtype is provided, preserve data.dtype in encoding
+            if dtype is None:
+                safe_setitem(encoding, "dtype", data.dtype, name=name)
             safe_setitem(attrs, "units", units, name=name)
             safe_setitem(attrs, "calendar", calendar, name=name)
 
@@ -1369,8 +1371,16 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
         if np.issubdtype(variable.data.dtype, np.timedelta64):
             dims, data, attrs, encoding = unpack_for_encoding(variable)
 
+            # in the case of packed data we need to encode into
+            # float first, the correct dtype will be established
+            # via CFScaleOffsetCoder/CFMaskCoder
+            dtype = None
+            if "add_offset" in encoding or "scale_factor" in encoding:
+                encoding.pop("dtype")
+                dtype = data.dtype if data.dtype.kind == "f" else "float64"
+
             data, units = encode_cf_timedelta(
-                data, encoding.pop("units", None), encoding.get("dtype", None)
+                data, encoding.pop("units", None), encoding.get("dtype", dtype)
             )
             safe_setitem(attrs, "units", units, name=name)
 

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
@@ -234,6 +234,8 @@
 
 def _is_time_like(units):
     # test for time-like
+    # return "datetime" for datetetime-like
+    # return "timedelta" for timedelta-like
     if units is None:
         return False
     time_strings = [
@@ -255,9 +257,9 @@
             _unpack_netcdf_time_units(units)
         except ValueError:
             return False
-        return True
+        return "datetime"
     else:
-        return any(tstr == units for tstr in time_strings)
+        return "timedelta" if any(tstr == units for tstr in time_strings) else False
 
 
 def _check_fill_values(attrs, name, dtype):
@@ -345,7 +347,7 @@
            # otherwise numpy unsigned ints will silently cast to the signed counterpart
            fill_value = fill_value.item()
        # passes if provided fill value fits in encoded on-disk type
        new_fill = encoded_dtype.type(fill_value)
    except OverflowError:
        encoded_kind_str = "signed" if encoded_dtype.kind == "i" else "unsigned"
        warnings.warn(
@@ -367,6 +369,14 @@
 class CFMaskCoder(VariableCoder):
     """Mask or unmask fill values according to CF conventions."""
 
+    def __init__(
+        self,
+        decode_times: bool = False,
+        decode_timedelta: bool = False,
+    ) -> None:
+        self.decode_times = decode_times
+        self.decode_timedelta = decode_timedelta
+
     def encode(self, variable: Variable, name: T_Name = None):
         dims, data, attrs, encoding = unpack_for_encoding(variable)
 
@@ -393,33 +403,50 @@
 
         if fv_exists:
             # Ensure _FillValue is cast to same dtype as data's
+            # but not for packed data
             encoding["_FillValue"] = (
                 _encode_unsigned_fill_value(name, fv, dtype)
                 if has_unsigned
                 else dtype.type(fv)
+                if "add_offset" not in encoding and "scale_factor" not in encoding
+                else fv
             )
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
 
         if mv_exists:
             # try to use _FillValue, if it exists to align both values
             # or use missing_value and ensure it's cast to same dtype as data's
+            # but not for packed data
             encoding["missing_value"] = attrs.get(
                 "_FillValue",
                 (
                     _encode_unsigned_fill_value(name, mv, dtype)
                     if has_unsigned
                     else dtype.type(mv)
+                    if "add_offset" not in encoding and "scale_factor" not in encoding
+                    else mv
                 ),
             )
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
 
         # apply fillna
         if fill_value is not None and not pd.isnull(fill_value):
             # special case DateTime to properly handle NaT
-            if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
-                data = duck_array_ops.where(
-                    data != np.iinfo(np.int64).min, data, fill_value
-                )
+            if _is_time_like(attrs.get("units")):
+                if data.dtype.kind in "iu":
+                    data = duck_array_ops.where(
+                        data != np.iinfo(np.int64).min, data, fill_value
+                    )
+                else:
+                    # if we have float data (data was packed prior masking)
+                    # we just fillna
+                    data = duck_array_ops.fillna(data, fill_value)
+                    # but if the fill_value is of integer type
+                    # we need to round and cast
+                    if np.array(fill_value).dtype.kind in "iu":
+                        data = duck_array_ops.astype(
+                            duck_array_ops.around(data), type(fill_value)
+                        )
             else:
                 data = duck_array_ops.fillna(data, fill_value)
 
@@ -458,9 +485,15 @@
 
         if encoded_fill_values:
             # special case DateTime to properly handle NaT
+            # we need to check if time-like will be decoded or not
+            # in further processing
             dtype: np.typing.DTypeLike
             decoded_fill_value: Any
-            if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
+            is_time_like = _is_time_like(attrs.get("units"))
+            if (
+                (is_time_like == "datetime" and self.decode_times)
+                or (is_time_like == "timedelta" and self.decode_timedelta)
+            ) and data.dtype.kind in "iu":
                 dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
             else:
                 if "scale_factor" not in attrs and "add_offset" not in attrs:
@@ -549,6 +582,14 @@
         decode_values = encoded_values * scale_factor + add_offset
     """
 
+    def __init__(
+        self,
+        decode_times: bool = False,
+        decode_timedelta: bool = False,
+    ) -> None:
+        self.decode_times = decode_times
+        self.decode_timedelta = decode_timedelta
+
     def encode(self, variable: Variable, name: T_Name = None) -> Variable:
         dims, data, attrs, encoding = unpack_for_encoding(variable)
 
@@ -580,8 +621,13 @@
                 add_offset = np.asarray(add_offset).item()
             # if we have a _FillValue/masked_value we already have the wanted
             # floating point dtype here (via CFMaskCoder), so no check is necessary
-            # only check in other cases
+            # only check in other cases and for time-like
             dtype = data.dtype
+            is_time_like = _is_time_like(attrs.get("units"))
+            if (is_time_like == "datetime" and self.decode_times) or (
+                is_time_like == "timedelta" and self.decode_timedelta
+            ):
+                dtype = _choose_float_dtype(dtype, encoding)
             if "_FillValue" not in encoding and "missing_value" not in encoding:
                 dtype = _choose_float_dtype(dtype, encoding)
 

diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -193,9 +193,15 @@ def decode_cf_variable(
         var = variables.Numpy2StringDTypeCoder().decode(var)
 
     if mask_and_scale:
+        dec_times = True if decode_times else False
+        dec_timedelta = True if decode_timedelta else False
         for coder in [
-            variables.CFMaskCoder(),
-            variables.CFScaleOffsetCoder(),
+            variables.CFMaskCoder(
+                decode_times=dec_times, decode_timedelta=dec_timedelta
+            ),
+            variables.CFScaleOffsetCoder(
+                decode_times=dec_times, decode_timedelta=dec_timedelta
+            ),
         ]:
             var = coder.decode(var, name=name)
 

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -638,7 +638,9 @@ def test_cf_timedelta_2d() -> None:
 
 
 @pytest.mark.parametrize("encoding_unit", FREQUENCIES_TO_ENCODING_UNITS.values())
-def test_decode_cf_timedelta_time_unit(time_unit, encoding_unit) -> None:
+def test_decode_cf_timedelta_time_unit(
+    time_unit: PDDatetimeUnitOptions, encoding_unit
+) -> None:
     encoded = 1
     encoding_unit_as_numpy = _netcdf_to_numpy_timeunit(encoding_unit)
     if np.timedelta64(1, time_unit) > np.timedelta64(1, encoding_unit_as_numpy):
@@ -652,7 +654,9 @@ def test_decode_cf_timedelta_time_unit(time_unit, encoding_unit) -> None:
     assert result.dtype == expected.dtype
 
 
-def test_decode_cf_timedelta_time_unit_out_of_bounds(time_unit) -> None:
+def test_decode_cf_timedelta_time_unit_out_of_bounds(
+    time_unit: PDDatetimeUnitOptions,
+) -> None:
     # Define a scale factor that will guarantee overflow with the given
     # time_unit.
     scale_factor = np.timedelta64(1, time_unit) // np.timedelta64(1, "ns")
@@ -661,7 +665,7 @@ def test_decode_cf_timedelta_time_unit_out_of_bounds(time_unit) -> None:
         decode_cf_timedelta(encoded, "days", time_unit)
 
 
-def test_cf_timedelta_roundtrip_large_value(time_unit) -> None:
+def test_cf_timedelta_roundtrip_large_value(time_unit: PDDatetimeUnitOptions) -> None:
     value = np.timedelta64(np.iinfo(np.int64).max, time_unit)
     encoded, units = encode_cf_timedelta(value)
     decoded = decode_cf_timedelta(encoded, units, time_unit=time_unit)
@@ -983,7 +987,7 @@ def test_use_cftime_default_standard_calendar_out_of_range(
 @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
 @pytest.mark.parametrize("units_year", [1500, 2000, 2500])
 def test_use_cftime_default_non_standard_calendar(
-    calendar, units_year, time_unit
+    calendar, units_year, time_unit: PDDatetimeUnitOptions
 ) -> None:
     from cftime import num2date
 
@@ -1429,9 +1433,9 @@ def test_roundtrip_datetime64_nanosecond_precision_warning(
 ) -> None:
     # test warning if times can't be serialized faithfully
     times = [
-        np.datetime64("1970-01-01T00:01:00", "ns"),
-        np.datetime64("NaT"),
-        np.datetime64("1970-01-02T00:01:00", "ns"),
+        np.datetime64("1970-01-01T00:01:00", time_unit),
+        np.datetime64("NaT", time_unit),
+        np.datetime64("1970-01-02T00:01:00", time_unit),
     ]
     units = "days since 1970-01-10T01:01:00"
     needed_units = "hours"
@@ -1620,7 +1624,9 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None
     _ENCODE_DATETIME64_VIA_DASK_TESTS.values(),
     ids=_ENCODE_DATETIME64_VIA_DASK_TESTS.keys(),
 )
-def test_encode_cf_datetime_datetime64_via_dask(freq, units, dtype, time_unit) -> None:
+def test_encode_cf_datetime_datetime64_via_dask(
+    freq, units, dtype, time_unit: PDDatetimeUnitOptions
+) -> None:
     import dask.array
 
     times_pd = pd.date_range(start="1700", freq=freq, periods=3, unit=time_unit)
@@ -1901,3 +1907,17 @@ def test_lazy_decode_timedelta_error() -> None:
     )
     with pytest.raises(OutOfBoundsTimedelta, match="overflow"):
         decoded.load()
+
+
+@pytest.mark.parametrize("decode_timedelta", [True, False])
+@pytest.mark.parametrize("mask_and_scale", [True, False])
+def test_decode_timedelta_mask_and_scale(
+    decode_timedelta: bool, mask_and_scale: bool
+) -> None:
+    attrs = {"units": "days", "_FillValue": np.int16(-1), "add_offset": 100.0}
+    encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs)
+    decoded = conventions.decode_cf_variable(
+        "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta
+    )
+    result = conventions.encode_cf_variable(decoded, name="foo")
+    assert_equal(encoded, result)
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
@@ -511,16 +511,13 @@ def test_decode_dask_times(self) -> None:
 
     @pytest.mark.parametrize("time_unit", ["s", "ms", "us", "ns"])
     def test_decode_cf_time_kwargs(self, time_unit) -> None:
-        # todo: if we set timedelta attrs "units": "days"
-        #  this errors on the last decode_cf wrt to the lazy_elemwise_func
-        #  trying to convert twice
         ds = Dataset.from_dict(
             {
                 "coords": {
                     "timedelta": {
                         "data": np.array([1, 2, 3], dtype="int64"),
                         "dims": "timedelta",
-                        "attrs": {"units": "seconds"},
+                        "attrs": {"units": "days"},
                     },
                     "time": {
                         "data": np.array([1, 2, 3], dtype="int64"),