From 00229203421317709ae9260e3cd939e8bdf83163 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 27 Feb 2025 10:16:34 -0700 Subject: [PATCH 1/6] Use to_numpy in time decoding --- xarray/coding/times.py | 8 ++++---- xarray/core/formatting.py | 17 ++++++----------- xarray/namedarray/pycompat.py | 11 +++++++++-- xarray/tests/arrays.py | 7 +++++++ xarray/tests/namespace.py | 5 +++++ xarray/tests/test_coding_times.py | 18 ++++++++++++++++++ 6 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 xarray/tests/namespace.py diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 68369dac0d7..fb859813f7e 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -28,7 +28,7 @@ from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray.pycompat import is_chunked_array, to_numpy from xarray.namedarray.utils import is_duck_dask_array try: @@ -310,7 +310,7 @@ def _decode_cf_datetime_dtype( # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) example_value = np.concatenate( - [first_n_items(values, 1) or [0], last_item(values) or [0]] + [to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])] ) try: @@ -516,7 +516,7 @@ def decode_cf_datetime( -------- cftime.num2date """ - num_dates = np.asarray(num_dates) + num_dates = to_numpy(num_dates) flat_num_dates = ravel(num_dates) if calendar is None: calendar = "standard" @@ -632,7 +632,7 @@ def decode_cf_timedelta( """Given an array of numeric timedeltas in netCDF format, convert it into a numpy timedelta64 ["s", "ms", "us", "ns"] array. """ - num_timedeltas = np.asarray(num_timedeltas) + num_timedeltas = to_numpy(num_timedeltas) unit = _netcdf_to_numpy_timeunit(units) with warnings.catch_warnings(): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index a6bacccbeef..993cddf2b57 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -18,12 +18,12 @@ from pandas.errors import OutOfBoundsDatetime from xarray.core.datatree_render import RenderDataTree -from xarray.core.duck_array_ops import array_all, array_any, array_equiv, astype +from xarray.core.duck_array_ops import array_all, array_any, array_equiv, astype, ravel from xarray.core.indexing import MemoryCachedArray from xarray.core.options import OPTIONS, _get_boolean_with_default from xarray.core.treenode import group_subtrees from xarray.core.utils import is_duck_array -from xarray.namedarray.pycompat import array_type, to_duck_array, to_numpy +from xarray.namedarray.pycompat import array_type, to_duck_array if TYPE_CHECKING: from xarray.core.coordinates import AbstractCoordinates @@ -94,7 +94,7 @@ def first_n_items(array, n_desired): # pass Variable._data if isinstance(array, Variable): array = array._data - return np.ravel(to_duck_array(array))[:n_desired] + return ravel(to_duck_array(array))[:n_desired] def last_n_items(array, n_desired): @@ -118,18 +118,13 @@ def last_n_items(array, n_desired): # pass Variable._data if isinstance(array, Variable): array = array._data - return np.ravel(to_duck_array(array))[-n_desired:] + return ravel(to_duck_array(array))[-n_desired:] def last_item(array): - """Returns the last item of an array in a list or an empty list.""" - if array.size == 0: - # work around for https://github.com/numpy/numpy/issues/5195 - return [] - + """Returns the last item of an array.""" indexer = (slice(-1, None),) * array.ndim - # to_numpy since dask doesn't support tolist - return np.ravel(to_numpy(array[indexer])).tolist() + return ravel(to_duck_array(array[indexer])) def calc_max_rows_first(max_rows: int) -> int: diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 3ce33d4d8ea..91903f5cfaf 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -102,6 +102,10 @@ def to_numpy( from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type + if hasattr(data, "to_numpy"): + # for tests only + return data.to_numpy() + if isinstance(data, ExplicitlyIndexed): data = data.get_duck_array() # type: ignore[no-untyped-call] @@ -122,7 +126,10 @@ def to_numpy( def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, _DType]: - from xarray.core.indexing import ExplicitlyIndexed + from xarray.core.indexing import ( + ExplicitlyIndexed, + ImplicitToExplicitIndexingAdapter, + ) from xarray.namedarray.parallelcompat import get_chunked_array_type if is_chunked_array(data): @@ -130,7 +137,7 @@ def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated] return loaded_data - if isinstance(data, ExplicitlyIndexed): + if isinstance(data, ExplicitlyIndexed | ImplicitToExplicitIndexingAdapter): return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return] elif is_duck_array(data): return data diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py index 7373b6c75ab..cc4c480c437 100644 --- a/xarray/tests/arrays.py +++ b/xarray/tests/arrays.py @@ -51,6 +51,10 @@ def __init__(self, array: np.ndarray): def __getitem__(self, key): return type(self)(self.array[key]) + def to_numpy(self) -> np.ndarray: + """Allow explicit conversions to numpy in `to_numpy`, but disallow np.asarray etc.""" + return self.array + def __array__( self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: @@ -58,6 +62,9 @@ def __array__( def __array_namespace__(self): """Present to satisfy is_duck_array test.""" + from xarray.tests import namespace + + return namespace CONCATENATABLEARRAY_HANDLED_ARRAY_FUNCTIONS: dict[str, Callable] = {} diff --git a/xarray/tests/namespace.py b/xarray/tests/namespace.py new file mode 100644 index 00000000000..f0cc28f4b57 --- /dev/null +++ b/xarray/tests/namespace.py @@ -0,0 +1,5 @@ +from xarray.core import duck_array_ops + + +def reshape(array, shape, **kwargs): + return type(array)(duck_array_ops.reshape(array.array, shape=shape, **kwargs)) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 2e61e5d853e..380d5265653 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -42,6 +42,7 @@ from xarray.core.utils import is_duck_dask_array from xarray.testing import assert_equal, assert_identical from xarray.tests import ( + DuckArrayWrapper, FirstElementAccessibleArray, arm_xfail, assert_array_equal, @@ -1901,3 +1902,20 @@ def test_lazy_decode_timedelta_error() -> None: ) with pytest.raises(OutOfBoundsTimedelta, match="overflow"): decoded.load() + + +@pytest.mark.parametrize("calendar", ["standard", "360_day"]) +def test_duck_array_decode_times(calendar) -> None: + from xarray.core.indexing import LazilyIndexedArray + + days = LazilyIndexedArray(DuckArrayWrapper(np.array([1.0, 2.0, 3.0]))) + var = Variable( + ["time"], days, {"units": "days since 2001-01-01", "calendar": calendar} + ) + decoded = conventions.decode_cf_variable( + "foo", var, decode_times=CFDatetimeCoder(use_cftime=None) + ) + if calendar not in _STANDARD_CALENDARS: + assert decoded.dtype == np.dtype("O") + else: + assert decoded.dtype == np.dtype("=M8[ns]") From 856b299a610b0734fd75a2ce5981173aa4714922 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 27 Feb 2025 10:22:09 -0700 Subject: [PATCH 2/6] WIP --- xarray/coding/strings.py | 5 ++-- xarray/coding/times.py | 63 ++++++++++++++++++++++++++------------- xarray/tests/arrays.py | 12 ++++++++ xarray/tests/namespace.py | 27 ++++++++++++++++- 4 files changed, 83 insertions(+), 24 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 4ca6a3f0a46..8c74e0a2dfb 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -18,7 +18,7 @@ from xarray.core.utils import module_available from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray.pycompat import is_chunked_array, to_numpy HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -135,7 +135,8 @@ def decode(self, variable, name=None): if data.dtype == "S1" and dims: encoding["char_dim_name"] = dims[-1] dims = dims[:-1] - data = char_to_bytes(data) + # TODO (duck array encoding) + data = char_to_bytes(to_numpy(data)) return Variable(dims, data, attrs, encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index fb859813f7e..57fed1fe52c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -21,14 +21,24 @@ unpack_for_encoding, ) from xarray.core import indexing +from xarray.core.array_api_compat import get_array_namespace from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like -from xarray.core.duck_array_ops import array_all, array_any, asarray, ravel, reshape +from xarray.core.duck_array_ops import ( + array_all, + array_any, + asarray, + astype, + concatenate, + isnull, + ravel, + reshape, +) from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import default_precision_timestamp, timestamp_as_unit from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array, to_numpy +from xarray.namedarray.pycompat import is_chunked_array, to_duck_array, to_numpy from xarray.namedarray.utils import is_duck_dask_array try: @@ -100,7 +110,7 @@ def _is_numpy_compatible_time_range(times): if is_np_datetime_like(times.dtype): return True # times array contains cftime objects - times = np.asarray(times) + times = to_duck_array(times) tmin = times.min() tmax = times.max() try: @@ -309,8 +319,9 @@ def _decode_cf_datetime_dtype( # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) - example_value = np.concatenate( - [to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])] + zero = asarray([0], xp=get_array_namespace(values)) + example_value = concatenate( + [first_n_items(values, 1) or zero, last_item(values) or zero] ) try: @@ -342,7 +353,13 @@ def _decode_datetime_with_cftime( cftime = attempt_import("cftime") if num_dates.size > 0: return np.asarray( - cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) + cftime.num2date( + # cftime uses Cython so we must convert to numpy here. + to_numpy(num_dates), + units, + calendar, + only_use_cftime_datetimes=True, + ) ) else: return np.array([], dtype=object) @@ -357,7 +374,7 @@ def _check_date_for_units_since_refdate( f"Value {date} can't be represented as Datetime/Timedelta." ) delta = date * np.timedelta64(1, unit) - if not np.isnan(delta): + if not isnull(delta): # this will raise on dtype overflow for integer dtypes if date.dtype.kind in "u" and not np.int64(delta) == date: raise OutOfBoundsTimedelta( @@ -381,7 +398,7 @@ def _check_timedelta_range(value, data_unit, time_unit): "ignore", "invalid value encountered in multiply", RuntimeWarning ) delta = value * np.timedelta64(1, data_unit) - if not np.isnan(delta): + if not isnull(delta): # this will raise on dtype overflow for integer dtypes if value.dtype.kind in "u" and not np.int64(delta) == value: raise OutOfBoundsTimedelta( @@ -449,9 +466,9 @@ def _decode_datetime_with_pandas( # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for # more details. if flat_num_dates.dtype.kind == "i": - flat_num_dates = flat_num_dates.astype(np.int64) + flat_num_dates = astype(flat_num_dates, np.int64) elif flat_num_dates.dtype.kind == "u": - flat_num_dates = flat_num_dates.astype(np.uint64) + flat_num_dates = astype(flat_num_dates, np.uint64) try: time_unit, ref_date = _unpack_time_unit_and_ref_date(units) @@ -483,9 +500,9 @@ def _decode_datetime_with_pandas( # overflow when converting to np.int64 would not be representable with a # timedelta64 value, and therefore would raise an error in the lines above. if flat_num_dates.dtype.kind in "iu": - flat_num_dates = flat_num_dates.astype(np.int64) + flat_num_dates = astype(flat_num_dates, np.int64) elif flat_num_dates.dtype.kind in "f": - flat_num_dates = flat_num_dates.astype(np.float64) + flat_num_dates = astype(flat_num_dates, np.float64) timedeltas = _numbers_to_timedelta( flat_num_dates, time_unit, ref_date.unit, "datetime" @@ -528,8 +545,12 @@ def decode_cf_datetime( ) except (KeyError, OutOfBoundsDatetime, OutOfBoundsTimedelta, OverflowError): dates = _decode_datetime_with_cftime( - flat_num_dates.astype(float), units, calendar + astype(flat_num_dates, float), units, calendar ) + # This conversion to numpy is only needed for nanarg* below. + # TODO: explore removing it. + # Note that `dates` is already a numpy object array of cftime objects. + num_dates = to_numpy(num_dates) # retrieve cftype dates_min = dates[np.nanargmin(num_dates)] dates_max = dates[np.nanargmax(num_dates)] @@ -586,16 +607,16 @@ def _numbers_to_timedelta( """Transform numbers to np.timedelta64.""" # keep NaT/nan mask if flat_num.dtype.kind == "f": - nan = np.asarray(np.isnan(flat_num)) + nan = isnull(flat_num) elif flat_num.dtype.kind == "i": - nan = np.asarray(flat_num == np.iinfo(np.int64).min) + nan = flat_num == np.iinfo(np.int64).min # in case we need to change the unit, we fix the numbers here # this should be safe, as errors would have been raised above ns_time_unit = _NS_PER_TIME_DELTA[time_unit] ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_unit] if ns_time_unit > ns_ref_date_unit: - flat_num = np.asarray(flat_num * np.int64(ns_time_unit / ns_ref_date_unit)) + flat_num = flat_num * np.int64(ns_time_unit / ns_ref_date_unit) time_unit = ref_unit # estimate fitting resolution for floating point values @@ -618,12 +639,12 @@ def _numbers_to_timedelta( # to prevent casting NaN to int with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) - flat_num = flat_num.astype(np.int64) - if nan.any(): + flat_num = astype(flat_num, np.int64) + if array_any(nan): flat_num[nan] = np.iinfo(np.int64).min # cast to wanted type - return flat_num.astype(f"timedelta64[{time_unit}]") + return astype(flat_num, f"timedelta64[{time_unit}]") def decode_cf_timedelta( @@ -712,8 +733,8 @@ def infer_datetime_units(dates) -> str: 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = ravel(np.asarray(dates)) - if np.issubdtype(np.asarray(dates).dtype, "datetime64"): + dates = ravel(to_duck_array(dates)) + if np.issubdtype(dates.dtype, "datetime64"): dates = to_datetime_unboxed(dates) dates = dates[pd.notnull(dates)] reference_date = dates[0] if len(dates) > 0 else "1970-01-01" diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py index cc4c480c437..ca7d70c4be5 100644 --- a/xarray/tests/arrays.py +++ b/xarray/tests/arrays.py @@ -51,6 +51,18 @@ def __init__(self, array: np.ndarray): def __getitem__(self, key): return type(self)(self.array[key]) + def min(self): + return self.array.min() + + def max(self): + return self.array.max() + + def __mul__(self, other): + return type(self)(self.array.__mul__(other)) + + def __radd__(self, other): + return type(self)(other + self.array) + def to_numpy(self) -> np.ndarray: """Allow explicit conversions to numpy in `to_numpy`, but disallow np.asarray etc.""" return self.array diff --git a/xarray/tests/namespace.py b/xarray/tests/namespace.py index f0cc28f4b57..ceeb85c2cbc 100644 --- a/xarray/tests/namespace.py +++ b/xarray/tests/namespace.py @@ -1,5 +1,30 @@ -from xarray.core import duck_array_ops +import numpy as np + +from xarray.core import array_api_compat, duck_array_ops def reshape(array, shape, **kwargs): return type(array)(duck_array_ops.reshape(array.array, shape=shape, **kwargs)) + + +def concatenate(arrays, axis): + return type(arrays[0])( + duck_array_ops.concatenate([a.array for a in arrays], axis=axis) + ) + + +def result_type(*arrays_and_dtypes): + parsed = [a.array if hasattr(a, "array") else a for a in arrays_and_dtypes] + return array_api_compat.result_type(*parsed, xp=np) + + +def astype(array, dtype, **kwargs): + return type(array)(duck_array_ops.astype(array.array, dtype=dtype, **kwargs)) + + +def isnan(array): + return type(array)(duck_array_ops.isnull(array.array)) + + +def any(array, *args, **kwargs): # TODO: keepdims + return duck_array_ops.array_any(array.array, *args, **kwargs) From f69ba29367ea502c9f97d0c76fb4c2f11dfc31ed Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 27 Feb 2025 10:22:21 -0700 Subject: [PATCH 3/6] Revert "WIP" This reverts commit 54be9b1b8c1b1f0fa23d8dd7ae0a96bb0834b9dc. --- xarray/coding/strings.py | 5 ++-- xarray/coding/times.py | 63 +++++++++++++-------------------------- xarray/tests/arrays.py | 12 -------- xarray/tests/namespace.py | 27 +---------------- 4 files changed, 24 insertions(+), 83 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 8c74e0a2dfb..4ca6a3f0a46 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -18,7 +18,7 @@ from xarray.core.utils import module_available from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array, to_numpy +from xarray.namedarray.pycompat import is_chunked_array HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -135,8 +135,7 @@ def decode(self, variable, name=None): if data.dtype == "S1" and dims: encoding["char_dim_name"] = dims[-1] dims = dims[:-1] - # TODO (duck array encoding) - data = char_to_bytes(to_numpy(data)) + data = char_to_bytes(data) return Variable(dims, data, attrs, encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 57fed1fe52c..fb859813f7e 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -21,24 +21,14 @@ unpack_for_encoding, ) from xarray.core import indexing -from xarray.core.array_api_compat import get_array_namespace from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like -from xarray.core.duck_array_ops import ( - array_all, - array_any, - asarray, - astype, - concatenate, - isnull, - ravel, - reshape, -) +from xarray.core.duck_array_ops import array_all, array_any, asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import default_precision_timestamp, timestamp_as_unit from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array, to_duck_array, to_numpy +from xarray.namedarray.pycompat import is_chunked_array, to_numpy from xarray.namedarray.utils import is_duck_dask_array try: @@ -110,7 +100,7 @@ def _is_numpy_compatible_time_range(times): if is_np_datetime_like(times.dtype): return True # times array contains cftime objects - times = to_duck_array(times) + times = np.asarray(times) tmin = times.min() tmax = times.max() try: @@ -319,9 +309,8 @@ def _decode_cf_datetime_dtype( # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) - zero = asarray([0], xp=get_array_namespace(values)) - example_value = concatenate( - [first_n_items(values, 1) or zero, last_item(values) or zero] + example_value = np.concatenate( + [to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])] ) try: @@ -353,13 +342,7 @@ def _decode_datetime_with_cftime( cftime = attempt_import("cftime") if num_dates.size > 0: return np.asarray( - cftime.num2date( - # cftime uses Cython so we must convert to numpy here. - to_numpy(num_dates), - units, - calendar, - only_use_cftime_datetimes=True, - ) + cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) ) else: return np.array([], dtype=object) @@ -374,7 +357,7 @@ def _check_date_for_units_since_refdate( f"Value {date} can't be represented as Datetime/Timedelta." ) delta = date * np.timedelta64(1, unit) - if not isnull(delta): + if not np.isnan(delta): # this will raise on dtype overflow for integer dtypes if date.dtype.kind in "u" and not np.int64(delta) == date: raise OutOfBoundsTimedelta( @@ -398,7 +381,7 @@ def _check_timedelta_range(value, data_unit, time_unit): "ignore", "invalid value encountered in multiply", RuntimeWarning ) delta = value * np.timedelta64(1, data_unit) - if not isnull(delta): + if not np.isnan(delta): # this will raise on dtype overflow for integer dtypes if value.dtype.kind in "u" and not np.int64(delta) == value: raise OutOfBoundsTimedelta( @@ -466,9 +449,9 @@ def _decode_datetime_with_pandas( # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for # more details. if flat_num_dates.dtype.kind == "i": - flat_num_dates = astype(flat_num_dates, np.int64) + flat_num_dates = flat_num_dates.astype(np.int64) elif flat_num_dates.dtype.kind == "u": - flat_num_dates = astype(flat_num_dates, np.uint64) + flat_num_dates = flat_num_dates.astype(np.uint64) try: time_unit, ref_date = _unpack_time_unit_and_ref_date(units) @@ -500,9 +483,9 @@ def _decode_datetime_with_pandas( # overflow when converting to np.int64 would not be representable with a # timedelta64 value, and therefore would raise an error in the lines above. if flat_num_dates.dtype.kind in "iu": - flat_num_dates = astype(flat_num_dates, np.int64) + flat_num_dates = flat_num_dates.astype(np.int64) elif flat_num_dates.dtype.kind in "f": - flat_num_dates = astype(flat_num_dates, np.float64) + flat_num_dates = flat_num_dates.astype(np.float64) timedeltas = _numbers_to_timedelta( flat_num_dates, time_unit, ref_date.unit, "datetime" @@ -545,12 +528,8 @@ def decode_cf_datetime( ) except (KeyError, OutOfBoundsDatetime, OutOfBoundsTimedelta, OverflowError): dates = _decode_datetime_with_cftime( - astype(flat_num_dates, float), units, calendar + flat_num_dates.astype(float), units, calendar ) - # This conversion to numpy is only needed for nanarg* below. - # TODO: explore removing it. - # Note that `dates` is already a numpy object array of cftime objects. - num_dates = to_numpy(num_dates) # retrieve cftype dates_min = dates[np.nanargmin(num_dates)] dates_max = dates[np.nanargmax(num_dates)] @@ -607,16 +586,16 @@ def _numbers_to_timedelta( """Transform numbers to np.timedelta64.""" # keep NaT/nan mask if flat_num.dtype.kind == "f": - nan = isnull(flat_num) + nan = np.asarray(np.isnan(flat_num)) elif flat_num.dtype.kind == "i": - nan = flat_num == np.iinfo(np.int64).min + nan = np.asarray(flat_num == np.iinfo(np.int64).min) # in case we need to change the unit, we fix the numbers here # this should be safe, as errors would have been raised above ns_time_unit = _NS_PER_TIME_DELTA[time_unit] ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_unit] if ns_time_unit > ns_ref_date_unit: - flat_num = flat_num * np.int64(ns_time_unit / ns_ref_date_unit) + flat_num = np.asarray(flat_num * np.int64(ns_time_unit / ns_ref_date_unit)) time_unit = ref_unit # estimate fitting resolution for floating point values @@ -639,12 +618,12 @@ def _numbers_to_timedelta( # to prevent casting NaN to int with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) - flat_num = astype(flat_num, np.int64) - if array_any(nan): + flat_num = flat_num.astype(np.int64) + if nan.any(): flat_num[nan] = np.iinfo(np.int64).min # cast to wanted type - return astype(flat_num, f"timedelta64[{time_unit}]") + return flat_num.astype(f"timedelta64[{time_unit}]") def decode_cf_timedelta( @@ -733,8 +712,8 @@ def infer_datetime_units(dates) -> str: 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = ravel(to_duck_array(dates)) - if np.issubdtype(dates.dtype, "datetime64"): + dates = ravel(np.asarray(dates)) + if np.issubdtype(np.asarray(dates).dtype, "datetime64"): dates = to_datetime_unboxed(dates) dates = dates[pd.notnull(dates)] reference_date = dates[0] if len(dates) > 0 else "1970-01-01" diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py index ca7d70c4be5..cc4c480c437 100644 --- a/xarray/tests/arrays.py +++ b/xarray/tests/arrays.py @@ -51,18 +51,6 @@ def __init__(self, array: np.ndarray): def __getitem__(self, key): return type(self)(self.array[key]) - def min(self): - return self.array.min() - - def max(self): - return self.array.max() - - def __mul__(self, other): - return type(self)(self.array.__mul__(other)) - - def __radd__(self, other): - return type(self)(other + self.array) - def to_numpy(self) -> np.ndarray: """Allow explicit conversions to numpy in `to_numpy`, but disallow np.asarray etc.""" return self.array diff --git a/xarray/tests/namespace.py b/xarray/tests/namespace.py index ceeb85c2cbc..f0cc28f4b57 100644 --- a/xarray/tests/namespace.py +++ b/xarray/tests/namespace.py @@ -1,30 +1,5 @@ -import numpy as np - -from xarray.core import array_api_compat, duck_array_ops +from xarray.core import duck_array_ops def reshape(array, shape, **kwargs): return type(array)(duck_array_ops.reshape(array.array, shape=shape, **kwargs)) - - -def concatenate(arrays, axis): - return type(arrays[0])( - duck_array_ops.concatenate([a.array for a in arrays], axis=axis) - ) - - -def result_type(*arrays_and_dtypes): - parsed = [a.array if hasattr(a, "array") else a for a in arrays_and_dtypes] - return array_api_compat.result_type(*parsed, xp=np) - - -def astype(array, dtype, **kwargs): - return type(array)(duck_array_ops.astype(array.array, dtype=dtype, **kwargs)) - - -def isnan(array): - return type(array)(duck_array_ops.isnull(array.array)) - - -def any(array, *args, **kwargs): # TODO: keepdims - return duck_array_ops.array_any(array.array, *args, **kwargs) From e092c483b8a4c04e81c44bcd5d31454c3a6e56d8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 27 Feb 2025 11:14:59 -0700 Subject: [PATCH 4/6] fix --- xarray/coding/times.py | 2 +- xarray/namedarray/pycompat.py | 4 ++-- xarray/tests/test_coding_times.py | 10 +++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index fb859813f7e..2c931d6fb5c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -310,7 +310,7 @@ def _decode_cf_datetime_dtype( # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) example_value = np.concatenate( - [to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])] + [to_numpy(first_n_items(values, 1)), to_numpy(last_item(values))] ) try: diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 91903f5cfaf..f7b60935103 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -103,8 +103,8 @@ def to_numpy( from xarray.namedarray.parallelcompat import get_chunked_array_type if hasattr(data, "to_numpy"): - # for tests only - return data.to_numpy() + # for tests only at the moment + return data.to_numpy() # type: ignore[no-any-return] if isinstance(data, ExplicitlyIndexed): data = data.get_duck_array() # type: ignore[no-untyped-call] diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 380d5265653..62fdc7955ba 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1904,7 +1904,15 @@ def test_lazy_decode_timedelta_error() -> None: decoded.load() -@pytest.mark.parametrize("calendar", ["standard", "360_day"]) +@pytest.mark.parametrize( + "calendar", + [ + "standard", + pytest.param( + "360_day", marks=pytest.mark.skipif(not has_cftime, reason="no cftime") + ), + ], +) def test_duck_array_decode_times(calendar) -> None: from xarray.core.indexing import LazilyIndexedArray From d171b44c5bdd4aaa2a040899dd4a724c53ef9341 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 9 Mar 2025 21:28:40 -0600 Subject: [PATCH 5/6] Update xarray/namedarray/pycompat.py --- xarray/namedarray/pycompat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index f7b60935103..35c545080fb 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -102,9 +102,11 @@ def to_numpy( from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type - if hasattr(data, "to_numpy"): + try: # for tests only at the moment return data.to_numpy() # type: ignore[no-any-return] + except AttributeError: + pass if isinstance(data, ExplicitlyIndexed): data = data.get_duck_array() # type: ignore[no-untyped-call] From 91f5d5ccdcf96a86e466334b6d07c7753c364986 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Mar 2025 13:04:51 -0600 Subject: [PATCH 6/6] Update xarray/namedarray/pycompat.py --- xarray/namedarray/pycompat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 35c545080fb..68b6a7853bf 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -104,7 +104,7 @@ def to_numpy( try: # for tests only at the moment - return data.to_numpy() # type: ignore[no-any-return] + return data.to_numpy() # type: ignore[no-any-return,union-attr] except AttributeError: pass