From c640030706bcdf45078201560ae5a8fce3897ac6 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Mon, 18 Dec 2023 21:51:31 +0100 Subject: [PATCH 1/9] fix skip decorator --- xarray/tests/__init__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index b3a31b28016..393f5ed1a91 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -58,7 +58,11 @@ def _importorskip( raise ImportError("Minimum version not satisfied") except ImportError: has = False - func = pytest.mark.skipif(not has, reason=f"requires {modname}") + + reason = f"requires {modname}" + if minversion is not None: + reason += f">={minversion}" + func = pytest.mark.skipif(not has, reason=reason) return has, func @@ -107,10 +111,7 @@ def _importorskip( not has_pandas_version_two, reason="requires pandas 2.0.0" ) has_numpy_array_api, requires_numpy_array_api = _importorskip("numpy", "1.26.0") -has_h5netcdf_ros3 = _importorskip("h5netcdf", "1.3.0") -requires_h5netcdf_ros3 = pytest.mark.skipif( - not has_h5netcdf_ros3[0], reason="requires h5netcdf 1.3.0" -) +has_h5netcdf_ros3, requires_h5netcdf_ros3 = _importorskip("h5netcdf", "1.3.0") # change some global options for tests set_options(warn_for_unclosed_files=True) From ffaef21e058f858f4afd73d23832989dc0c90c51 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Mon, 18 Dec 2023 22:07:51 +0100 Subject: [PATCH 2/9] support hashable dims in DataArray --- xarray/core/dataarray.py | 34 ++++++++++++++---------- xarray/tests/test_hashable.py | 50 +++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 14 deletions(-) create mode 100644 xarray/tests/test_hashable.py diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0335ad3bdda..2e625ee62d1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -61,6 +61,7 @@ ReprObject, _default, either_dict_or_kwargs, + hashable, ) from xarray.core.variable import ( IndexVariable, @@ -140,7 +141,9 @@ def _check_coords_dims(shape, coords, dim): def _infer_coords_and_dims( - shape, coords, dims + shape: tuple[int, ...], + coords: Sequence[Sequence[Any] | pd.Index | DataArray] | Mapping[Any, Any] | None, + dims: str | Iterable[Hashable] | None, ) -> tuple[Mapping[Hashable, Any], tuple[Hashable, ...]]: """All the logic for creating a new DataArray""" @@ -157,8 +160,7 @@ def _infer_coords_and_dims( if isinstance(dims, str): dims = (dims,) - - if dims is None: + elif dims is None: dims = [f"dim_{n}" for n in range(len(shape))] if coords is not None and len(coords) == len(shape): # try to infer dimensions from coords @@ -168,16 +170,16 @@ def _infer_coords_and_dims( for n, (dim, coord) in enumerate(zip(dims, coords)): coord = as_variable(coord, name=dims[n]).to_index_variable() dims[n] = coord.name - dims = tuple(dims) - elif len(dims) != len(shape): + dims_tuple = tuple(dims) + if len(dims_tuple) != len(shape): raise ValueError( "different number of dimensions on data " - f"and dims: {len(shape)} vs {len(dims)}" + f"and dims: {len(shape)} vs {len(dims_tuple)}" ) else: - for d in dims: - if not isinstance(d, str): - raise TypeError(f"dimension {d} is not a string") + for d in dims_tuple: + if not hashable(d): + raise TypeError(f"Dimension {d} is not hashable") new_coords: Mapping[Hashable, Any] @@ -189,17 +191,21 @@ def _infer_coords_and_dims( for k, v in coords.items(): new_coords[k] = as_variable(v, name=k) elif coords is not None: - for dim, coord in zip(dims, coords): + for dim, coord in zip(dims_tuple, coords): var = as_variable(coord, name=dim) var.dims = (dim,) new_coords[dim] = var.to_index_variable() - _check_coords_dims(shape, new_coords, dims) + _check_coords_dims(shape, new_coords, dims_tuple) - return new_coords, dims + return new_coords, dims_tuple -def _check_data_shape(data, coords, dims): +def _check_data_shape( + data: Any, + coords: Sequence[Sequence[Any] | pd.Index | DataArray] | Mapping[Any, Any] | None, + dims: str | Iterable[Hashable] | None, +) -> Any: if data is dtypes.NA: data = np.nan if coords is not None and utils.is_scalar(data, include_0d=False): @@ -408,7 +414,7 @@ def __init__( coords: Sequence[Sequence[Any] | pd.Index | DataArray] | Mapping[Any, Any] | None = None, - dims: Hashable | Sequence[Hashable] | None = None, + dims: str | Iterable[Hashable] | None = None, name: Hashable | None = None, attrs: Mapping | None = None, # internal parameters diff --git a/xarray/tests/test_hashable.py b/xarray/tests/test_hashable.py new file mode 100644 index 00000000000..2c3e5c245cd --- /dev/null +++ b/xarray/tests/test_hashable.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from enum import Enum +from typing import Union + +import pytest + +from xarray import DataArray, Dataset, Variable +from xarray.core.types import TypeAlias + + +class DEnum(Enum): + dim = "dim" + + +class CustmomHashable: + def __init__(self, a: int) -> None: + self.a = a + + def __hash__(self) -> int: + return self.a + + +parametrize_dim = pytest.mark.parametrize( + "dim", + [ + pytest.param(5, id="int"), + pytest.param(("a", "b"), id="tuple"), + pytest.param(DEnum.dim, id="enum"), + pytest.param(CustmomHashable(3), id="HashableObject"), + ], +) +DimT: TypeAlias = Union[int, tuple, DEnum, CustmomHashable] + + +@parametrize_dim +def test_hashable_dims(dim: DimT) -> None: + v = Variable([dim], [1, 2, 3]) + da = DataArray([1, 2, 3], dims=[dim]) + Dataset({"a": ([dim], [1, 2, 3])}) + + # alternative constructors + DataArray(v) + Dataset({"a": v}) + Dataset({"a": da}) + + +@parametrize_dim +def test_dataset_variable_hashable_names(dim: DimT) -> None: + Dataset({dim: ("x", [1, 2, 3])}) From 1fe3a3b535e17886a0c91499cfdf4f51b826bd89 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Mon, 18 Dec 2023 22:11:08 +0100 Subject: [PATCH 3/9] add whats-new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c0917b7443b..00f6763c355 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,9 @@ Deprecations Bug fixes ~~~~~~~~~ +- Support non-string hashable dimensions in :py:class:`xarray.DataArray` (:issue:`8546`, :pull:`8559`). + By `Michael Niklas `_. + Documentation ~~~~~~~~~~~~~ From 218e14c2ae243f12201c3b6b89a2635842709d82 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Mon, 18 Dec 2023 22:18:47 +0100 Subject: [PATCH 4/9] remove uneccessary except ImportError --- xarray/core/dataset.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9ec39e74ad1..c40dee2631c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -130,6 +130,8 @@ from xarray.util.deprecation_helpers import _deprecate_positional_args if TYPE_CHECKING: + from dask.dataframe import DataFrame as DaskDataFrame + from dask.delayed import Delayed from numpy.typing import ArrayLike from xarray.backends import AbstractDataStore, ZarrStore @@ -164,15 +166,6 @@ ) from xarray.core.weighted import DatasetWeighted - try: - from dask.delayed import Delayed - except ImportError: - Delayed = None # type: ignore - try: - from dask.dataframe import DataFrame as DaskDataFrame - except ImportError: - DaskDataFrame = None # type: ignore - # list of attributes of pd.DatetimeIndex that are ndarrays of time info _DATETIMEINDEX_COMPONENTS = [ From caad4fb98c7d8ab6de0037bdc391c5938abef61a Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Mon, 18 Dec 2023 22:20:21 +0100 Subject: [PATCH 5/9] remove uneccessary except ImportError #2 --- xarray/core/dataarray.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2e625ee62d1..ec9c88e5c66 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -11,6 +11,8 @@ Generic, Literal, NoReturn, + TypeVar, + Union, overload, ) @@ -74,23 +76,11 @@ from xarray.util.deprecation_helpers import _deprecate_positional_args, deprecate_dims if TYPE_CHECKING: - from typing import TypeVar, Union - + from dask.dataframe import DataFrame as DaskDataFrame + from dask.delayed import Delayed + from iris.cube import Cube as iris_Cube from numpy.typing import ArrayLike - try: - from dask.dataframe import DataFrame as DaskDataFrame - except ImportError: - DaskDataFrame = None # type: ignore - try: - from dask.delayed import Delayed - except ImportError: - Delayed = None # type: ignore - try: - from iris.cube import Cube as iris_Cube - except ImportError: - iris_Cube = None - from xarray.backends import ZarrStore from xarray.backends.api import T_NetcdfEngine, T_NetcdfTypes from xarray.core.groupby import DataArrayGroupBy From eb6621fbebad6ae7132b7c44a35b7483043737f3 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 19 Dec 2023 21:29:10 +0100 Subject: [PATCH 6/9] remove some Anys --- xarray/core/dataarray.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ec9c88e5c66..2875d5aa87d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -132,7 +132,7 @@ def _check_coords_dims(shape, coords, dim): def _infer_coords_and_dims( shape: tuple[int, ...], - coords: Sequence[Sequence[Any] | pd.Index | DataArray] | Mapping[Any, Any] | None, + coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None, dims: str | Iterable[Hashable] | None, ) -> tuple[Mapping[Hashable, Any], tuple[Hashable, ...]]: """All the logic for creating a new DataArray""" @@ -193,7 +193,7 @@ def _infer_coords_and_dims( def _check_data_shape( data: Any, - coords: Sequence[Sequence[Any] | pd.Index | DataArray] | Mapping[Any, Any] | None, + coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None, dims: str | Iterable[Hashable] | None, ) -> Any: if data is dtypes.NA: @@ -401,9 +401,7 @@ class DataArray( def __init__( self, data: Any = dtypes.NA, - coords: Sequence[Sequence[Any] | pd.Index | DataArray] - | Mapping[Any, Any] - | None = None, + coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, dims: str | Iterable[Hashable] | None = None, name: Hashable | None = None, attrs: Mapping | None = None, From 1a7f52c65132cd4773ce6dcdeb64270746414e38 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 19 Dec 2023 22:07:33 +0100 Subject: [PATCH 7/9] fix test and checking --- xarray/core/dataarray.py | 7 +++---- xarray/tests/test_dataarray.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2875d5aa87d..1fe58894315 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -166,10 +166,9 @@ def _infer_coords_and_dims( "different number of dimensions on data " f"and dims: {len(shape)} vs {len(dims_tuple)}" ) - else: - for d in dims_tuple: - if not hashable(d): - raise TypeError(f"Dimension {d} is not hashable") + for d in dims_tuple: + if not hashable(d): + raise TypeError(f"Dimension {d} is not hashable") new_coords: Mapping[Hashable, Any] diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6dc85fc5691..ab1fc316f77 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -401,8 +401,8 @@ def test_constructor_invalid(self) -> None: with pytest.raises(ValueError, match=r"not a subset of the .* dim"): DataArray(data, {"x": [0, 1, 2]}) - with pytest.raises(TypeError, match=r"is not a string"): - DataArray(data, dims=["x", None]) + with pytest.raises(TypeError, match=r"is not hashable"): + DataArray(data, dims=["x", []]) # type: ignore[list-item] with pytest.raises(ValueError, match=r"conflicting sizes for dim"): DataArray([1, 2, 3], coords=[("x", [0, 1])]) From f11b9ba326e2c4204dd512b066c5bf8c18ec4438 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 19 Dec 2023 22:24:01 +0100 Subject: [PATCH 8/9] import in typechecking only --- xarray/tests/test_hashable.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_hashable.py b/xarray/tests/test_hashable.py index 2c3e5c245cd..b927bb6f9ee 100644 --- a/xarray/tests/test_hashable.py +++ b/xarray/tests/test_hashable.py @@ -1,12 +1,16 @@ from __future__ import annotations from enum import Enum -from typing import Union +from typing import TYPE_CHECKING, Union import pytest from xarray import DataArray, Dataset, Variable -from xarray.core.types import TypeAlias + +if TYPE_CHECKING: + from xarray.core.types import TypeAlias + + DimT: TypeAlias = Union[int, tuple, "DEnum", "CustmomHashable"] class DEnum(Enum): @@ -30,7 +34,6 @@ def __hash__(self) -> int: pytest.param(CustmomHashable(3), id="HashableObject"), ], ) -DimT: TypeAlias = Union[int, tuple, DEnum, CustmomHashable] @parametrize_dim From b06452e699c09b0458459069d1480ce849f30a06 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Tue, 19 Dec 2023 22:46:04 +0100 Subject: [PATCH 9/9] fix typo --- xarray/tests/test_hashable.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_hashable.py b/xarray/tests/test_hashable.py index b927bb6f9ee..9f92c604dc3 100644 --- a/xarray/tests/test_hashable.py +++ b/xarray/tests/test_hashable.py @@ -10,14 +10,14 @@ if TYPE_CHECKING: from xarray.core.types import TypeAlias - DimT: TypeAlias = Union[int, tuple, "DEnum", "CustmomHashable"] + DimT: TypeAlias = Union[int, tuple, "DEnum", "CustomHashable"] class DEnum(Enum): dim = "dim" -class CustmomHashable: +class CustomHashable: def __init__(self, a: int) -> None: self.a = a @@ -31,7 +31,7 @@ def __hash__(self) -> int: pytest.param(5, id="int"), pytest.param(("a", "b"), id="tuple"), pytest.param(DEnum.dim, id="enum"), - pytest.param(CustmomHashable(3), id="HashableObject"), + pytest.param(CustomHashable(3), id="HashableObject"), ], )