From 690bde86b1524e9a7aca88e364a6aa58d9baef13 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 7 Oct 2021 19:32:24 -0400 Subject: [PATCH] change return type of DataArray.chunks and Dataset.chunks --- xarray/core/common.py | 17 +++++++++++++++++ xarray/core/dataarray.py | 19 ++++++++++++++----- xarray/core/dataset.py | 26 ++++++++++++-------------- xarray/core/variable.py | 19 +++++++++++++++---- 4 files changed, 58 insertions(+), 23 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 2c5d7900ef8..55bd2c03f6a 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1813,6 +1813,23 @@ def ones_like(other, dtype: DTypeLike = None): return full_like(other, 1, dtype) +def get_chunks( + variables: Iterable[Variable], +) -> Mapping[Hashable, Tuple[int, ...]]: + + chunks: Dict[Hashable, Tuple[int, ...]] = {} + for v in variables: + if hasattr(v.data, "chunks"): + for dim, c in v.chunks.items(): + if dim in chunks and c != chunks[dim]: + raise ValueError( + f"Object has inconsistent chunks along dimension {dim}. " + "This can be fixed by calling unify_chunks()." + ) + chunks[dim] = c + return Frozen(chunks) + + def is_np_datetime_like(dtype: DTypeLike) -> bool: """Check if a dtype is a subclass of the numpy datetime types""" return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ed8b393628d..9d1a5bf4d93 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -43,7 +43,7 @@ reindex_like_indexers, ) from .arithmetic import DataArrayArithmetic -from .common import AbstractArray, DataWithCoords +from .common import AbstractArray, DataWithCoords, get_chunks from .computation import unify_chunks from .coordinates import ( DataArrayCoordinates, @@ -1057,11 +1057,20 @@ def __deepcopy__(self, memo=None) -> "DataArray": __hash__ = None # type: ignore[assignment] @property - def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]: - """Block dimensions for this array's data or None if it's not a dask - array. + def chunks(self) -> Optional[Mapping[Hashable, Tuple[int, ...]]]: """ - return self.variable.chunks + Mapping from dimension names to block lengths for this dataarray's data, or None if + the underlying data is not a dask array. + + Cannot be modified directly, but can be modified by calling .chunk(). + + See Also + -------- + DataArray.chunk + xarray.unify_chunks + """ + all_variables = [self.variable] + [c.variable for c in self.coords.values()] + return get_chunks(all_variables) def chunk( self, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4b1b1de222d..fa8bc7183c7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -52,7 +52,7 @@ ) from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .arithmetic import DatasetArithmetic -from .common import DataWithCoords, _contains_datetime_like_objects +from .common import DataWithCoords, _contains_datetime_like_objects, get_chunks from .computation import unify_chunks from .coordinates import ( DatasetCoordinates, @@ -2090,20 +2090,18 @@ def info(self, buf=None) -> None: @property def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]: - """Block dimensions for this dataset's data or None if it's not a dask - array. """ - chunks: Dict[Hashable, Tuple[int, ...]] = {} - for v in self.variables.values(): - if v.chunks is not None: - for dim, c in zip(v.dims, v.chunks): - if dim in chunks and c != chunks[dim]: - raise ValueError( - f"Object has inconsistent chunks along dimension {dim}. " - "This can be fixed by calling unify_chunks()." - ) - chunks[dim] = c - return Frozen(chunks) + Mapping from dimension names to block lengths for this dataset's data, or None if + the underlying data is not a dask array. + + Cannot be modified directly, but can be modified by calling .chunk(). + + See Also + -------- + Dataset.chunk + xarray.unify_chunks + """ + return get_chunks(self.variables.values()) def chunk( self, diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 191bb4059f5..a68ce149fd8 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -45,6 +45,7 @@ sparse_array_type, ) from .utils import ( + Frozen, NdimSizeLenMixin, OrderedSet, _default, @@ -997,15 +998,25 @@ def __deepcopy__(self, memo=None): @property def chunks(self): - """Block dimensions for this array's data or None if it's not a dask - array. """ - return getattr(self._data, "chunks", None) + Mapping from dimension names to block lengths for this array's data, or None if + the underlying data is not a dask array. + + Cannot be modified directly, but can be modified by calling .chunk(). + + See Also + -------- + Variable.chunk + """ + if hasattr(self._data, "chunks"): + return Frozen({dim: c for dim, c in zip(self.dims, self.data.chunks)}) + else: + return None _array_counter = itertools.count() def chunk(self, chunks={}, name=None, lock=False): - """Coerce this array's data into a dask arrays with the given chunks. + """Coerce this array's data into a dask array with the given chunks. If this variable is a non-dask array, it will be converted to dask array. If it's a dask array, it will be rechunked to the given chunk