Skip to content

Commit f399a55

Browse files
committed
Add data_vars_only rechunking param; update test
- Add a data_vars_only parameter to chunk_dataset and update_dataset_chunk_encoding. - Update test_append_time_slice to use this parameter in order to ensure compatibility with xarray 2024.3.0 Addresses Issue #958.
1 parent 012f760 commit f399a55

File tree

3 files changed

+23
-6
lines changed

3 files changed

+23
-6
lines changed

test/core/test_timeslice.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,18 @@ def write_cube(self, start_date, num_days: int):
3838
cube = self.make_cube(start_date, num_days)
3939
cube.to_zarr(self.CUBE_PATH)
4040

41-
def make_cube(self, start_date, num_days: int) -> xr.Dataset:
41+
@staticmethod
42+
def make_cube(start_date, num_days: int) -> xr.Dataset:
4243
cube = new_cube(
4344
time_periods=num_days,
4445
time_freq="1D",
4546
time_start=start_date,
4647
variables=dict(precipitation=0.1, temperature=270.5, soil_moisture=0.2),
4748
)
4849
chunk_sizes = dict(time=1, lat=90, lon=90)
49-
cube = chunk_dataset(cube, chunk_sizes, format_name="zarr")
50+
cube = chunk_dataset(
51+
cube, chunk_sizes, format_name="zarr", data_vars_only=True
52+
)
5053
return cube
5154

5255
def test_find_time_slice(self):

xcube/core/chunk.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99

1010
def chunk_dataset(
11-
dataset: xr.Dataset, chunk_sizes: Dict[str, int] = None, format_name: str = None
11+
dataset: xr.Dataset,
12+
chunk_sizes: Dict[str, int] = None,
13+
format_name: str = None,
14+
data_vars_only: bool = False,
1215
) -> xr.Dataset:
1316
"""Chunk *dataset* using *chunk_sizes* and optionally
1417
update encodings for given *format_name*.
@@ -17,14 +20,23 @@ def chunk_dataset(
1720
dataset: input dataset
1821
chunk_sizes: mapping from dimension name to new chunk size
1922
format_name: optional format, e.g. "zarr" or "netcdf4"
23+
data_vars_only: only chunk data variables, not coordinates
2024
2125
Returns:
2226
the (re)chunked dataset
2327
"""
24-
dataset = dataset.chunk(chunks=chunk_sizes)
28+
29+
if data_vars_only:
30+
for variable in dataset.data_vars:
31+
dataset[variable] = dataset[variable].chunk(chunk_sizes)
32+
else:
33+
dataset = dataset.chunk(chunks=chunk_sizes)
2534
if format_name:
2635
dataset = update_dataset_chunk_encoding(
27-
dataset, chunk_sizes=chunk_sizes, format_name=format_name
36+
dataset,
37+
chunk_sizes=chunk_sizes,
38+
format_name=format_name,
39+
data_vars_only=data_vars_only,
2840
)
2941
return dataset
3042

xcube/core/update.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ def update_dataset_chunk_encoding(
234234
chunk_sizes: Dict[str, int] = None,
235235
format_name: str = None,
236236
in_place: bool = False,
237+
data_vars_only: bool = False
237238
) -> xr.Dataset:
238239
"""Update each variable's encoding in *dataset* with respect to *chunk_sizes*
239240
so *dataset* is written in chunks for given *format_name*.
@@ -245,6 +246,7 @@ def update_dataset_chunk_encoding(
245246
format_name: format name, e.g. "zarr" or "netcdf4".
246247
in_place: If ``True``, *dataset* will be modified in place and
247248
returned.
249+
data_vars_only: only chunk data variables, not coordinates
248250
"""
249251
if format_name == FORMAT_NAME_ZARR:
250252
chunk_sizes_attr_name = "chunks"
@@ -254,7 +256,7 @@ def update_dataset_chunk_encoding(
254256
return dataset
255257
if not in_place:
256258
dataset = dataset.copy()
257-
for var_name in dataset.variables:
259+
for var_name in dataset.data_vars if data_vars_only else dataset.variables:
258260
var = dataset[var_name]
259261
if chunk_sizes is not None:
260262

0 commit comments

Comments
 (0)