diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2ffbc60622d..7a7117734e7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -136,6 +136,10 @@ Bug fixes the dates must be encoded using cftime rather than NumPy (:issue:`2272`). By `Spencer Clark `_. +- Chunked datasets can now roundtrip to Zarr storage continually + with `to_zarr` and ``open_zarr`` (:issue:`2300`). + By `Lily Wang `_. + .. _whats-new.0.10.9: v0.10.9 (21 September 2018) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 5f19c826289..06fe7f04e4f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -79,14 +79,14 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if var_chunks and enc_chunks is None: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( - "Zarr requires uniform chunk sizes excpet for final chunk." - " Variable %r has incompatible chunks. Consider " + "Zarr requires uniform chunk sizes except for final chunk." + " Variable dask chunks %r are incompatible. Consider " "rechunking using `chunk()`." % (var_chunks,)) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( - "Final chunk of Zarr array must be smaller than first. " - "Variable %r has incompatible chunks. Consider rechunking " - "using `chunk()`." % var_chunks) + "Final chunk of Zarr array must be the same size or smaller " + "than the first. Variable Dask chunks %r are incompatible. " + "Consider rechunking using `chunk()`." % var_chunks) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) @@ -126,7 +126,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): # threads if var_chunks and enc_chunks_tuple: for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks): - for dchunk in dchunks: + for dchunk in dchunks[:-1]: if dchunk % zchunk: raise NotImplementedError( "Specified zarr chunks %r would overlap multiple dask " @@ -134,6 +134,13 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): " Consider rechunking the data using " "`chunk()` or specifying different chunks in encoding." % (enc_chunks_tuple, var_chunks)) + if dchunks[-1] > zchunk: + raise ValueError( + "Final chunk of Zarr array must be the same size or " + "smaller than the first. The specified Zarr chunk " + "encoding is %r, but %r in variable Dask chunks %r is " + "incompatible. Consider rechunking using `chunk()`." + % (enc_chunks_tuple, dchunks, var_chunks)) return enc_chunks_tuple raise AssertionError( diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c6a2df733fa..8963ed4edd3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1389,6 +1389,10 @@ def test_chunk_encoding_with_dask(self): ds_chunk_irreg = ds.chunk({'x': (5, 5, 2)}) with self.roundtrip(ds_chunk_irreg) as actual: assert (5,) == actual['var1'].encoding['chunks'] + # re-save Zarr arrays + with self.roundtrip(ds_chunk_irreg) as original: + with self.roundtrip(original) as actual: + assert_identical(original, actual) # - encoding specified - # specify compatible encodings