Open
Description
See upstream: zarr-developers/zarr-python#551
It seems that using a ZipStore creates 1 byte objects for Unicode string attributes.
For example, saving the same Dataset with a DirectoryStore and a Zip Store creates an attribute for a unicode array with 20 bytes in size in the first, and 1 byte in size in the second.
In fact, ubuntu file roller isn't even allowing me to extract the files.
I have a feeling it is due to the note in the zarr documentation
Note that Zip files do not provide any way to remove or replace existing entries.
https://zarr.readthedocs.io/en/stable/api/storage.html#zarr.storage.ZipStore
MCVE Code Sample
ZipStore
import xarray as xr
import zarr
x = xr.Dataset()
x['hello'] = 'world'
x
with zarr.ZipStore('test_store.zip', mode='w') as store:
x.to_zarr(store)
with zarr.ZipStore('test_store.zip', mode='r') as store:
x_read = xr.open_zarr(store).compute()
Issued error
---------------------------------------------------------------------------
BadZipFile Traceback (most recent call last)
<ipython-input-1-2a92a6db56ab> in <module>
7 x.to_zarr(store)
8 with zarr.ZipStore('test_store.zip', mode='r') as store:
----> 9 x_read = xr.open_zarr(store).compute()
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/core/dataset.py in compute(self, **kwargs)
803 """
804 new = self.copy(deep=False)
--> 805 return new.load(**kwargs)
806
807 def _persist_inplace(self, **kwargs) -> "Dataset":
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/core/dataset.py in load(self, **kwargs)
655 for k, v in self.variables.items():
656 if k not in lazy_data:
--> 657 v.load()
658
659 return self
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/core/variable.py in load(self, **kwargs)
370 self._data = as_compatible_data(self._data.compute(**kwargs))
371 elif not hasattr(self._data, "__array_function__"):
--> 372 self._data = np.asarray(self._data)
373 return self
374
~/miniconda3/envs/dev/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
545 def __array__(self, dtype=None):
546 array = as_indexable(self.array)
--> 547 return np.asarray(array[self.key], dtype=None)
548
549 def transpose(self, order):
~/miniconda3/envs/dev/lib/python3.7/site-packages/xarray/backends/zarr.py in __getitem__(self, key)
46 array = self.get_array()
47 if isinstance(key, indexing.BasicIndexer):
---> 48 return array[key.tuple]
49 elif isinstance(key, indexing.VectorizedIndexer):
50 return array.vindex[
~/miniconda3/envs/dev/lib/python3.7/site-packages/zarr/core.py in __getitem__(self, selection)
570
571 fields, selection = pop_fields(selection)
--> 572 return self.get_basic_selection(selection, fields=fields)
573
574 def get_basic_selection(self, selection=Ellipsis, out=None, fields=None):
~/miniconda3/envs/dev/lib/python3.7/site-packages/zarr/core.py in get_basic_selection(self, selection, out, fields)
693 if self._shape == ():
694 return self._get_basic_selection_zd(selection=selection, out=out,
--> 695 fields=fields)
696 else:
697 return self._get_basic_selection_nd(selection=selection, out=out,
~/miniconda3/envs/dev/lib/python3.7/site-packages/zarr/core.py in _get_basic_selection_zd(self, selection, out, fields)
709 # obtain encoded data for chunk
710 ckey = self._chunk_key((0,))
--> 711 cdata = self.chunk_store[ckey]
712
713 except KeyError:
~/miniconda3/envs/dev/lib/python3.7/site-packages/zarr/storage.py in __getitem__(self, key)
1249 with self.mutex:
1250 with self.zf.open(key) as f: # will raise KeyError
-> 1251 return f.read()
1252
1253 def __setitem__(self, key, value):
~/miniconda3/envs/dev/lib/python3.7/zipfile.py in read(self, n)
914 self._offset = 0
915 while not self._eof:
--> 916 buf += self._read1(self.MAX_N)
917 return buf
918
~/miniconda3/envs/dev/lib/python3.7/zipfile.py in _read1(self, n)
1018 if self._left <= 0:
1019 self._eof = True
-> 1020 self._update_crc(data)
1021 return data
1022
~/miniconda3/envs/dev/lib/python3.7/zipfile.py in _update_crc(self, newdata)
946 # Check the CRC if we're at the end of the file
947 if self._eof and self._running_crc != self._expected_crc:
--> 948 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
949
950 def read1(self, n):
BadZipFile: Bad CRC-32 for file 'hello/0'
0
2
Untitled10.ipynb
Working Directory Store example
import xarray as xr
import zarr
x = xr.Dataset()
x['hello'] = 'world'
x
store = zarr.DirectoryStore('test_store2.zarr')
x.to_zarr(store)
x_read = xr.open_zarr(store)
x_read.compute()
assert x_read.hello == x.hello
Expected Output
The string metadata should work.
Output of xr.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 3.7.6 | packaged by conda-forge | (default, Jan 7 2020, 22:33:48)
[GCC 7.3.0]
python-bits: 64
OS: Linux
OS-release: 5.3.0-40-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_CA.UTF-8
LOCALE: en_CA.UTF-8
libhdf5: None
libnetcdf: None
xarray: 0.14.1
pandas: 1.0.0
numpy: 1.17.5
scipy: 1.4.1
netCDF4: None
pydap: None
h5netcdf: None
h5py: None
Nio: None
zarr: 2.4.0
cftime: None
nc_time_axis: None
PseudoNetCDF: None
rasterio: None
cfgrib: None
iris: None
bottleneck: None
dask: 2.10.1
distributed: 2.10.0
matplotlib: 3.1.3
cartopy: None
seaborn: None
numbagg: None
setuptools: 45.1.0.post20200119
pip: 20.0.2
conda: None
pytest: 5.3.1
IPython: 7.12.0
sphinx: 2.3.1