Skip to content

Zarr v3: support root path #1085

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Sep 8, 2022
10 changes: 5 additions & 5 deletions zarr/_storage/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,23 +420,23 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str

def _prefix_to_array_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
sfx = _get_metadata_suffix(store) # type: ignore
if prefix:
sfx = _get_metadata_suffix(store) # type: ignore
key = meta_root + prefix.rstrip("/") + ".array" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 array key")
key = meta_root[:-1] + '.array' + sfx
else:
key = prefix + array_meta_key
return key


def _prefix_to_group_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
sfx = _get_metadata_suffix(store) # type: ignore
if prefix:
sfx = _get_metadata_suffix(store) # type: ignore
key = meta_root + prefix.rstrip('/') + ".group" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 group key")
key = meta_root[:-1] + '.group' + sfx
else:
key = prefix + group_meta_key
return key
Expand All @@ -449,7 +449,7 @@ def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str:
if prefix:
key = meta_root + prefix.rstrip('/') + ".array" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 array key")
key = meta_root[:-1] + '.array' + sfx
else:
key = prefix + attrs_key
return key
2 changes: 1 addition & 1 deletion zarr/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
dimension_separator = normalize_dimension_separator(dimension_separator)

if zarr_version > 2 and path is None:
raise ValueError("path must be supplied to initialize a zarr v3 array")
path = '/'

# initialize array metadata
init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,
Expand Down
6 changes: 0 additions & 6 deletions zarr/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1247,8 +1247,6 @@ def group(store=None, overwrite=False, chunk_store=None,
if zarr_version != 2:
assert_zarr_v3_api_available()

if zarr_version == 3 and path is None:
raise ValueError(f"path must be provided for a v{zarr_version} group")
path = normalize_storage_path(path)

if zarr_version == 2:
Expand Down Expand Up @@ -1332,10 +1330,6 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N
"zarr_version of store and chunk_store must match"
)

store_version = getattr(store, '_store_version', 2)
if store_version == 3 and path is None:
raise ValueError("path must be supplied to initialize a zarr v3 group")

path = normalize_storage_path(path)

# ensure store is initialized
Expand Down
10 changes: 8 additions & 2 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,14 @@ def _getsize(store: BaseStore, path: Path = None) -> int:
size = 0
store_version = getattr(store, '_store_version', 2)
if store_version == 3:
members = store.list_prefix(data_root + path) # type: ignore
members += store.list_prefix(meta_root + path) # type: ignore
if path == '':
# have to list the root folders without trailing / in this case
members = store.list_prefix(data_root.rstrip('/')) # type: ignore
members += store.list_prefix(meta_root.rstrip('/')) # type: ignore
else:
members = store.list_prefix(data_root + path) # type: ignore
members += store.list_prefix(meta_root + path) # type: ignore
# also include zarr.json?
# members += ['zarr.json']
else:
members = listdir(store, path)
Expand Down
10 changes: 0 additions & 10 deletions zarr/tests/test_convenience.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,6 @@ def test_open_array(path_type, zarr_version):
assert isinstance(z, Array)
assert z.shape == (200,)

if zarr_version == 3:
# cannot open a v3 array without path
with pytest.raises(ValueError):
open(store, mode='w', shape=200, zarr_version=3)

# open array, read-only
z = open(store, mode='r', **kwargs)
assert isinstance(z, Array)
Expand Down Expand Up @@ -108,11 +103,6 @@ def test_open_group(path_type, zarr_version):
assert isinstance(g, Group)
assert 'foo' not in g

if zarr_version == 3:
# cannot open a v3 group without path
with pytest.raises(ValueError):
open(store, mode='w', zarr_version=3)

# open group, read-only
g = open(store, mode='r', **kwargs)
assert isinstance(g, Group)
Expand Down
117 changes: 64 additions & 53 deletions zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

from zarr._storage.store import (
v3_api_available,
_prefix_to_array_key,
_prefix_to_attrs_key,
_prefix_to_group_key
)
from zarr.core import Array
from zarr.errors import ArrayNotFoundError, ContainsGroupError
Expand Down Expand Up @@ -64,56 +61,64 @@
class TestArray(unittest.TestCase):

version = 2
root = ''
KVStoreClass = KVStore

def test_array_init(self):

# normal initialization
store = KVStore(dict())
store = self.KVStoreClass(dict())
init_array(store, shape=100, chunks=10, dtype="<f8")
a = Array(store)
a = Array(store, zarr_version=self.version)
assert isinstance(a, Array)
assert (100,) == a.shape
assert (10,) == a.chunks
assert '' == a.path
assert a.name is None
assert a.basename is None
assert store is a.store
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
if self.version == 2:
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
else:
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == a.hexdigest()
store.close()

# initialize at path
store = KVStore(dict())
store = self.KVStoreClass(dict())
init_array(store, shape=100, chunks=10, path='foo/bar', dtype='<f8')
a = Array(store, path='foo/bar')
a = Array(store, path='foo/bar', zarr_version=self.version)
assert isinstance(a, Array)
assert (100,) == a.shape
assert (10,) == a.chunks
assert 'foo/bar' == a.path
assert '/foo/bar' == a.name
assert 'bar' == a.basename
assert store is a.store
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()

if self.version == 2:
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
else:
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == a.hexdigest()
# store not initialized
store = KVStore(dict())
store = self.KVStoreClass(dict())
with pytest.raises(ValueError):
Array(store)
Array(store, zarr_version=self.version)

# group is in the way
store = KVStore(dict())
store = self.KVStoreClass(dict())
init_group(store, path='baz')
with pytest.raises(ValueError):
Array(store, path='baz')
Array(store, path='baz', zarr_version=self.version)

def create_array(self, read_only=False, **kwargs):
store = KVStore(dict())
store = self.KVStoreClass(dict())
kwargs.setdefault('compressor', Zlib(level=1))
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
write_empty_chunks = kwargs.pop('write_empty_chunks', True)
init_array(store, **kwargs)
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks)
cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks,
zarr_version=self.version)

def test_store_has_text_keys(self):
# Initialize array
Expand Down Expand Up @@ -162,15 +167,28 @@ def test_nbytes_stored(self):

# dict as store
z = self.create_array(shape=1000, chunks=100)
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
if self.version == 3:
expect_nbytes_stored = sum(
buffer_size(v) for k, v in z.store.items() if k != 'zarr.json'
)
else:
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
assert expect_nbytes_stored == z.nbytes_stored
z[:] = 42
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
if self.version == 3:
expect_nbytes_stored = sum(
buffer_size(v) for k, v in z.store.items() if k != 'zarr.json'
)
else:
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
assert expect_nbytes_stored == z.nbytes_stored

# mess with store
try:
z.store[z._key_prefix + 'foo'] = list(range(10))
if self.version == 2:
z.store[z._key_prefix + 'foo'] = list(range(10))
else:
z.store['meta/root/foo'] = list(range(10))
assert -1 == z.nbytes_stored
except TypeError:
pass
Expand Down Expand Up @@ -1003,7 +1021,7 @@ def test_nchunks_initialized(self):

assert 0 == z.nchunks_initialized
# manually put something into the store to confuse matters
z.store['foo'] = b'bar'
z.store[self.root + 'foo'] = b'bar'
assert 0 == z.nchunks_initialized
z[:] = 42
assert 10 == z.nchunks_initialized
Expand Down Expand Up @@ -2703,36 +2721,25 @@ def test_read_from_all_blocks(self):
# StoreV3 test classes inheriting from the above below this point
####

# Start with TestArrayWithPathV3 not TestArrayV3 since path must be supplied

@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled")
class TestArrayV3(unittest.TestCase):
class TestArrayV3(TestArray):

version = 3
root = meta_root
KVStoreClass = KVStoreV3

def test_array_init(self):

# normal initialization
store = KVStoreV3(dict())
with pytest.raises(ValueError):
# cannot init_array for v3 without a path
init_array(store, shape=100, chunks=10, dtype="<f8")

init_array(store, path='x', shape=100, chunks=10, dtype="<f8")
with pytest.raises(ValueError):
# cannot initialize a v3 array without a path
Array(store)

def test_prefix_exceptions(self):
store = KVStoreV3(dict())
with pytest.raises(ValueError):
_prefix_to_array_key(store, '')

with pytest.raises(ValueError):
_prefix_to_group_key(store, '')
def expected(self):
# tests for array without path will not be run for v3 stores
assert self.version == 3
return [
"73ab8ace56719a5c9308c3754f5e2d57bc73dc20",
"5fb3d02b8f01244721582929b3cad578aec5cea5",
"26b098bedb640846e18dc2fbc1c27684bb02b532",
"799a458c287d431d747bec0728987ca4fe764549",
"c780221df84eb91cb62f633f12d3f1eaa9cee6bd"
]

with pytest.raises(ValueError):
_prefix_to_attrs_key(store, '')
# TODO: fix test_nbytes_stored


@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled")
Expand All @@ -2754,10 +2761,19 @@ def create_array(array_path='arr1', read_only=False, **kwargs):

def test_array_init(self):

# should not be able to initialize without a path in V3
store = KVStoreV3(dict())
with pytest.raises(ValueError):
init_array(store, shape=100, chunks=10, dtype="<f8")
# can initialize an array without a path
init_array(store, shape=100, chunks=10, dtype="<f8")
b = Array(store)
assert not b.is_view
assert isinstance(b, Array)
assert (100,) == b.shape
assert (10,) == b.chunks
assert '' == b.path
assert b.name is None
assert b.basename is None
assert store is b.store
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == b.hexdigest()

# initialize at path
store = KVStoreV3(dict())
Expand Down Expand Up @@ -2797,11 +2813,6 @@ def test_array_init(self):
assert group_key not in store
assert (meta_root + path + '.array.json') in store

def test_array_no_path(self):
# passing path=None to init_array will raise an exception
with pytest.raises(ValueError):
self.create_array(shape=1000, chunks=100, array_path=None)

def expected(self):
return [
"73ab8ace56719a5c9308c3754f5e2d57bc73dc20",
Expand Down
Loading