Skip to content

Add StoreV3 support to creation.py (zarr v3 support part 5) #897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
64bbbd8
add v3 store classes
grlee77 Nov 16, 2021
85b8e23
add TODO comment to meta.py
grlee77 Nov 17, 2021
5fee6fb
Merge remote-tracking branch 'upstream/master' into v3-store-part1
grlee77 Nov 17, 2021
983d190
fix flake8 errors
grlee77 Nov 30, 2021
4057088
Merge remote-tracking branch 'upstream/master' into v3-store-part1
grlee77 Nov 30, 2021
9ed6181
follow zarr v3 spec when dealing with extension data types
grlee77 Dec 15, 2021
662e310
fixes to v3 dtype handling
grlee77 Dec 15, 2021
9950a4d
Merge remote-tracking branch 'upstream/master' into v3-store-part1
grlee77 Dec 15, 2021
450c575
flake8 cleanup
grlee77 Dec 15, 2021
63a9e3c
remove duplicate lines in Metadata2.encode_array_metadata
grlee77 Dec 16, 2021
1a2a1ef
Fix fields in array metadata
grlee77 Dec 17, 2021
bcb2d5f
Fix encode/decode of codec metadata
grlee77 Dec 17, 2021
9bfeacb
add missing level to Zlib in _decode_codec_metadata
grlee77 Dec 17, 2021
23aca42
add extensions entry to v3 array metadata
grlee77 Dec 17, 2021
e47035d
dimension_separator should not be in the array metadata for v3
grlee77 Dec 17, 2021
7549d5b
update Attributes, adding StoreV3 support
grlee77 Nov 30, 2021
765fe7d
add StoreV3 support to core Array object
grlee77 Nov 30, 2021
5efc786
update hexdigests
grlee77 Dec 17, 2021
6a308c8
handle additional codecs that were not implemented in zarrita
grlee77 Dec 17, 2021
f7883fc
fix
grlee77 Dec 17, 2021
5040119
fix hexdigests
grlee77 Dec 17, 2021
7c3736d
fix indentation
grlee77 Dec 17, 2021
d07caed
add StoreV3 support to Group, open_group, etc.
grlee77 Dec 1, 2021
00b2cc0
add StoreV3 support to creation routines
grlee77 Dec 1, 2021
504913a
Handle dimension_separator appropriately in open_array
grlee77 Dec 16, 2021
7142974
TST: add tests for open_array and dimension_separator
grlee77 Dec 16, 2021
e4ce79f
only allow Codec not a simple str as compressor during array initiali…
grlee77 Dec 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 242 additions & 8 deletions zarr/_storage/store.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import sys
from collections.abc import MutableMapping
from string import ascii_letters, digits
from typing import Any, List, Optional, Union

from zarr.meta import Metadata2
from zarr.meta import Metadata2, Metadata3, _default_entry_point_metadata_v3
from zarr.util import normalize_storage_path

# v2 store keys
Expand Down Expand Up @@ -131,6 +133,169 @@ def rmdir(self, path: str = "") -> None:
_rmdir_from_keys(self, path)


class StoreV3(BaseStore):
_store_version = 3
_metadata_class = Metadata3

@staticmethod
def _valid_key(key: str) -> bool:
"""
Verify that a key conforms to the specification.

A key is any string containing only character in the range a-z, A-Z,
0-9, or in the set /.-_ it will return True if that's the case, False
otherwise.

In addition, in spec v3, keys can only start with the prefix meta/,
data/ or be exactly zarr.json and should not end with /. This should
not be exposed to the user, and is a store implementation detail, so
this method will raise a ValueError in that case.
"""
if sys.version_info > (3, 7):
if not key.isascii():
return False
if set(key) - set(ascii_letters + digits + "/.-_"):
return False

if (
not key.startswith("data/")
and (not key.startswith("meta/"))
and (not key == "zarr.json")
):
raise ValueError("keys starts with unexpected value: `{}`".format(key))

if key.endswith('/'):
raise ValueError("keys may not end in /")

return True

def list_prefix(self, prefix):
if prefix.startswith('/'):
raise ValueError("prefix must not begin with /")
# TODO: force prefix to end with /?
return [k for k in self.list() if k.startswith(prefix)]

def erase(self, key):
self.__delitem__(key)

def erase_prefix(self, prefix):
assert prefix.endswith("/")

if prefix == "/":
all_keys = self.list()
else:
all_keys = self.list_prefix(prefix)
for key in all_keys:
self.erase(key)

def list_dir(self, prefix):
"""
Note: carefully test this with trailing/leading slashes
"""
if prefix: # allow prefix = "" ?
assert prefix.endswith("/")

all_keys = self.list_prefix(prefix)
len_prefix = len(prefix)
keys = []
prefixes = []
for k in all_keys:
trail = k[len_prefix:]
if "/" not in trail:
keys.append(prefix + trail)
else:
prefixes.append(prefix + trail.split("/", maxsplit=1)[0] + "/")
return keys, list(set(prefixes))

def list(self):
if hasattr(self, 'keys'):
return list(self.keys())
raise NotImplementedError(
"The list method has not been implemented for this store type."
)

# TODO: Remove listdir? This method is just to match the current V2 stores
# The v3 spec mentions: list, list_dir, list_prefix
def listdir(self, path: str = ""):
if path and not path.endswith("/"):
path = path + "/"
keys, prefixes = self.list_dir(path)
prefixes = [p[len(path):].rstrip("/") for p in prefixes]
keys = [k[len(path):] for k in keys]
return keys + prefixes

# TODO: rmdir here is identical to the rmdir on Store so could potentially
# move to BaseStore instead.
def rmdir(self, path: str = "") -> None:
if not self.is_erasable():
raise NotImplementedError(
f'{type(self)} is not erasable, cannot call "rmdir"'
) # pragma: no cover
path = normalize_storage_path(path)
_rmdir_from_keys(self, path)

def __contains__(self, key):
# TODO: re-enable this check?
# if not key.startswith(("meta/", "data/")):
# raise ValueError(
# f'Key must start with either "meta/" or "data/". '
# f'Got {key}'
# )
return key in self.list()

def clear(self):
"""Remove all items from store."""
self.erase_prefix("/")

def __eq__(self, other):
from zarr.storage import KVStoreV3 # avoid circular import
if isinstance(other, KVStoreV3):
return self._mutable_mapping == other._mutable_mapping
else:
return NotImplemented

@staticmethod
def _ensure_store(store):
"""
We want to make sure internally that zarr stores are always a class
with a specific interface derived from ``Store``, which is slightly
different than ``MutableMapping``.

We'll do this conversion in a few places automatically
"""
from zarr.storage import KVStoreV3 # avoid circular import
if store is None:
return None
elif isinstance(store, StoreV3):
return store
elif isinstance(store, MutableMapping):
return KVStoreV3(store)
else:
for attr in [
"keys",
"values",
"get",
"__setitem__",
"__getitem__",
"__delitem__",
"__contains__",
]:
if not hasattr(store, attr):
break
else:
return KVStoreV3(store)

raise ValueError(
"v3 stores must be subclasses of StoreV3, "
"if your store exposes the MutableMapping interface wrap it in "
f"Zarr.storage.KVStoreV3. Got {store}"
)


# allow MutableMapping for backwards compatibility
StoreLike = Union[BaseStore, MutableMapping]


def _path_to_prefix(path: Optional[str]) -> str:
# assume path already normalized
if path:
Expand All @@ -140,17 +305,49 @@ def _path_to_prefix(path: Optional[str]) -> str:
return prefix


# TODO: Should this return default metadata or raise an Error if zarr.json
# is absent?
def _get_hierarchy_metadata(store=None):
meta = _default_entry_point_metadata_v3
if store is not None:
version = getattr(store, '_store_version', 2)
if version < 3:
raise ValueError("zarr.json hierarchy metadata not stored for "
f"zarr v{version} stores")
if 'zarr.json' in store:
meta = store._metadata_class.decode_hierarchy_metadata(store['zarr.json'])
return meta


def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None:
# assume path already normalized
src_prefix = _path_to_prefix(src_path)
dst_prefix = _path_to_prefix(dst_path)
for key in list(store.keys()):
if key.startswith(src_prefix):
new_key = dst_prefix + key.lstrip(src_prefix)
store[new_key] = store.pop(key)


def _rmdir_from_keys(store: Union[BaseStore, MutableMapping], path: Optional[str] = None) -> None:
version = getattr(store, '_store_version', 2)
if version == 2:
root_prefixes = ['']
elif version == 3:
root_prefixes = ['meta/root/', 'data/root/']
for root_prefix in root_prefixes:
_src_prefix = root_prefix + src_prefix
_dst_prefix = root_prefix + dst_prefix
for key in list(store.keys()):
if key.startswith(_src_prefix):
new_key = _dst_prefix + key.lstrip(_src_prefix)
store[new_key] = store.pop(key)
if version == 3:
sfx = _get_hierarchy_metadata(store)['metadata_key_suffix']
_src_array_json = 'meta/root/' + src_prefix[:-1] + '.array' + sfx
if _src_array_json in store:
new_key = 'meta/root/' + dst_prefix[:-1] + '.array' + sfx
store[new_key] = store.pop(_src_array_json)
_src_group_json = 'meta/root/' + src_prefix[:-1] + '.group' + sfx
if _src_group_json in store:
new_key = 'meta/root/' + dst_prefix[:-1] + '.group' + sfx
store[new_key] = store.pop(_src_group_json)


def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None:
# assume path already normalized
prefix = _path_to_prefix(path)
for key in list(store.keys()):
Expand All @@ -168,3 +365,40 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str
child = suffix.split('/')[0]
children.add(child)
return sorted(children)


def _prefix_to_array_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
if prefix:
sfx = _get_hierarchy_metadata(store)['metadata_key_suffix']
key = "meta/root/" + prefix.rstrip("/") + ".array" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 array key")
else:
key = prefix + array_meta_key
return key


def _prefix_to_group_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
if prefix:
sfx = _get_hierarchy_metadata(store)['metadata_key_suffix']
key = "meta/root/" + prefix.rstrip('/') + ".group" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 group key")
else:
key = prefix + group_meta_key
return key


def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
# for v3, attributes are stored in the array metadata
sfx = _get_hierarchy_metadata(store)['metadata_key_suffix']
if prefix:
key = "meta/root/" + prefix.rstrip('/') + ".array" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 array key")
else:
key = prefix + attrs_key
return key
Loading