Skip to content

Commit b13a6b3

Browse files
committed
add StoreV3 support to most convenience routines
consolidated metadata functions haven't been updated yet
1 parent 8e3c443 commit b13a6b3

File tree

2 files changed

+290
-91
lines changed

2 files changed

+290
-91
lines changed

zarr/convenience.py

Lines changed: 125 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,28 @@
77

88
from zarr.core import Array
99
from zarr.creation import array as _create_array
10-
from zarr.creation import normalize_store_arg, open_array
10+
from zarr.creation import open_array
1111
from zarr.errors import CopyError, PathNotFoundError
1212
from zarr.hierarchy import Group
1313
from zarr.hierarchy import group as _create_group
1414
from zarr.hierarchy import open_group
1515
from zarr.meta import json_dumps, json_loads
16-
from zarr.storage import contains_array, contains_group, BaseStore
16+
from zarr.storage import contains_array, contains_group, normalize_store_arg, BaseStore
1717
from zarr.util import TreeViewer, buffer_size, normalize_storage_path
1818

1919
from typing import Union
2020

2121
StoreLike = Union[BaseStore, MutableMapping, str, None]
2222

2323

24+
def _check_and_update_path(store: BaseStore, path):
25+
if getattr(store, '_store_version', 2) > 2 and not path:
26+
raise ValueError("path must be provided for v3 stores")
27+
return normalize_storage_path(path)
28+
29+
2430
# noinspection PyShadowingBuiltins
25-
def open(store: StoreLike = None, mode: str = "a", **kwargs):
31+
def open(store: StoreLike = None, mode: str = "a", *, zarr_version=2, path=None, **kwargs):
2632
"""Convenience function to open a group or array using file-mode-like semantics.
2733
2834
Parameters
@@ -34,6 +40,10 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs):
3440
read/write (must exist); 'a' means read/write (create if doesn't
3541
exist); 'w' means create (overwrite if exists); 'w-' means create
3642
(fail if exists).
43+
zarr_version : {2, 3}
44+
The zarr protocol version to use.
45+
path : str
46+
The path within the store to open.
3747
**kwargs
3848
Additional parameters are passed through to :func:`zarr.creation.open_array` or
3949
:func:`zarr.hierarchy.open_group`.
@@ -75,15 +85,16 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs):
7585
7686
"""
7787

78-
path = kwargs.get('path')
7988
# handle polymorphic store arg
8089
clobber = mode == 'w'
8190
# we pass storage options explicitly, since normalize_store_arg might construct
8291
# a store if the input is a fsspec-compatible URL
8392
_store: BaseStore = normalize_store_arg(
84-
store, clobber=clobber, storage_options=kwargs.pop("storage_options", {})
93+
store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}),
94+
zarr_version=zarr_version,
8595
)
86-
path = normalize_storage_path(path)
96+
path = _check_and_update_path(_store, path)
97+
kwargs['path'] = path
8798

8899
if mode in {'w', 'w-', 'x'}:
89100
if 'shape' in kwargs:
@@ -110,7 +121,7 @@ def _might_close(path):
110121
return isinstance(path, (str, os.PathLike))
111122

112123

113-
def save_array(store: StoreLike, arr, **kwargs):
124+
def save_array(store: StoreLike, arr, *, zarr_version=2, path=None, **kwargs):
114125
"""Convenience function to save a NumPy array to the local file system, following a
115126
similar API to the NumPy save() function.
116127
@@ -120,6 +131,10 @@ def save_array(store: StoreLike, arr, **kwargs):
120131
Store or path to directory in file system or name of zip file.
121132
arr : ndarray
122133
NumPy array with data to save.
134+
zarr_version : {2, 3}
135+
The zarr protocol version to use when saving.
136+
path : str
137+
The path within the store where the array will be saved.
123138
kwargs
124139
Passed through to :func:`create`, e.g., compressor.
125140
@@ -142,16 +157,18 @@ def save_array(store: StoreLike, arr, **kwargs):
142157
143158
"""
144159
may_need_closing = _might_close(store)
145-
_store: BaseStore = normalize_store_arg(store, clobber=True)
160+
_store: BaseStore = normalize_store_arg(store, clobber=True, zarr_version=zarr_version)
161+
path = _check_and_update_path(_store, path)
146162
try:
147-
_create_array(arr, store=_store, overwrite=True, **kwargs)
163+
_create_array(arr, store=_store, overwrite=True, zarr_version=zarr_version, path=path,
164+
**kwargs)
148165
finally:
149166
if may_need_closing:
150167
# needed to ensure zip file records are written
151168
_store.close()
152169

153170

154-
def save_group(store: StoreLike, *args, **kwargs):
171+
def save_group(store: StoreLike, *args, zarr_version=2, path=None, **kwargs):
155172
"""Convenience function to save several NumPy arrays to the local file system, following a
156173
similar API to the NumPy savez()/savez_compressed() functions.
157174
@@ -161,6 +178,10 @@ def save_group(store: StoreLike, *args, **kwargs):
161178
Store or path to directory in file system or name of zip file.
162179
args : ndarray
163180
NumPy arrays with data to save.
181+
zarr_version : {2, 3}
182+
The zarr protocol version to use when saving.
183+
path : str
184+
Path within the store where the group will be saved.
164185
kwargs
165186
NumPy arrays with data to save.
166187
@@ -213,21 +234,22 @@ def save_group(store: StoreLike, *args, **kwargs):
213234
raise ValueError('at least one array must be provided')
214235
# handle polymorphic store arg
215236
may_need_closing = _might_close(store)
216-
_store: BaseStore = normalize_store_arg(store, clobber=True)
237+
_store: BaseStore = normalize_store_arg(store, clobber=True, zarr_version=zarr_version)
238+
path = _check_and_update_path(_store, path)
217239
try:
218-
grp = _create_group(_store, overwrite=True)
240+
grp = _create_group(_store, path=path, overwrite=True, zarr_version=zarr_version)
219241
for i, arr in enumerate(args):
220242
k = 'arr_{}'.format(i)
221-
grp.create_dataset(k, data=arr, overwrite=True)
243+
grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version)
222244
for k, arr in kwargs.items():
223-
grp.create_dataset(k, data=arr, overwrite=True)
245+
grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version)
224246
finally:
225247
if may_need_closing:
226248
# needed to ensure zip file records are written
227249
_store.close()
228250

229251

230-
def save(store: StoreLike, *args, **kwargs):
252+
def save(store: StoreLike, *args, zarr_version=2, path=None, **kwargs):
231253
"""Convenience function to save an array or group of arrays to the local file system.
232254
233255
Parameters
@@ -236,6 +258,10 @@ def save(store: StoreLike, *args, **kwargs):
236258
Store or path to directory in file system or name of zip file.
237259
args : ndarray
238260
NumPy arrays with data to save.
261+
zarr_version : {2, 3}
262+
The zarr protocol version to use when saving.
263+
path : str
264+
The path within the group where the arrays will be saved.
239265
kwargs
240266
NumPy arrays with data to save.
241267
@@ -302,9 +328,10 @@ def save(store: StoreLike, *args, **kwargs):
302328
if len(args) == 0 and len(kwargs) == 0:
303329
raise ValueError('at least one array must be provided')
304330
if len(args) == 1 and len(kwargs) == 0:
305-
save_array(store, args[0])
331+
save_array(store, args[0], zarr_version=zarr_version, path=path)
306332
else:
307-
save_group(store, *args, **kwargs)
333+
save_group(store, *args, zarr_version=zarr_version, path=path,
334+
**kwargs)
308335

309336

310337
class LazyLoader(Mapping):
@@ -337,7 +364,7 @@ def __repr__(self):
337364
return r
338365

339366

340-
def load(store: StoreLike):
367+
def load(store: StoreLike, zarr_version=2, path=None):
341368
"""Load data from an array or group into memory.
342369
343370
Parameters
@@ -363,11 +390,12 @@ def load(store: StoreLike):
363390
364391
"""
365392
# handle polymorphic store arg
366-
_store = normalize_store_arg(store)
367-
if contains_array(_store, path=None):
368-
return Array(store=_store, path=None)[...]
369-
elif contains_group(_store, path=None):
370-
grp = Group(store=_store, path=None)
393+
_store = normalize_store_arg(store, zarr_version=zarr_version)
394+
path = _check_and_update_path(_store, path)
395+
if contains_array(_store, path=path):
396+
return Array(store=_store, path=path)[...]
397+
elif contains_group(_store, path=path):
398+
grp = Group(store=_store, path=path)
371399
return LazyLoader(grp)
372400

373401

@@ -601,59 +629,79 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
601629
# setup counting variables
602630
n_copied = n_skipped = n_bytes_copied = 0
603631

632+
source_store_version = getattr(source, '_store_version', 2)
633+
dest_store_version = getattr(dest, '_store_version', 2)
634+
if source_store_version != dest_store_version:
635+
raise ValueError("zarr stores must share the same protocol version")
636+
if source_store_version > 2:
637+
if not source_path or not dest_path:
638+
raise ValueError("v3 stores require specifying a non-empty "
639+
"source_path and dest_path")
640+
604641
# setup logging
605642
with _LogWriter(log) as log:
606643

607644
# iterate over source keys
608645
for source_key in sorted(source.keys()):
609646

610647
# filter to keys under source path
611-
if source_key.startswith(source_path):
648+
if source_store_version == 2:
649+
if not source_key.startswith(source_path):
650+
continue
651+
elif source_store_version == 3:
652+
# 'meta/root/' or 'data/root/' have length 10
653+
if not source_key[10:].startswith(source_path):
654+
continue
612655

613-
# process excludes and includes
614-
exclude = False
615-
for prog in excludes:
656+
# process excludes and includes
657+
exclude = False
658+
for prog in excludes:
659+
if prog.search(source_key):
660+
exclude = True
661+
break
662+
if exclude:
663+
for prog in includes:
616664
if prog.search(source_key):
617-
exclude = True
665+
exclude = False
618666
break
619-
if exclude:
620-
for prog in includes:
621-
if prog.search(source_key):
622-
exclude = False
623-
break
624-
if exclude:
625-
continue
667+
if exclude:
668+
continue
626669

627-
# map key to destination path
670+
# map key to destination path
671+
if source_store_version == 2:
628672
key_suffix = source_key[len(source_path):]
629673
dest_key = dest_path + key_suffix
630-
631-
# create a descriptive label for this operation
632-
descr = source_key
633-
if dest_key != source_key:
634-
descr = descr + ' -> ' + dest_key
635-
636-
# decide what to do
637-
do_copy = True
638-
if if_exists != 'replace':
639-
if dest_key in dest:
640-
if if_exists == 'raise':
641-
raise CopyError('key {!r} exists in destination'
642-
.format(dest_key))
643-
elif if_exists == 'skip':
644-
do_copy = False
645-
646-
# take action
647-
if do_copy:
648-
log('copy {}'.format(descr))
649-
if not dry_run:
650-
data = source[source_key]
651-
n_bytes_copied += buffer_size(data)
652-
dest[dest_key] = data
653-
n_copied += 1
654-
else:
655-
log('skip {}'.format(descr))
656-
n_skipped += 1
674+
elif source_store_version == 3:
675+
# 10 is length of 'meta/root/' or 'data/root/'
676+
key_suffix = source_key[10 + len(source_path):]
677+
dest_key = source_key[:10] + dest_path + key_suffix
678+
679+
# create a descriptive label for this operation
680+
descr = source_key
681+
if dest_key != source_key:
682+
descr = descr + ' -> ' + dest_key
683+
684+
# decide what to do
685+
do_copy = True
686+
if if_exists != 'replace':
687+
if dest_key in dest:
688+
if if_exists == 'raise':
689+
raise CopyError('key {!r} exists in destination'
690+
.format(dest_key))
691+
elif if_exists == 'skip':
692+
do_copy = False
693+
694+
# take action
695+
if do_copy:
696+
log('copy {}'.format(descr))
697+
if not dry_run:
698+
data = source[source_key]
699+
n_bytes_copied += buffer_size(data)
700+
dest[dest_key] = data
701+
n_copied += 1
702+
else:
703+
log('skip {}'.format(descr))
704+
n_skipped += 1
657705

658706
# log a final message with a summary of what happened
659707
_log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied)
@@ -908,7 +956,15 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists,
908956

909957
# copy attributes
910958
if not without_attrs:
911-
ds.attrs.update(source.attrs)
959+
if dest_h5py and 'filters' in source.attrs:
960+
# No filters key in v3 metadata so it was stored in the
961+
# attributes instead. We cannot copy this key to
962+
# HDF5 attrs, though!
963+
source_attrs = source.attrs.asdict().copy()
964+
source_attrs.pop('filters', None)
965+
else:
966+
source_attrs = source.attrs
967+
ds.attrs.update(source_attrs)
912968

913969
n_copied += 1
914970

@@ -1064,6 +1120,8 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None,
10641120
# setup counting variables
10651121
n_copied = n_skipped = n_bytes_copied = 0
10661122

1123+
zarr_version = getattr(source, '_version', 2)
1124+
10671125
# setup logging
10681126
with _LogWriter(log) as log:
10691127

@@ -1075,15 +1133,16 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None,
10751133
n_copied += c
10761134
n_skipped += s
10771135
n_bytes_copied += b
1078-
dest.attrs.update(**source.attrs)
1136+
if zarr_version == 2:
1137+
dest.attrs.update(**source.attrs)
10791138

10801139
# log a final message with a summary of what happened
10811140
_log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied)
10821141

10831142
return n_copied, n_skipped, n_bytes_copied
10841143

10851144

1086-
def consolidate_metadata(store: StoreLike, metadata_key=".zmetadata"):
1145+
def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata"):
10871146
"""
10881147
Consolidate all metadata for groups and arrays within the given store
10891148
into a single resource and put it under the given key.

0 commit comments

Comments
 (0)