7
7
8
8
from zarr .core import Array
9
9
from zarr .creation import array as _create_array
10
- from zarr .creation import normalize_store_arg , open_array
10
+ from zarr .creation import open_array
11
11
from zarr .errors import CopyError , PathNotFoundError
12
12
from zarr .hierarchy import Group
13
13
from zarr .hierarchy import group as _create_group
14
14
from zarr .hierarchy import open_group
15
15
from zarr .meta import json_dumps , json_loads
16
- from zarr .storage import contains_array , contains_group , BaseStore
16
+ from zarr .storage import contains_array , contains_group , normalize_store_arg , BaseStore
17
17
from zarr .util import TreeViewer , buffer_size , normalize_storage_path
18
18
19
19
from typing import Union
20
20
21
21
StoreLike = Union [BaseStore , MutableMapping , str , None ]
22
22
23
23
24
+ def _check_and_update_path (store : BaseStore , path ):
25
+ if getattr (store , '_store_version' , 2 ) > 2 and not path :
26
+ raise ValueError ("path must be provided for v3 stores" )
27
+ return normalize_storage_path (path )
28
+
29
+
24
30
# noinspection PyShadowingBuiltins
25
- def open (store : StoreLike = None , mode : str = "a" , ** kwargs ):
31
+ def open (store : StoreLike = None , mode : str = "a" , * , zarr_version = 2 , path = None , * *kwargs ):
26
32
"""Convenience function to open a group or array using file-mode-like semantics.
27
33
28
34
Parameters
@@ -34,6 +40,10 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs):
34
40
read/write (must exist); 'a' means read/write (create if doesn't
35
41
exist); 'w' means create (overwrite if exists); 'w-' means create
36
42
(fail if exists).
43
+ zarr_version : {2, 3}
44
+ The zarr protocol version to use.
45
+ path : str
46
+ The path within the store to open.
37
47
**kwargs
38
48
Additional parameters are passed through to :func:`zarr.creation.open_array` or
39
49
:func:`zarr.hierarchy.open_group`.
@@ -75,15 +85,16 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs):
75
85
76
86
"""
77
87
78
- path = kwargs .get ('path' )
79
88
# handle polymorphic store arg
80
89
clobber = mode == 'w'
81
90
# we pass storage options explicitly, since normalize_store_arg might construct
82
91
# a store if the input is a fsspec-compatible URL
83
92
_store : BaseStore = normalize_store_arg (
84
- store , clobber = clobber , storage_options = kwargs .pop ("storage_options" , {})
93
+ store , clobber = clobber , storage_options = kwargs .pop ("storage_options" , {}),
94
+ zarr_version = zarr_version ,
85
95
)
86
- path = normalize_storage_path (path )
96
+ path = _check_and_update_path (_store , path )
97
+ kwargs ['path' ] = path
87
98
88
99
if mode in {'w' , 'w-' , 'x' }:
89
100
if 'shape' in kwargs :
@@ -110,7 +121,7 @@ def _might_close(path):
110
121
return isinstance (path , (str , os .PathLike ))
111
122
112
123
113
- def save_array (store : StoreLike , arr , ** kwargs ):
124
+ def save_array (store : StoreLike , arr , * , zarr_version = 2 , path = None , * *kwargs ):
114
125
"""Convenience function to save a NumPy array to the local file system, following a
115
126
similar API to the NumPy save() function.
116
127
@@ -120,6 +131,10 @@ def save_array(store: StoreLike, arr, **kwargs):
120
131
Store or path to directory in file system or name of zip file.
121
132
arr : ndarray
122
133
NumPy array with data to save.
134
+ zarr_version : {2, 3}
135
+ The zarr protocol version to use when saving.
136
+ path : str
137
+ The path within the store where the array will be saved.
123
138
kwargs
124
139
Passed through to :func:`create`, e.g., compressor.
125
140
@@ -142,16 +157,18 @@ def save_array(store: StoreLike, arr, **kwargs):
142
157
143
158
"""
144
159
may_need_closing = _might_close (store )
145
- _store : BaseStore = normalize_store_arg (store , clobber = True )
160
+ _store : BaseStore = normalize_store_arg (store , clobber = True , zarr_version = zarr_version )
161
+ path = _check_and_update_path (_store , path )
146
162
try :
147
- _create_array (arr , store = _store , overwrite = True , ** kwargs )
163
+ _create_array (arr , store = _store , overwrite = True , zarr_version = zarr_version , path = path ,
164
+ ** kwargs )
148
165
finally :
149
166
if may_need_closing :
150
167
# needed to ensure zip file records are written
151
168
_store .close ()
152
169
153
170
154
- def save_group (store : StoreLike , * args , ** kwargs ):
171
+ def save_group (store : StoreLike , * args , zarr_version = 2 , path = None , ** kwargs ):
155
172
"""Convenience function to save several NumPy arrays to the local file system, following a
156
173
similar API to the NumPy savez()/savez_compressed() functions.
157
174
@@ -161,6 +178,10 @@ def save_group(store: StoreLike, *args, **kwargs):
161
178
Store or path to directory in file system or name of zip file.
162
179
args : ndarray
163
180
NumPy arrays with data to save.
181
+ zarr_version : {2, 3}
182
+ The zarr protocol version to use when saving.
183
+ path : str
184
+ Path within the store where the group will be saved.
164
185
kwargs
165
186
NumPy arrays with data to save.
166
187
@@ -213,21 +234,22 @@ def save_group(store: StoreLike, *args, **kwargs):
213
234
raise ValueError ('at least one array must be provided' )
214
235
# handle polymorphic store arg
215
236
may_need_closing = _might_close (store )
216
- _store : BaseStore = normalize_store_arg (store , clobber = True )
237
+ _store : BaseStore = normalize_store_arg (store , clobber = True , zarr_version = zarr_version )
238
+ path = _check_and_update_path (_store , path )
217
239
try :
218
- grp = _create_group (_store , overwrite = True )
240
+ grp = _create_group (_store , path = path , overwrite = True , zarr_version = zarr_version )
219
241
for i , arr in enumerate (args ):
220
242
k = 'arr_{}' .format (i )
221
- grp .create_dataset (k , data = arr , overwrite = True )
243
+ grp .create_dataset (k , data = arr , overwrite = True , zarr_version = zarr_version )
222
244
for k , arr in kwargs .items ():
223
- grp .create_dataset (k , data = arr , overwrite = True )
245
+ grp .create_dataset (k , data = arr , overwrite = True , zarr_version = zarr_version )
224
246
finally :
225
247
if may_need_closing :
226
248
# needed to ensure zip file records are written
227
249
_store .close ()
228
250
229
251
230
- def save (store : StoreLike , * args , ** kwargs ):
252
+ def save (store : StoreLike , * args , zarr_version = 2 , path = None , ** kwargs ):
231
253
"""Convenience function to save an array or group of arrays to the local file system.
232
254
233
255
Parameters
@@ -236,6 +258,10 @@ def save(store: StoreLike, *args, **kwargs):
236
258
Store or path to directory in file system or name of zip file.
237
259
args : ndarray
238
260
NumPy arrays with data to save.
261
+ zarr_version : {2, 3}
262
+ The zarr protocol version to use when saving.
263
+ path : str
264
+ The path within the group where the arrays will be saved.
239
265
kwargs
240
266
NumPy arrays with data to save.
241
267
@@ -302,9 +328,10 @@ def save(store: StoreLike, *args, **kwargs):
302
328
if len (args ) == 0 and len (kwargs ) == 0 :
303
329
raise ValueError ('at least one array must be provided' )
304
330
if len (args ) == 1 and len (kwargs ) == 0 :
305
- save_array (store , args [0 ])
331
+ save_array (store , args [0 ], zarr_version = zarr_version , path = path )
306
332
else :
307
- save_group (store , * args , ** kwargs )
333
+ save_group (store , * args , zarr_version = zarr_version , path = path ,
334
+ ** kwargs )
308
335
309
336
310
337
class LazyLoader (Mapping ):
@@ -337,7 +364,7 @@ def __repr__(self):
337
364
return r
338
365
339
366
340
- def load (store : StoreLike ):
367
+ def load (store : StoreLike , zarr_version = 2 , path = None ):
341
368
"""Load data from an array or group into memory.
342
369
343
370
Parameters
@@ -363,11 +390,12 @@ def load(store: StoreLike):
363
390
364
391
"""
365
392
# handle polymorphic store arg
366
- _store = normalize_store_arg (store )
367
- if contains_array (_store , path = None ):
368
- return Array (store = _store , path = None )[...]
369
- elif contains_group (_store , path = None ):
370
- grp = Group (store = _store , path = None )
393
+ _store = normalize_store_arg (store , zarr_version = zarr_version )
394
+ path = _check_and_update_path (_store , path )
395
+ if contains_array (_store , path = path ):
396
+ return Array (store = _store , path = path )[...]
397
+ elif contains_group (_store , path = path ):
398
+ grp = Group (store = _store , path = path )
371
399
return LazyLoader (grp )
372
400
373
401
@@ -601,59 +629,79 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
601
629
# setup counting variables
602
630
n_copied = n_skipped = n_bytes_copied = 0
603
631
632
+ source_store_version = getattr (source , '_store_version' , 2 )
633
+ dest_store_version = getattr (dest , '_store_version' , 2 )
634
+ if source_store_version != dest_store_version :
635
+ raise ValueError ("zarr stores must share the same protocol version" )
636
+ if source_store_version > 2 :
637
+ if not source_path or not dest_path :
638
+ raise ValueError ("v3 stores require specifying a non-empty "
639
+ "source_path and dest_path" )
640
+
604
641
# setup logging
605
642
with _LogWriter (log ) as log :
606
643
607
644
# iterate over source keys
608
645
for source_key in sorted (source .keys ()):
609
646
610
647
# filter to keys under source path
611
- if source_key .startswith (source_path ):
648
+ if source_store_version == 2 :
649
+ if not source_key .startswith (source_path ):
650
+ continue
651
+ elif source_store_version == 3 :
652
+ # 'meta/root/' or 'data/root/' have length 10
653
+ if not source_key [10 :].startswith (source_path ):
654
+ continue
612
655
613
- # process excludes and includes
614
- exclude = False
615
- for prog in excludes :
656
+ # process excludes and includes
657
+ exclude = False
658
+ for prog in excludes :
659
+ if prog .search (source_key ):
660
+ exclude = True
661
+ break
662
+ if exclude :
663
+ for prog in includes :
616
664
if prog .search (source_key ):
617
- exclude = True
665
+ exclude = False
618
666
break
619
- if exclude :
620
- for prog in includes :
621
- if prog .search (source_key ):
622
- exclude = False
623
- break
624
- if exclude :
625
- continue
667
+ if exclude :
668
+ continue
626
669
627
- # map key to destination path
670
+ # map key to destination path
671
+ if source_store_version == 2 :
628
672
key_suffix = source_key [len (source_path ):]
629
673
dest_key = dest_path + key_suffix
630
-
631
- # create a descriptive label for this operation
632
- descr = source_key
633
- if dest_key != source_key :
634
- descr = descr + ' -> ' + dest_key
635
-
636
- # decide what to do
637
- do_copy = True
638
- if if_exists != 'replace' :
639
- if dest_key in dest :
640
- if if_exists == 'raise' :
641
- raise CopyError ('key {!r} exists in destination'
642
- .format (dest_key ))
643
- elif if_exists == 'skip' :
644
- do_copy = False
645
-
646
- # take action
647
- if do_copy :
648
- log ('copy {}' .format (descr ))
649
- if not dry_run :
650
- data = source [source_key ]
651
- n_bytes_copied += buffer_size (data )
652
- dest [dest_key ] = data
653
- n_copied += 1
654
- else :
655
- log ('skip {}' .format (descr ))
656
- n_skipped += 1
674
+ elif source_store_version == 3 :
675
+ # 10 is length of 'meta/root/' or 'data/root/'
676
+ key_suffix = source_key [10 + len (source_path ):]
677
+ dest_key = source_key [:10 ] + dest_path + key_suffix
678
+
679
+ # create a descriptive label for this operation
680
+ descr = source_key
681
+ if dest_key != source_key :
682
+ descr = descr + ' -> ' + dest_key
683
+
684
+ # decide what to do
685
+ do_copy = True
686
+ if if_exists != 'replace' :
687
+ if dest_key in dest :
688
+ if if_exists == 'raise' :
689
+ raise CopyError ('key {!r} exists in destination'
690
+ .format (dest_key ))
691
+ elif if_exists == 'skip' :
692
+ do_copy = False
693
+
694
+ # take action
695
+ if do_copy :
696
+ log ('copy {}' .format (descr ))
697
+ if not dry_run :
698
+ data = source [source_key ]
699
+ n_bytes_copied += buffer_size (data )
700
+ dest [dest_key ] = data
701
+ n_copied += 1
702
+ else :
703
+ log ('skip {}' .format (descr ))
704
+ n_skipped += 1
657
705
658
706
# log a final message with a summary of what happened
659
707
_log_copy_summary (log , dry_run , n_copied , n_skipped , n_bytes_copied )
@@ -908,7 +956,15 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists,
908
956
909
957
# copy attributes
910
958
if not without_attrs :
911
- ds .attrs .update (source .attrs )
959
+ if dest_h5py and 'filters' in source .attrs :
960
+ # No filters key in v3 metadata so it was stored in the
961
+ # attributes instead. We cannot copy this key to
962
+ # HDF5 attrs, though!
963
+ source_attrs = source .attrs .asdict ().copy ()
964
+ source_attrs .pop ('filters' , None )
965
+ else :
966
+ source_attrs = source .attrs
967
+ ds .attrs .update (source_attrs )
912
968
913
969
n_copied += 1
914
970
@@ -1064,6 +1120,8 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None,
1064
1120
# setup counting variables
1065
1121
n_copied = n_skipped = n_bytes_copied = 0
1066
1122
1123
+ zarr_version = getattr (source , '_version' , 2 )
1124
+
1067
1125
# setup logging
1068
1126
with _LogWriter (log ) as log :
1069
1127
@@ -1075,15 +1133,16 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None,
1075
1133
n_copied += c
1076
1134
n_skipped += s
1077
1135
n_bytes_copied += b
1078
- dest .attrs .update (** source .attrs )
1136
+ if zarr_version == 2 :
1137
+ dest .attrs .update (** source .attrs )
1079
1138
1080
1139
# log a final message with a summary of what happened
1081
1140
_log_copy_summary (log , dry_run , n_copied , n_skipped , n_bytes_copied )
1082
1141
1083
1142
return n_copied , n_skipped , n_bytes_copied
1084
1143
1085
1144
1086
- def consolidate_metadata (store : StoreLike , metadata_key = ".zmetadata" ):
1145
+ def consolidate_metadata (store : BaseStore , metadata_key = ".zmetadata" ):
1087
1146
"""
1088
1147
Consolidate all metadata for groups and arrays within the given store
1089
1148
into a single resource and put it under the given key.
0 commit comments