From 0d2217b4ad8dfd7b5b0e6c68887442f4a3df7bca Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Tue, 22 Oct 2019 16:45:53 +0200 Subject: [PATCH 01/13] Allow disabling filling of missing chunks --- zarr/core.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/zarr/core.py b/zarr/core.py index d64392c264..b7273227ae 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -93,6 +93,7 @@ class Array(object): set_mask_selection get_coordinate_selection set_coordinate_selection + set_options digest hexdigest resize @@ -710,6 +711,8 @@ def _get_basic_selection_zd(self, selection, out=None, fields=None): cdata = self.chunk_store[ckey] except KeyError: + if not self._fill_missing_chunk: + raise # chunk not initialized chunk = np.zeros((), dtype=self._dtype) if self._fill_value is not None: @@ -1376,6 +1379,18 @@ def set_coordinate_selection(self, selection, value, fields=None): self._set_selection(indexer, value, fields=fields) + def set_options(self, fill_missing_chunk=True): + """Set options. + + Parameters + ---------- + fill_missing_chunk : bool + Wether Zarr is supposed to fill missing chunks or not. Defaults to True. + + """ + + self._fill_missing_chunk = fill_missing_chunk + def set_mask_selection(self, selection, value, fields=None): """Modify a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True @@ -1472,6 +1487,8 @@ def _set_basic_selection_zd(self, selection, value, fields=None): cdata = self.chunk_store[ckey] except KeyError: + if not self._fill_missing_chunk: + raise # chunk not initialized chunk = np.zeros((), dtype=self._dtype) if self._fill_value is not None: @@ -1579,6 +1596,8 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, cdata = self.chunk_store[ckey] except KeyError: + if not self._fill_missing_chunk: + raise # chunk not initialized if self._fill_value is not None: if fields: @@ -1692,6 +1711,9 @@ def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=Non except KeyError: + if not self._fill_missing_chunk: + raise + # chunk not initialized if self._fill_value is not None: chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) From c627aaf1edd377b0d6e86aa135979688204b9b88 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Tue, 22 Oct 2019 16:49:21 +0200 Subject: [PATCH 02/13] Fix typo in docstring --- zarr/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/core.py b/zarr/core.py index b7273227ae..895319089e 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1385,7 +1385,7 @@ def set_options(self, fill_missing_chunk=True): Parameters ---------- fill_missing_chunk : bool - Wether Zarr is supposed to fill missing chunks or not. Defaults to True. + Whether Zarr is supposed to fill missing chunks. Defaults to True. """ From a73e24834855d01945aca94f9ba872f2a1c704a4 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Fri, 25 Oct 2019 11:42:43 +0200 Subject: [PATCH 03/13] Ensure that options are always initialized --- zarr/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zarr/core.py b/zarr/core.py index 895319089e..0679ec4ef5 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -135,6 +135,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._oindex = OIndex(self) self._vindex = VIndex(self) + # initialize options + self.set_options() + def _load_metadata(self): """(Re)load metadata from store.""" if self._synchronizer is None: From 75139ad45a9acdb9fedb51e2756a4d6075a5c616 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 10:41:59 +0200 Subject: [PATCH 04/13] Add test for array with missing chunk --- zarr/tests/test_missing.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 zarr/tests/test_missing.py diff --git a/zarr/tests/test_missing.py b/zarr/tests/test_missing.py new file mode 100644 index 0000000000..57e6896e40 --- /dev/null +++ b/zarr/tests/test_missing.py @@ -0,0 +1,20 @@ +import unittest +from zarr.creation import array + + +class TestArrayMissingKeys(unittest.TestCase): + def test_raises_on_missing_key(self): + a = array(range(2), chunks=1) + + # configure raise on missing chunk + a.set_options(fill_missing_chunk=False) + + # pop first chunk + a.chunk_store.pop("0") + + # read avaible chunk w/o error + b = a[-1] + + # reading missing chunk should raise + with self.assertRaises(KeyError): + b = a[0] From b69460fa236260ae1ff25ae8d01488b18d799b76 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 10:45:52 +0200 Subject: [PATCH 05/13] Don't raise on setting items on missing chunks --- zarr/core.py | 321 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 197 insertions(+), 124 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index da5102a1a1..6a763c140a 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -146,9 +146,9 @@ def __init__( self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: - self._key_prefix = self._path + '/' + self._key_prefix = self._path + "/" else: - self._key_prefix = '' + self._key_prefix = "" self._read_only = bool(read_only) self._synchronizer = synchronizer self._cache_metadata = cache_metadata @@ -160,8 +160,13 @@ def __init__( # initialize attributes akey = self._key_prefix + attrs_key - self._attrs = Attributes(store, key=akey, read_only=read_only, - synchronizer=synchronizer, cache=cache_attrs) + self._attrs = Attributes( + store, + key=akey, + read_only=read_only, + synchronizer=synchronizer, + cache=cache_attrs, + ) # initialize info reporter self._info_reporter = InfoReporter(self) @@ -193,21 +198,21 @@ def _load_metadata_nosync(self): # decode and store metadata as instance members meta = decode_array_metadata(meta_bytes) self._meta = meta - self._shape = meta['shape'] - self._chunks = meta['chunks'] - self._dtype = meta['dtype'] - self._fill_value = meta['fill_value'] - self._order = meta['order'] + self._shape = meta["shape"] + self._chunks = meta["chunks"] + self._dtype = meta["dtype"] + self._fill_value = meta["fill_value"] + self._order = meta["order"] # setup compressor - config = meta['compressor'] + config = meta["compressor"] if config is None: self._compressor = None else: self._compressor = get_codec(config) # setup filters - filters = meta['filters'] + filters = meta["filters"] if filters: filters = [get_codec(config) for config in filters] self._filters = filters @@ -222,7 +227,7 @@ def _refresh_metadata_nosync(self): def _flush_metadata_nosync(self): if self._is_view: - raise PermissionError('operation not permitted for views') + raise PermissionError("operation not permitted for views") if self._compressor: compressor_config = self._compressor.get_config() @@ -232,9 +237,15 @@ def _flush_metadata_nosync(self): filters_config = [f.get_config() for f in self._filters] else: filters_config = None - meta = dict(shape=self._shape, chunks=self._chunks, dtype=self._dtype, - compressor=compressor_config, fill_value=self._fill_value, - order=self._order, filters=filters_config) + meta = dict( + shape=self._shape, + chunks=self._chunks, + dtype=self._dtype, + compressor=compressor_config, + fill_value=self._fill_value, + order=self._order, + filters=filters_config, + ) mkey = self._key_prefix + array_meta_key self._store[mkey] = encode_array_metadata(meta) @@ -254,8 +265,8 @@ def name(self): if self.path: # follow h5py convention: add leading slash name = self.path - if name[0] != '/': - name = '/' + name + if name[0] != "/": + name = "/" + name return name return None @@ -263,7 +274,7 @@ def name(self): def basename(self): """Final component of name.""" if self.name is not None: - return self.name.split('/')[-1] + return self.name.split("/")[-1] return None @property @@ -392,10 +403,9 @@ def nbytes_stored(self): @property def _cdata_shape(self): if self._shape == (): - return 1, + return (1,) else: - return tuple(math.ceil(s / c) - for s, c in zip(self._shape, self._chunks)) + return tuple(math.ceil(s / c) for s, c in zip(self._shape, self._chunks)) @property def cdata_shape(self): @@ -419,7 +429,7 @@ def nchunks_initialized(self): """The number of chunks that have been initialized with some data.""" # key pattern for chunk keys - prog = re.compile(r'\.'.join([r'\d+'] * min(1, self.ndim))) + prog = re.compile(r"\.".join([r"\d+"] * min(1, self.ndim))) # count chunk keys return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) @@ -447,11 +457,11 @@ def vindex(self): def __eq__(self, other): return ( - isinstance(other, Array) and - self.store == other.store and - self.read_only == other.read_only and - self.path == other.path and - not self._is_view + isinstance(other, Array) + and self.store == other.store + and self.read_only == other.read_only + and self.path == other.path + and not self._is_view # N.B., no need to compare other properties, should be covered by # store comparison ) @@ -471,7 +481,7 @@ def __iter__(self): chunk_size = self.chunks[0] for j in range(self.shape[0]): if j % chunk_size == 0: - chunk = self[j: j + chunk_size] + chunk = self[j : j + chunk_size] yield chunk[j % chunk_size] def __len__(self): @@ -479,7 +489,7 @@ def __len__(self): return self.shape[0] else: # 0-dimensional array, same error message as numpy - raise TypeError('len() of unsized object') + raise TypeError("len() of unsized object") def __getitem__(self, selection): """Retrieve data for an item or region of the array. @@ -729,11 +739,13 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): # handle zero-dimensional arrays if self._shape == (): - return self._get_basic_selection_zd(selection=selection, out=out, - fields=fields) + return self._get_basic_selection_zd( + selection=selection, out=out, fields=fields + ) else: - return self._get_basic_selection_nd(selection=selection, out=out, - fields=fields) + return self._get_basic_selection_nd( + selection=selection, out=out, fields=fields + ) def _get_basic_selection_zd(self, selection, out=None, fields=None): # special case basic selection for zero-dimensional array @@ -1058,22 +1070,35 @@ def _get_selection(self, indexer, out=None, fields=None): if out is None: out = np.empty(out_shape, dtype=out_dtype, order=self._order) else: - check_array_shape('out', out, out_shape) + check_array_shape("out", out, out_shape) # iterate over chunks - if not hasattr(self.chunk_store, "getitems") or \ - any(map(lambda x: x == 0, self.shape)): + if not hasattr(self.chunk_store, "getitems") or any( + map(lambda x: x == 0, self.shape) + ): # sequentially get one key at a time from storage for chunk_coords, chunk_selection, out_selection in indexer: # load chunk selection into output array - self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection, - drop_axes=indexer.drop_axes, fields=fields) + self._chunk_getitem( + chunk_coords, + chunk_selection, + out, + out_selection, + drop_axes=indexer.drop_axes, + fields=fields, + ) else: # allow storage to get multiple items at once lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) - self._chunk_getitems(lchunk_coords, lchunk_selection, out, lout_selection, - drop_axes=indexer.drop_axes, fields=fields) + self._chunk_getitems( + lchunk_coords, + lchunk_selection, + out, + lout_selection, + drop_axes=indexer.drop_axes, + fields=fields, + ) if out.shape: return out @@ -1420,7 +1445,7 @@ def set_coordinate_selection(self, selection, value, fields=None): # handle value - need to flatten if not is_scalar(value, self._dtype): value = np.asanyarray(value) - if hasattr(value, 'shape') and len(value.shape) > 1: + if hasattr(value, "shape") and len(value.shape) > 1: value = value.reshape(-1) self._set_selection(indexer, value, fields=fields) @@ -1586,13 +1611,16 @@ def _set_selection(self, indexer, value, fields=None): # setting a scalar value pass else: - if not hasattr(value, 'shape'): + if not hasattr(value, "shape"): value = np.asanyarray(value) - check_array_shape('value', value, sel_shape) + check_array_shape("value", value, sel_shape) # iterate over chunks in range - if not hasattr(self.store, "setitems") or self._synchronizer is not None \ - or any(map(lambda x: x == 0, self.shape)): + if ( + not hasattr(self.store, "setitems") + or self._synchronizer is not None + or any(map(lambda x: x == 0, self.shape)) + ): # iterative approach for chunk_coords, chunk_selection, out_selection in indexer: @@ -1612,7 +1640,9 @@ def _set_selection(self, indexer, value, fields=None): chunk_value = chunk_value[item] # put data - self._chunk_setitem(chunk_coords, chunk_selection, chunk_value, fields=fields) + self._chunk_setitem( + chunk_coords, chunk_selection, chunk_value, fields=fields + ) else: lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) chunk_values = [] @@ -1632,8 +1662,9 @@ def _set_selection(self, indexer, value, fields=None): cv = chunk_value[item] chunk_values.append(cv) - self._chunk_setitems(lchunk_coords, lchunk_selection, chunk_values, - fields=fields) + self._chunk_setitems( + lchunk_coords, lchunk_selection, chunk_values, fields=fields + ) def _process_chunk( self, @@ -1647,20 +1678,19 @@ def _process_chunk( partial_read_decode=False, ): """Take binary data from storage and fill output array""" - if (out_is_ndarray and - not fields and - is_contiguous_selection(out_selection) and - is_total_slice(chunk_selection, self._chunks) and - not self._filters and - self._dtype != object): + if ( + out_is_ndarray + and not fields + and is_contiguous_selection(out_selection) + and is_total_slice(chunk_selection, self._chunks) + and not self._filters + and self._dtype != object + ): dest = out[out_selection] - write_direct = ( - dest.flags.writeable and - ( - (self._order == 'C' and dest.flags.c_contiguous) or - (self._order == 'F' and dest.flags.f_contiguous) - ) + write_direct = dest.flags.writeable and ( + (self._order == "C" and dest.flags.c_contiguous) + or (self._order == "F" and dest.flags.f_contiguous) ) if write_direct: @@ -1718,8 +1748,15 @@ def _process_chunk( # store selected data in output out[out_selection] = tmp - def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, - drop_axes=None, fields=None): + def _chunk_getitem( + self, + chunk_coords, + chunk_selection, + out, + out_selection, + drop_axes=None, + fields=None, + ): """Obtain part or whole of a chunk. Parameters @@ -1765,11 +1802,25 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, out[out_selection] = fill_value else: - self._process_chunk(out, cdata, chunk_selection, drop_axes, - out_is_ndarray, fields, out_selection) + self._process_chunk( + out, + cdata, + chunk_selection, + drop_axes, + out_is_ndarray, + fields, + out_selection, + ) - def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, - drop_axes=None, fields=None): + def _chunk_getitems( + self, + lchunk_coords, + lchunk_selection, + out, + lout_selection, + drop_axes=None, + fields=None, + ): """As _chunk_getitem, but for lists of chunks This gets called where the storage supports ``getitems``, so that @@ -1800,7 +1851,9 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, else: partial_read_decode = False cdatas = self.chunk_store.getitems(ckeys, on_error="omit") - for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection): + for ckey, chunk_select, out_select in zip( + ckeys, lchunk_selection, lout_selection + ): if ckey in cdatas: self._process_chunk( out, @@ -1823,8 +1876,10 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, def _chunk_setitems(self, lchunk_coords, lchunk_selection, values, fields=None): ckeys = [self._chunk_key(co) for co in lchunk_coords] - cdatas = [self._process_for_setitem(key, sel, val, fields=fields) - for key, sel, val in zip(ckeys, lchunk_selection, values)] + cdatas = [ + self._process_for_setitem(key, sel, val, fields=fields) + for key, sel, val in zip(ckeys, lchunk_selection, values) + ] values = {k: v for k, v in zip(ckeys, cdatas)} self.chunk_store.setitems(values) @@ -1851,8 +1906,9 @@ def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None): lock = self._synchronizer[ckey] with lock: - self._chunk_setitem_nosync(chunk_coords, chunk_selection, value, - fields=fields) + self._chunk_setitem_nosync( + chunk_coords, chunk_selection, value, fields=fields + ) def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None): ckey = self._chunk_key(chunk_coords) @@ -1887,10 +1943,6 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): cdata = self.chunk_store[ckey] except KeyError: - - if not self._fill_missing_chunk: - raise - # chunk not initialized if self._fill_value is not None: chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) @@ -1907,7 +1959,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # decode chunk chunk = self._decode_chunk(cdata) if not chunk.flags.writeable: - chunk = chunk.copy(order='K') + chunk = chunk.copy(order="K") # modify if fields: @@ -1921,7 +1973,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - return self._key_prefix + '.'.join(map(str, chunk_coords)) + return self._key_prefix + ".".join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress @@ -1954,10 +2006,10 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # codec in the filter chain, i.e., a filter that converts from object # array to something else during encoding, and converts back to object # array during decoding. - raise RuntimeError('cannot read object array without object codec') + raise RuntimeError("cannot read object array without object codec") # ensure correct chunk shape - chunk = chunk.reshape(-1, order='A') + chunk = chunk.reshape(-1, order="A") chunk = chunk.reshape(expected_shape or self._chunks, order=self._order) return chunk @@ -1971,7 +2023,7 @@ def _encode_chunk(self, chunk): # check object encoding if ensure_ndarray(chunk).dtype == object: - raise RuntimeError('cannot write object array without object codec') + raise RuntimeError("cannot write object array without object codec") # compress if self._compressor: @@ -1987,14 +2039,14 @@ def _encode_chunk(self, chunk): def __repr__(self): t = type(self) - r = '<{}.{}'.format(t.__module__, t.__name__) + r = "<{}.{}".format(t.__module__, t.__name__) if self.name: - r += ' %r' % self.name - r += ' %s' % str(self.shape) - r += ' %s' % self.dtype + r += " %r" % self.name + r += " %s" % str(self.shape) + r += " %s" % self.dtype if self._read_only: - r += ' read-only' - r += '>' + r += " read-only" + r += ">" return r @property @@ -2026,13 +2078,12 @@ def info_items(self): return self._synchronized_op(self._info_items_nosync) def _info_items_nosync(self): - def typestr(o): - return '{}.{}'.format(type(o).__module__, type(o).__name__) + return "{}.{}".format(type(o).__module__, type(o).__name__) def bytestr(n): - if n > 2**10: - return '{} ({})'.format(n, human_readable_size(n)) + if n > 2 ** 10: + return "{} ({})".format(n, human_readable_size(n)) else: return str(n) @@ -2040,40 +2091,43 @@ def bytestr(n): # basic info if self.name is not None: - items += [('Name', self.name)] + items += [("Name", self.name)] items += [ - ('Type', typestr(self)), - ('Data type', '%s' % self.dtype), - ('Shape', str(self.shape)), - ('Chunk shape', str(self.chunks)), - ('Order', self.order), - ('Read-only', str(self.read_only)), + ("Type", typestr(self)), + ("Data type", "%s" % self.dtype), + ("Shape", str(self.shape)), + ("Chunk shape", str(self.chunks)), + ("Order", self.order), + ("Read-only", str(self.read_only)), ] # filters if self.filters: for i, f in enumerate(self.filters): - items += [('Filter [%s]' % i, repr(f))] + items += [("Filter [%s]" % i, repr(f))] # compressor - items += [('Compressor', repr(self.compressor))] + items += [("Compressor", repr(self.compressor))] # synchronizer if self._synchronizer is not None: - items += [('Synchronizer type', typestr(self._synchronizer))] + items += [("Synchronizer type", typestr(self._synchronizer))] # storage info - items += [('Store type', typestr(self._store))] + items += [("Store type", typestr(self._store))] if self._chunk_store is not None: - items += [('Chunk store type', typestr(self._chunk_store))] - items += [('No. bytes', bytestr(self.nbytes))] + items += [("Chunk store type", typestr(self._chunk_store))] + items += [("No. bytes", bytestr(self.nbytes))] if self.nbytes_stored > 0: items += [ - ('No. bytes stored', bytestr(self.nbytes_stored)), - ('Storage ratio', '%.1f' % (self.nbytes / self.nbytes_stored)), + ("No. bytes stored", bytestr(self.nbytes_stored)), + ("Storage ratio", "%.1f" % (self.nbytes / self.nbytes_stored)), ] items += [ - ('Chunks initialized', '{}/{}'.format(self.nchunks_initialized, self.nchunks)) + ( + "Chunks initialized", + "{}/{}".format(self.nchunks_initialized, self.nchunks), + ) ] return items @@ -2132,13 +2186,20 @@ def hexdigest(self, hashname="sha1"): # This is a bytes object on Python 3 and we want a str. if type(checksum) is not str: - checksum = checksum.decode('utf8') + checksum = checksum.decode("utf8") return checksum def __getstate__(self): - return (self._store, self._path, self._read_only, self._chunk_store, - self._synchronizer, self._cache_metadata, self._attrs.cache) + return ( + self._store, + self._path, + self._read_only, + self._chunk_store, + self._synchronizer, + self._cache_metadata, + self._attrs.cache, + ) def __setstate__(self, state): self.__init__(*state) @@ -2209,8 +2270,7 @@ def _resize_nosync(self, *args): # determine the new number and arrangement of chunks chunks = self._chunks - new_cdata_shape = tuple(math.ceil(s / c) - for s, c in zip(new_shape, chunks)) + new_cdata_shape = tuple(math.ceil(s / c) for s, c in zip(new_shape, chunks)) # remove any chunks not within range chunk_store = self.chunk_store @@ -2265,18 +2325,18 @@ def append(self, data, axis=0): def _append_nosync(self, data, axis=0): # ensure data is array-like - if not hasattr(data, 'shape'): + if not hasattr(data, "shape"): data = np.asanyarray(data) # ensure shapes are compatible for non-append dimensions - self_shape_preserved = tuple(s for i, s in enumerate(self._shape) - if i != axis) - data_shape_preserved = tuple(s for i, s in enumerate(data.shape) - if i != axis) + self_shape_preserved = tuple(s for i, s in enumerate(self._shape) if i != axis) + data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis) if self_shape_preserved != data_shape_preserved: - raise ValueError('shape of data to append is not compatible with the array; ' - 'all dimensions must match except for the dimension being ' - 'appended') + raise ValueError( + "shape of data to append is not compatible with the array; " + "all dimensions must match except for the dimension being " + "appended" + ) # remember old shape old_shape = self._shape @@ -2300,9 +2360,16 @@ def _append_nosync(self, data, axis=0): return new_shape - def view(self, shape=None, chunks=None, dtype=None, - fill_value=None, filters=None, read_only=None, - synchronizer=None): + def view( + self, + shape=None, + chunks=None, + dtype=None, + fill_value=None, + filters=None, + read_only=None, + synchronizer=None, + ): """Return an array sharing the same data. Parameters @@ -2417,8 +2484,14 @@ def view(self, shape=None, chunks=None, dtype=None, read_only = self._read_only if synchronizer is None: synchronizer = self._synchronizer - a = Array(store=store, path=path, chunk_store=chunk_store, read_only=read_only, - synchronizer=synchronizer, cache_metadata=True) + a = Array( + store=store, + path=path, + chunk_store=chunk_store, + read_only=read_only, + synchronizer=synchronizer, + cache_metadata=True, + ) a._is_view = True # allow override of some properties From 806e0be850c597449ced97b5121f6f095a09ba81 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 10:46:08 +0200 Subject: [PATCH 06/13] Also test for slicing --- zarr/tests/test_missing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_missing.py b/zarr/tests/test_missing.py index 57e6896e40..076f068712 100644 --- a/zarr/tests/test_missing.py +++ b/zarr/tests/test_missing.py @@ -4,7 +4,7 @@ class TestArrayMissingKeys(unittest.TestCase): def test_raises_on_missing_key(self): - a = array(range(2), chunks=1) + a = array(range(4), chunks=2) # configure raise on missing chunk a.set_options(fill_missing_chunk=False) @@ -12,9 +12,13 @@ def test_raises_on_missing_key(self): # pop first chunk a.chunk_store.pop("0") - # read avaible chunk w/o error + # read from avaible chunk w/o error b = a[-1] + c = a[-2:] # reading missing chunk should raise with self.assertRaises(KeyError): b = a[0] + + with self.assertRaises(KeyError): + c = a[:2] From 843410ca76224276cbb8e955f466ce8033d77734 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 11:41:11 +0200 Subject: [PATCH 07/13] Satisfy linter --- zarr/tests/test_missing.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/zarr/tests/test_missing.py b/zarr/tests/test_missing.py index 076f068712..050a45f7b8 100644 --- a/zarr/tests/test_missing.py +++ b/zarr/tests/test_missing.py @@ -12,13 +12,17 @@ def test_raises_on_missing_key(self): # pop first chunk a.chunk_store.pop("0") - # read from avaible chunk w/o error - b = a[-1] - c = a[-2:] + # read from missing chunk and make sure fill-value is returned + assert a.fill_value == a[0] + assert a.fill_value == a[1] + + # read from avaible chunk w/o error + assert 2 = a[2] + assert 3 = a[3] # reading missing chunk should raise with self.assertRaises(KeyError): - b = a[0] + a[0] with self.assertRaises(KeyError): - c = a[:2] + a[:2] From 4d5d5322967618171cc59065298ca14e9fc1b137 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 11:44:26 +0200 Subject: [PATCH 08/13] Satisfy linter --- zarr/tests/test_missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_missing.py b/zarr/tests/test_missing.py index 050a45f7b8..e1f32445fc 100644 --- a/zarr/tests/test_missing.py +++ b/zarr/tests/test_missing.py @@ -16,7 +16,7 @@ def test_raises_on_missing_key(self): assert a.fill_value == a[0] assert a.fill_value == a[1] - # read from avaible chunk w/o error + # read from avaible chunk w/o error assert 2 = a[2] assert 3 = a[3] From 6690a6d93300069825bcbf65df9c55bb6a4be74f Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 11:51:17 +0200 Subject: [PATCH 09/13] Fix test logic --- zarr/tests/test_missing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/zarr/tests/test_missing.py b/zarr/tests/test_missing.py index e1f32445fc..3b3d49ad4b 100644 --- a/zarr/tests/test_missing.py +++ b/zarr/tests/test_missing.py @@ -6,9 +6,6 @@ class TestArrayMissingKeys(unittest.TestCase): def test_raises_on_missing_key(self): a = array(range(4), chunks=2) - # configure raise on missing chunk - a.set_options(fill_missing_chunk=False) - # pop first chunk a.chunk_store.pop("0") @@ -16,9 +13,12 @@ def test_raises_on_missing_key(self): assert a.fill_value == a[0] assert a.fill_value == a[1] - # read from avaible chunk w/o error - assert 2 = a[2] - assert 3 = a[3] + # read from avaible chunk w/o error + assert 2 == a[2] + assert 3 == a[3] + + # configure raise on missing chunk + a.set_options(fill_missing_chunk=False) # reading missing chunk should raise with self.assertRaises(KeyError): From 690e0978044bec86c934b356a5820f7c72018757 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 12:10:31 +0200 Subject: [PATCH 10/13] Revert black --- zarr/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index f26af1e562..4937303163 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -516,10 +516,10 @@ def islice(self, start=None, end=None): end = self.shape[0] if not isinstance(start, int) or start < 0: - raise ValueError('start must be a nonnegative integer') + raise ValueError("start must be a nonnegative integer") if not isinstance(end, int) or end < 0: - raise ValueError('end must be a nonnegative integer') + raise ValueError("end must be a nonnegative integer") # Avoid repeatedly decompressing chunks by iterating over the chunks # in the first dimension. @@ -527,7 +527,7 @@ def islice(self, start=None, end=None): chunk = None for j in range(start, end): if j % chunk_size == 0: - chunk = self[j: j + chunk_size] + chunk = self[j : j + chunk_size] # init chunk if we start offset of chunk borders elif chunk is None: chunk_start = j - j % chunk_size @@ -1997,6 +1997,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): cdata = self.chunk_store[ckey] except KeyError: + # chunk not initialized if self._fill_value is not None: chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) @@ -2027,10 +2028,10 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - if hasattr(self._store, 'key_separator'): + if hasattr(self._store, "key_separator"): separator = self._store.key_separator else: - separator = '.' + separator = "." return self._key_prefix + separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): From 55ae35395ed25c3a586a2e39df1dd5d682bd2318 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 12:22:19 +0200 Subject: [PATCH 11/13] Revert black --- zarr/core.py | 323 ++++++++++++++++++++------------------------------- 1 file changed, 123 insertions(+), 200 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 4937303163..3b4b3de7d4 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -146,9 +146,9 @@ def __init__( self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: - self._key_prefix = self._path + "/" + self._key_prefix = self._path + '/' else: - self._key_prefix = "" + self._key_prefix = '' self._read_only = bool(read_only) self._synchronizer = synchronizer self._cache_metadata = cache_metadata @@ -160,13 +160,8 @@ def __init__( # initialize attributes akey = self._key_prefix + attrs_key - self._attrs = Attributes( - store, - key=akey, - read_only=read_only, - synchronizer=synchronizer, - cache=cache_attrs, - ) + self._attrs = Attributes(store, key=akey, read_only=read_only, + synchronizer=synchronizer, cache=cache_attrs) # initialize info reporter self._info_reporter = InfoReporter(self) @@ -198,21 +193,21 @@ def _load_metadata_nosync(self): # decode and store metadata as instance members meta = decode_array_metadata(meta_bytes) self._meta = meta - self._shape = meta["shape"] - self._chunks = meta["chunks"] - self._dtype = meta["dtype"] - self._fill_value = meta["fill_value"] - self._order = meta["order"] + self._shape = meta['shape'] + self._chunks = meta['chunks'] + self._dtype = meta['dtype'] + self._fill_value = meta['fill_value'] + self._order = meta['order'] # setup compressor - config = meta["compressor"] + config = meta['compressor'] if config is None: self._compressor = None else: self._compressor = get_codec(config) # setup filters - filters = meta["filters"] + filters = meta['filters'] if filters: filters = [get_codec(config) for config in filters] self._filters = filters @@ -227,7 +222,7 @@ def _refresh_metadata_nosync(self): def _flush_metadata_nosync(self): if self._is_view: - raise PermissionError("operation not permitted for views") + raise PermissionError('operation not permitted for views') if self._compressor: compressor_config = self._compressor.get_config() @@ -237,15 +232,9 @@ def _flush_metadata_nosync(self): filters_config = [f.get_config() for f in self._filters] else: filters_config = None - meta = dict( - shape=self._shape, - chunks=self._chunks, - dtype=self._dtype, - compressor=compressor_config, - fill_value=self._fill_value, - order=self._order, - filters=filters_config, - ) + meta = dict(shape=self._shape, chunks=self._chunks, dtype=self._dtype, + compressor=compressor_config, fill_value=self._fill_value, + order=self._order, filters=filters_config) mkey = self._key_prefix + array_meta_key self._store[mkey] = encode_array_metadata(meta) @@ -265,8 +254,8 @@ def name(self): if self.path: # follow h5py convention: add leading slash name = self.path - if name[0] != "/": - name = "/" + name + if name[0] != '/': + name = '/' + name return name return None @@ -274,7 +263,7 @@ def name(self): def basename(self): """Final component of name.""" if self.name is not None: - return self.name.split("/")[-1] + return self.name.split('/')[-1] return None @property @@ -403,9 +392,10 @@ def nbytes_stored(self): @property def _cdata_shape(self): if self._shape == (): - return (1,) + return 1, else: - return tuple(math.ceil(s / c) for s, c in zip(self._shape, self._chunks)) + return tuple(math.ceil(s / c) + for s, c in zip(self._shape, self._chunks)) @property def cdata_shape(self): @@ -429,7 +419,7 @@ def nchunks_initialized(self): """The number of chunks that have been initialized with some data.""" # key pattern for chunk keys - prog = re.compile(r"\.".join([r"\d+"] * min(1, self.ndim))) + prog = re.compile(r'\.'.join([r'\d+'] * min(1, self.ndim))) # count chunk keys return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) @@ -457,11 +447,11 @@ def vindex(self): def __eq__(self, other): return ( - isinstance(other, Array) - and self.store == other.store - and self.read_only == other.read_only - and self.path == other.path - and not self._is_view + isinstance(other, Array) and + self.store == other.store and + self.read_only == other.read_only and + self.path == other.path and + not self._is_view # N.B., no need to compare other properties, should be covered by # store comparison ) @@ -516,10 +506,10 @@ def islice(self, start=None, end=None): end = self.shape[0] if not isinstance(start, int) or start < 0: - raise ValueError("start must be a nonnegative integer") + raise ValueError('start must be a nonnegative integer') if not isinstance(end, int) or end < 0: - raise ValueError("end must be a nonnegative integer") + raise ValueError('end must be a nonnegative integer') # Avoid repeatedly decompressing chunks by iterating over the chunks # in the first dimension. @@ -527,7 +517,7 @@ def islice(self, start=None, end=None): chunk = None for j in range(start, end): if j % chunk_size == 0: - chunk = self[j : j + chunk_size] + chunk = self[j: j + chunk_size] # init chunk if we start offset of chunk borders elif chunk is None: chunk_start = j - j % chunk_size @@ -543,7 +533,7 @@ def __len__(self): return self.shape[0] else: # 0-dimensional array, same error message as numpy - raise TypeError("len() of unsized object") + raise TypeError('len() of unsized object') def __getitem__(self, selection): """Retrieve data for an item or region of the array. @@ -793,13 +783,11 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): # handle zero-dimensional arrays if self._shape == (): - return self._get_basic_selection_zd( - selection=selection, out=out, fields=fields - ) + return self._get_basic_selection_zd(selection=selection, out=out, + fields=fields) else: - return self._get_basic_selection_nd( - selection=selection, out=out, fields=fields - ) + return self._get_basic_selection_nd(selection=selection, out=out, + fields=fields) def _get_basic_selection_zd(self, selection, out=None, fields=None): # special case basic selection for zero-dimensional array @@ -1124,35 +1112,22 @@ def _get_selection(self, indexer, out=None, fields=None): if out is None: out = np.empty(out_shape, dtype=out_dtype, order=self._order) else: - check_array_shape("out", out, out_shape) + check_array_shape('out', out, out_shape) # iterate over chunks - if not hasattr(self.chunk_store, "getitems") or any( - map(lambda x: x == 0, self.shape) - ): + if not hasattr(self.chunk_store, "getitems") or \ + any(map(lambda x: x == 0, self.shape)): # sequentially get one key at a time from storage for chunk_coords, chunk_selection, out_selection in indexer: # load chunk selection into output array - self._chunk_getitem( - chunk_coords, - chunk_selection, - out, - out_selection, - drop_axes=indexer.drop_axes, - fields=fields, - ) + self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection, + drop_axes=indexer.drop_axes, fields=fields) else: # allow storage to get multiple items at once lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) - self._chunk_getitems( - lchunk_coords, - lchunk_selection, - out, - lout_selection, - drop_axes=indexer.drop_axes, - fields=fields, - ) + self._chunk_getitems(lchunk_coords, lchunk_selection, out, lout_selection, + drop_axes=indexer.drop_axes, fields=fields) if out.shape: return out @@ -1499,7 +1474,7 @@ def set_coordinate_selection(self, selection, value, fields=None): # handle value - need to flatten if not is_scalar(value, self._dtype): value = np.asanyarray(value) - if hasattr(value, "shape") and len(value.shape) > 1: + if hasattr(value, 'shape') and len(value.shape) > 1: value = value.reshape(-1) self._set_selection(indexer, value, fields=fields) @@ -1665,16 +1640,13 @@ def _set_selection(self, indexer, value, fields=None): # setting a scalar value pass else: - if not hasattr(value, "shape"): + if not hasattr(value, 'shape'): value = np.asanyarray(value) - check_array_shape("value", value, sel_shape) + check_array_shape('value', value, sel_shape) # iterate over chunks in range - if ( - not hasattr(self.store, "setitems") - or self._synchronizer is not None - or any(map(lambda x: x == 0, self.shape)) - ): + if not hasattr(self.store, "setitems") or self._synchronizer is not None \ + or any(map(lambda x: x == 0, self.shape)): # iterative approach for chunk_coords, chunk_selection, out_selection in indexer: @@ -1694,9 +1666,7 @@ def _set_selection(self, indexer, value, fields=None): chunk_value = chunk_value[item] # put data - self._chunk_setitem( - chunk_coords, chunk_selection, chunk_value, fields=fields - ) + self._chunk_setitem(chunk_coords, chunk_selection, chunk_value, fields=fields) else: lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) chunk_values = [] @@ -1716,9 +1686,8 @@ def _set_selection(self, indexer, value, fields=None): cv = chunk_value[item] chunk_values.append(cv) - self._chunk_setitems( - lchunk_coords, lchunk_selection, chunk_values, fields=fields - ) + self._chunk_setitems(lchunk_coords, lchunk_selection, chunk_values, + fields=fields) def _process_chunk( self, @@ -1732,19 +1701,20 @@ def _process_chunk( partial_read_decode=False, ): """Take binary data from storage and fill output array""" - if ( - out_is_ndarray - and not fields - and is_contiguous_selection(out_selection) - and is_total_slice(chunk_selection, self._chunks) - and not self._filters - and self._dtype != object - ): + if (out_is_ndarray and + not fields and + is_contiguous_selection(out_selection) and + is_total_slice(chunk_selection, self._chunks) and + not self._filters and + self._dtype != object): dest = out[out_selection] - write_direct = dest.flags.writeable and ( - (self._order == "C" and dest.flags.c_contiguous) - or (self._order == "F" and dest.flags.f_contiguous) + write_direct = ( + dest.flags.writeable and + ( + (self._order == 'C' and dest.flags.c_contiguous) or + (self._order == 'F' and dest.flags.f_contiguous) + ) ) if write_direct: @@ -1802,15 +1772,8 @@ def _process_chunk( # store selected data in output out[out_selection] = tmp - def _chunk_getitem( - self, - chunk_coords, - chunk_selection, - out, - out_selection, - drop_axes=None, - fields=None, - ): + def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, + drop_axes=None, fields=None): """Obtain part or whole of a chunk. Parameters @@ -1856,25 +1819,11 @@ def _chunk_getitem( out[out_selection] = fill_value else: - self._process_chunk( - out, - cdata, - chunk_selection, - drop_axes, - out_is_ndarray, - fields, - out_selection, - ) + self._process_chunk(out, cdata, chunk_selection, drop_axes, + out_is_ndarray, fields, out_selection) - def _chunk_getitems( - self, - lchunk_coords, - lchunk_selection, - out, - lout_selection, - drop_axes=None, - fields=None, - ): + def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, + drop_axes=None, fields=None): """As _chunk_getitem, but for lists of chunks This gets called where the storage supports ``getitems``, so that @@ -1905,9 +1854,7 @@ def _chunk_getitems( else: partial_read_decode = False cdatas = self.chunk_store.getitems(ckeys, on_error="omit") - for ckey, chunk_select, out_select in zip( - ckeys, lchunk_selection, lout_selection - ): + for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection): if ckey in cdatas: self._process_chunk( out, @@ -1930,10 +1877,8 @@ def _chunk_getitems( def _chunk_setitems(self, lchunk_coords, lchunk_selection, values, fields=None): ckeys = [self._chunk_key(co) for co in lchunk_coords] - cdatas = [ - self._process_for_setitem(key, sel, val, fields=fields) - for key, sel, val in zip(ckeys, lchunk_selection, values) - ] + cdatas = [self._process_for_setitem(key, sel, val, fields=fields) + for key, sel, val in zip(ckeys, lchunk_selection, values)] values = {k: v for k, v in zip(ckeys, cdatas)} self.chunk_store.setitems(values) @@ -1960,9 +1905,8 @@ def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None): lock = self._synchronizer[ckey] with lock: - self._chunk_setitem_nosync( - chunk_coords, chunk_selection, value, fields=fields - ) + self._chunk_setitem_nosync(chunk_coords, chunk_selection, value, + fields=fields) def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None): ckey = self._chunk_key(chunk_coords) @@ -2014,7 +1958,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # decode chunk chunk = self._decode_chunk(cdata) if not chunk.flags.writeable: - chunk = chunk.copy(order="K") + chunk = chunk.copy(order='K') # modify if fields: @@ -2028,10 +1972,10 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - if hasattr(self._store, "key_separator"): + if hasattr(self._store, 'key_separator'): separator = self._store.key_separator else: - separator = "." + separator = '.' return self._key_prefix + separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): @@ -2065,10 +2009,10 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # codec in the filter chain, i.e., a filter that converts from object # array to something else during encoding, and converts back to object # array during decoding. - raise RuntimeError("cannot read object array without object codec") + raise RuntimeError('cannot read object array without object codec') # ensure correct chunk shape - chunk = chunk.reshape(-1, order="A") + chunk = chunk.reshape(-1, order='A') chunk = chunk.reshape(expected_shape or self._chunks, order=self._order) return chunk @@ -2082,7 +2026,7 @@ def _encode_chunk(self, chunk): # check object encoding if ensure_ndarray(chunk).dtype == object: - raise RuntimeError("cannot write object array without object codec") + raise RuntimeError('cannot write object array without object codec') # compress if self._compressor: @@ -2098,14 +2042,14 @@ def _encode_chunk(self, chunk): def __repr__(self): t = type(self) - r = "<{}.{}".format(t.__module__, t.__name__) + r = '<{}.{}'.format(t.__module__, t.__name__) if self.name: - r += " %r" % self.name - r += " %s" % str(self.shape) - r += " %s" % self.dtype + r += ' %r' % self.name + r += ' %s' % str(self.shape) + r += ' %s' % self.dtype if self._read_only: - r += " read-only" - r += ">" + r += ' read-only' + r += '>' return r @property @@ -2137,12 +2081,13 @@ def info_items(self): return self._synchronized_op(self._info_items_nosync) def _info_items_nosync(self): + def typestr(o): - return "{}.{}".format(type(o).__module__, type(o).__name__) + return '{}.{}'.format(type(o).__module__, type(o).__name__) def bytestr(n): - if n > 2 ** 10: - return "{} ({})".format(n, human_readable_size(n)) + if n > 2**10: + return '{} ({})'.format(n, human_readable_size(n)) else: return str(n) @@ -2150,43 +2095,40 @@ def bytestr(n): # basic info if self.name is not None: - items += [("Name", self.name)] + items += [('Name', self.name)] items += [ - ("Type", typestr(self)), - ("Data type", "%s" % self.dtype), - ("Shape", str(self.shape)), - ("Chunk shape", str(self.chunks)), - ("Order", self.order), - ("Read-only", str(self.read_only)), + ('Type', typestr(self)), + ('Data type', '%s' % self.dtype), + ('Shape', str(self.shape)), + ('Chunk shape', str(self.chunks)), + ('Order', self.order), + ('Read-only', str(self.read_only)), ] # filters if self.filters: for i, f in enumerate(self.filters): - items += [("Filter [%s]" % i, repr(f))] + items += [('Filter [%s]' % i, repr(f))] # compressor - items += [("Compressor", repr(self.compressor))] + items += [('Compressor', repr(self.compressor))] # synchronizer if self._synchronizer is not None: - items += [("Synchronizer type", typestr(self._synchronizer))] + items += [('Synchronizer type', typestr(self._synchronizer))] # storage info - items += [("Store type", typestr(self._store))] + items += [('Store type', typestr(self._store))] if self._chunk_store is not None: - items += [("Chunk store type", typestr(self._chunk_store))] - items += [("No. bytes", bytestr(self.nbytes))] + items += [('Chunk store type', typestr(self._chunk_store))] + items += [('No. bytes', bytestr(self.nbytes))] if self.nbytes_stored > 0: items += [ - ("No. bytes stored", bytestr(self.nbytes_stored)), - ("Storage ratio", "%.1f" % (self.nbytes / self.nbytes_stored)), + ('No. bytes stored', bytestr(self.nbytes_stored)), + ('Storage ratio', '%.1f' % (self.nbytes / self.nbytes_stored)), ] items += [ - ( - "Chunks initialized", - "{}/{}".format(self.nchunks_initialized, self.nchunks), - ) + ('Chunks initialized', '{}/{}'.format(self.nchunks_initialized, self.nchunks)) ] return items @@ -2245,20 +2187,13 @@ def hexdigest(self, hashname="sha1"): # This is a bytes object on Python 3 and we want a str. if type(checksum) is not str: - checksum = checksum.decode("utf8") + checksum = checksum.decode('utf8') return checksum def __getstate__(self): - return ( - self._store, - self._path, - self._read_only, - self._chunk_store, - self._synchronizer, - self._cache_metadata, - self._attrs.cache, - ) + return (self._store, self._path, self._read_only, self._chunk_store, + self._synchronizer, self._cache_metadata, self._attrs.cache) def __setstate__(self, state): self.__init__(*state) @@ -2329,7 +2264,8 @@ def _resize_nosync(self, *args): # determine the new number and arrangement of chunks chunks = self._chunks - new_cdata_shape = tuple(math.ceil(s / c) for s, c in zip(new_shape, chunks)) + new_cdata_shape = tuple(math.ceil(s / c) + for s, c in zip(new_shape, chunks)) # remove any chunks not within range chunk_store = self.chunk_store @@ -2384,18 +2320,18 @@ def append(self, data, axis=0): def _append_nosync(self, data, axis=0): # ensure data is array-like - if not hasattr(data, "shape"): + if not hasattr(data, 'shape'): data = np.asanyarray(data) # ensure shapes are compatible for non-append dimensions - self_shape_preserved = tuple(s for i, s in enumerate(self._shape) if i != axis) - data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis) + self_shape_preserved = tuple(s for i, s in enumerate(self._shape) + if i != axis) + data_shape_preserved = tuple(s for i, s in enumerate(data.shape) + if i != axis) if self_shape_preserved != data_shape_preserved: - raise ValueError( - "shape of data to append is not compatible with the array; " - "all dimensions must match except for the dimension being " - "appended" - ) + raise ValueError('shape of data to append is not compatible with the array; ' + 'all dimensions must match except for the dimension being ' + 'appended') # remember old shape old_shape = self._shape @@ -2419,16 +2355,9 @@ def _append_nosync(self, data, axis=0): return new_shape - def view( - self, - shape=None, - chunks=None, - dtype=None, - fill_value=None, - filters=None, - read_only=None, - synchronizer=None, - ): + def view(self, shape=None, chunks=None, dtype=None, + fill_value=None, filters=None, read_only=None, + synchronizer=None): """Return an array sharing the same data. Parameters @@ -2543,14 +2472,8 @@ def view( read_only = self._read_only if synchronizer is None: synchronizer = self._synchronizer - a = Array( - store=store, - path=path, - chunk_store=chunk_store, - read_only=read_only, - synchronizer=synchronizer, - cache_metadata=True, - ) + a = Array(store=store, path=path, chunk_store=chunk_store, read_only=read_only, + synchronizer=synchronizer, cache_metadata=True) a._is_view = True # allow override of some properties From 5a42221530035dc8b6940cf5ac615aa66c389894 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 20:22:57 +0200 Subject: [PATCH 12/13] Don't raise on setting items on missing chunks --- zarr/core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 3b4b3de7d4..79dc677052 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1587,8 +1587,6 @@ def _set_basic_selection_zd(self, selection, value, fields=None): cdata = self.chunk_store[ckey] except KeyError: - if not self._fill_missing_chunk: - raise # chunk not initialized chunk = np.zeros((), dtype=self._dtype) if self._fill_value is not None: From 0a33f442d386f3d04d4123ea4e4f716f4bcb5477 Mon Sep 17 00:00:00 2001 From: Willi Rath Date: Wed, 21 Apr 2021 20:28:06 +0200 Subject: [PATCH 13/13] Add test for zero-dim array --- zarr/tests/test_missing.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_missing.py b/zarr/tests/test_missing.py index 3b3d49ad4b..7c6ed4fd47 100644 --- a/zarr/tests/test_missing.py +++ b/zarr/tests/test_missing.py @@ -3,7 +3,23 @@ class TestArrayMissingKeys(unittest.TestCase): - def test_raises_on_missing_key(self): + def test_raises_on_missing_key_zd(self): + a = array(1, chunks=1) + + # pop first chunk + a.chunk_store.pop("0") + + # read from missing chunk and make sure fill-value is returned + assert a.fill_value == a[()] + + # configure raise on missing chunk + a.set_options(fill_missing_chunk=False) + + # reading missing chunk should raise + with self.assertRaises(KeyError): + a[()] + + def test_raises_on_missing_key_1d(self): a = array(range(4), chunks=2) # pop first chunk