diff --git a/docs/release.rst b/docs/release.rst index 5cdfc96055..215f4f5993 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -124,6 +124,9 @@ Enhancements * Ensure contiguous data using ``astype``. By :user:`John Kirkham `; :issue:`513`. +* Ensure contiguous data using ``astype``. + By :user:`John Kirkham `; :issue:`513`. + * Refactor out ``_tofile``/``_fromfile`` from ``DirectoryStore``. By :user:`John Kirkham `; :issue:`503`. diff --git a/zarr/core.py b/zarr/core.py index f8e5834070..16c5e8bdcd 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -76,6 +76,9 @@ class Array: read and decompressed when possible. .. versionadded:: 2.7 + meta_array : array, optional + An array instance to use for determining arrays to create and return + to users. Attributes ---------- @@ -137,6 +140,7 @@ def __init__( cache_metadata=True, cache_attrs=True, partial_decompress=False, + meta_array=None ): # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized @@ -148,6 +152,10 @@ def __init__( self._key_prefix = self._path + '/' else: self._key_prefix = '' + if meta_array is not None: + self._meta_array = np.empty_like(meta_array) + else: + self._meta_array = np.empty(()) self._read_only = bool(read_only) self._synchronizer = synchronizer self._cache_metadata = cache_metadata @@ -746,7 +754,7 @@ def _get_basic_selection_zd(self, selection, out=None, fields=None): except KeyError: # chunk not initialized - chunk = np.zeros((), dtype=self._dtype) + chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype) if self._fill_value is not None: chunk.fill(self._fill_value) @@ -1050,7 +1058,8 @@ def _get_selection(self, indexer, out=None, fields=None): # setup output array if out is None: - out = np.empty(out_shape, dtype=out_dtype, order=self._order) + out = np.empty_like(self._meta_array, shape=out_shape, + dtype=out_dtype, order=self._order) else: check_array_shape('out', out, out_shape) @@ -1516,7 +1525,7 @@ def _set_basic_selection_zd(self, selection, value, fields=None): except KeyError: # chunk not initialized - chunk = np.zeros((), dtype=self._dtype) + chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype) if self._fill_value is not None: chunk.fill(self._fill_value) @@ -1746,6 +1755,23 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, self._process_chunk(out, cdata, chunk_selection, drop_axes, out_is_ndarray, fields, out_selection) + if (out_is_ndarray and + not fields and + is_contiguous_selection(out_selection) and + is_total_slice(chunk_selection, self._chunks) and + not self._filters and + self._dtype != object): + + dest = out[out_selection] + write_direct = ( + getattr(getattr(dest, "flags", None), "writeable", True) and ( + (self._order == 'C' and dest.flags.c_contiguous) or + (self._order == 'F' and dest.flags.f_contiguous) + ) + ) + + if write_direct: + def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, drop_axes=None, fields=None): """As _chunk_getitem, but for lists of chunks @@ -1848,7 +1874,8 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): if is_scalar(value, self._dtype): # setup array filled with value - chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) + chunk = np.empty_like(self._meta_array, shape=self._chunks, + dtype=self._dtype, order=self._order) chunk.fill(value) else: @@ -1868,20 +1895,22 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # chunk not initialized if self._fill_value is not None: - chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) + chunk = np.empty_like(self._meta_array, shape=self._chunks, + dtype=self._dtype, order=self._order) chunk.fill(self._fill_value) elif self._dtype == object: chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) else: # N.B., use zeros here so any region beyond the array has consistent # and compressible data - chunk = np.zeros(self._chunks, dtype=self._dtype, order=self._order) + chunk = np.zeros_like(self._meta_array, shape=self._chunks, + dtype=self._dtype, order=self._order) else: # decode chunk chunk = self._decode_chunk(cdata) - if not chunk.flags.writeable: + if not getattr(getattr(chunk, "flags", None), "writeable", True): chunk = chunk.copy(order='K') # modify @@ -2113,7 +2142,8 @@ def hexdigest(self, hashname="sha1"): def __getstate__(self): return (self._store, self._path, self._read_only, self._chunk_store, - self._synchronizer, self._cache_metadata, self._attrs.cache) + self._synchronizer, self._cache_metadata, self._attrs.cache, + self._meta_array) def __setstate__(self, state): self.__init__(*state) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 39dc82c724..7ee58d69bc 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -44,6 +44,8 @@ class Group(MutableMapping): to all attribute read operations. synchronizer : object, optional Array synchronizer. + meta_array : array, optional + An array to base allocations off of. Attributes ---------- @@ -95,7 +97,7 @@ class Group(MutableMapping): """ def __init__(self, store, path=None, read_only=False, chunk_store=None, - cache_attrs=True, synchronizer=None): + cache_attrs=True, synchronizer=None, meta_array=None): self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -105,6 +107,10 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._key_prefix = '' self._read_only = read_only self._synchronizer = synchronizer + if meta_array is not None: + self._meta_array = np.empty_like(meta_array) + else: + self._meta_array = np.empty(()) # guard conditions if contains_array(store, path=self._path): @@ -282,7 +288,7 @@ def typestr(o): def __getstate__(self): return (self._store, self._path, self._read_only, self._chunk_store, - self.attrs.cache, self._synchronizer) + self.attrs.cache, self._synchronizer, self._meta_array) def __setstate__(self, state): self.__init__(*state) @@ -340,11 +346,13 @@ def __getitem__(self, item): if contains_array(self._store, path): return Array(self._store, read_only=self._read_only, path=path, chunk_store=self._chunk_store, - synchronizer=self._synchronizer, cache_attrs=self.attrs.cache) + synchronizer=self._synchronizer, cache_attrs=self.attrs.cache, + meta_array=self._meta_array) elif contains_group(self._store, path): return Group(self._store, read_only=self._read_only, path=path, chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer) + synchronizer=self._synchronizer, + meta_array=self._meta_array) else: raise KeyError(item) @@ -863,7 +871,8 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, cache_attrs = kwargs.get('cache_attrs', self.attrs.cache) a = Array(self._store, path=path, read_only=self._read_only, chunk_store=self._chunk_store, synchronizer=synchronizer, - cache_metadata=cache_metadata, cache_attrs=cache_attrs) + cache_metadata=cache_metadata, cache_attrs=cache_attrs, + meta_array=self._meta_array) shape = normalize_shape(shape) if shape != a.shape: raise TypeError('shape do not match existing array; expected {}, got {}'