Skip to content

WIP: Provide a meta array to change array type returned #501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
3 changes: 3 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ Enhancements
* Ensure contiguous data using ``astype``.
By :user:`John Kirkham <jakirkham>`; :issue:`513`.

* Ensure contiguous data using ``astype``.
By :user:`John Kirkham <jakirkham>`; :issue:`513`.

* Refactor out ``_tofile``/``_fromfile`` from ``DirectoryStore``.
By :user:`John Kirkham <jakirkham>`; :issue:`503`.

Expand Down
46 changes: 38 additions & 8 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class Array:
read and decompressed when possible.

.. versionadded:: 2.7
meta_array : array, optional
An array instance to use for determining arrays to create and return
to users.

Attributes
----------
Expand Down Expand Up @@ -137,6 +140,7 @@ def __init__(
cache_metadata=True,
cache_attrs=True,
partial_decompress=False,
meta_array=None
):
# N.B., expect at this point store is fully initialized with all
# configuration metadata fully specified and normalized
Expand All @@ -148,6 +152,10 @@ def __init__(
self._key_prefix = self._path + '/'
else:
self._key_prefix = ''
if meta_array is not None:
self._meta_array = np.empty_like(meta_array)
else:
self._meta_array = np.empty(())
self._read_only = bool(read_only)
self._synchronizer = synchronizer
self._cache_metadata = cache_metadata
Expand Down Expand Up @@ -746,7 +754,7 @@ def _get_basic_selection_zd(self, selection, out=None, fields=None):

except KeyError:
# chunk not initialized
chunk = np.zeros((), dtype=self._dtype)
chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype)
if self._fill_value is not None:
chunk.fill(self._fill_value)

Expand Down Expand Up @@ -1050,7 +1058,8 @@ def _get_selection(self, indexer, out=None, fields=None):

# setup output array
if out is None:
out = np.empty(out_shape, dtype=out_dtype, order=self._order)
out = np.empty_like(self._meta_array, shape=out_shape,
dtype=out_dtype, order=self._order)
else:
check_array_shape('out', out, out_shape)

Expand Down Expand Up @@ -1516,7 +1525,7 @@ def _set_basic_selection_zd(self, selection, value, fields=None):

except KeyError:
# chunk not initialized
chunk = np.zeros((), dtype=self._dtype)
chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype)
if self._fill_value is not None:
chunk.fill(self._fill_value)

Expand Down Expand Up @@ -1746,6 +1755,23 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,
self._process_chunk(out, cdata, chunk_selection, drop_axes,
out_is_ndarray, fields, out_selection)

if (out_is_ndarray and
not fields and
is_contiguous_selection(out_selection) and
is_total_slice(chunk_selection, self._chunks) and
not self._filters and
self._dtype != object):

dest = out[out_selection]
write_direct = (
getattr(getattr(dest, "flags", None), "writeable", True) and (
(self._order == 'C' and dest.flags.c_contiguous) or
(self._order == 'F' and dest.flags.f_contiguous)
)
)

if write_direct:

def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection,
drop_axes=None, fields=None):
"""As _chunk_getitem, but for lists of chunks
Expand Down Expand Up @@ -1848,7 +1874,8 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
if is_scalar(value, self._dtype):

# setup array filled with value
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
chunk = np.empty_like(self._meta_array, shape=self._chunks,
dtype=self._dtype, order=self._order)
chunk.fill(value)

else:
Expand All @@ -1868,20 +1895,22 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):

# chunk not initialized
if self._fill_value is not None:
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
chunk = np.empty_like(self._meta_array, shape=self._chunks,
dtype=self._dtype, order=self._order)
chunk.fill(self._fill_value)
elif self._dtype == object:
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
else:
# N.B., use zeros here so any region beyond the array has consistent
# and compressible data
chunk = np.zeros(self._chunks, dtype=self._dtype, order=self._order)
chunk = np.zeros_like(self._meta_array, shape=self._chunks,
dtype=self._dtype, order=self._order)

else:

# decode chunk
chunk = self._decode_chunk(cdata)
if not chunk.flags.writeable:
if not getattr(getattr(chunk, "flags", None), "writeable", True):
chunk = chunk.copy(order='K')

# modify
Expand Down Expand Up @@ -2113,7 +2142,8 @@ def hexdigest(self, hashname="sha1"):

def __getstate__(self):
return (self._store, self._path, self._read_only, self._chunk_store,
self._synchronizer, self._cache_metadata, self._attrs.cache)
self._synchronizer, self._cache_metadata, self._attrs.cache,
self._meta_array)

def __setstate__(self, state):
self.__init__(*state)
Expand Down
19 changes: 14 additions & 5 deletions zarr/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class Group(MutableMapping):
to all attribute read operations.
synchronizer : object, optional
Array synchronizer.
meta_array : array, optional
An array to base allocations off of.

Attributes
----------
Expand Down Expand Up @@ -95,7 +97,7 @@ class Group(MutableMapping):
"""

def __init__(self, store, path=None, read_only=False, chunk_store=None,
cache_attrs=True, synchronizer=None):
cache_attrs=True, synchronizer=None, meta_array=None):
self._store = store
self._chunk_store = chunk_store
self._path = normalize_storage_path(path)
Expand All @@ -105,6 +107,10 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
self._key_prefix = ''
self._read_only = read_only
self._synchronizer = synchronizer
if meta_array is not None:
self._meta_array = np.empty_like(meta_array)
else:
self._meta_array = np.empty(())

# guard conditions
if contains_array(store, path=self._path):
Expand Down Expand Up @@ -282,7 +288,7 @@ def typestr(o):

def __getstate__(self):
return (self._store, self._path, self._read_only, self._chunk_store,
self.attrs.cache, self._synchronizer)
self.attrs.cache, self._synchronizer, self._meta_array)

def __setstate__(self, state):
self.__init__(*state)
Expand Down Expand Up @@ -340,11 +346,13 @@ def __getitem__(self, item):
if contains_array(self._store, path):
return Array(self._store, read_only=self._read_only, path=path,
chunk_store=self._chunk_store,
synchronizer=self._synchronizer, cache_attrs=self.attrs.cache)
synchronizer=self._synchronizer, cache_attrs=self.attrs.cache,
meta_array=self._meta_array)
elif contains_group(self._store, path):
return Group(self._store, read_only=self._read_only, path=path,
chunk_store=self._chunk_store, cache_attrs=self.attrs.cache,
synchronizer=self._synchronizer)
synchronizer=self._synchronizer,
meta_array=self._meta_array)
else:
raise KeyError(item)

Expand Down Expand Up @@ -863,7 +871,8 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False,
cache_attrs = kwargs.get('cache_attrs', self.attrs.cache)
a = Array(self._store, path=path, read_only=self._read_only,
chunk_store=self._chunk_store, synchronizer=synchronizer,
cache_metadata=cache_metadata, cache_attrs=cache_attrs)
cache_metadata=cache_metadata, cache_attrs=cache_attrs,
meta_array=self._meta_array)
shape = normalize_shape(shape)
if shape != a.shape:
raise TypeError('shape do not match existing array; expected {}, got {}'
Expand Down