Skip to content

Bump Numcodecs requirement to 0.6.1 #347

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
7eed366
Bump Numcodecs requirement to 0.6.1
jakirkham Nov 30, 2018
2552f62
Assert MsgPack round-trips bytes objects correctly
jakirkham Nov 30, 2018
aee5ace
properly guard against removal of object codec
alimanfoo Dec 1, 2018
bf4eee8
Ensure `chunk` in `_decode_chunk` is an `ndarray`
jakirkham Dec 1, 2018
b741fe1
Reshape `chunk` ourselves since it is an `ndarray`
jakirkham Dec 1, 2018
f3144ae
Refactor `reshape` from `_decode_chunk`
jakirkham Dec 1, 2018
3e3920a
Consolidate type checks in `_decode_chunk`
jakirkham Dec 1, 2018
a61842b
Ensure `DictStore` uses `bytes` to store blobs
jakirkham Dec 2, 2018
0e05be0
Drop `test_getsize_ext`
jakirkham Dec 2, 2018
bbf783e
Change default store to `DictStore`
jakirkham Dec 2, 2018
b56c2dd
Update `DictStore` docs to note `Array` uses it
jakirkham Dec 2, 2018
cf781ad
Update `Array`'s `info` examples
jakirkham Dec 2, 2018
39e3ab8
Drop `ensure_bytes` definition from `zarr.storage`
jakirkham Dec 2, 2018
cb14850
Drop import of `binary_type` in `zarr.storage`
jakirkham Dec 2, 2018
d299b1b
Take flattened array views to avoid some copies
jakirkham Dec 2, 2018
205fa16
Simplify `buffer_size` by using `ensure_ndarray`
jakirkham Dec 2, 2018
f6880b9
Test `getsize` for unknown size
jakirkham Dec 2, 2018
bcee828
Simplify `ensure_str` in `zarr.meta`
jakirkham Dec 2, 2018
dfa51f8
Drop unknown size cases from `DictStore`
jakirkham Dec 2, 2018
d0b8012
Cast datetime/timedelta arrays for buffer protocol
jakirkham Dec 2, 2018
0cf5e5b
Use `ensure_contiguous_ndarray` with stores
jakirkham Dec 2, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ print some diagnostics, e.g.::
Read-only : False
Compressor : Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE,
: blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 3242241 (3.1M)
Storage ratio : 123.4
Expand Down Expand Up @@ -268,7 +268,7 @@ Here is an example using a delta filter with the Blosc compressor::
Read-only : False
Filter [0] : Delta(dtype='<i4')
Compressor : Blosc(cname='zstd', clevel=1, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 648605 (633.4K)
Storage ratio : 616.7
Expand Down Expand Up @@ -1181,7 +1181,7 @@ ratios, depending on the correlation structure within the data. E.g.::
Order : C
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 6696010 (6.4M)
Storage ratio : 59.7
Expand All @@ -1195,7 +1195,7 @@ ratios, depending on the correlation structure within the data. E.g.::
Order : F
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 4684636 (4.5M)
Storage ratio : 85.4
Expand Down
2 changes: 1 addition & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
asciitree==0.3.3
fasteners==0.14.1
numcodecs==0.5.5
numcodecs==0.6.1
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
'asciitree',
'numpy>=1.7',
'fasteners',
'numcodecs>=0.5.3',
'numcodecs>=0.6.1',
],
package_dir={'': '.'},
packages=['zarr', 'zarr.tests'],
Expand Down
33 changes: 19 additions & 14 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


import numpy as np
from numcodecs.compat import ensure_ndarray


from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
Expand Down Expand Up @@ -1743,18 +1744,22 @@ def _decode_chunk(self, cdata):
for f in self._filters[::-1]:
chunk = f.decode(chunk)

# view as correct dtype
if self._dtype == object:
if isinstance(chunk, np.ndarray):
chunk = chunk.astype(self._dtype)
else:
raise RuntimeError('cannot read object array without object codec')
elif isinstance(chunk, np.ndarray):
# view as numpy array with correct dtype
chunk = ensure_ndarray(chunk)
# special case object dtype, because incorrect handling can lead to
# segfaults and other bad things happening
if self._dtype != object:
chunk = chunk.view(self._dtype)
else:
chunk = np.frombuffer(chunk, dtype=self._dtype)

# reshape
elif chunk.dtype != object:
# If we end up here, someone must have hacked around with the filters.
# We cannot deal with object arrays unless there is an object
# codec in the filter chain, i.e., a filter that converts from object
# array to something else during encoding, and converts back to object
# array during decoding.
raise RuntimeError('cannot read object array without object codec')

# ensure correct chunk shape
chunk = chunk.reshape(-1, order='A')
chunk = chunk.reshape(self._chunks, order=self._order)

return chunk
Expand Down Expand Up @@ -1806,10 +1811,10 @@ def info(self):
Order : C
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 4000000 (3.8M)
No. bytes stored : ...
Storage ratio : ...
No. bytes stored : 320
Storage ratio : 12500.0
Chunks initialized : 0/10

"""
Expand Down
4 changes: 2 additions & 2 deletions zarr/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


from zarr.core import Array
from zarr.storage import (DirectoryStore, init_array, contains_array, contains_group,
from zarr.storage import (DictStore, DirectoryStore, init_array, contains_array, contains_group,
default_compressor, normalize_storage_path, ZipStore)
from numcodecs.registry import codec_registry
from zarr.errors import err_contains_array, err_contains_group, err_array_not_found
Expand Down Expand Up @@ -125,7 +125,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
return z


def normalize_store_arg(store, clobber=False, default=dict):
def normalize_store_arg(store, clobber=False, default=DictStore):
if store is None:
return default()
elif isinstance(store, str):
Expand Down
14 changes: 5 additions & 9 deletions zarr/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,20 @@


import numpy as np
from numcodecs.compat import ensure_bytes


from zarr.compat import PY2, binary_type, Mapping
from zarr.compat import PY2, Mapping
from zarr.errors import MetadataError


ZARR_FORMAT = 2


def ensure_str(s):
if PY2: # pragma: py3 no cover
# noinspection PyUnresolvedReferences
if isinstance(s, buffer): # noqa
s = str(s)
else: # pragma: py2 no cover
if isinstance(s, memoryview):
s = s.tobytes()
if isinstance(s, binary_type):
if not isinstance(s, str):
s = ensure_bytes(s)
if not PY2: # pragma: py2 no cover
s = s.decode('ascii')
return s

Expand Down
47 changes: 11 additions & 36 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,13 @@
import warnings


import numpy as np


from zarr.util import (normalize_shape, normalize_chunks, normalize_order,
normalize_storage_path, buffer_size,
normalize_fill_value, nolock, normalize_dtype)
from zarr.meta import encode_array_metadata, encode_group_metadata
from zarr.compat import PY2, binary_type, OrderedDict_move_to_end
from zarr.compat import PY2, OrderedDict_move_to_end
from numcodecs.registry import codec_registry
from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray
from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
err_fspath_exists_notdir, err_read_only, MetadataError)

Expand Down Expand Up @@ -444,23 +442,6 @@ def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None):
store[key] = encode_group_metadata(meta)


def ensure_bytes(s):
if isinstance(s, binary_type):
return s
if isinstance(s, np.ndarray):
if PY2: # pragma: py3 no cover
# noinspection PyArgumentList
return s.tostring(order='A')
else: # pragma: py2 no cover
# noinspection PyArgumentList
return s.tobytes(order='A')
if hasattr(s, 'tobytes'):
return s.tobytes()
if PY2 and hasattr(s, 'tostring'): # pragma: py3 no cover
return s.tostring()
return memoryview(s).tobytes()


def _dict_store_keys(d, prefix='', cls=dict):
for k in d.keys():
v = d[k]
Expand All @@ -484,12 +465,11 @@ class DictStore(MutableMapping):
>>> type(g.store)
<class 'zarr.storage.DictStore'>

Note that the default class when creating an array is the built-in
:class:`dict` class, i.e.::
Also this is the default class when creating an array. E.g.::

>>> z = zarr.zeros(100)
>>> type(z.store)
<class 'dict'>
<class 'zarr.storage.DictStore'>

Notes
-----
Expand Down Expand Up @@ -554,6 +534,8 @@ def __getitem__(self, item):
def __setitem__(self, item, value):
with self.write_mutex:
parent, key = self._require_parent(item)
if not isinstance(value, self.cls):
value = ensure_bytes(value)
parent[key] = value

def __delitem__(self, item):
Expand Down Expand Up @@ -652,17 +634,11 @@ def getsize(self, path=None):
size = 0
for v in value.values():
if not isinstance(v, self.cls):
try:
size += buffer_size(v)
except TypeError:
return -1
size += buffer_size(v)
return size

else:
try:
return buffer_size(value)
except TypeError:
return -1
return buffer_size(value)

def clear(self):
with self.write_mutex:
Expand Down Expand Up @@ -741,9 +717,8 @@ def __getitem__(self, key):

def __setitem__(self, key, value):

# handle F-contiguous numpy arrays
if isinstance(value, np.ndarray) and value.flags.f_contiguous:
value = ensure_bytes(value)
# coerce to flat, contiguous array (ideally without copying)
value = ensure_contiguous_ndarray(value)

# destination path for key
file_path = os.path.join(self.path, key)
Expand Down Expand Up @@ -1192,7 +1167,7 @@ def __getitem__(self, key):
def __setitem__(self, key, value):
if self.mode == 'r':
err_read_only()
value = ensure_bytes(value)
value = ensure_contiguous_ndarray(value)
with self.mutex:
self.zf.writestr(key, value)

Expand Down
2 changes: 1 addition & 1 deletion zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,7 +982,7 @@ def test_object_arrays(self):
z[0] = 'foo'
assert z[0] == 'foo'
z[1] = b'bar'
assert z[1] == 'bar' # msgpack gets this wrong
assert z[1] == b'bar'
z[2] = 1
assert z[2] == 1
z[3] = [2, 4, 6, 'baz']
Expand Down
12 changes: 4 additions & 8 deletions zarr/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,14 +633,6 @@ def test_setdel(self):
store = self.create_store()
setdel_hierarchy_checks(store)

def test_getsize_ext(self):
store = self.create_store()
store['a'] = list(range(10))
store['b/c'] = list(range(10))
assert -1 == store.getsize()
assert -1 == store.getsize('a')
assert -1 == store.getsize('b')


class TestDirectoryStore(StoreTests, unittest.TestCase):

Expand Down Expand Up @@ -1096,6 +1088,10 @@ def test_getsize():
assert 7 == getsize(store)
assert 5 == getsize(store, 'baz')

store = dict()
store['boo'] = None
assert -1 == getsize(store)


def test_migrate_1to2():
from zarr import meta_v1
Expand Down
16 changes: 3 additions & 13 deletions zarr/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
import operator
from textwrap import TextWrapper, dedent
import numbers
import uuid
Expand All @@ -10,10 +9,11 @@
from asciitree import BoxStyle, LeftAligned
from asciitree.traversal import Traversal
import numpy as np
from numcodecs.compat import ensure_ndarray
from numcodecs.registry import codec_registry


from zarr.compat import PY2, reduce, text_type, binary_type
from zarr.compat import PY2, text_type, binary_type


# codecs to use for object dtype convenience API
Expand Down Expand Up @@ -314,17 +314,7 @@ def normalize_storage_path(path):


def buffer_size(v):
from array import array as _stdlib_array
if PY2 and isinstance(v, _stdlib_array): # pragma: py3 no cover
# special case array.array because does not support buffer
# interface in PY2
return v.buffer_info()[1] * v.itemsize
else: # pragma: py2 no cover
v = memoryview(v)
if v.shape:
return reduce(operator.mul, v.shape) * v.itemsize
else:
return v.itemsize
return ensure_ndarray(v).nbytes


def info_text_report(items):
Expand Down