Skip to content

WIP: Make DictStore the default Array store #351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
8 changes: 4 additions & 4 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ print some diagnostics, e.g.::
Read-only : False
Compressor : Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE,
: blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 3379344 (3.2M)
Storage ratio : 118.4
Expand Down Expand Up @@ -268,7 +268,7 @@ Here is an example using a delta filter with the Blosc compressor::
Read-only : False
Filter [0] : Delta(dtype='<i4')
Compressor : Blosc(cname='zstd', clevel=1, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 648605 (633.4K)
Storage ratio : 616.7
Expand Down Expand Up @@ -1198,7 +1198,7 @@ ratios, depending on the correlation structure within the data. E.g.::
Order : C
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 6696010 (6.4M)
Storage ratio : 59.7
Expand All @@ -1212,7 +1212,7 @@ ratios, depending on the correlation structure within the data. E.g.::
Order : F
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 4684636 (4.5M)
Storage ratio : 85.4
Expand Down
13 changes: 6 additions & 7 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
# N.B., expect at this point store is fully initialized with all
# configuration metadata fully specified and normalized

if isinstance(store, dict):
raise TypeError("Please use Zarr's DictStore instead")

self._store = store
self._chunk_store = chunk_store
self._path = normalize_storage_path(path)
Expand Down Expand Up @@ -1778,10 +1781,6 @@ def _encode_chunk(self, chunk):
else:
cdata = chunk

# ensure in-memory data is immutable and easy to compare
if isinstance(self.chunk_store, dict):
cdata = ensure_bytes(cdata)

return cdata

def __repr__(self):
Expand Down Expand Up @@ -1812,10 +1811,10 @@ def info(self):
Order : C
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.DictStore
No. bytes : 4000000 (3.8M)
No. bytes stored : ...
Storage ratio : ...
No. bytes stored : 320
Storage ratio : 12500.0
Chunks initialized : 0/10

"""
Expand Down
6 changes: 3 additions & 3 deletions zarr/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


from zarr.core import Array
from zarr.storage import (DirectoryStore, init_array, contains_array, contains_group,
from zarr.storage import (DictStore, DirectoryStore, init_array, contains_array, contains_group,
default_compressor, normalize_storage_path, ZipStore)
from numcodecs.registry import codec_registry
from zarr.errors import err_contains_array, err_contains_group, err_array_not_found
Expand Down Expand Up @@ -98,7 +98,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
Example with some filters, and also storing chunks separately from metadata::

>>> from numcodecs import Quantize, Adler32
>>> store, chunk_store = dict(), dict()
>>> store, chunk_store = DictStore(), DictStore()
>>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='f8',
... filters=[Quantize(digits=2, dtype='f8'), Adler32()],
... store=store, chunk_store=chunk_store)
Expand All @@ -125,7 +125,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
return z


def normalize_store_arg(store, clobber=False, default=dict):
def normalize_store_arg(store, clobber=False, default=DictStore):
if store is None:
return default()
elif isinstance(store, str):
Expand Down
3 changes: 3 additions & 0 deletions zarr/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ class Group(MutableMapping):

def __init__(self, store, path=None, read_only=False, chunk_store=None,
cache_attrs=True, synchronizer=None):
if isinstance(store, dict):
raise TypeError("Please use Zarr's DictStore instead")

self._store = store
self._chunk_store = chunk_store
self._path = normalize_storage_path(path)
Expand Down
5 changes: 2 additions & 3 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,12 +476,11 @@ class DictStore(MutableMapping):
>>> type(g.store)
<class 'zarr.storage.DictStore'>

Note that the default class when creating an array is the built-in
:class:`dict` class, i.e.::
Also this is the default class when creating an array. E.g.::

>>> z = zarr.zeros(100)
>>> type(z.store)
<class 'dict'>
<class 'zarr.storage.DictStore'>

Notes
-----
Expand Down
62 changes: 37 additions & 25 deletions zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pytest


from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore,
from zarr.storage import (DictStore, DirectoryStore, init_array, init_group, NestedDirectoryStore,
DBMStore, LMDBStore, SQLiteStore, atexit_rmtree, atexit_rmglob,
LRUStoreCache)
from zarr.core import Array
Expand All @@ -41,7 +41,7 @@ class TestArray(unittest.TestCase):
def test_array_init(self):

# normal initialization
store = dict()
store = DictStore()
init_array(store, shape=100, chunks=10)
a = Array(store)
assert isinstance(a, Array)
Expand All @@ -54,7 +54,7 @@ def test_array_init(self):
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()

# initialize at path
store = dict()
store = DictStore()
init_array(store, shape=100, chunks=10, path='foo/bar')
a = Array(store, path='foo/bar')
assert isinstance(a, Array)
Expand All @@ -67,18 +67,18 @@ def test_array_init(self):
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()

# store not initialized
store = dict()
store = DictStore()
with pytest.raises(ValueError):
Array(store)

# group is in the way
store = dict()
store = DictStore()
init_group(store, path='baz')
with pytest.raises(ValueError):
Array(store, path='baz')

def create_array(self, read_only=False, **kwargs):
store = dict()
store = DictStore()
kwargs.setdefault('compressor', Zlib(level=1))
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
Expand Down Expand Up @@ -1255,7 +1255,7 @@ class TestArrayWithPath(TestArray):

@staticmethod
def create_array(read_only=False, **kwargs):
store = dict()
store = DictStore()
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
init_array(store, path='foo/bar', **kwargs)
Expand Down Expand Up @@ -1299,18 +1299,14 @@ def test_nbytes_stored(self):
if k.startswith('foo/bar/'))
assert expect_nbytes_stored == z.nbytes_stored

# mess with store
z.store[z._key_prefix + 'foo'] = list(range(10))
assert -1 == z.nbytes_stored


class TestArrayWithChunkStore(TestArray):

@staticmethod
def create_array(read_only=False, **kwargs):
store = dict()
store = DictStore()
# separate chunk store
chunk_store = dict()
chunk_store = DictStore()
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
init_array(store, chunk_store=chunk_store, **kwargs)
Expand Down Expand Up @@ -1353,10 +1349,6 @@ def test_nbytes_stored(self):
for v in z.chunk_store.values())
assert expect_nbytes_stored == z.nbytes_stored

# mess with store
z.chunk_store[z._key_prefix + 'foo'] = list(range(10))
assert -1 == z.nbytes_stored


class TestArrayWithDirectoryStore(TestArray):

Expand Down Expand Up @@ -1516,7 +1508,7 @@ def test_nbytes_stored(self):
class TestArrayWithNoCompressor(TestArray):

def create_array(self, read_only=False, **kwargs):
store = dict()
store = DictStore()
kwargs.setdefault('compressor', None)
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
Expand Down Expand Up @@ -1551,7 +1543,7 @@ def test_hexdigest(self):
class TestArrayWithBZ2Compressor(TestArray):

def create_array(self, read_only=False, **kwargs):
store = dict()
store = DictStore()
compressor = BZ2(level=1)
kwargs.setdefault('compressor', compressor)
cache_metadata = kwargs.pop('cache_metadata', True)
Expand Down Expand Up @@ -1587,7 +1579,7 @@ def test_hexdigest(self):
class TestArrayWithBloscCompressor(TestArray):

def create_array(self, read_only=False, **kwargs):
store = dict()
store = DictStore()
compressor = Blosc(cname='zstd', clevel=1, shuffle=1)
kwargs.setdefault('compressor', compressor)
cache_metadata = kwargs.pop('cache_metadata', True)
Expand Down Expand Up @@ -1630,7 +1622,7 @@ def test_hexdigest(self):
class TestArrayWithLZMACompressor(TestArray):

def create_array(self, read_only=False, **kwargs):
store = dict()
store = DictStore()
compressor = LZMA(preset=1)
kwargs.setdefault('compressor', compressor)
cache_metadata = kwargs.pop('cache_metadata', True)
Expand Down Expand Up @@ -1667,7 +1659,7 @@ class TestArrayWithFilters(TestArray):

@staticmethod
def create_array(read_only=False, **kwargs):
store = dict()
store = DictStore()
dtype = kwargs.get('dtype', None)
filters = [
Delta(dtype=dtype),
Expand Down Expand Up @@ -1710,7 +1702,7 @@ def test_astype_no_filters(self):
dtype = np.dtype(np.int8)
astype = np.dtype(np.float32)

store = dict()
store = DictStore()
init_array(store, shape=shape, chunks=10, dtype=dtype)

data = np.arange(np.prod(shape), dtype=dtype).reshape(shape)
Expand Down Expand Up @@ -1830,11 +1822,31 @@ def test_nbytes_stored(self):
assert -1 == z.nbytes_stored


class TestArrayWithCustomChunkStore(TestArray):

@staticmethod
def create_array(read_only=False, **kwargs):
store = CustomMapping()
kwargs["chunk_store"] = CustomMapping()
kwargs.setdefault('compressor', Zlib(1))
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
init_array(store, **kwargs)
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
cache_attrs=cache_attrs)

def test_nbytes_stored(self):
z = self.create_array(shape=1000, chunks=100)
assert -1 == z.nbytes_stored
z[:] = 42
assert -1 == z.nbytes_stored


class TestArrayNoCache(TestArray):

@staticmethod
def create_array(read_only=False, **kwargs):
store = dict()
store = DictStore()
kwargs.setdefault('compressor', Zlib(level=1))
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
Expand Down Expand Up @@ -1906,7 +1918,7 @@ class TestArrayWithStoreCache(TestArray):

@staticmethod
def create_array(read_only=False, **kwargs):
store = LRUStoreCache(dict(), max_size=None)
store = LRUStoreCache(DictStore(), max_size=None)
kwargs.setdefault('compressor', Zlib(level=1))
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
Expand Down
10 changes: 5 additions & 5 deletions zarr/tests/test_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class TestGroup(unittest.TestCase):
@staticmethod
def create_store():
# can be overridden in sub-classes
return dict(), None
return DictStore(), None

def create_group(self, store=None, path=None, read_only=False,
chunk_store=None, synchronizer=None):
Expand Down Expand Up @@ -948,7 +948,7 @@ class TestGroupWithChunkStore(TestGroup):

@staticmethod
def create_store():
return dict(), dict()
return DictStore(), DictStore()

def test_chunk_store(self):
# setup
Expand Down Expand Up @@ -979,7 +979,7 @@ class TestGroupWithStoreCache(TestGroup):

@staticmethod
def create_store():
store = LRUStoreCache(dict(), max_size=None)
store = LRUStoreCache(DictStore(), max_size=None)
return store, None


Expand All @@ -993,13 +993,13 @@ def test_group():
assert '/' == g.name

# usage with custom store
store = dict()
store = DictStore()
g = group(store=store)
assert isinstance(g, Group)
assert store is g.store

# overwrite behaviour
store = dict()
store = DictStore()
init_array(store, shape=100, chunks=10)
with pytest.raises(ValueError):
group(store)
Expand Down
2 changes: 1 addition & 1 deletion zarr/tests/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
def test_info():

# setup
g = zarr.group(store=dict(), chunk_store=dict(),
g = zarr.group(store=zarr.DictStore(), chunk_store=zarr.DictStore(),
synchronizer=zarr.ThreadSynchronizer())
g.create_group('foo')
z = g.zeros('bar', shape=10, filters=[numcodecs.Adler32()])
Expand Down
4 changes: 2 additions & 2 deletions zarr/tests/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from zarr.sync import ThreadSynchronizer, ProcessSynchronizer
from zarr.core import Array
from zarr.attrs import Attributes
from zarr.storage import init_array, DirectoryStore, init_group, atexit_rmtree
from zarr.storage import init_array, DictStore, DirectoryStore, init_group, atexit_rmtree
from zarr.hierarchy import Group


Expand Down Expand Up @@ -100,7 +100,7 @@ def test_parallel_append(self):
class TestArrayWithThreadSynchronizer(TestArray, MixinArraySyncTests):

def create_array(self, read_only=False, **kwargs):
store = dict()
store = DictStore()
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
init_array(store, **kwargs)
Expand Down