From 4831fe13a00d1704117ac2b54f1584bf97346779 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 15 Apr 2021 09:30:58 +0200 Subject: [PATCH 01/15] Implement `dimension_separator` for Python storage classes (See #715) * All top-level storage classes now take an optional `dimension_separator` parameter which defaults to `None`, but can also be `.` or `/`. * A ValueError is raised at normalization time if this is not the case. * `None`s are normalized to the default of `.` in all except the NestedDirectoryStore case. * The value is stored as `self._dimension_separator` on participating classes so that array creation can lookup the value. * This value deprecates the `key_separator` value from FSStore. * Wrapper classes like LRUCacheStore and ConsolidatedMetadataStore *do not* follow this pattern and instead rely on the value in the underlying store. --- zarr/core.py | 2 + zarr/creation.py | 14 ++- zarr/hierarchy.py | 2 + zarr/meta.py | 2 + zarr/storage.py | 96 +++++++++++++++---- zarr/tests/test_storage.py | 191 +++++++++++++++++++++++++------------ zarr/util.py | 12 ++- 7 files changed, 238 insertions(+), 81 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 02419af63a..3df8043000 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -89,6 +89,7 @@ class Array: dtype compression compression_opts + dimension_separator fill_value order synchronizer @@ -194,6 +195,7 @@ def _load_metadata_nosync(self): self._dtype = meta['dtype'] self._fill_value = meta['fill_value'] self._order = meta['order'] + self._dimension_separator = meta.get('dimension_separator', '.') # setup compressor config = meta['compressor'] diff --git a/zarr/creation.py b/zarr/creation.py index 6fbdaf04c1..913e117ff2 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -13,13 +13,14 @@ from zarr.storage import (DirectoryStore, ZipStore, contains_array, contains_group, default_compressor, init_array, normalize_storage_path, FSStore) +from zarr.util import normalize_dimension_separator def create(shape, chunks=True, dtype=None, compressor='default', fill_value=0, order='C', store=None, synchronizer=None, overwrite=False, path=None, chunk_store=None, filters=None, cache_metadata=True, cache_attrs=True, read_only=False, - object_codec=None, **kwargs): + object_codec=None, dimension_separator=None, **kwargs): """Create an array. Parameters @@ -66,6 +67,9 @@ def create(shape, chunks=True, dtype=None, compressor='default', True if array should be protected against modification. object_codec : Codec, optional A codec to encode object arrays, only needed if dtype=object. + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. + .. versionadded:: 2.8 Returns ------- @@ -117,10 +121,16 @@ def create(shape, chunks=True, dtype=None, compressor='default', # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) + # optional array metadata + if dimension_separator is None: + dimension_separator = getattr(store, "_dimension_separator", ".") + dimension_separator = normalize_dimension_separator(dimension_separator) + # initialize array metadata init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, overwrite=overwrite, path=path, - chunk_store=chunk_store, filters=filters, object_codec=object_codec) + chunk_store=chunk_store, filters=filters, object_codec=object_codec, + dimension_separator=dimension_separator) # instantiate array z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer, diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 39dc82c724..89804d445b 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -783,6 +783,8 @@ def create_dataset(self, name, **kwargs): lifetime of the object. If False, array metadata will be reloaded prior to all data access and modification operations (may incur overhead depending on storage and data access pattern). + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. Returns ------- diff --git a/zarr/meta.py b/zarr/meta.py index d1a1e43bbe..095bfab5e0 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -50,6 +50,7 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: fill_value=fill_value, order=meta['order'], filters=meta['filters'], + dimension_separator=meta.get('dimension_separator', '.'), ) except Exception as e: raise MetadataError('error decoding metadata: %s' % e) @@ -71,6 +72,7 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes: fill_value=encode_fill_value(meta['fill_value'], dtype), order=meta['order'], filters=meta['filters'], + dimension_separator=meta.get('dimension_separator', '.'), ) return json_dumps(meta) diff --git a/zarr/storage.py b/zarr/storage.py index c332ee02f5..86b834f56c 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -52,6 +52,7 @@ ) from zarr.meta import encode_array_metadata, encode_group_metadata from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, + normalize_dimension_separator, normalize_dtype, normalize_fill_value, normalize_order, normalize_shape, normalize_storage_path, retry_call) @@ -235,6 +236,7 @@ def init_array( chunk_store: MutableMapping = None, filters=None, object_codec=None, + dimension_separator=None, ): """Initialize an array store with the given configuration. Note that this is a low-level function and there should be no need to call this directly from user code. @@ -267,6 +269,8 @@ def init_array( Sequence of filters to use to encode chunk data prior to compression. object_codec : Codec, optional A codec to encode object arrays, only needed if dtype=object. + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. Examples -------- @@ -293,6 +297,7 @@ def init_array( "id": "blosc", "shuffle": 1 }, + "dimension_separator": ".", "dtype": ">> store.close() # don't forget to call this when you're done """ - def __init__(self, path, **kwargs): + def __init__(self, path, dimension_separator=None, **kwargs): import sqlite3 + self._dimension_separator = dimension_separator + # normalize path if path != ':memory:': path = os.path.abspath(path) @@ -2507,6 +2561,8 @@ class MongoDBStore(MutableMapping): Name of database collection : string Name of collection + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. **kwargs Keyword arguments passed through to the `pymongo.MongoClient` function. @@ -2520,11 +2576,12 @@ class MongoDBStore(MutableMapping): _value = 'value' def __init__(self, database='mongodb_zarr', collection='zarr_collection', - **kwargs): + dimension_separator=None, **kwargs): import pymongo self._database = database self._collection = collection + self._dimension_separator = dimension_separator self._kwargs = kwargs self.client = pymongo.MongoClient(**self._kwargs) @@ -2585,14 +2642,17 @@ class RedisStore(MutableMapping): ---------- prefix : string Name of prefix for Redis keys + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. **kwargs Keyword arguments passed through to the `redis.Redis` function. """ - def __init__(self, prefix='zarr', **kwargs): + def __init__(self, prefix='zarr', dimension_separator=None, **kwargs): import redis self._prefix = prefix self._kwargs = kwargs + self._dimension_separator = dimension_separator self.client = redis.Redis(**kwargs) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index a6598f2781..de4b8c6dbe 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -5,7 +5,6 @@ import pickle import shutil import tempfile -import unittest from contextlib import contextmanager from pickle import PicklingError from zipfile import ZipFile @@ -39,6 +38,20 @@ def does_not_raise(): yield +@pytest.fixture(params=[ + (None, "."), + (".", "."), + ("/", "/"), +]) +def dimension_separator_fixture(request): + return request.param + + +def skip_if_nested_chunks(**kwargs): + if kwargs.get("dimension_separator") == "/": + pytest.skip("nested chunks are unsupported") + + class StoreTests(object): """Abstract store tests.""" @@ -371,8 +384,11 @@ def test_hierarchy(self): if hasattr(store, 'close'): store.close() - def test_init_array(self): - store = self.create_store() + def test_init_array(self, dimension_separator_fixture): + + pass_dim_sep, want_dim_sep = dimension_separator_fixture + + store = self.create_store(dimension_separator=pass_dim_sep) init_array(store, shape=1000, chunks=100) # check metadata @@ -384,6 +400,8 @@ def test_init_array(self): assert np.dtype(None) == meta['dtype'] assert default_compressor.get_config() == meta['compressor'] assert meta['fill_value'] is None + # Missing MUST be assumed to be "." + assert meta.get('dimension_separator', ".") is want_dim_sep if hasattr(store, 'close'): store.close() @@ -700,9 +718,10 @@ def _test_init_group_overwrite_chunk_store(self, order): chunk_store.close() -class TestMappingStore(StoreTests, unittest.TestCase): +class TestMappingStore(StoreTests): - def create_store(self): + def create_store(self, **kwargs): + skip_if_nested_chunks(**kwargs) return dict() def test_set_invalid_content(self): @@ -747,10 +766,11 @@ def setdel_hierarchy_checks(store): assert 'r/s' not in store -class TestMemoryStore(StoreTests, unittest.TestCase): +class TestMemoryStore(StoreTests): - def create_store(self): - return MemoryStore() + def create_store(self, **kwargs): + skip_if_nested_chunks(**kwargs) + return MemoryStore(**kwargs) def test_store_contains_bytes(self): store = self.create_store() @@ -762,11 +782,13 @@ def test_setdel(self): setdel_hierarchy_checks(store) -class TestDictStore(StoreTests, unittest.TestCase): +class TestDictStore(StoreTests): + + def create_store(self, **kwargs): + skip_if_nested_chunks(**kwargs) - def create_store(self): with pytest.warns(DeprecationWarning): - return DictStore() + return DictStore(**kwargs) def test_deprecated(self): store = self.create_store() @@ -778,12 +800,14 @@ def test_pickle(self): super().test_pickle() -class TestDirectoryStore(StoreTests, unittest.TestCase): +class TestDirectoryStore(StoreTests): + + def create_store(self, normalize_keys=False, **kwargs): + skip_if_nested_chunks(**kwargs) - def create_store(self, normalize_keys=False): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = DirectoryStore(path, normalize_keys=normalize_keys) + store = DirectoryStore(path, normalize_keys=normalize_keys, **kwargs) return store def test_filesystem_path(self): @@ -863,20 +887,33 @@ def mock_walker_no_slash(_path): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestFSStore(StoreTests, unittest.TestCase): +class TestFSStore(StoreTests): - def create_store(self, normalize_keys=False, key_separator="."): + def create_store(self, normalize_keys=False, dimension_separator="."): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = FSStore( path, normalize_keys=normalize_keys, - key_separator=key_separator) + dimension_separator=dimension_separator) return store - def test_key_separator(self): + def test_init_array(self): + store = self.create_store() + init_array(store, shape=1000, chunks=100) + + # check metadata + assert array_meta_key in store + meta = decode_array_metadata(store[array_meta_key]) + assert ZARR_FORMAT == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunks'] + assert np.dtype(None) == meta['dtype'] + assert meta['dimension_separator'] is "." + + def test_dimension_separator(self): for x in (".", "/"): - store = self.create_store(key_separator=x) + store = self.create_store(dimension_separator=x) norm = store._normalize_key assert ".zarray" == norm(".zarray") assert ".zarray" == norm("/.zarray") @@ -1022,6 +1059,23 @@ def test_s3_complex(self): assert (a[:] == -np.ones((8, 8, 8))).all() +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestFSStoreWithKeySeparator(StoreTests): + + def create_store(self, normalize_keys=False, key_separator=".", **kwargs): + + # Since the user is passing key_separator, that will take priority. + skip_if_nested_chunks(**kwargs) + + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + with pytest.warns(DeprecationWarning): + return FSStore( + path, + normalize_keys=normalize_keys, + key_separator=key_separator) + + @pytest.fixture() def s3(request): # writable local S3 system @@ -1063,7 +1117,7 @@ def s3(request): proc.wait() -class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): +class TestNestedDirectoryStore(TestDirectoryStore): def create_store(self, normalize_keys=False): path = tempfile.mkdtemp() @@ -1071,6 +1125,19 @@ def create_store(self, normalize_keys=False): store = NestedDirectoryStore(path, normalize_keys=normalize_keys) return store + def test_init_array(self): + store = self.create_store() + init_array(store, shape=1000, chunks=100) + + # check metadata + assert array_meta_key in store + meta = decode_array_metadata(store[array_meta_key]) + assert ZARR_FORMAT == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunks'] + assert np.dtype(None) == meta['dtype'] + assert meta['dimension_separator'] is "/" + def test_chunk_nesting(self): store = self.create_store() # any path where last segment looks like a chunk key gets special handling @@ -1084,7 +1151,7 @@ def test_chunk_nesting(self): assert b'zzz' == store['42'] -class TestN5Store(TestNestedDirectoryStore, unittest.TestCase): +class TestN5Store(TestNestedDirectoryStore): def create_store(self, normalize_keys=False): path = tempfile.mkdtemp(suffix='.n5') @@ -1200,12 +1267,12 @@ def test_filters(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestNestedFSStore(TestNestedDirectoryStore): - def create_store(self, normalize_keys=False, path=None): + def create_store(self, normalize_keys=False, path=None, **kwargs): if path is None: path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = FSStore(path, normalize_keys=normalize_keys, - key_separator='/', auto_mkdir=True) + dimension_separator='/', auto_mkdir=True, **kwargs) return store def test_numbered_groups(self): @@ -1222,22 +1289,23 @@ def test_numbered_groups(self): zarr.open_group(store.path)["0"] -class TestTempStore(StoreTests, unittest.TestCase): +class TestTempStore(StoreTests): - def create_store(self): - return TempStore() + def create_store(self, **kwargs): + skip_if_nested_chunks(**kwargs) + return TempStore(**kwargs) def test_setdel(self): store = self.create_store() setdel_hierarchy_checks(store) -class TestZipStore(StoreTests, unittest.TestCase): +class TestZipStore(StoreTests): - def create_store(self): + def create_store(self, **kwargs): path = tempfile.mktemp(suffix='.zip') atexit.register(os.remove, path) - store = ZipStore(path, mode='w') + store = ZipStore(path, mode='w', **kwargs) return store def test_mode(self): @@ -1297,13 +1365,13 @@ def test_permissions(self): z.close() -class TestDBMStore(StoreTests, unittest.TestCase): +class TestDBMStore(StoreTests): - def create_store(self): + def create_store(self, dimension_separator=None): path = tempfile.mktemp(suffix='.anydbm') atexit.register(atexit_rmglob, path + '*') # create store using default dbm implementation - store = DBMStore(path, flag='n') + store = DBMStore(path, flag='n', dimension_separator=dimension_separator) return store def test_context_manager(self): @@ -1315,55 +1383,55 @@ def test_context_manager(self): class TestDBMStoreDumb(TestDBMStore): - def create_store(self): + def create_store(self, **kwargs): path = tempfile.mktemp(suffix='.dumbdbm') atexit.register(atexit_rmglob, path + '*') import dbm.dumb as dumbdbm - store = DBMStore(path, flag='n', open=dumbdbm.open) + store = DBMStore(path, flag='n', open=dumbdbm.open, **kwargs) return store class TestDBMStoreGnu(TestDBMStore): - def create_store(self): + def create_store(self, **kwargs): gdbm = pytest.importorskip("dbm.gnu") path = tempfile.mktemp(suffix=".gdbm") # pragma: no cover atexit.register(os.remove, path) # pragma: no cover store = DBMStore( - path, flag="n", open=gdbm.open, write_lock=False + path, flag="n", open=gdbm.open, write_lock=False, **kwargs ) # pragma: no cover return store # pragma: no cover class TestDBMStoreNDBM(TestDBMStore): - def create_store(self): + def create_store(self, **kwargs): ndbm = pytest.importorskip("dbm.ndbm") path = tempfile.mktemp(suffix=".ndbm") # pragma: no cover atexit.register(atexit_rmglob, path + "*") # pragma: no cover - store = DBMStore(path, flag="n", open=ndbm.open) # pragma: no cover + store = DBMStore(path, flag="n", open=ndbm.open, **kwargs) # pragma: no cover return store # pragma: no cover class TestDBMStoreBerkeleyDB(TestDBMStore): - def create_store(self): + def create_store(self, **kwargs): bsddb3 = pytest.importorskip("bsddb3") path = tempfile.mktemp(suffix='.dbm') atexit.register(os.remove, path) - store = DBMStore(path, flag='n', open=bsddb3.btopen, write_lock=False) + store = DBMStore(path, flag='n', open=bsddb3.btopen, write_lock=False, **kwargs) return store -class TestLMDBStore(StoreTests, unittest.TestCase): +class TestLMDBStore(StoreTests): - def create_store(self): + def create_store(self, **kwargs): pytest.importorskip("lmdb") path = tempfile.mktemp(suffix='.lmdb') atexit.register(atexit_rmtree, path) buffers = True - store = LMDBStore(path, buffers=buffers) + store = LMDBStore(path, buffers=buffers, **kwargs) return store def test_context_manager(self): @@ -1373,13 +1441,13 @@ def test_context_manager(self): assert 2 == len(store) -class TestSQLiteStore(StoreTests, unittest.TestCase): +class TestSQLiteStore(StoreTests): - def create_store(self): + def create_store(self, **kwargs): pytest.importorskip("sqlite3") path = tempfile.mktemp(suffix='.db') atexit.register(atexit_rmtree, path) - store = SQLiteStore(path) + store = SQLiteStore(path, **kwargs) return store def test_underscore_in_name(self): @@ -1392,11 +1460,11 @@ def test_underscore_in_name(self): assert 'a_b' in store -class TestSQLiteStoreInMemory(TestSQLiteStore, unittest.TestCase): +class TestSQLiteStoreInMemory(TestSQLiteStore): - def create_store(self): + def create_store(self, **kwargs): pytest.importorskip("sqlite3") - store = SQLiteStore(':memory:') + store = SQLiteStore(':memory:', **kwargs) return store def test_pickle(self): @@ -1412,33 +1480,35 @@ def test_pickle(self): @skip_test_env_var("ZARR_TEST_MONGO") -class TestMongoDBStore(StoreTests, unittest.TestCase): +class TestMongoDBStore(StoreTests): - def create_store(self): + def create_store(self, **kwargs): pytest.importorskip("pymongo") store = MongoDBStore(host='127.0.0.1', database='zarr_tests', - collection='zarr_tests') + collection='zarr_tests', **kwargs) # start with an empty store store.clear() return store @skip_test_env_var("ZARR_TEST_REDIS") -class TestRedisStore(StoreTests, unittest.TestCase): +class TestRedisStore(StoreTests): - def create_store(self): + def create_store(self, **kwargs): # TODO: this is the default host for Redis on Travis, # we probably want to generalize this though pytest.importorskip("redis") - store = RedisStore(host='localhost', port=6379) + store = RedisStore(host='localhost', port=6379, **kwargs) # start with an empty store store.clear() return store -class TestLRUStoreCache(StoreTests, unittest.TestCase): +class TestLRUStoreCache(StoreTests): - def create_store(self): + def create_store(self, **kwargs): + # wrapper therefore no dimension_separator argument + skip_if_nested_chunks(**kwargs) return LRUStoreCache(dict(), max_size=2**27) def test_cache_values_no_max_size(self): @@ -1831,9 +1901,9 @@ def test_format_compatibility(): @skip_test_env_var("ZARR_TEST_ABS") -class TestABSStore(StoreTests, unittest.TestCase): +class TestABSStore(StoreTests): - def create_store(self, prefix=None): + def create_store(self, prefix=None, **kwargs): asb = pytest.importorskip("azure.storage.blob") blob_client = asb.BlockBlobService(is_emulated=True, socket_timeout=10) blob_client.delete_container("test") @@ -1844,6 +1914,7 @@ def create_store(self, prefix=None): account_name="foo", account_key="bar", blob_service_kwargs={"is_emulated": True, "socket_timeout": 10}, + **kwargs, ) store.rmdir() return store @@ -1880,7 +1951,7 @@ def test_hierarchy(self): return super().test_hierarchy() -class TestConsolidatedMetadataStore(unittest.TestCase): +class TestConsolidatedMetadataStore: def test_bad_format(self): diff --git a/zarr/util.py b/zarr/util.py index 82526ef8c1..7f6b2cf4a6 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -13,7 +13,7 @@ from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo -from typing import Any, Callable, Dict, Tuple, Union +from typing import Any, Callable, Dict, Optional, Tuple, Union # codecs to use for object dtype convenience API @@ -243,6 +243,16 @@ def normalize_order(order: str) -> str: return order +def normalize_dimension_separator(sep: Optional[str]) -> str: + if sep is None: + return "." + elif sep not in (".", "/"): + raise ValueError( + "dimension_separator must be either '.' or '/', found: %r" % sep) + else: + return sep + + def normalize_fill_value(fill_value, dtype: np.dtype): if fill_value is None: From f1a5ca60109d67526eeb27409b46e0577fdcd19f Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 15 Apr 2021 16:13:44 +0200 Subject: [PATCH 02/15] Only store `dimension_separator` if not None All hexdigest tests were failing due to updated array metadata. In the case of NestedDirectoryStore and N5Store, this is necessary. If the dimension_separator key is excluded from the .zarray JSON when None, then most standard tests continue to pass. --- zarr/creation.py | 2 +- zarr/meta.py | 9 ++++++- zarr/storage.py | 19 +++++++++----- zarr/tests/test_core.py | 52 ++++++++++++++++++++++++++++++-------- zarr/tests/test_storage.py | 7 ++--- zarr/util.py | 4 +-- 6 files changed, 70 insertions(+), 23 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 913e117ff2..4b92a2b06a 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -123,7 +123,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', # optional array metadata if dimension_separator is None: - dimension_separator = getattr(store, "_dimension_separator", ".") + dimension_separator = getattr(store, "_dimension_separator", None) dimension_separator = normalize_dimension_separator(dimension_separator) # initialize array metadata diff --git a/zarr/meta.py b/zarr/meta.py index 095bfab5e0..c8c5d77004 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -52,6 +52,7 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: filters=meta['filters'], dimension_separator=meta.get('dimension_separator', '.'), ) + except Exception as e: raise MetadataError('error decoding metadata: %s' % e) else: @@ -63,6 +64,9 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes: sdshape = () if dtype.subdtype is not None: dtype, sdshape = dtype.subdtype + + dimension_separator = meta.get('dimension_separator') + meta = dict( zarr_format=ZARR_FORMAT, shape=meta['shape'] + sdshape, @@ -72,8 +76,11 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes: fill_value=encode_fill_value(meta['fill_value'], dtype), order=meta['order'], filters=meta['filters'], - dimension_separator=meta.get('dimension_separator', '.'), ) + + if dimension_separator: + meta['dimension_separator'] = dimension_separator + return json_dumps(meta) diff --git a/zarr/storage.py b/zarr/storage.py index 86b834f56c..33a075ef28 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -396,7 +396,7 @@ def _init_array_metadata( # optional array metadata if dimension_separator is None: - dimension_separator = getattr(store, "_dimension_separator", ".") + dimension_separator = getattr(store, "_dimension_separator", None) dimension_separator = normalize_dimension_separator(dimension_separator) # compressor prep @@ -1046,7 +1046,7 @@ class FSStore(MutableMapping): def __init__(self, url, normalize_keys=True, key_separator=None, mode='w', exceptions=(KeyError, PermissionError, IOError), - dimension_separator='.', + dimension_separator=None, **storage_options): import fsspec self.normalize_keys = normalize_keys @@ -1063,8 +1063,10 @@ def __init__(self, url, normalize_keys=True, key_separator=None, "in 2.8.0", DeprecationWarning) dimension_separator = key_separator - # For backwards compatibility + # For backwards compatibility. Guaranteed to be non-None self.key_separator = dimension_separator + if self.key_separator is None: + self.key_separator = "." # Pass attributes to array creation self._dimension_separator = dimension_separator @@ -1078,7 +1080,7 @@ def _normalize_key(self, key): *bits, end = key.split('/') if end not in FSStore._META_KEYS: - end = end.replace('.', self._dimension_separator) + end = end.replace('.', self.key_separator) key = '/'.join(bits + [end]) return key.lower() if self.normalize_keys else key @@ -1227,9 +1229,9 @@ class NestedDirectoryStore(DirectoryStore): (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be useful to avoid potential discrepancies between case-senstive and case-insensitive file system. Default value is False. - dimension_separator : {'.', '/'}, optional + dimension_separator : {'/'}, optional Separator placed between the dimensions of a chunk. - Defaults to "/" unlike other implementations. + Only supports "/" unlike other implementations. Examples -------- @@ -1288,6 +1290,11 @@ class NestedDirectoryStore(DirectoryStore): def __init__(self, path, normalize_keys=False, dimension_separator="/"): super().__init__(path, normalize_keys=normalize_keys) + if dimension_separator is None: + dimension_separator = "/" + elif dimension_separator != "/": + raise ValueError( + "NestedDirectoryStore only supports '/' as dimension_separator") self._dimension_separator = dimension_separator def __getitem__(self, key): diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index eaa87800ef..8c5bc6497a 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -39,6 +39,14 @@ # noinspection PyMethodMayBeStatic class TestArray(unittest.TestCase): + DIGESTS = ( + "063b02ff8d9d3bab6da932ad5828b506ef0a6578", + "f97b84dc9ffac807415f750100108764e837bb82", + "c7190ad2bea1e9d2e73eaa2d3ca9187be1ead261", + "14470724dca6c1837edddedc490571b6a7f270bc", + "2a1046dd99b914459b3e86be9dde05027a07d209", + ) + def test_array_init(self): # normal initialization @@ -535,33 +543,33 @@ def test_setitem_data_not_shared(self): def test_hexdigest(self): # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype=' str: return order -def normalize_dimension_separator(sep: Optional[str]) -> str: +def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]: if sep is None: - return "." + return None elif sep not in (".", "/"): raise ValueError( "dimension_separator must be either '.' or '/', found: %r" % sep) From aa939fa9872684b53c20399c96adecbd4fac76ac Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 15 Apr 2021 16:32:14 +0200 Subject: [PATCH 03/15] Fix doctests with optional key --- zarr/storage.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 33a075ef28..c8084ff612 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -297,7 +297,6 @@ def init_array( "id": "blosc", "shuffle": 1 }, - "dimension_separator": ".", "dtype": " Date: Thu, 15 Apr 2021 16:51:41 +0200 Subject: [PATCH 04/15] Add separator to missed LDBMStore --- zarr/storage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index c8084ff612..e17a8ea2e0 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1838,6 +1838,8 @@ class LMDBStore(MutableMapping): buffers : bool, optional If True (default) use support for buffers, which should increase performance by reducing memory copies. + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. **kwargs Keyword arguments passed through to the `lmdb.open` function. @@ -1880,7 +1882,7 @@ class LMDBStore(MutableMapping): """ - def __init__(self, path, buffers=True, **kwargs): + def __init__(self, path, buffers=True, dimension_separator=None, **kwargs): import lmdb # set default memory map size to something larger than the lmdb default, which is @@ -1918,6 +1920,7 @@ def __init__(self, path, buffers=True, **kwargs): self.buffers = buffers self.path = path self.kwargs = kwargs + self._dimension_separator = dimension_separator def __getstate__(self): try: From 7a3d679e916084f4c1f81dd0354db3ffef915c64 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 15 Apr 2021 20:23:38 +0200 Subject: [PATCH 05/15] Fix linting issue --- zarr/tests/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index da25357387..a38bfc4c3c 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -909,7 +909,7 @@ def test_init_array(self): assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] assert np.dtype(None) == meta['dtype'] - assert meta['dimension_separator'] is "." + assert meta['dimension_separator'] == "." def test_dimension_separator(self): for x in (".", "/"): From 64dffdebe5cc29d740f856e2c5f58a0f7dd2b8d4 Mon Sep 17 00:00:00 2001 From: jmoore Date: Fri, 16 Apr 2021 17:23:38 +0200 Subject: [PATCH 06/15] De-deprecate key_separator as public, non-null API --- zarr/storage.py | 13 +++++-------- zarr/tests/test_storage.py | 9 ++++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index e17a8ea2e0..c6e38ee052 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1027,8 +1027,9 @@ class FSStore(MutableMapping): The destination to map. Should include protocol and path, like "s3://bucket/root" normalize_keys : bool - key_separator : str (deprecated) - See dimension_separator + key_separator : str + public API for accessing dimension_separator. Never `None` + See dimension_separator for more informatino. mode : str "w" for writable, "r" for read-only exceptions : list of Exception subclasses @@ -1054,14 +1055,10 @@ def __init__(self, url, normalize_keys=True, key_separator=None, self.mode = mode self.exceptions = exceptions - # Handle deprecated properties + # For backwards compatibility. Guaranteed to be non-None if key_separator is not None: - warnings.warn( - "key_separator has been replaced by dimension_separator " - "in 2.8.0", DeprecationWarning) dimension_separator = key_separator - # For backwards compatibility. Guaranteed to be non-None self.key_separator = dimension_separator if self.key_separator is None: self.key_separator = "." @@ -1651,7 +1648,7 @@ class DBMStore(MutableMapping): write_lock: bool, optional Use a lock to prevent concurrent writes from multiple threads (True by default). dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. + Separator placed between the dimensions of a chunk.e **open_kwargs Keyword arguments to pass the `open` function. diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index a38bfc4c3c..dc277dda7e 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1069,11 +1069,10 @@ def create_store(self, normalize_keys=False, key_separator=".", **kwargs): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - with pytest.warns(DeprecationWarning): - return FSStore( - path, - normalize_keys=normalize_keys, - key_separator=key_separator) + return FSStore( + path, + normalize_keys=normalize_keys, + key_separator=key_separator) @pytest.fixture() From caf60ed58a04bf9e7bd6ae98dbe9549db4ab6a7e Mon Sep 17 00:00:00 2001 From: jmoore Date: Sat, 17 Apr 2021 18:21:28 +0200 Subject: [PATCH 07/15] Add test for normalize_dim_sep to appease codecov --- zarr/tests/test_util.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/zarr/tests/test_util.py b/zarr/tests/test_util.py index 10f5889f71..84e09dac8f 100644 --- a/zarr/tests/test_util.py +++ b/zarr/tests/test_util.py @@ -6,12 +6,21 @@ from zarr.util import (guess_chunks, human_readable_size, info_html_report, info_text_report, is_total_slice, normalize_chunks, + normalize_dimension_separator, normalize_fill_value, normalize_order, normalize_resize_args, normalize_shape, retry_call, tree_array_icon, tree_group_icon, tree_get_icon, tree_widget) +def test_normalize_dimension_separator(): + assert None is normalize_dimension_separator(None) + assert '/' == normalize_dimension_separator('/') + assert '.' == normalize_dimension_separator('.') + with pytest.raises(ValueError): + normalize_shape('X') + + def test_normalize_shape(): assert (100,) == normalize_shape((100,)) assert (100,) == normalize_shape([100]) From 17f54d56fdf9dd5dccbe20bf2f7086b1531792a0 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 19 Apr 2021 12:49:55 +0200 Subject: [PATCH 08/15] More tests for codecov --- zarr/tests/test_storage.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index dc277dda7e..0bb9fc40e2 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1151,6 +1151,17 @@ def test_chunk_nesting(self): assert b'zzz' == store['42'] +class TestNestedDirectoryStoreWithWrongValue: + + def test_value_error(self): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + with pytest.raises(ValueError): + NestedDirectoryStore( + path, normalize_keys=True, + dimension_separator=".") + + class TestN5Store(TestNestedDirectoryStore): def create_store(self, normalize_keys=False): From 0da3a03e9e32854d81349fa657d206836bb17198 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 19 Apr 2021 20:03:11 +0200 Subject: [PATCH 09/15] Remove key from n5 array metadata --- zarr/n5.py | 3 +++ zarr/tests/test_core.py | 28 +++++++++++++++------------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index fa01005302..67e39357e7 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -355,6 +355,9 @@ def array_metadata_to_n5(array_metadata): compressor_config = compressor_config_to_n5(compressor_config) array_metadata['compression'] = compressor_config + if 'dimension_separator' in array_metadata: + del array_metadata['dimension_separator'] + return array_metadata diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 8c5bc6497a..7ea41e7ac7 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1677,14 +1677,6 @@ def create_array(read_only=False, **kwargs): class TestArrayWithN5Store(TestArrayWithDirectoryStore): - DIGESTS = ( - "453feae4fa9c7086da9e77982e313a45180e4954", - "35e50e63ec4443b6f73094daee51af9a28b8702f", - "8946a49684c3ca9432c896c6129cb00e5d70ad80", - "c71ad4699147c54cde28a54d23ea83e4c80b14b6", - "eb997d6507c5bf9ab994b75b28e24ad2a99fa3d6", - ) - @staticmethod def create_array(read_only=False, **kwargs): path = mkdtemp() @@ -1935,27 +1927,37 @@ def test_compressors(self): assert np.all(a2[:] == 1) def test_hexdigest(self): + expecting = [ + 'c6b83adfad999fbd865057531d749d87cf138f58', + 'a3d6d187536ecc3a9dd6897df55d258e2f52f9c5', + 'ec2e008525ae09616dbc1d2408cbdb42532005c8', + 'b63f031031dcd5248785616edcb2d6fe68203c28', + '0cfc673215a8292a87f3c505e2402ce75243c601', + ] + found = [] # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype=' Date: Mon, 19 Apr 2021 20:34:18 +0200 Subject: [PATCH 10/15] Fix minor typo --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index c6e38ee052..aa46e59e79 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1029,7 +1029,7 @@ class FSStore(MutableMapping): normalize_keys : bool key_separator : str public API for accessing dimension_separator. Never `None` - See dimension_separator for more informatino. + See dimension_separator for more information. mode : str "w" for writable, "r" for read-only exceptions : list of Exception subclasses From abc33008d14a28327f9de1997f20056fa8b7a6ff Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 19 Apr 2021 20:34:45 +0200 Subject: [PATCH 11/15] Cleanup DIGESTS in test_core.py --- zarr/tests/test_core.py | 64 ++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 7ea41e7ac7..6914cff87e 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -39,14 +39,6 @@ # noinspection PyMethodMayBeStatic class TestArray(unittest.TestCase): - DIGESTS = ( - "063b02ff8d9d3bab6da932ad5828b506ef0a6578", - "f97b84dc9ffac807415f750100108764e837bb82", - "c7190ad2bea1e9d2e73eaa2d3ca9187be1ead261", - "14470724dca6c1837edddedc490571b6a7f270bc", - "2a1046dd99b914459b3e86be9dde05027a07d209", - ) - def test_array_init(self): # normal initialization @@ -540,39 +532,52 @@ def test_setitem_data_not_shared(self): if hasattr(z.store, 'close'): z.store.close() + def expected(self): + return [ + "063b02ff8d9d3bab6da932ad5828b506ef0a6578", + "f97b84dc9ffac807415f750100108764e837bb82", + "c7190ad2bea1e9d2e73eaa2d3ca9187be1ead261", + "14470724dca6c1837edddedc490571b6a7f270bc", + "2a1046dd99b914459b3e86be9dde05027a07d209", + ] + def test_hexdigest(self): + found = [] + # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype=' Date: Mon, 19 Apr 2021 20:35:28 +0200 Subject: [PATCH 12/15] Fix cut-n-paste error in test_utils.py --- zarr/tests/test_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_util.py b/zarr/tests/test_util.py index 84e09dac8f..85928fc825 100644 --- a/zarr/tests/test_util.py +++ b/zarr/tests/test_util.py @@ -18,7 +18,7 @@ def test_normalize_dimension_separator(): assert '/' == normalize_dimension_separator('/') assert '.' == normalize_dimension_separator('.') with pytest.raises(ValueError): - normalize_shape('X') + normalize_dimension_separator('X') def test_normalize_shape(): From 7ee834355c776c187e698dc56356f8a85ab02967 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 19 Apr 2021 22:00:01 +0200 Subject: [PATCH 13/15] And hopefully on last codecov fix --- zarr/tests/test_storage.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0bb9fc40e2..a34eb39933 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1151,6 +1151,17 @@ def test_chunk_nesting(self): assert b'zzz' == store['42'] +class TestNestedDirectoryStoreNone: + + def test_value_error(self): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = NestedDirectoryStore( + path, normalize_keys=True, + dimension_separator=None) + assert store._dimension_separator == "/" + + class TestNestedDirectoryStoreWithWrongValue: def test_value_error(self): From 71c0a58c40dc3142fec6017f226619309c500f67 Mon Sep 17 00:00:00 2001 From: jmoore Date: Wed, 21 Apr 2021 20:56:41 +0200 Subject: [PATCH 14/15] Apply review changes --- zarr/creation.py | 2 +- zarr/util.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 4b92a2b06a..d017ec921d 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -69,7 +69,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', A codec to encode object arrays, only needed if dtype=object. dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. - .. versionadded:: 2.8 + .. versionadded:: 2.8 Returns ------- diff --git a/zarr/util.py b/zarr/util.py index 077f047cb2..a48cdfd6b9 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -244,13 +244,11 @@ def normalize_order(order: str) -> str: def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]: - if sep is None: - return None - elif sep not in (".", "/"): + if sep in (".", "/", None): + return sep + else: raise ValueError( "dimension_separator must be either '.' or '/', found: %r" % sep) - else: - return sep def normalize_fill_value(fill_value, dtype: np.dtype): From a60a40af2818c8dac147917150b663056b84d511 Mon Sep 17 00:00:00 2001 From: jmoore Date: Sat, 24 Apr 2021 11:13:14 +0200 Subject: [PATCH 15/15] Add 2.8.0 release notes --- docs/release.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 7c73a02911..5929efb317 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -1,6 +1,20 @@ Release notes ============= +.. _release_2.8.0: + +2.8.0 +----- + +V2 Specification Update +~~~~~~~~~~~~~~~~~~~~~~~ + +* Introduce optional dimension_separator .zarray key for nested chunks. + By :user:`Josh Moore `; :issue:`715`, :issue:`716`. + +.. _release_2.7.0: + + .. _release_2.7.1: 2.7.1