From b59ce458f72106239035d54673a4e53169869d40 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 13 Sep 2024 16:14:30 -0700 Subject: [PATCH] fix: opening a group with unspecified format finds either v2 or v3 metadata objects --- src/zarr/api/asynchronous.py | 32 ++++++++++++++++---------------- src/zarr/core/config.py | 21 ++++++++++----------- tests/v3/test_api.py | 21 +++++++++++++++++++++ tests/v3/test_config.py | 1 + 4 files changed, 48 insertions(+), 27 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index d757aa2120..8a1b0c5f36 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -9,6 +9,7 @@ from zarr.core.array import Array, AsyncArray from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat +from zarr.core.config import config from zarr.core.group import AsyncGroup from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata @@ -126,8 +127,7 @@ def _handle_zarr_version_or_format( def _default_zarr_version() -> ZarrFormat: """return the default zarr_version""" - # TODO: set default value from config - return 3 + return cast(ZarrFormat, int(config.get("default_zarr_version", 3))) async def consolidate_metadata(*args: Any, **kwargs: Any) -> AsyncGroup: @@ -337,7 +337,10 @@ async def save_group( kwargs NumPy arrays with data to save. """ - zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) if len(args) == 0 and len(kwargs) == 0: raise ValueError("at least one array must be provided") @@ -448,10 +451,7 @@ async def group( The new group. """ - zarr_format = ( - _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - or _default_zarr_version() - ) + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) store_path = await make_store_path(store) if path is not None: @@ -474,7 +474,7 @@ async def group( except (KeyError, FileNotFoundError): return await AsyncGroup.create( store=store_path, - zarr_format=zarr_format, + zarr_format=zarr_format or _default_zarr_version(), exists_ok=overwrite, attributes=attributes, ) @@ -483,7 +483,7 @@ async def group( async def open_group( *, # Note: this is a change from v2 store: StoreLike | None = None, - mode: AccessModeLiteral | None = None, # not used + mode: AccessModeLiteral | None = None, cache_attrs: bool | None = None, # not used, default changed synchronizer: Any = None, # not used path: str | None = None, @@ -538,10 +538,7 @@ async def open_group( The new group. """ - zarr_format = ( - _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - or _default_zarr_version() - ) + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) if cache_attrs is not None: warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) @@ -565,7 +562,10 @@ async def open_group( return await AsyncGroup.open(store_path, zarr_format=zarr_format) except (KeyError, FileNotFoundError): return await AsyncGroup.create( - store_path, zarr_format=zarr_format, exists_ok=True, attributes=attributes + store_path, + zarr_format=zarr_format or _default_zarr_version(), + exists_ok=True, + attributes=attributes, ) @@ -687,7 +687,7 @@ async def create( if zarr_format == 2 and chunks is None: chunks = shape - if zarr_format == 3 and chunk_shape is None: + elif zarr_format == 3 and chunk_shape is None: if chunks is not None: chunk_shape = chunks chunks = None @@ -908,7 +908,7 @@ async def open_array( if store_path.store.mode.create: return await create( store=store_path, - zarr_format=zarr_format, + zarr_format=zarr_format or _default_zarr_version(), overwrite=store_path.store.mode.overwrite, **kwargs, ) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 611d1faea5..45e4114389 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -28,21 +28,20 @@ def reset(self) -> None: self.refresh() -""" -The config module is responsible for managing the configuration of zarr and is based on the Donfig python library. -For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations -in the registry and then select them in the config. -e.g. an implementation of the bytes codec in a class "NewBytesCodec", requires the value of codecs.bytes.name to be -"NewBytesCodec". -Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations -e.g. export ZARR_CODECS__BYTES__NAME="NewBytesCodec" -(for more information see github.com/pytroll/donfig) -Default values below point to the standard implementations of zarr-python -""" +# The config module is responsible for managing the configuration of zarr and is based on the Donfig python library. +# For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations +# in the registry and then select them in the config. +# e.g. an implementation of the bytes codec in a class "NewBytesCodec", requires the value of codecs.bytes.name to be +# "NewBytesCodec". +# Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations +# e.g. export ZARR_CODECS__BYTES__NAME="NewBytesCodec" +# (for more information see github.com/pytroll/donfig) +# Default values below point to the standard implementations of zarr-python config = Config( "zarr", defaults=[ { + "default_zarr_version": 3, "array": {"order": "C"}, "async": {"concurrency": None, "timeout": None}, "json_indent": 2, diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index 239dd1c3e2..ddfab587cc 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -8,6 +8,7 @@ from zarr import Array, Group from zarr.abc.store import Store from zarr.api.synchronous import create, load, open, open_group, save, save_array, save_group +from zarr.core.common import ZarrFormat from zarr.store.memory import MemoryStore @@ -81,6 +82,26 @@ async def test_open_group(memory_store: MemoryStore) -> None: # assert g.read_only +@pytest.mark.parametrize("zarr_format", [None, 2, 3]) +async def test_open_group_unspecified_version( + tmpdir: pathlib.Path, zarr_format: ZarrFormat +) -> None: + """regression test for https://github.com/zarr-developers/zarr-python/issues/2175""" + + # create a group with specified zarr format (could be 2, 3, or None) + _ = await zarr.api.asynchronous.open_group( + store=str(tmpdir), mode="w", zarr_format=zarr_format, attributes={"foo": "bar"} + ) + + # now open that group without specifying the format + g2 = await zarr.api.asynchronous.open_group(store=str(tmpdir), mode="r") + + assert g2.attrs == {"foo": "bar"} + + if zarr_format is not None: + assert g2.metadata.zarr_format == zarr_format + + def test_save_errors() -> None: with pytest.raises(ValueError): # no arrays provided diff --git a/tests/v3/test_config.py b/tests/v3/test_config.py index c0674ecbfd..115487ba87 100644 --- a/tests/v3/test_config.py +++ b/tests/v3/test_config.py @@ -39,6 +39,7 @@ def test_config_defaults_set() -> None: # regression test for available defaults assert config.defaults == [ { + "default_zarr_version": 3, "array": {"order": "C"}, "async": {"concurrency": None, "timeout": None}, "json_indent": 2,