diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index 6828377f97..1226a04f06 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -17,6 +17,7 @@ from zarr.core.common import ChunkCoords, concurrent_map from zarr.core.config import config from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice +from zarr.core.metadata.v2 import _default_fill_value from zarr.registry import register_pipeline if TYPE_CHECKING: @@ -247,7 +248,17 @@ async def read_batch( if chunk_array is not None: out[out_selection] = chunk_array else: - out[out_selection] = chunk_spec.fill_value + fill_value = chunk_spec.fill_value + + if fill_value is None: + # Zarr V2 allowed `fill_value` to be null in the metadata. + # Zarr V3 requires it to be set. This has already been + # validated when decoding the metadata, but we support reading + # Zarr V2 data and need to support the case where fill_value + # is None. + fill_value = _default_fill_value(dtype=chunk_spec.dtype) + + out[out_selection] = fill_value else: chunk_bytes_batch = await concurrent_map( [ @@ -274,7 +285,10 @@ async def read_batch( tmp = tmp.squeeze(axis=drop_axes) out[out_selection] = tmp else: - out[out_selection] = chunk_spec.fill_value + fill_value = chunk_spec.fill_value + if fill_value is None: + fill_value = _default_fill_value(dtype=chunk_spec.dtype) + out[out_selection] = fill_value def _merge_chunk_array( self, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 9f5591ce1e..60508916c6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -380,7 +380,7 @@ async def _create_v2( chunks=chunks, order=order, dimension_separator=dimension_separator, - fill_value=0 if fill_value is None else fill_value, + fill_value=fill_value, compressor=compressor, filters=filters, attributes=attributes, diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index ec44673f9d..6d8f2a8ab1 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -300,3 +300,24 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any: raise ValueError(msg) from e return fill_value + + +def _default_fill_value(dtype: np.dtype[Any]) -> Any: + """ + Get the default fill value for a type. + + Notes + ----- + This differs from :func:`parse_fill_value`, which parses a fill value + stored in the Array metadata into an in-memory value. This only gives + the default fill value for some type. + + This is useful for reading Zarr V2 arrays, which allow the fill + value to be unspecified. + """ + if dtype.kind == "S": + return b"" + elif dtype.kind == "U": + return "" + else: + return dtype.type(0) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 234454e289..8494d35939 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -140,7 +140,8 @@ def arrays( ) assert isinstance(a, Array) - assert a.fill_value is not None + if a.metadata.zarr_format == 3: + assert a.fill_value is not None assert isinstance(root[array_path], Array) assert nparray.shape == a.shape assert chunks == a.chunks diff --git a/tests/v3/test_metadata/test_v2.py b/tests/v3/test_metadata/test_v2.py index 4cd222d812..bf6e246668 100644 --- a/tests/v3/test_metadata/test_v2.py +++ b/tests/v3/test_metadata/test_v2.py @@ -28,7 +28,7 @@ def test_parse_zarr_format_invalid(data: Any) -> None: @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) @pytest.mark.parametrize("filters", [None, (), (numcodecs.GZip(),)]) @pytest.mark.parametrize("compressor", [None, numcodecs.GZip()]) -@pytest.mark.parametrize("fill_value", [0, 1]) +@pytest.mark.parametrize("fill_value", [None, 0, 1]) @pytest.mark.parametrize("order", ["C", "F"]) @pytest.mark.parametrize("dimension_separator", [".", "/", None]) def test_metadata_to_dict( diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py index f488782d78..d981fbc893 100644 --- a/tests/v3/test_v2.py +++ b/tests/v3/test_v2.py @@ -1,5 +1,6 @@ import json from collections.abc import Iterator +from typing import Any import numpy as np import pytest @@ -35,6 +36,27 @@ def test_simple(store: StorePath) -> None: assert np.array_equal(data, a[:, :]) +@pytest.mark.parametrize( + ("dtype", "fill_value"), + [ + ("bool", False), + ("int64", 0), + ("float64", 0.0), + ("|S1", b""), + ("|U1", ""), + ("object", 0), + (str, ""), + ], +) +def test_implicit_fill_value(store: StorePath, dtype: str, fill_value: Any) -> None: + arr = zarr.open_array(store=store, shape=(4,), fill_value=None, zarr_format=2, dtype=dtype) + assert arr.metadata.fill_value is None + assert arr.metadata.to_dict()["fill_value"] is None + result = arr[:] + expected = np.full(arr.shape, fill_value, dtype=dtype) + np.testing.assert_array_equal(result, expected) + + def test_codec_pipeline() -> None: # https://github.com/zarr-developers/zarr-python/issues/2243 store = MemoryStore(mode="w")