diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index def95b206d..e3ef664b94 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -324,6 +324,22 @@ def __init__( object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) + # todo: typedict return type + def __getstate__(self) -> dict[str, Any]: + return self.to_dict() + + def __setstate__(self, state: dict[str, Any]) -> None: + config = state["configuration"] + object.__setattr__(self, "chunk_shape", parse_shapelike(config["chunk_shape"])) + object.__setattr__(self, "codecs", parse_codecs(config["codecs"])) + object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"])) + object.__setattr__(self, "index_location", parse_index_location(config["index_location"])) + + # Use instance-local lru_cache to avoid memory leaks + object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) + object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) + object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) + @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "sharding_indexed") diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index ae4aa0681b..1f483e1c15 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -1220,24 +1220,25 @@ def make_slice_selection(selection: Any) -> list[slice]: return ls -def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: - def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords: - # Inspired by compressed morton code as implemented in Neuroglancer - # https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code - bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape) - max_coords_bits = max(*bits) - input_bit = 0 - input_value = z - out = [0 for _ in range(len(chunk_shape))] - - for coord_bit in range(max_coords_bits): - for dim in range(len(chunk_shape)): - if coord_bit < bits[dim]: - bit = (input_value >> input_bit) & 1 - out[dim] |= bit << coord_bit - input_bit += 1 - return tuple(out) +def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords: + # Inspired by compressed morton code as implemented in Neuroglancer + # https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code + bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape) + max_coords_bits = max(bits) + input_bit = 0 + input_value = z + out = [0] * len(chunk_shape) + + for coord_bit in range(max_coords_bits): + for dim in range(len(chunk_shape)): + if coord_bit < bits[dim]: + bit = (input_value >> input_bit) & 1 + out[dim] |= bit << coord_bit + input_bit += 1 + return tuple(out) + +def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: for i in range(product(chunk_shape)): yield decode_morton(i, chunk_shape) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 6b58cce412..8b75d9f2f8 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -4,7 +4,10 @@ from types import ModuleType from typing import TYPE_CHECKING -from zarr.common import ZarrFormat +from _pytest.compat import LEGACY_PATH + +from zarr.abc.store import Store +from zarr.common import ChunkCoords, MemoryOrder, ZarrFormat from zarr.group import AsyncGroup if TYPE_CHECKING: @@ -12,6 +15,7 @@ import pathlib from dataclasses import dataclass, field +import numpy as np import pytest from zarr.store import LocalStore, MemoryStore, StorePath @@ -26,40 +30,40 @@ def parse_store( if store == "memory": return MemoryStore(mode="w") if store == "remote": - return RemoteStore(mode="w") + return RemoteStore(url=path, mode="w") raise AssertionError @pytest.fixture(params=[str, pathlib.Path]) -def path_type(request): +def path_type(request: pytest.FixtureRequest) -> Any: return request.param # todo: harmonize this with local_store fixture @pytest.fixture -def store_path(tmpdir): +def store_path(tmpdir: LEGACY_PATH) -> StorePath: store = LocalStore(str(tmpdir), mode="w") p = StorePath(store) return p @pytest.fixture(scope="function") -def local_store(tmpdir): +def local_store(tmpdir: LEGACY_PATH) -> LocalStore: return LocalStore(str(tmpdir), mode="w") @pytest.fixture(scope="function") -def remote_store(): - return RemoteStore(mode="w") +def remote_store(url: str) -> RemoteStore: + return RemoteStore(url, mode="w") @pytest.fixture(scope="function") -def memory_store(): +def memory_store() -> MemoryStore: return MemoryStore(mode="w") @pytest.fixture(scope="function") -def store(request: str, tmpdir): +def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: param = request.param return parse_store(param, str(tmpdir)) @@ -72,7 +76,7 @@ class AsyncGroupRequest: @pytest.fixture(scope="function") -async def async_group(request: pytest.FixtureRequest, tmpdir) -> AsyncGroup: +async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> AsyncGroup: param: AsyncGroupRequest = request.param store = parse_store(param.store, str(tmpdir)) @@ -90,3 +94,20 @@ def xp(request: pytest.FixtureRequest) -> Iterator[ModuleType]: """Fixture to parametrize over numpy-like libraries""" yield pytest.importorskip(request.param) + + +@dataclass +class ArrayRequest: + shape: ChunkCoords + dtype: str + order: MemoryOrder + + +@pytest.fixture +def array_fixture(request: pytest.FixtureRequest) -> np.ndarray: + array_request: ArrayRequest = request.param + return ( + np.arange(np.prod(array_request.shape)) + .reshape(array_request.shape, order=array_request.order) + .astype(array_request.dtype) + ) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py deleted file mode 100644 index 7cb0d0f804..0000000000 --- a/tests/v3/test_codecs.py +++ /dev/null @@ -1,1049 +0,0 @@ -from __future__ import annotations - -import json -from collections.abc import Iterator -from dataclasses import dataclass -from typing import Literal - -import numpy as np -import pytest - -import zarr.v2 -from zarr.abc.codec import Codec -from zarr.abc.store import Store -from zarr.array import Array, AsyncArray -from zarr.codecs import ( - BloscCodec, - BytesCodec, - GzipCodec, - ShardingCodec, - ShardingCodecIndexLocation, - TransposeCodec, - ZstdCodec, -) -from zarr.config import config -from zarr.indexing import Selection, morton_order_iter -from zarr.store import MemoryStore, StorePath -from zarr.testing.utils import assert_bytes_equal - - -@dataclass(frozen=True) -class _AsyncArrayProxy: - array: AsyncArray - - def __getitem__(self, selection: Selection) -> _AsyncArraySelectionProxy: - return _AsyncArraySelectionProxy(self.array, selection) - - -@dataclass(frozen=True) -class _AsyncArraySelectionProxy: - array: AsyncArray - selection: Selection - - async def get(self) -> np.ndarray: - return await self.array.getitem(self.selection) - - async def set(self, value: np.ndarray): - return await self.array.setitem(self.selection, value) - - -@pytest.fixture -def store() -> Iterator[Store]: - yield StorePath(MemoryStore(mode="w")) - - -@pytest.fixture -def sample_data() -> np.ndarray: - return np.arange(0, 128 * 128 * 128, dtype="uint16").reshape((128, 128, 128), order="F") - - -def order_from_dim(order: Literal["F", "C"], ndim: int) -> tuple[int, ...]: - if order == "F": - return tuple(ndim - x - 1 for x in range(ndim)) - else: - return tuple(range(ndim)) - - -@pytest.mark.parametrize("index_location", ["start", "end"]) -def test_sharding( - store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -): - a = Array.create( - store / "sample", - shape=sample_data.shape, - chunk_shape=(64, 64, 64), - dtype=sample_data.dtype, - fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], - ) - - a[:, :, :] = sample_data - - read_data = a[0 : sample_data.shape[0], 0 : sample_data.shape[1], 0 : sample_data.shape[2]] - assert sample_data.shape == read_data.shape - assert np.array_equal(sample_data, read_data) - - -@pytest.mark.parametrize("index_location", ["start", "end"]) -def test_sharding_partial( - store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -): - a = Array.create( - store / "sample", - shape=tuple(a + 10 for a in sample_data.shape), - chunk_shape=(64, 64, 64), - dtype=sample_data.dtype, - fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], - ) - - a[10:, 10:, 10:] = sample_data - - read_data = a[0:10, 0:10, 0:10] - assert np.all(read_data == 0) - - read_data = a[10:, 10:, 10:] - assert sample_data.shape == read_data.shape - assert np.array_equal(sample_data, read_data) - - -@pytest.mark.parametrize("index_location", ["start", "end"]) -def test_sharding_partial_read( - store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -): - a = Array.create( - store / "sample", - shape=tuple(a + 10 for a in sample_data.shape), - chunk_shape=(64, 64, 64), - dtype=sample_data.dtype, - fill_value=1, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], - ) - - read_data = a[0:10, 0:10, 0:10] - assert np.all(read_data == 1) - - -@pytest.mark.parametrize("index_location", ["start", "end"]) -def test_sharding_partial_overwrite( - store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -): - data = sample_data[:10, :10, :10] - - a = Array.create( - store / "sample", - shape=tuple(a + 10 for a in data.shape), - chunk_shape=(64, 64, 64), - dtype=data.dtype, - fill_value=1, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], - ) - - a[:10, :10, :10] = data - - read_data = a[0:10, 0:10, 0:10] - assert np.array_equal(data, read_data) - - data = data + 10 - a[:10, :10, :10] = data - read_data = a[0:10, 0:10, 0:10] - assert np.array_equal(data, read_data) - - -@pytest.mark.parametrize( - "outer_index_location", - ["start", "end"], -) -@pytest.mark.parametrize( - "inner_index_location", - ["start", "end"], -) -def test_nested_sharding( - store: Store, - sample_data: np.ndarray, - outer_index_location: ShardingCodecIndexLocation, - inner_index_location: ShardingCodecIndexLocation, -): - a = Array.create( - store / "l4_sample" / "color" / "1", - shape=sample_data.shape, - chunk_shape=(64, 64, 64), - dtype=sample_data.dtype, - fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location) - ], - index_location=outer_index_location, - ) - ], - ) - - a[:, :, :] = sample_data - - read_data = a[0 : sample_data.shape[0], 0 : sample_data.shape[1], 0 : sample_data.shape[2]] - assert sample_data.shape == read_data.shape - assert np.array_equal(sample_data, read_data) - - -@pytest.mark.parametrize("input_order", ["F", "C"]) -@pytest.mark.parametrize("store_order", ["F", "C"]) -@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) -@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) -@pytest.mark.parametrize("with_sharding", [True, False]) -async def test_order( - store: Store, - input_order: Literal["F", "C"], - store_order: Literal["F", "C"], - runtime_write_order: Literal["F", "C"], - runtime_read_order: Literal["F", "C"], - with_sharding: bool, -): - data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) - - codecs_: list[Codec] = ( - [ - ShardingCodec( - chunk_shape=(16, 8), - codecs=[TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()], - ) - ] - if with_sharding - else [TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()] - ) - - with config.set({"array.order": runtime_write_order}): - a = await AsyncArray.create( - store / "order", - shape=data.shape, - chunk_shape=(32, 8), - dtype=data.dtype, - fill_value=0, - chunk_key_encoding=("v2", "."), - codecs=codecs_, - ) - - await _AsyncArrayProxy(a)[:, :].set(data) - read_data = await _AsyncArrayProxy(a)[:, :].get() - assert np.array_equal(data, read_data) - - with config.set({"array.order": runtime_read_order}): - a = await AsyncArray.open( - store / "order", - ) - read_data = await _AsyncArrayProxy(a)[:, :].get() - assert np.array_equal(data, read_data) - - if runtime_read_order == "F": - assert read_data.flags["F_CONTIGUOUS"] - assert not read_data.flags["C_CONTIGUOUS"] - else: - assert not read_data.flags["F_CONTIGUOUS"] - assert read_data.flags["C_CONTIGUOUS"] - - if not with_sharding: - # Compare with zarr-python - z = zarr.v2.create( - shape=data.shape, - chunks=(32, 8), - dtype="u2", "u2", " None: + typesize = np.dtype(dtype).itemsize + path = "blosc_evolve" + spath = StorePath(store, path) + await AsyncArray.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=dtype, + fill_value=0, + codecs=[BytesCodec(), BloscCodec()], + ) + + zarr_json = json.loads( + (await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype)).to_bytes() + ) + blosc_configuration_json = zarr_json["codecs"][1]["configuration"] + assert blosc_configuration_json["typesize"] == typesize + if typesize == 1: + assert blosc_configuration_json["shuffle"] == "bitshuffle" + else: + assert blosc_configuration_json["shuffle"] == "shuffle" + + path2 = "blosc_evolve_sharding" + spath2 = StorePath(store, path2) + await AsyncArray.create( + spath2, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=dtype, + fill_value=0, + codecs=[ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()])], + ) + + zarr_json = json.loads( + (await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype)).to_bytes() + ) + blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"] + assert blosc_configuration_json["typesize"] == typesize + if typesize == 1: + assert blosc_configuration_json["shuffle"] == "bitshuffle" + else: + assert blosc_configuration_json["shuffle"] == "shuffle" diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py new file mode 100644 index 0000000000..1104805d4b --- /dev/null +++ b/tests/v3/test_codecs/test_codecs.py @@ -0,0 +1,486 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass + +import numpy as np +import pytest + +import zarr.v2 +from zarr.abc.codec import Codec +from zarr.abc.store import Store +from zarr.array import Array, AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import ( + BytesCodec, + GzipCodec, + ShardingCodec, + TransposeCodec, +) +from zarr.common import MemoryOrder +from zarr.config import config +from zarr.indexing import Selection, morton_order_iter +from zarr.store import StorePath +from zarr.testing.utils import assert_bytes_equal + + +@dataclass(frozen=True) +class _AsyncArrayProxy: + array: AsyncArray + + def __getitem__(self, selection: Selection) -> _AsyncArraySelectionProxy: + return _AsyncArraySelectionProxy(self.array, selection) + + +@dataclass(frozen=True) +class _AsyncArraySelectionProxy: + array: AsyncArray + selection: Selection + + async def get(self) -> np.ndarray: + return await self.array.getitem(self.selection) + + async def set(self, value: np.ndarray) -> None: + return await self.array.setitem(self.selection, value) + + +def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: + if order == "F": + return tuple(ndim - x - 1 for x in range(ndim)) + else: + return tuple(range(ndim)) + + +def test_sharding_pickle() -> None: + """ + Test that sharding codecs can be pickled + """ + pass + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("input_order", ["F", "C"]) +@pytest.mark.parametrize("store_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) +@pytest.mark.parametrize("with_sharding", [True, False]) +async def test_order( + store: Store, + input_order: MemoryOrder, + store_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, + with_sharding: bool, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) + path = "order" + spath = StorePath(store, path=path) + codecs_: list[Codec] = ( + [ + ShardingCodec( + chunk_shape=(16, 8), + codecs=[TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()], + ) + ] + if with_sharding + else [TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()] + ) + + with config.set({"array.order": runtime_write_order}): + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=codecs_, + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + with config.set({"array.order": runtime_read_order}): + a = await AsyncArray.open( + spath, + ) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + if not with_sharding: + # Compare with zarr-python + z = zarr.v2.create( + shape=data.shape, + chunks=(32, 8), + dtype=" None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) + path = "order_implicit" + spath = StorePath(store, path) + codecs_: list[Codec] | None = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None + + with config.set({"array.order": runtime_write_order}): + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=codecs_, + ) + + a[:, :] = data + + with config.set({"array.order": runtime_read_order}): + a = Array.open(spath) + read_data = a[:, :] + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_open(store: Store) -> None: + spath = StorePath(store) + a = Array.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype="int32", + fill_value=0, + ) + b = Array.open(spath) + assert a.metadata == b.metadata + + +def test_morton() -> None: + assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)] + assert list(morton_order_iter((2, 2, 2))) == [ + (0, 0, 0), + (1, 0, 0), + (0, 1, 0), + (1, 1, 0), + (0, 0, 1), + (1, 0, 1), + (0, 1, 1), + (1, 1, 1), + ] + assert list(morton_order_iter((2, 2, 2, 2))) == [ + (0, 0, 0, 0), + (1, 0, 0, 0), + (0, 1, 0, 0), + (1, 1, 0, 0), + (0, 0, 1, 0), + (1, 0, 1, 0), + (0, 1, 1, 0), + (1, 1, 1, 0), + (0, 0, 0, 1), + (1, 0, 0, 1), + (0, 1, 0, 1), + (1, 1, 0, 1), + (0, 0, 1, 1), + (1, 0, 1, 1), + (0, 1, 1, 1), + (1, 1, 1, 1), + ] + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_write_partial_chunks(store: Store) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + spath = StorePath(store) + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(20, 20), + dtype=data.dtype, + fill_value=1, + ) + a[0:16, 0:16] = data + assert np.array_equal(a[0:16, 0:16], data) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_delete_empty_chunks(store: Store) -> None: + data = np.ones((16, 16)) + path = "delete_empty_chunks" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(32, 32), + dtype=data.dtype, + fill_value=1, + ) + await _AsyncArrayProxy(a)[:16, :16].set(np.zeros((16, 16))) + await _AsyncArrayProxy(a)[:16, :16].set(data) + assert np.array_equal(await _AsyncArrayProxy(a)[:16, :16].get(), data) + assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype) is None + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_zarr_compat(store: Store) -> None: + data = np.zeros((16, 18), dtype="uint16") + path = "zarr_compat3" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(10, 10), + dtype=data.dtype, + chunk_key_encoding=("v2", "."), + fill_value=1, + ) + + z2 = zarr.v2.create( + shape=data.shape, + chunks=(10, 10), + dtype=data.dtype, + compressor=None, + fill_value=1, + ) + + await _AsyncArrayProxy(a)[:16, :18].set(data) + z2[:16, :18] = data + assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get()) + assert np.array_equal(data, z2[:16, :18]) + + assert_bytes_equal( + z2._store["0.0"], await store.get(f"{path}/0.0", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["0.1"], await store.get(f"{path}/0.1", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["1.0"], await store.get(f"{path}/1.0", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["1.1"], await store.get(f"{path}/1.1", prototype=default_buffer_prototype) + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_zarr_compat_F(store: Store) -> None: + data = np.zeros((16, 18), dtype="uint16", order="F") + path = "zarr_compatF3" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(10, 10), + dtype=data.dtype, + chunk_key_encoding=("v2", "."), + fill_value=1, + codecs=[TransposeCodec(order=order_from_dim("F", data.ndim)), BytesCodec()], + ) + + z2 = zarr.v2.create( + shape=data.shape, + chunks=(10, 10), + dtype=data.dtype, + compressor=None, + order="F", + fill_value=1, + ) + + await _AsyncArrayProxy(a)[:16, :18].set(data) + z2[:16, :18] = data + assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get()) + assert np.array_equal(data, z2[:16, :18]) + + assert_bytes_equal( + z2._store["0.0"], await store.get(f"{path}/0.0", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["0.1"], await store.get(f"{path}/0.1", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["1.0"], await store.get(f"{path}/1.0", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["1.1"], await store.get(f"{path}/1.1", prototype=default_buffer_prototype) + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_dimension_names(store: Store) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + path = "dimension_names" + spath = StorePath(store, path) + await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + dimension_names=("x", "y"), + ) + + assert (await AsyncArray.open(spath)).metadata.dimension_names == ( + "x", + "y", + ) + path2 = "dimension_names2" + spath2 = StorePath(store, path2) + await AsyncArray.create( + spath2, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + ) + + assert (await AsyncArray.open(spath2)).metadata.dimension_names is None + zarr_json_buffer = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype) + assert zarr_json_buffer is not None + assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_invalid_metadata(store: Store) -> None: + spath = StorePath(store, "invalid_metadata") + with pytest.raises(ValueError): + Array.create( + spath, + shape=(16, 16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + ) + spath2 = StorePath(store, "invalid_endian") + with pytest.raises(ValueError): + Array.create( + spath2, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + BytesCodec(endian="big"), + TransposeCodec(order=order_from_dim("F", 2)), + ], + ) + spath3 = StorePath(store, "invalid_order") + with pytest.raises(TypeError): + Array.create( + spath3, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + BytesCodec(), + TransposeCodec(order="F"), + ], + ) + spath4 = StorePath(store, "invalid_missing_bytes_codec") + with pytest.raises(ValueError): + Array.create( + spath4, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + TransposeCodec(order=order_from_dim("F", 2)), + ], + ) + spath5 = StorePath(store, "invalid_inner_chunk_shape") + with pytest.raises(ValueError): + Array.create( + spath5, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8,)), + ], + ) + spath6 = StorePath(store, "invalid_inner_chunk_shape") + with pytest.raises(ValueError): + Array.create( + spath6, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8, 7)), + ], + ) + spath7 = StorePath(store, "warning_inefficient_codecs") + with pytest.warns(UserWarning): + Array.create( + spath7, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8, 8)), + GzipCodec(), + ], + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_resize(store: Store) -> None: + data = np.zeros((16, 18), dtype="uint16") + path = "resize" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(10, 10), + dtype=data.dtype, + chunk_key_encoding=("v2", "."), + fill_value=1, + ) + + await _AsyncArrayProxy(a)[:16, :18].set(data) + assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype) is not None + + a = await a.resize((10, 12)) + assert a.metadata.shape == (10, 12) + assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype) is None + assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype) is None diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py new file mode 100644 index 0000000000..8301a424b9 --- /dev/null +++ b/tests/v3/test_codecs/test_endian.py @@ -0,0 +1,87 @@ +from typing import Literal + +import numpy as np +import pytest + +import zarr.v2 +from zarr.abc.store import Store +from zarr.array import AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import BytesCodec +from zarr.store.core import StorePath +from zarr.testing.utils import assert_bytes_equal + +from .test_codecs import _AsyncArrayProxy + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("endian", ["big", "little"]) +async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + path = "endian" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[BytesCodec(endian=endian)], + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + readback_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, readback_data) + + # Compare with v2 + z = zarr.v2.create( + shape=data.shape, + chunks=(16, 16), + dtype=">u2" if endian == "big" else "u2", "u2", " None: + data = np.arange(0, 256, dtype=dtype_input_endian).reshape((16, 16)) + path = "endian" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype="uint16", + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[BytesCodec(endian=dtype_store_endian)], + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + readback_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, readback_data) + + # Compare with zarr-python + z = zarr.v2.create( + shape=data.shape, + chunks=(16, 16), + dtype=">u2" if dtype_store_endian == "big" else " None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + + a = Array.create( + StorePath(store), + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[BytesCodec(), GzipCodec()], + ) + + a[:, :] = data + assert np.array_equal(data, a[:, :]) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py new file mode 100644 index 0000000000..f0031349cb --- /dev/null +++ b/tests/v3/test_codecs/test_sharding.py @@ -0,0 +1,324 @@ +import pickle + +import numpy as np +import pytest + +from zarr.abc.store import Store +from zarr.array import Array, AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import ( + BloscCodec, + BytesCodec, + ShardingCodec, + ShardingCodecIndexLocation, + TransposeCodec, +) +from zarr.store.core import StorePath + +from ..conftest import ArrayRequest +from .test_codecs import _AsyncArrayProxy, order_from_dim + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 1, dtype="uint8", order="C"), + ArrayRequest(shape=(128,) * 2, dtype="uint8", order="C"), + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("offset", [0, 10]) +def test_sharding( + store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation, offset: int +) -> None: + """ + Test that we can create an array with a sharding codec, write data to that array, and get + the same data out via indexing. + """ + data = array_fixture + spath = StorePath(store) + arr = Array.create( + spath, + shape=tuple(s + offset for s in data.shape), + chunk_shape=(64,) * data.ndim, + dtype=data.dtype, + fill_value=6, + codecs=[ + ShardingCodec( + chunk_shape=(32,) * data.ndim, + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + write_region = tuple(slice(offset, None) for dim in range(data.ndim)) + arr[write_region] = data + + if offset > 0: + empty_region = tuple(slice(0, offset) for dim in range(data.ndim)) + assert np.all(arr[empty_region] == arr.metadata.fill_value) + + read_data = arr[write_region] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +def test_sharding_partial( + store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + a[10:, 10:, 10:] = data + + read_data = a[0:10, 0:10, 0:10] + assert np.all(read_data == 0) + + read_data = a[10:, 10:, 10:] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_sharding_partial_read( + store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + read_data = a[0:10, 0:10, 0:10] + assert np.all(read_data == 1) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_sharding_partial_overwrite( + store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture[:10, :10, :10] + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + a[:10, :10, :10] = data + + read_data = a[0:10, 0:10, 0:10] + assert np.array_equal(data, read_data) + + data = data + 10 + a[:10, :10, :10] = data + read_data = a[0:10, 0:10, 0:10] + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize( + "outer_index_location", + ["start", "end"], +) +@pytest.mark.parametrize( + "inner_index_location", + ["start", "end"], +) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_nested_sharding( + store: Store, + array_fixture: np.ndarray, + outer_index_location: ShardingCodecIndexLocation, + inner_index_location: ShardingCodecIndexLocation, +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location) + ], + index_location=outer_index_location, + ) + ], + ) + + a[:, :, :] = data + + read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_open_sharding(store: Store) -> None: + path = "open_sharding" + spath = StorePath(store, path) + a = Array.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype="int32", + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(8, 8), + codecs=[ + TransposeCodec(order=order_from_dim("F", 2)), + BytesCodec(), + BloscCodec(), + ], + ) + ], + ) + b = Array.open(spath) + assert a.metadata == b.metadata + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_write_partial_sharded_chunks(store: Store) -> None: + data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) + spath = StorePath(store) + a = Array.create( + spath, + shape=(40, 40), + chunk_shape=(20, 20), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(10, 10), + codecs=[ + BytesCodec(), + BloscCodec(), + ], + ) + ], + ) + a[0:16, 0:16] = data + assert np.array_equal(a[0:16, 0:16], data) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_delete_empty_shards(store: Store) -> None: + path = "delete_empty_shards" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=(16, 16), + chunk_shape=(8, 16), + dtype="uint16", + fill_value=1, + codecs=[ShardingCodec(chunk_shape=(8, 8))], + ) + await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16))) + await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16))) + await _AsyncArrayProxy(a)[:, 8:].set(np.ones((16, 8))) + # chunk (0, 0) is full + # chunks (0, 1), (1, 0), (1, 1) are empty + # shard (0, 0) is half-full + # shard (1, 0) is empty + + data = np.ones((16, 16), dtype="uint16") + data[:8, :8] = 0 + assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get()) + assert await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype) is None + chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype) + assert chunk_bytes is not None and len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 + + +def test_pickle() -> None: + codec = ShardingCodec(chunk_shape=(8, 8)) + assert pickle.loads(pickle.dumps(codec)) == codec diff --git a/tests/v3/test_codecs/test_transpose.py b/tests/v3/test_codecs/test_transpose.py new file mode 100644 index 0000000000..3fd4350299 --- /dev/null +++ b/tests/v3/test_codecs/test_transpose.py @@ -0,0 +1,121 @@ +import numpy as np +import pytest + +import zarr.v2 +from zarr.abc.codec import Codec +from zarr.abc.store import Store +from zarr.array import Array, AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import BytesCodec, ShardingCodec, TransposeCodec +from zarr.common import MemoryOrder +from zarr.config import config +from zarr.store.core import StorePath + +from .test_codecs import _AsyncArrayProxy + + +@pytest.mark.parametrize("input_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) +@pytest.mark.parametrize("with_sharding", [True, False]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_transpose( + store: Store, + input_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, + with_sharding: bool, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) + spath = StorePath(store, path="transpose") + codecs_: list[Codec] = ( + [ + ShardingCodec( + chunk_shape=(1, 16, 8), + codecs=[TransposeCodec(order=(2, 1, 0)), BytesCodec()], + ) + ] + if with_sharding + else [TransposeCodec(order=(2, 1, 0)), BytesCodec()] + ) + with config.set({"array.order": runtime_write_order}): + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(1, 32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=codecs_, + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + with config.set({"array.order": runtime_read_order}): + a = await AsyncArray.open( + spath, + ) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + if not with_sharding: + # Compare with zarr-python + z = zarr.v2.create( + shape=data.shape, + chunks=(1, 32, 8), + dtype=" None: + shape = [i + 3 for i in range(len(order))] + data = np.arange(0, np.prod(shape), dtype="uint16").reshape(shape) + spath = StorePath(store, "transpose_non_self_inverse") + a = Array.create( + spath, + shape=data.shape, + chunk_shape=data.shape, + dtype=data.dtype, + fill_value=0, + codecs=[TransposeCodec(order=order), BytesCodec()], + ) + a[:, :] = data + read_data = a[:, :] + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_transpose_invalid( + store: Store, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8)) + spath = StorePath(store, "transpose_invalid") + for order in [(1, 0), (3, 2, 1), (3, 3, 1)]: + with pytest.raises(ValueError): + Array.create( + spath, + shape=data.shape, + chunk_shape=(1, 32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[TransposeCodec(order=order), BytesCodec()], + ) diff --git a/tests/v3/test_codecs/test_zstd.py b/tests/v3/test_codecs/test_zstd.py new file mode 100644 index 0000000000..1e1b1e02c9 --- /dev/null +++ b/tests/v3/test_codecs/test_zstd.py @@ -0,0 +1,25 @@ +import numpy as np +import pytest + +from zarr.abc.store import Store +from zarr.array import Array +from zarr.codecs import BytesCodec, ZstdCodec +from zarr.store.core import StorePath + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("checksum", [True, False]) +def test_zstd(store: Store, checksum: bool) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + + a = Array.create( + StorePath(store, path="zstd"), + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[BytesCodec(), ZstdCodec(level=0, checksum=checksum)], + ) + + a[:, :] = data + assert np.array_equal(data, a[:, :])