From 8164bfbecf723ca121c05e56137177ae11b5f31e Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 11 Oct 2024 16:53:17 +0200 Subject: [PATCH 1/5] test_sharding_with_empty_inner_chunk --- tests/v3/test_codecs/test_sharding.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index c0dcfbf350..b8291d29ff 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -330,3 +330,25 @@ async def test_delete_empty_shards(store: Store) -> None: def test_pickle() -> None: codec = ShardingCodec(chunk_shape=(8, 8)) assert pickle.loads(pickle.dumps(codec)) == codec + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end]) +async def test_sharding_with_empty_inner_chunk(store: Store, index_location): + data = np.arange(0, 16 * 16, dtype="uint32").reshape((16, 16)) + fill_value = 1 + + path = f"sharding_with_empty_inner_chunk_{index_location}" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=(16, 16), + chunk_shape=(8, 8), + dtype="uint32", + fill_value=fill_value, + codecs=[ShardingCodec(chunk_shape=(4, 4), index_location=index_location, index_codecs=[BytesCodec()])], + ) + data[:4, :4] = fill_value + await a.setitem(..., data) + print("read data") + data_read = await a.getitem(...) + assert np.array_equal(data_read, data) From 74bd774739ad904756b149c3cad332a6b1dfd833 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 11 Oct 2024 17:08:02 +0200 Subject: [PATCH 2/5] only update non-empty chunks offset --- src/zarr/codecs/sharding.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 2181e9eb76..47464b29be 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -265,7 +265,8 @@ async def finalize( ) -> Buffer: index_bytes = await index_encoder(self.index) if index_location == ShardingCodecIndexLocation.start: - self.index.offsets_and_lengths[..., 0] += len(index_bytes) + empty_chunks_mask = self.index.offsets_and_lengths[..., 0] == MAX_UINT_64 + self.index.offsets_and_lengths[~empty_chunks_mask, 0] += len(index_bytes) index_bytes = await index_encoder(self.index) # encode again with corrected offsets out_buf = index_bytes + self.buf else: From 4c5acf1640859aad52cc060fbea459cfd4de38a5 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 11 Oct 2024 18:25:50 +0200 Subject: [PATCH 3/5] format --- src/zarr/codecs/pipeline.py | 1 - tests/v3/test_codecs/test_sharding.py | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index 1226a04f06..038a2eeac2 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -162,7 +162,6 @@ async def decode_batch( ) -> Iterable[NDBuffer | None]: chunk_bytes_batch: Iterable[Buffer | None] chunk_bytes_batch, chunk_specs = _unzip2(chunk_bytes_and_specs) - ( aa_codecs_with_spec, ab_codec_with_spec, diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index b8291d29ff..b9ed0aacfe 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -331,9 +331,12 @@ def test_pickle() -> None: codec = ShardingCodec(chunk_shape=(8, 8)) assert pickle.loads(pickle.dumps(codec)) == codec + @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end]) -async def test_sharding_with_empty_inner_chunk(store: Store, index_location): +@pytest.mark.parametrize( + "index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end] +) +async def test_sharding_with_empty_inner_chunk(store: Store, index_location: ShardingCodecIndexLocation) -> None: data = np.arange(0, 16 * 16, dtype="uint32").reshape((16, 16)) fill_value = 1 @@ -345,7 +348,7 @@ async def test_sharding_with_empty_inner_chunk(store: Store, index_location): chunk_shape=(8, 8), dtype="uint32", fill_value=fill_value, - codecs=[ShardingCodec(chunk_shape=(4, 4), index_location=index_location, index_codecs=[BytesCodec()])], + codecs=[ShardingCodec(chunk_shape=(4, 4), index_location=index_location)], ) data[:4, :4] = fill_value await a.setitem(..., data) From 1df6108d64f280aea6809002ce548160b7256dc8 Mon Sep 17 00:00:00 2001 From: Norman Rzepka Date: Fri, 18 Oct 2024 14:15:56 +0200 Subject: [PATCH 4/5] Update test_sharding.py --- tests/v3/test_codecs/test_sharding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index b9ed0aacfe..8f5b0dc3c3 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -332,7 +332,7 @@ def test_pickle() -> None: assert pickle.loads(pickle.dumps(codec)) == codec -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize( "index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end] ) From e8045c98881b5b5763311330e18fc235e3227381 Mon Sep 17 00:00:00 2001 From: Norman Rzepka Date: Fri, 18 Oct 2024 20:22:03 +0200 Subject: [PATCH 5/5] format --- tests/test_codecs/test_sharding.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index 3485ce3697..85315c8780 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -372,7 +372,9 @@ def test_pickle() -> None: @pytest.mark.parametrize( "index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end] ) -async def test_sharding_with_empty_inner_chunk(store: Store, index_location: ShardingCodecIndexLocation) -> None: +async def test_sharding_with_empty_inner_chunk( + store: Store, index_location: ShardingCodecIndexLocation +) -> None: data = np.arange(0, 16 * 16, dtype="uint32").reshape((16, 16)) fill_value = 1