Skip to content

[WIP] Refactor arrays in v3 #1589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
55b47b1
Pull Zarrita into Zarr-Python @ 78274781ad64aef95772eb4b083f7ea9b7d03d06
jhamman Nov 3, 2023
a31a4e5
apply zarr lint rules
jhamman Nov 3, 2023
15f667c
zarrita -> v3
jhamman Nov 3, 2023
c3f8764
v3/abc [wip]
jhamman Nov 3, 2023
0864187
use abcs plus implementation notes
jhamman Nov 3, 2023
664c9ca
refactor arrays
d-v-b Dec 5, 2023
78d0bc0
working on making codecs extensible
normanrz Dec 5, 2023
8b86afa
merge
normanrz Dec 5, 2023
4fac528
adds index_location
normanrz Dec 5, 2023
cb6c9a8
start putting chunk-specific operations in chunks.py
d-v-b Dec 5, 2023
fcf79b6
adds support for codec entry points
normanrz Dec 5, 2023
4ca61fb
move v2 chunk io into chunk.py
d-v-b Dec 5, 2023
9f97282
adds tests from zarrita
normanrz Dec 5, 2023
f8bab5b
fixes types
normanrz Dec 5, 2023
c7cebb0
v2 array IO using v3 routines
d-v-b Dec 5, 2023
6c3e40a
Apply suggestions from code review
normanrz Dec 6, 2023
d30bdbf
rename CoreArrayMetadata to ChunkMetadata; remove v3 suffix from io r…
d-v-b Dec 6, 2023
9749f25
remove test codec from pyproject.toml
normanrz Dec 6, 2023
41cee85
pull in codec tests from @normanrz's branch; try to unify chunk encod…
d-v-b Dec 6, 2023
aef3a4c
Update zarr/v3/array/base.py
d-v-b Dec 6, 2023
93aa706
ChunkMetadata.shape -> ChunkMetadata.array_shape
d-v-b Dec 6, 2023
2324003
remove breakpoint
d-v-b Dec 6, 2023
0a89edf
rename test, fix failures relating to incorrect chunkkeyencoding for …
d-v-b Dec 7, 2023
3e84800
remove conditional statement in json serialization that was breaking …
d-v-b Dec 8, 2023
8d28e8d
remove runtime_configuration from chunk_metadata
d-v-b Dec 8, 2023
79c3ce2
remove runtime_configuration from chunk_metadata in array classes
d-v-b Dec 9, 2023
b2a1515
define codec_pipeline once at top level of v2.array
d-v-b Dec 9, 2023
a7ce7a1
chore: remove fill_value, chunk_key_encoding, and chunk_coords argume…
d-v-b Dec 15, 2023
27740b0
add v3/types.py, bring v2 into closer alignment to v3 api
d-v-b Jan 7, 2024
b4a653b
remove v3x.py
d-v-b Jan 7, 2024
348ddb1
remove v3x tests
d-v-b Jan 7, 2024
4fd50b5
start removing attrs, and using typeddict + to_dict methods instead. …
d-v-b Jan 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ repos:
hooks:
- id: check-yaml
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.3.0
rev: v1.7.1
hooks:
- id: mypy
files: zarr
args: []
additional_dependencies:
- types-redis
- types-setuptools
- attrs
121 changes: 121 additions & 0 deletions zarr/tests/test_array_v3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import zarr.v3.array.v3 as v3
import zarr.v3.array.v2 as v2
import pytest
from typing import Any, Dict, Literal, Tuple, Union
import numpy as np

from zarr.v3.types import Attributes, ChunkCoords
from zarr.v3.metadata.v3 import DefaultChunkKeyEncoding, RegularChunkGrid, RegularChunkGridConfig

# todo: parametrize by chunks
@pytest.mark.asyncio
@pytest.mark.parametrize("zarr_version", ("2", "3"))
@pytest.mark.parametrize(
"shape",
(
(10,),
(
10,
11,
),
(
10,
11,
12,
),
),
)
@pytest.mark.parametrize(
"dtype", (np.dtype("uint8"), "uint8", np.dtype("float32"), "float32", "int64")
)
@pytest.mark.parametrize("attributes", ({}, dict(a=10, b=10)))
@pytest.mark.parametrize("fill_value", (0, 1, 2))
@pytest.mark.parametrize("dimension_separator", (".", "/"))
async def test_array(
tmpdir,
zarr_version: Literal["2", "3"],
shape: Tuple[int, ...],
dtype: Union[str, np.dtype],
attributes: Attributes,
fill_value: float,
dimension_separator: Literal[".", "/"],
):
store_path = str(tmpdir)
arr: Union[v2.AsyncArray, v3.Array]
if zarr_version == "2":
arr = await v2.AsyncArray.create(
store=store_path,
shape=shape,
dtype=dtype,
chunks=shape,
dimension_separator=dimension_separator,
fill_value=fill_value,
attributes=attributes,
exists_ok=True,
)
else:
arr = await v3.AsyncArray.create(
store=store_path,
shape=shape,
dtype=dtype,
chunk_shape=shape,
fill_value=fill_value,
attributes=attributes,
exists_ok=True,
)
fill_array = np.zeros(shape, dtype=dtype) + fill_value
assert np.array_equal(arr[:], fill_array)

data = np.arange(np.prod(shape)).reshape(shape).astype(dtype)

# note: if we try to create a prefix called "0/0/0" but an object named "0" already
# exists in the store, then we will get an unhandled exception
arr[:] = data
assert np.array_equal(arr[:], data)

# partial write
arr[slice(0, 1)] = data[slice(0, 1)]


@pytest.mark.parametrize("zarr_format", (2, 3))
def test_init_format(zarr_format: Literal[2, 3]):
dtype = "uint8"
shape = (10,)
if zarr_format == 2:
with pytest.raises(ValueError):
arr1 = v2.ArrayMetadata(shape=shape, dtype=dtype, chunks=shape, zarr_format=3)
else:
with pytest.raises(ValueError):
arr2 = v3.ArrayMetadata(
shape=shape,
data_type=dtype,
codecs=[],
chunk_grid=RegularChunkGrid(
configuration=RegularChunkGridConfig(chunk_shape=shape)
),
fill_value=0,
chunk_key_encoding=DefaultChunkKeyEncoding(),
zarr_format=2,
)


@pytest.mark.parametrize("zarr_format", ("2", "3"))
def test_init_node_type(zarr_format: Literal["2", "3"]):
dtype = "uint8"
shape = (10,)
if zarr_format == 2:
with pytest.raises(ValueError):
arr = v2.ArrayMetadata(shape=shape, dtype=dtype, chunks=shape, node_type="group")
else:
with pytest.raises(ValueError):
arr = v3.ArrayMetadata(
shape=shape,
data_type=dtype,
codecs=[],
chunk_grid=RegularChunkGrid(
configuration=RegularChunkGridConfig(chunk_shape=shape)
),
fill_value=0,
chunk_key_encoding=DefaultChunkKeyEncoding(),
node_type="group",
)
Loading