Skip to content

Commit 6d54469

Browse files
committed
Ignore extra keys in v2 metadata
Ignore unexpected keys in Zarr V2 metadata, to enable reading zarr files written by other systems, which might store additional data in the top level of the `.zgroup` and `.zarray` files` Closes zarr-developers#2296
1 parent b1c4e47 commit 6d54469

File tree

5 files changed

+57
-2
lines changed

5 files changed

+57
-2
lines changed

src/zarr/core/group.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import asyncio
44
import json
55
import logging
6-
from dataclasses import asdict, dataclass, field, replace
6+
from dataclasses import asdict, dataclass, field, fields, replace
77
from typing import TYPE_CHECKING, Literal, cast, overload
88

99
import numpy as np
@@ -116,6 +116,15 @@ def __init__(
116116
@classmethod
117117
def from_dict(cls, data: dict[str, Any]) -> GroupMetadata:
118118
assert data.pop("node_type", None) in ("group", None)
119+
120+
zarr_format = data.get("zarr_format")
121+
if zarr_format == 2 or zarr_format is None:
122+
# zarr v2 allowed arbitrary keys here.
123+
# We don't want the GroupMetadata constructor to fail just because someone put an
124+
# extra key in the metadata.
125+
expected = {x.name for x in fields(cls)}
126+
data = {k: v for k, v in data.items() if k in expected}
127+
119128
return cls(**data)
120129

121130
def to_dict(self) -> dict[str, Any]:

src/zarr/core/metadata/v2.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from zarr.core.common import JSON, ChunkCoords
1414

1515
import json
16-
from dataclasses import dataclass, field, replace
16+
from dataclasses import dataclass, field, fields, replace
1717

1818
import numcodecs
1919
import numpy as np
@@ -140,6 +140,13 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
140140
_data = data.copy()
141141
# check that the zarr_format attribute is correct
142142
_ = parse_zarr_format(_data.pop("zarr_format"))
143+
144+
# zarr v2 allowed arbitrary keys here.
145+
# We don't want the ArrayV2Metadata constructor to fail just because someone put an
146+
# extra key in the metadata.
147+
expected = {x.name for x in fields(cls)}
148+
_data = {k: v for k, v in _data.items() if k in expected}
149+
143150
return cls(**_data)
144151

145152
def to_dict(self) -> dict[str, JSON]:

tests/v3/test_array.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pytest
77

88
import zarr.api.asynchronous
9+
import zarr.storage
910
from zarr import Array, AsyncArray, Group
1011
from zarr.codecs.bytes import BytesCodec
1112
from zarr.core.array import chunks_initialized

tests/v3/test_group.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,3 +963,15 @@ async def test_open_mutable_mapping():
963963
def test_open_mutable_mapping_sync():
964964
group = zarr.open_group(store={}, mode="w")
965965
assert isinstance(group.store_path.store, MemoryStore)
966+
967+
968+
class TestGroupMetadata:
969+
def test_from_dict_extra_fields(self):
970+
data = {
971+
"attributes": {"key": "value"},
972+
"_nczarr_superblock": {"version": "2.0.0"},
973+
"zarr_format": 2,
974+
}
975+
result = GroupMetadata.from_dict(data)
976+
expected = GroupMetadata(attributes={"key": "value"}, zarr_format=2)
977+
assert result == expected

tests/v3/test_metadata/test_v2.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,29 @@ def test_metadata_to_dict(
7272
observed.pop("dimension_separator")
7373

7474
assert observed == expected
75+
76+
77+
def test_from_dict_extra_fields() -> None:
78+
data = {
79+
"_nczarr_array": {"dimrefs": ["/dim1", "/dim2"], "storage": "chunked"},
80+
"attributes": {"key": "value"},
81+
"chunks": [8],
82+
"compressor": None,
83+
"dtype": "<f8",
84+
"fill_value": 0.0,
85+
"filters": None,
86+
"order": "C",
87+
"shape": [8],
88+
"zarr_format": 2,
89+
}
90+
91+
result = ArrayV2Metadata.from_dict(data)
92+
expected = ArrayV2Metadata(
93+
attributes={"key": "value"},
94+
shape=(8,),
95+
data_type="float64",
96+
chunk_grid=(8,),
97+
fill_value=0.0,
98+
order="C",
99+
)
100+
assert result == expected

0 commit comments

Comments
 (0)