Skip to content

Commit aecbf06

Browse files
authored
Merge branch 'main' into feat/memory-store-registry
2 parents 1d554f8 + 9c47b6d commit aecbf06

File tree

7 files changed

+162
-11
lines changed

7 files changed

+162
-11
lines changed

changes/3668.feature.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Exposes the array runtime configuration as an attribute called `config` on the `Array` and
2+
`AsyncArray` classes. The previous `AsyncArray._config` attribute is now a deprecated alias for `AsyncArray.config`.
3+
4+
Adds a method for creating a new `Array` / `AsyncArray` instance with a new runtime configuration, and fixes inaccurate documentation about the `write_empty_chunks` configuration parameter.

docs/user-guide/arrays.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,32 @@ z.append(np.vstack([a, a]), axis=1)
153153
print(f"Shape after second append: {z.shape}")
154154
```
155155

156+
## Runtime configuration
157+
158+
Zarr arrays are parametrized with a configuration that determines certain aspects of array behavior.
159+
160+
We currently support two configuration options for arrays: `write_empty_chunks` and `order`.
161+
162+
| field | type | default | description |
163+
| - | - | - | - |
164+
| `write_empty_chunks` | `bool` | `False` | Controls whether empty chunks are written to storage. See [Empty chunks](performance.md#empty-chunks).
165+
| `order` | `Literal["C", "F"]` | `"C"` | The memory layout of arrays returned when reading data from the store.
166+
167+
You can specify the configuration when you create an array with the `config` keyword argument.
168+
`config` can be passed as either a `dict` or an `ArrayConfig` object.
169+
170+
```python exec="true" session="arrays" source="above" result="ansi"
171+
arr = zarr.create_array({}, shape=(10,), dtype='int8', config={"write_empty_chunks": True})
172+
print(arr.config)
173+
```
174+
175+
To get an array view with a different config, use the `with_config` method.
176+
177+
```python exec="true" session="arrays" source="above" result="ansi"
178+
arr_f = arr.with_config({"order": "F"})
179+
print(arr_f.config)
180+
```
181+
156182
## Compressors
157183

158184
A number of different compressors can be used with Zarr. Zarr includes Blosc,

docs/user-guide/performance.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,14 @@ This optimization prevents storing redundant objects and can speed up reads, but
125125
added computation during array writes, since the contents of
126126
each chunk must be compared to the fill value, and these advantages are contingent on the content of the array.
127127
If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above.
128-
In this case, creating an array with `write_empty_chunks=True` (the default) will instruct Zarr to write every chunk without checking for emptiness.
128+
In this case, creating an array with `write_empty_chunks=True` will instruct Zarr to write every chunk without checking for emptiness.
129+
130+
The default value of `write_empty_chunks` is `False`:
131+
132+
```python exec="true" session="performance" source="above" result="ansi"
133+
arr = zarr.create_array(store={}, shape=(1,), dtype='uint8')
134+
assert arr.config.write_empty_chunks == False
135+
```
129136

130137
The following example illustrates the effect of the `write_empty_chunks` flag on
131138
the time required to write an array with different values.:

src/zarr/core/array.py

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@
144144
from zarr.codecs.sharding import ShardingCodecIndexLocation
145145
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar
146146
from zarr.storage import StoreLike
147-
from zarr.types import AnyArray, AnyAsyncArray, AsyncArrayV2, AsyncArrayV3
147+
from zarr.types import AnyArray, AnyAsyncArray, ArrayV2, ArrayV3, AsyncArrayV2, AsyncArrayV3
148148

149149

150150
# Array and AsyncArray are defined in the base ``zarr`` namespace
@@ -300,14 +300,14 @@ class AsyncArray(Generic[T_ArrayMetadata]):
300300
The path to the Zarr store.
301301
codec_pipeline : CodecPipeline
302302
The codec pipeline used for encoding and decoding chunks.
303-
_config : ArrayConfig
303+
config : ArrayConfig
304304
The runtime configuration of the array.
305305
"""
306306

307307
metadata: T_ArrayMetadata
308308
store_path: StorePath
309309
codec_pipeline: CodecPipeline = field(init=False)
310-
_config: ArrayConfig
310+
config: ArrayConfig
311311

312312
@overload
313313
def __init__(
@@ -336,7 +336,7 @@ def __init__(
336336

337337
object.__setattr__(self, "metadata", metadata_parsed)
338338
object.__setattr__(self, "store_path", store_path)
339-
object.__setattr__(self, "_config", config_parsed)
339+
object.__setattr__(self, "config", config_parsed)
340340
object.__setattr__(
341341
self,
342342
"codec_pipeline",
@@ -1012,6 +1012,11 @@ async def example():
10121012
def store(self) -> Store:
10131013
return self.store_path.store
10141014

1015+
@property
1016+
@deprecated("Use AsyncArray.config instead.", category=ZarrDeprecationWarning)
1017+
def _config(self) -> ArrayConfig:
1018+
return self.config
1019+
10151020
@property
10161021
def ndim(self) -> int:
10171022
"""Returns the number of dimensions in the Array.
@@ -1165,7 +1170,7 @@ def order(self) -> MemoryOrder:
11651170
if self.metadata.zarr_format == 2:
11661171
return self.metadata.order
11671172
else:
1168-
return self._config.order
1173+
return self.config.order
11691174

11701175
@property
11711176
def attrs(self) -> dict[str, JSON]:
@@ -1298,6 +1303,35 @@ def _nshards(self) -> int:
12981303
"""
12991304
return product(self._shard_grid_shape)
13001305

1306+
@overload
1307+
def with_config(self: AsyncArrayV2, config: ArrayConfigLike) -> AsyncArrayV2: ...
1308+
1309+
@overload
1310+
def with_config(self: AsyncArrayV3, config: ArrayConfigLike) -> AsyncArrayV3: ...
1311+
1312+
def with_config(self, config: ArrayConfigLike) -> Self:
1313+
"""
1314+
Return a copy of this Array with a new runtime configuration.
1315+
1316+
Parameters
1317+
----------
1318+
1319+
config : ArrayConfigLike
1320+
The runtime config for the new Array. Any keys not specified will be inherited
1321+
from the current array's config.
1322+
1323+
Returns
1324+
-------
1325+
A new Array
1326+
"""
1327+
if isinstance(config, ArrayConfig):
1328+
new_config = config
1329+
else:
1330+
# Merge new config with existing config, so missing keys are inherited
1331+
# from the current array rather than from global defaults
1332+
new_config = ArrayConfig(**{**self.config.to_dict(), **config}) # type: ignore[arg-type]
1333+
return type(self)(metadata=self.metadata, store_path=self.store_path, config=new_config)
1334+
13011335
async def nchunks_initialized(self) -> int:
13021336
"""
13031337
Calculate the number of chunks that have been initialized in storage.
@@ -1570,7 +1604,7 @@ async def _get_selection(
15701604
)
15711605
if product(indexer.shape) > 0:
15721606
# need to use the order from the metadata for v2
1573-
_config = self._config
1607+
_config = self.config
15741608
if self.metadata.zarr_format == 2:
15751609
_config = replace(_config, order=self.order)
15761610

@@ -1741,7 +1775,7 @@ async def _set_selection(
17411775
value_buffer = prototype.nd_buffer.from_ndarray_like(value)
17421776

17431777
# need to use the order from the metadata for v2
1744-
_config = self._config
1778+
_config = self.config
17451779
if self.metadata.zarr_format == 2:
17461780
_config = replace(_config, order=self.metadata.order)
17471781

@@ -2063,6 +2097,19 @@ def async_array(self) -> AsyncArray[T_ArrayMetadata]:
20632097
"""
20642098
return self._async_array
20652099

2100+
@property
2101+
def config(self) -> ArrayConfig:
2102+
"""
2103+
The runtime configuration for this array. This is a read-only property. To modify the
2104+
runtime configuration, use `Array.with_config` to create a new `Array` with the modified
2105+
configuration.
2106+
2107+
Returns
2108+
-------
2109+
An `ArrayConfig` object that defines the runtime configuration for the array.
2110+
"""
2111+
return self.async_array.config
2112+
20662113
@classmethod
20672114
@deprecated("Use zarr.create_array instead.", category=ZarrDeprecationWarning)
20682115
def create(
@@ -2524,6 +2571,29 @@ def _nshards(self) -> int:
25242571
"""
25252572
return self.async_array._nshards
25262573

2574+
@overload
2575+
def with_config(self: ArrayV2, config: ArrayConfigLike) -> ArrayV2: ...
2576+
2577+
@overload
2578+
def with_config(self: ArrayV3, config: ArrayConfigLike) -> ArrayV3: ...
2579+
2580+
def with_config(self, config: ArrayConfigLike) -> Self:
2581+
"""
2582+
Return a copy of this Array with a new runtime configuration.
2583+
2584+
Parameters
2585+
----------
2586+
2587+
config : ArrayConfigLike
2588+
The runtime config for the new Array. Any keys not specified will be inherited
2589+
from the current array's config.
2590+
2591+
Returns
2592+
-------
2593+
A new Array
2594+
"""
2595+
return type(self)(self._async_array.with_config(config))
2596+
25272597
@property
25282598
def nbytes(self) -> int:
25292599
"""

src/zarr/core/array_spec.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ def from_dict(cls, data: ArrayConfigParams) -> Self:
6969
kwargs_out[field_name] = data[field_name]
7070
return cls(**kwargs_out)
7171

72+
def to_dict(self) -> ArrayConfigParams:
73+
"""
74+
Serialize an instance of this class to a dict.
75+
"""
76+
return {"order": self.order, "write_empty_chunks": self.write_empty_chunks}
77+
7278

7379
ArrayConfigLike = ArrayConfig | ArrayConfigParams
7480

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def test_open_array_respects_write_empty_chunks_config(zarr_format: ZarrFormat)
232232
arr2 = zarr.open(store=store, path="test_array", config={"write_empty_chunks": True})
233233
assert isinstance(arr2, zarr.Array)
234234

235-
assert arr2.async_array._config.write_empty_chunks is True
235+
assert arr2.async_array.config.write_empty_chunks is True
236236

237237
arr2[0:5] = np.zeros(5)
238238
assert arr2.nchunks_initialized == 1

tests/test_array.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
default_filters_v2,
4545
default_serializer_v3,
4646
)
47+
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams
4748
from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype
4849
from zarr.core.chunk_grids import _auto_partition
4950
from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams
@@ -889,7 +890,7 @@ def test_write_empty_chunks_behavior(
889890
config={"write_empty_chunks": write_empty_chunks},
890891
)
891892

892-
assert arr.async_array._config.write_empty_chunks == write_empty_chunks
893+
assert arr.async_array.config.write_empty_chunks == write_empty_chunks
893894

894895
# initialize the store with some non-fill value chunks
895896
arr[:] = fill_value + 1
@@ -1562,7 +1563,7 @@ async def test_write_empty_chunks_config(write_empty_chunks: bool, store: Store)
15621563
"""
15631564
with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}):
15641565
arr = await create_array(store, shape=(2, 2), dtype="i4")
1565-
assert arr._config.write_empty_chunks == write_empty_chunks
1566+
assert arr.config.write_empty_chunks == write_empty_chunks
15661567

15671568
@staticmethod
15681569
@pytest.mark.parametrize("path", [None, "", "/", "/foo", "foo", "foo/bar"])
@@ -2194,3 +2195,40 @@ def test_create_array_with_data_num_gets(
21942195
# one get for the metadata and one per shard.
21952196
# Note: we don't actually need one get per shard, but this is the current behavior
21962197
assert store.counter["get"] == 1 + num_shards
2198+
2199+
2200+
@pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True}, {"order": "C"}])
2201+
def test_with_config(config: ArrayConfigParams) -> None:
2202+
"""
2203+
Test that `AsyncArray.with_config` and `Array.with_config` create a copy of the source
2204+
array with a new runtime configuration.
2205+
"""
2206+
# the config we start with
2207+
source_config: ArrayConfigParams = {"write_empty_chunks": False, "order": "F"}
2208+
source_array = zarr.create_array({}, shape=(1,), dtype="uint8", config=source_config)
2209+
2210+
new_async_array_config_dict = source_array._async_array.with_config(config).config.to_dict()
2211+
new_array_config_dict = source_array.with_config(config).config.to_dict()
2212+
2213+
for key in source_config:
2214+
if key in config:
2215+
assert new_async_array_config_dict[key] == config[key] # type: ignore[literal-required]
2216+
assert new_array_config_dict[key] == config[key] # type: ignore[literal-required]
2217+
else:
2218+
assert new_async_array_config_dict[key] == source_config[key] # type: ignore[literal-required]
2219+
assert new_array_config_dict[key] == source_config[key] # type: ignore[literal-required]
2220+
2221+
2222+
def test_with_config_polymorphism() -> None:
2223+
"""
2224+
Test that `AsyncArray.with_config` and `Array.with_config` accept dicts and full array config
2225+
objects.
2226+
"""
2227+
source_config: ArrayConfig = ArrayConfig.from_dict({"write_empty_chunks": False, "order": "F"})
2228+
source_config_dict = source_config.to_dict()
2229+
2230+
arr = zarr.create_array({}, shape=(1,), dtype="uint8")
2231+
arr_source_config = arr.with_config(source_config)
2232+
arr_source_config_dict = arr.with_config(source_config_dict)
2233+
2234+
assert arr_source_config.config == arr_source_config_dict.config

0 commit comments

Comments
 (0)