From 4bd820f16889565cb9fc9a6365473d4e96f1a162 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 24 Aug 2024 10:01:49 -0700 Subject: [PATCH 1/8] feature(h5compat): add create_dataset, require_dataset, require_group, and require_gruops methods to group class --- src/zarr/core/common.py | 1 + src/zarr/core/group.py | 164 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index aaa30cfcb8..99ab58fae9 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -28,6 +28,7 @@ ZATTRS_JSON = ".zattrs" BytesLike = bytes | bytearray | memoryview +ShapeLike = tuple[int, ...] | int ChunkCoords = tuple[int, ...] ChunkCoordsLike = Iterable[int] ZarrFormat = Literal[2, 3] diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 86d27e3a97..df4815c54e 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -7,6 +7,7 @@ from dataclasses import asdict, dataclass, field, replace from typing import TYPE_CHECKING, Literal, cast, overload +import numpy as np import numpy.typing as npt from typing_extensions import deprecated @@ -25,6 +26,8 @@ ZGROUP_JSON, ChunkCoords, ZarrFormat, + concurrent_map, + parse_shapelike, ) from zarr.core.config import config from zarr.core.sync import SyncMixin, sync @@ -323,6 +326,38 @@ async def create_group( zarr_format=self.metadata.zarr_format, ) + async def require_group(self, name: str, overwrite: bool = False) -> AsyncGroup: + """Obtain a sub-group, creating one if it doesn't exist. + + Parameters + ---------- + name : string + Group name. + overwrite : bool, optional + Overwrite any existing group with given `name` if present. + + Returns + ------- + g : AsyncGroup + """ + if overwrite: + # TODO: check that exists_ok=True errors if an array exists where the group is being created + grp = await self.create_group(name, exists_ok=True) + else: + try: + grp = await self.getitem(name) + if not isinstance(grp, AsyncGroup): + raise TypeError( + f"Incompatible object ({grp.__class__.__name__}) already exists" + ) + except KeyError: + grp = await self.create_group(name) + return grp + + async def require_groups(self, *names: str) -> tuple[AsyncGroup, ...]: + """Convenience method to require multiple groups in a single call.""" + return tuple(await concurrent_map(list(names), self.require_group)) + async def create_array( self, path: str, @@ -411,6 +446,74 @@ async def create_array( data=data, ) + async def create_dataset(self, name: str, **kwargs: Any) -> AsyncArray: + """Create an array. + + Arrays are known as "datasets" in HDF5 terminology. For compatibility + with h5py, Zarr groups also implement the require_dataset() method. + + Parameters + ---------- + name : string + Array name. + kwargs : dict + Additional arguments passed to create_array() + + Returns + ------- + a : AsyncArray + """ + return await self.create_array(name, **kwargs) + + async def require_dataset( + self, + name: str, + *, + shape: ChunkCoords, + dtype: npt.DTypeLike = None, + exact: bool = False, + **kwargs: Any, + ) -> AsyncArray: + """Obtain an array, creating if it doesn't exist. + + Arrays are known as "datasets" in HDF5 terminology. For compatibility + with h5py, Zarr groups also implement the create_dataset() method. + + Other `kwargs` are as per :func:`zarr.Group.create_dataset`. + + Parameters + ---------- + name : string + Array name. + shape : int or tuple of ints + Array shape. + dtype : string or dtype, optional + NumPy dtype. + exact : bool, optional + If True, require `dtype` to match exactly. If false, require + `dtype` can be cast from array dtype. + """ + try: + ds = await self.getitem(name) + if not isinstance(ds, AsyncArray): + raise TypeError(f"Incompatible object ({ds.__class__.__name__}) already exists") + + shape = parse_shapelike(shape) + if shape != ds.shape: + raise TypeError(f"Incompatible shape ({ds.shape} vs {shape})") + + dtype = np.dtype(dtype) + if exact: + if ds.dtype != dtype: + raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") + else: + if not np.can_cast(ds.dtype, dtype): + raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") + except KeyError: + ds = await self.create_dataset(name, shape=shape, dtype=dtype, exact=exact, **kwargs) + + return ds + async def update_attributes(self, new_attributes: dict[str, Any]) -> AsyncGroup: # metadata.attributes is "frozen" so we simply clear and update the dict self.metadata.attributes.clear() @@ -665,6 +768,26 @@ def tree(self, expand: bool = False, level: int | None = None) -> Any: def create_group(self, name: str, **kwargs: Any) -> Group: return Group(self._sync(self._async_group.create_group(name, **kwargs))) + def require_group(self, name: str, **kwargs: Any) -> Group: + """Obtain a sub-group, creating one if it doesn't exist. + + Parameters + ---------- + name : string + Group name. + overwrite : bool, optional + Overwrite any existing group with given `name` if present. + + Returns + ------- + g : Group + """ + return Group(self._sync(self._async_group.require_group(name, **kwargs))) + + def require_groups(self, *names: str) -> tuple[Group, ...]: + """Convenience method to require multiple groups in a single call.""" + return tuple(map(Group, self._sync(self._async_group.require_groups(*names)))) + def create_array( self, name: str, @@ -759,6 +882,47 @@ def create_array( ) ) + def create_dataset(self, name: str, **kwargs: Any) -> Array: + """Create an array. + + Arrays are known as "datasets" in HDF5 terminology. For compatibility + with h5py, Zarr groups also implement the require_dataset() method. + + Parameters + ---------- + name : string + Array name. + kwargs : dict + Additional arguments passed to create_array() + + Returns + ------- + a : Array + """ + return Array(self._sync(self._async_group.create_dataset(name, **kwargs))) + + def require_dataset(self, name: str, **kwargs: Any) -> Array: + """Obtain an array, creating if it doesn't exist. + + Arrays are known as "datasets" in HDF5 terminology. For compatibility + with h5py, Zarr groups also implement the create_dataset() method. + + Other `kwargs` are as per :func:`zarr.Group.create_dataset`. + + Parameters + ---------- + name : string + Array name. + shape : int or tuple of ints + Array shape. + dtype : string or dtype, optional + NumPy dtype. + exact : bool, optional + If True, require `dtype` to match exactly. If false, require + `dtype` can be cast from array dtype. + """ + return Array(self._sync(self._async_group.require_dataset(name, **kwargs))) + def empty(self, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.empty(**kwargs))) From 0cc022f9c8d9676c55dd3ac23f3a8af058594d81 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 24 Aug 2024 10:14:20 -0700 Subject: [PATCH 2/8] make mypy happy --- src/zarr/core/group.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index df4815c54e..6faf9932fa 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -345,18 +345,20 @@ async def require_group(self, name: str, overwrite: bool = False) -> AsyncGroup: grp = await self.create_group(name, exists_ok=True) else: try: - grp = await self.getitem(name) - if not isinstance(grp, AsyncGroup): + item: AsyncGroup | AsyncArray = await self.getitem(name) + if not isinstance(item, AsyncGroup): raise TypeError( - f"Incompatible object ({grp.__class__.__name__}) already exists" + f"Incompatible object ({item.__class__.__name__}) already exists" ) + assert isinstance(item, AsyncGroup) # make mypy happy + grp = item except KeyError: grp = await self.create_group(name) return grp async def require_groups(self, *names: str) -> tuple[AsyncGroup, ...]: """Convenience method to require multiple groups in a single call.""" - return tuple(await concurrent_map(list(names), self.require_group)) + return tuple(await concurrent_map([names], self.require_group)) async def create_array( self, @@ -457,7 +459,7 @@ async def create_dataset(self, name: str, **kwargs: Any) -> AsyncArray: name : string Array name. kwargs : dict - Additional arguments passed to create_array() + Additional arguments passed to AsyncGroup.create_array() Returns ------- @@ -886,14 +888,14 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array: """Create an array. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the require_dataset() method. + with h5py, Zarr groups also implement the :func:`zarr.Group.require_dataset` method. Parameters ---------- name : string Array name. kwargs : dict - Additional arguments passed to create_array() + Additional arguments passed to :func:`zarr.Group.create_array` Returns ------- @@ -905,7 +907,7 @@ def require_dataset(self, name: str, **kwargs: Any) -> Array: """Obtain an array, creating if it doesn't exist. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the create_dataset() method. + with h5py, Zarr groups also implement the :func:`zarr.Group.create_dataset` method. Other `kwargs` are as per :func:`zarr.Group.create_dataset`. From d9314ddbb063ae9f46367fe9787ce0442e9f89b0 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 24 Aug 2024 10:18:50 -0700 Subject: [PATCH 3/8] doc fixes --- src/zarr/core/group.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 6faf9932fa..ac38f2b628 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -452,14 +452,14 @@ async def create_dataset(self, name: str, **kwargs: Any) -> AsyncArray: """Create an array. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the require_dataset() method. + with h5py, Zarr groups also implement the :func:`zarr.AsyncGroup.require_dataset` method. Parameters ---------- name : string Array name. kwargs : dict - Additional arguments passed to AsyncGroup.create_array() + Additional arguments passed to :func:`zarr.AsyncGroup.create_array`. Returns ------- @@ -479,9 +479,9 @@ async def require_dataset( """Obtain an array, creating if it doesn't exist. Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the create_dataset() method. + with h5py, Zarr groups also implement the :func:`zarr.AsyncGroup.create_dataset` method. - Other `kwargs` are as per :func:`zarr.Group.create_dataset`. + Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. Parameters ---------- @@ -494,6 +494,10 @@ async def require_dataset( exact : bool, optional If True, require `dtype` to match exactly. If false, require `dtype` can be cast from array dtype. + + Returns + ------- + a : AsyncArray """ try: ds = await self.getitem(name) From 2f90c9a412bac4223dd33acd40f64a8baaa34b71 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 27 Aug 2024 16:12:02 -0700 Subject: [PATCH 4/8] write initial tests --- src/zarr/core/group.py | 9 +++-- tests/v3/test_group.py | 78 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index ac38f2b628..be5f0bcca7 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -26,7 +26,6 @@ ZGROUP_JSON, ChunkCoords, ZarrFormat, - concurrent_map, parse_shapelike, ) from zarr.core.config import config @@ -358,7 +357,9 @@ async def require_group(self, name: str, overwrite: bool = False) -> AsyncGroup: async def require_groups(self, *names: str) -> tuple[AsyncGroup, ...]: """Convenience method to require multiple groups in a single call.""" - return tuple(await concurrent_map([names], self.require_group)) + if not names: + return () + return tuple(await asyncio.gather(*(self.require_group(name) for name in names))) async def create_array( self, @@ -501,6 +502,7 @@ async def require_dataset( """ try: ds = await self.getitem(name) + print("Found existing dataset", ds) if not isinstance(ds, AsyncArray): raise TypeError(f"Incompatible object ({ds.__class__.__name__}) already exists") @@ -516,7 +518,8 @@ async def require_dataset( if not np.can_cast(ds.dtype, dtype): raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") except KeyError: - ds = await self.create_dataset(name, shape=shape, dtype=dtype, exact=exact, **kwargs) + print(f"Creating dataset {name} with shape {shape} and dtype {dtype}") + ds = await self.create_dataset(name, shape=shape, dtype=dtype, **kwargs) return ds diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 39921c26d8..e82a817683 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -653,3 +653,81 @@ async def test_asyncgroup_update_attributes( agroup_new_attributes = await agroup.update_attributes(attributes_new) assert agroup_new_attributes.attrs == attributes_new + + +async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + root = await AsyncGroup.create(store=store, zarr_format=zarr_format) + with pytest.raises(KeyError): + await root.getitem("foo") + # create foo group + _ = await root.create_group("foo", attributes={"foo": 100}) + + # test that we can get the group using require_group + foo_group = await root.require_group("foo") + assert foo_group.attrs == {"foo": 100} + + # test that we can get the group using require_group and overwrite=True + foo_group = await root.require_group("foo", overwrite=True) + + _ = await foo_group.create_array( + "bar", shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100} + ) + + # test that overwriting a group w/ children fails + # TODO: figure out why ensure_no_existing_node is not catching the foo.bar array + # + # with pytest.raises(ContainsArrayError): + # await root.require_group("foo", overwrite=True) + + # test that requiring a group where an array is fails + with pytest.raises(TypeError): + await foo_group.require_group("bar") + + +async def test_require_groups(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + root = await AsyncGroup.create(store=store, zarr_format=zarr_format) + # create foo group + _ = await root.create_group("foo", attributes={"foo": 100}) + # create bar group + _ = await root.create_group("bar", attributes={"bar": 200}) + + foo_group, bar_group = await root.require_groups("foo", "bar") + assert foo_group.attrs == {"foo": 100} + assert bar_group.attrs == {"bar": 200} + + # get a mix of existing and new groups + foo_group, spam_group = await root.require_groups("foo", "spam") + assert foo_group.attrs == {"foo": 100} + assert spam_group.attrs == {} + + # no names + no_group = await root.require_groups() + assert no_group == () + + +async def test_create_dataset(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + root = await AsyncGroup.create(store=store, zarr_format=zarr_format) + foo = await root.create_dataset("foo", shape=(10,), dtype="uint8") + assert foo.shape == (10,) + + with pytest.raises(ContainsArrayError): + await root.create_dataset("foo", shape=(100,), dtype="int8") + + _ = await root.create_group("bar") + with pytest.raises(ContainsGroupError): + await root.create_dataset("bar", shape=(100,), dtype="int8") + + +async def test_require_dataset(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + root = await AsyncGroup.create(store=store, zarr_format=zarr_format) + foo1 = await root.require_dataset("foo", shape=(10,), dtype="uint8", attributes={"foo": 101}) + assert foo1.attrs == {"foo": 101} + foo2 = await root.require_dataset("foo", shape=(10,), dtype="uint8") + assert foo2.attrs == {"foo": 101} + + # with pytest.raises(TypeError): + # await root.require_dataset("foo", shape=(100,), dtype="int8") + + # _ = await root.create_group("bar") + # with pytest.raises(TypeError): + # await root.require_dataset("bar", shape=(100,), dtype="int8") From 7df67099d309dfe580eeafabb013b8385ab28477 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 27 Aug 2024 17:00:40 -0700 Subject: [PATCH 5/8] more tests --- src/zarr/core/group.py | 7 +++---- tests/v3/test_group.py | 27 ++++++++++++++++++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index be5f0bcca7..a3547c7b87 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -252,7 +252,7 @@ async def getitem( if zarray is not None: # TODO: update this once the V2 array support is part of the primary array class zarr_json = {**zarray, "attributes": zattrs} - return AsyncArray.from_dict(store_path, zarray) + return AsyncArray.from_dict(store_path, zarr_json) else: zgroup = ( json.loads(zgroup_bytes.to_bytes()) @@ -502,7 +502,6 @@ async def require_dataset( """ try: ds = await self.getitem(name) - print("Found existing dataset", ds) if not isinstance(ds, AsyncArray): raise TypeError(f"Incompatible object ({ds.__class__.__name__}) already exists") @@ -518,7 +517,6 @@ async def require_dataset( if not np.can_cast(ds.dtype, dtype): raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") except KeyError: - print(f"Creating dataset {name} with shape {shape} and dtype {dtype}") ds = await self.create_dataset(name, shape=shape, dtype=dtype, **kwargs) return ds @@ -670,8 +668,9 @@ def create( def open( cls, store: StoreLike, + zarr_format: Literal[2, 3, None] = 3, ) -> Group: - obj = sync(AsyncGroup.open(store)) + obj = sync(AsyncGroup.open(store, zarr_format=zarr_format)) return cls(obj) def __getitem__(self, path: str) -> Array | Group: diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index e82a817683..42ec22c305 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -657,8 +657,7 @@ async def test_asyncgroup_update_attributes( async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.create(store=store, zarr_format=zarr_format) - with pytest.raises(KeyError): - await root.getitem("foo") + # create foo group _ = await root.create_group("foo", attributes={"foo": 100}) @@ -720,14 +719,24 @@ async def test_create_dataset(store: LocalStore | MemoryStore, zarr_format: Zarr async def test_require_dataset(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.create(store=store, zarr_format=zarr_format) - foo1 = await root.require_dataset("foo", shape=(10,), dtype="uint8", attributes={"foo": 101}) + foo1 = await root.require_dataset("foo", shape=(10,), dtype="i8", attributes={"foo": 101}) assert foo1.attrs == {"foo": 101} - foo2 = await root.require_dataset("foo", shape=(10,), dtype="uint8") + foo2 = await root.require_dataset("foo", shape=(10,), dtype="i8") assert foo2.attrs == {"foo": 101} - # with pytest.raises(TypeError): - # await root.require_dataset("foo", shape=(100,), dtype="int8") + # exact = False + _ = await root.require_dataset("foo", shape=10, dtype="f8") + + # errors w/ exact True + with pytest.raises(TypeError, match="Incompatible dtype"): + await root.require_dataset("foo", shape=(10,), dtype="f8", exact=True) - # _ = await root.create_group("bar") - # with pytest.raises(TypeError): - # await root.require_dataset("bar", shape=(100,), dtype="int8") + with pytest.raises(TypeError, match="Incompatible shape"): + await root.require_dataset("foo", shape=(100, 100), dtype="i8") + + with pytest.raises(TypeError, match="Incompatible dtype"): + await root.require_dataset("foo", shape=(10,), dtype="f4") + + _ = await root.create_group("bar") + with pytest.raises(TypeError, match="Incompatible object"): + await root.require_dataset("bar", shape=(10,), dtype="int8") From 3825d3f2cc690f6fc18ba4c7411ead2c1902b3e0 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 4 Sep 2024 14:57:31 -0700 Subject: [PATCH 6/8] add deprecation warnings --- src/zarr/core/group.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 4c68b9f5e5..a6eb0263c3 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -451,6 +451,7 @@ async def create_array( data=data, ) + @deprecated("Use Group.create_array instead.") async def create_dataset(self, name: str, **kwargs: Any) -> AsyncArray: """Create an array. @@ -467,9 +468,13 @@ async def create_dataset(self, name: str, **kwargs: Any) -> AsyncArray: Returns ------- a : AsyncArray + + .. deprecated:: 3.0.0 + The h5py compatibility methods will be removed in 3.1.0. Use `Group.create_array` instead. """ return await self.create_array(name, **kwargs) + @deprecated("Use Group.require_array instead.") async def require_dataset( self, name: str, @@ -486,6 +491,40 @@ async def require_dataset( Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. + Parameters + ---------- + name : string + Array name. + shape : int or tuple of ints + Array shape. + dtype : string or dtype, optional + NumPy dtype. + exact : bool, optional + If True, require `dtype` to match exactly. If false, require + `dtype` can be cast from array dtype. + + Returns + ------- + a : AsyncArray + + .. deprecated:: 3.0.0 + The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_dataset` instead. + """ + return await self.require_array(name, shape=shape, dtype=dtype, exact=exact, **kwargs) + + async def require_array( + self, + name: str, + *, + shape: ChunkCoords, + dtype: npt.DTypeLike = None, + exact: bool = False, + **kwargs: Any, + ) -> AsyncArray: + """Obtain an array, creating if it doesn't exist. + + Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. + Parameters ---------- name : string From 0455729851c193d54ba258a750cb6317152df638 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 4 Sep 2024 15:44:53 -0700 Subject: [PATCH 7/8] add deprecation warnings --- src/zarr/core/group.py | 48 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index a6eb0263c3..8dbad2a496 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -451,7 +451,7 @@ async def create_array( data=data, ) - @deprecated("Use Group.create_array instead.") + @deprecated("Use AsyncGroup.create_array instead.") async def create_dataset(self, name: str, **kwargs: Any) -> AsyncArray: """Create an array. @@ -470,11 +470,11 @@ async def create_dataset(self, name: str, **kwargs: Any) -> AsyncArray: a : AsyncArray .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `Group.create_array` instead. + The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead. """ return await self.create_array(name, **kwargs) - @deprecated("Use Group.require_array instead.") + @deprecated("Use AsyncGroup.require_array instead.") async def require_dataset( self, name: str, @@ -508,7 +508,7 @@ async def require_dataset( a : AsyncArray .. deprecated:: 3.0.0 - The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_dataset` instead. + The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.require_dataset` instead. """ return await self.require_array(name, shape=shape, dtype=dtype, exact=exact, **kwargs) @@ -558,7 +558,7 @@ async def require_array( if not np.can_cast(ds.dtype, dtype): raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") except KeyError: - ds = await self.create_dataset(name, shape=shape, dtype=dtype, **kwargs) + ds = await self.create_array(name, shape=shape, dtype=dtype, **kwargs) return ds @@ -981,6 +981,7 @@ def create_array( ) ) + @deprecated("Use Group.create_array instead.") def create_dataset(self, name: str, **kwargs: Any) -> Array: """Create an array. @@ -997,9 +998,13 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array: Returns ------- a : Array + + .. deprecated:: 3.0.0 + The h5py compatibility methods will be removed in 3.1.0. Use `Group.create_array` instead. """ return Array(self._sync(self._async_group.create_dataset(name, **kwargs))) + @deprecated("Use Group.require_array instead.") def require_dataset(self, name: str, **kwargs: Any) -> Array: """Obtain an array, creating if it doesn't exist. @@ -1019,8 +1024,39 @@ def require_dataset(self, name: str, **kwargs: Any) -> Array: exact : bool, optional If True, require `dtype` to match exactly. If false, require `dtype` can be cast from array dtype. + + Returns + ------- + a : Array + + .. deprecated:: 3.0.0 + The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead. + """ + return Array(self._sync(self._async_group.require_array(name, **kwargs))) + + def require_array(self, name: str, **kwargs: Any) -> Array: + """Obtain an array, creating if it doesn't exist. + + + Other `kwargs` are as per :func:`zarr.Group.create_array`. + + Parameters + ---------- + name : string + Array name. + shape : int or tuple of ints + Array shape. + dtype : string or dtype, optional + NumPy dtype. + exact : bool, optional + If True, require `dtype` to match exactly. If false, require + `dtype` can be cast from array dtype. + + Returns + ------- + a : Array """ - return Array(self._sync(self._async_group.require_dataset(name, **kwargs))) + return Array(self._sync(self._async_group.require_array(name, **kwargs))) def empty(self, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.empty(**kwargs))) From 77356d7b6c02e7a15d87c2cb52a7bb368b4c8dc5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 4 Sep 2024 15:46:19 -0700 Subject: [PATCH 8/8] switch up test --- tests/v3/test_group.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 968a04eeeb..80a06febec 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -794,26 +794,26 @@ async def test_create_dataset(store: LocalStore | MemoryStore, zarr_format: Zarr await root.create_dataset("bar", shape=(100,), dtype="int8") -async def test_require_dataset(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_require_array(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.create(store=store, zarr_format=zarr_format) - foo1 = await root.require_dataset("foo", shape=(10,), dtype="i8", attributes={"foo": 101}) + foo1 = await root.require_array("foo", shape=(10,), dtype="i8", attributes={"foo": 101}) assert foo1.attrs == {"foo": 101} - foo2 = await root.require_dataset("foo", shape=(10,), dtype="i8") + foo2 = await root.require_array("foo", shape=(10,), dtype="i8") assert foo2.attrs == {"foo": 101} # exact = False - _ = await root.require_dataset("foo", shape=10, dtype="f8") + _ = await root.require_array("foo", shape=10, dtype="f8") # errors w/ exact True with pytest.raises(TypeError, match="Incompatible dtype"): - await root.require_dataset("foo", shape=(10,), dtype="f8", exact=True) + await root.require_array("foo", shape=(10,), dtype="f8", exact=True) with pytest.raises(TypeError, match="Incompatible shape"): - await root.require_dataset("foo", shape=(100, 100), dtype="i8") + await root.require_array("foo", shape=(100, 100), dtype="i8") with pytest.raises(TypeError, match="Incompatible dtype"): - await root.require_dataset("foo", shape=(10,), dtype="f4") + await root.require_array("foo", shape=(10,), dtype="f4") _ = await root.create_group("bar") with pytest.raises(TypeError, match="Incompatible object"): - await root.require_dataset("bar", shape=(10,), dtype="int8") + await root.require_array("bar", shape=(10,), dtype="int8")