Skip to content

Commit 49f1505

Browse files
jhammand-v-b
andauthored
feature(store): make list_* methods async generators (zarr-developers#110)
* feature(store): make list_* methods async generators * Update src/zarr/v3/store/memory.py * Apply suggestions from code review - simplify code comments - use `removeprefix` instead of `strip` --------- Co-authored-by: Davis Bennett <[email protected]>
1 parent b5a7698 commit 49f1505

File tree

4 files changed

+61
-63
lines changed

4 files changed

+61
-63
lines changed

src/zarr/v3/abc/store.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from abc import abstractmethod, ABC
2+
from collections.abc import AsyncGenerator
23

34
from typing import List, Tuple, Optional
45

@@ -106,17 +107,17 @@ def supports_listing(self) -> bool:
106107
...
107108

108109
@abstractmethod
109-
async def list(self) -> List[str]:
110+
async def list(self) -> AsyncGenerator[str, None]:
110111
"""Retrieve all keys in the store.
111112
112113
Returns
113114
-------
114-
list[str]
115+
AsyncGenerator[str, None]
115116
"""
116117
...
117118

118119
@abstractmethod
119-
async def list_prefix(self, prefix: str) -> List[str]:
120+
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
120121
"""Retrieve all keys in the store with a given prefix.
121122
122123
Parameters
@@ -125,12 +126,12 @@ async def list_prefix(self, prefix: str) -> List[str]:
125126
126127
Returns
127128
-------
128-
list[str]
129+
AsyncGenerator[str, None]
129130
"""
130131
...
131132

132133
@abstractmethod
133-
async def list_dir(self, prefix: str) -> List[str]:
134+
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
134135
"""
135136
Retrieve all keys and prefixes with a given prefix and which do not contain the character
136137
“/” after the given prefix.
@@ -141,6 +142,6 @@ async def list_dir(self, prefix: str) -> List[str]:
141142
142143
Returns
143144
-------
144-
list[str]
145+
AsyncGenerator[str, None]
145146
"""
146147
...

src/zarr/v3/group.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from __future__ import annotations
2+
from collections.abc import AsyncGenerator
23
from typing import TYPE_CHECKING
34
from dataclasses import asdict, dataclass, field, replace
45

@@ -9,7 +10,6 @@
910
if TYPE_CHECKING:
1011
from typing import (
1112
Any,
12-
AsyncGenerator,
1313
Literal,
1414
AsyncIterator,
1515
)
@@ -157,6 +157,7 @@ async def getitem(
157157
key: str,
158158
) -> AsyncArray | AsyncGroup:
159159
store_path = self.store_path / key
160+
logger.warning("key=%s, store_path=%s", key, store_path)
160161

161162
# if `key` names an object in storage, it cannot be an array or group
162163
if await store_path.exists():
@@ -302,22 +303,24 @@ async def members(self) -> AsyncGenerator[tuple[str, AsyncArray | AsyncGroup], N
302303
)
303304

304305
raise ValueError(msg)
305-
subkeys = await self.store_path.store.list_dir(self.store_path.path)
306-
# would be nice to make these special keys accessible programmatically,
307-
# and scoped to specific zarr versions
308-
subkeys_filtered = filter(lambda v: v not in ("zarr.json", ".zgroup", ".zattrs"), subkeys)
309-
# is there a better way to schedule this?
310-
for subkey in subkeys_filtered:
311-
try:
312-
yield (subkey, await self.getitem(subkey))
313-
except KeyError:
314-
# keyerror is raised when `subkey` names an object (in the object storage sense),
315-
# as opposed to a prefix, in the store under the prefix associated with this group
316-
# in which case `subkey` cannot be the name of a sub-array or sub-group.
317-
logger.warning(
318-
"Object at %s is not recognized as a component of a Zarr hierarchy.", subkey
319-
)
320-
pass
306+
307+
async for key in self.store_path.store.list_dir(self.store_path.path):
308+
# these keys are not valid child names so we make sure to skip them
309+
# TODO: it would be nice to make these special keys accessible programmatically,
310+
# and scoped to specific zarr versions
311+
if key not in ("zarr.json", ".zgroup", ".zattrs"):
312+
try:
313+
# TODO: performance optimization -- load children concurrently
314+
child = await self.getitem(key)
315+
yield key, child
316+
except KeyError:
317+
# keyerror is raised when `key` names an object (in the object storage sense),
318+
# as opposed to a prefix, in the store under the prefix associated with this group.
319+
# In this case, `key` cannot be the name of a sub-array or sub-group.
320+
logger.warning(
321+
"Object at %s is not recognized as a component of a Zarr hierarchy.", key
322+
)
323+
pass
321324

322325
async def contains(self, member: str) -> bool:
323326
raise NotImplementedError

src/zarr/v3/store/local.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import io
44
import shutil
5+
from collections.abc import AsyncGenerator
56
from pathlib import Path
67
from typing import Union, Optional, List, Tuple
78

@@ -145,22 +146,19 @@ async def exists(self, key: str) -> bool:
145146
path = self.root / key
146147
return await to_thread(path.is_file)
147148

148-
async def list(self) -> List[str]:
149+
async def list(self) -> AsyncGenerator[str, None]:
149150
"""Retrieve all keys in the store.
150151
151152
Returns
152153
-------
153-
list[str]
154+
AsyncGenerator[str, None]
154155
"""
156+
for p in self.root.rglob(""):
157+
if p.is_file():
158+
yield str(p)
155159

156-
# Q: do we want to return strings or Paths?
157-
def _list(root: Path) -> List[str]:
158-
files = [str(p) for p in root.rglob("") if p.is_file()]
159-
return files
160160

161-
return await to_thread(_list, self.root)
162-
163-
async def list_prefix(self, prefix: str) -> List[str]:
161+
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
164162
"""Retrieve all keys in the store with a given prefix.
165163
166164
Parameters
@@ -169,16 +167,14 @@ async def list_prefix(self, prefix: str) -> List[str]:
169167
170168
Returns
171169
-------
172-
list[str]
170+
AsyncGenerator[str, None]
173171
"""
172+
for p in (self.root / prefix).rglob("*"):
173+
if p.is_file():
174+
yield str(p)
174175

175-
def _list_prefix(root: Path, prefix: str) -> List[str]:
176-
files = [str(p) for p in (root / prefix).rglob("*") if p.is_file()]
177-
return files
178-
179-
return await to_thread(_list_prefix, self.root, prefix)
180176

181-
async def list_dir(self, prefix: str) -> List[str]:
177+
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
182178
"""
183179
Retrieve all keys and prefixes with a given prefix and which do not contain the character
184180
“/” after the given prefix.
@@ -189,15 +185,16 @@ async def list_dir(self, prefix: str) -> List[str]:
189185
190186
Returns
191187
-------
192-
list[str]
188+
AsyncGenerator[str, None]
193189
"""
190+
base = self.root / prefix
191+
to_strip = str(base) + "/"
192+
193+
try:
194+
key_iter = base.iterdir()
195+
except (FileNotFoundError, NotADirectoryError):
196+
key_iter = []
194197

195-
def _list_dir(root: Path, prefix: str) -> List[str]:
196-
base = root / prefix
197-
to_strip = str(base) + "/"
198-
try:
199-
return [str(key).replace(to_strip, "") for key in base.iterdir()]
200-
except (FileNotFoundError, NotADirectoryError):
201-
return []
198+
for key in key_iter:
199+
yield str(key).replace(to_strip, "")
202200

203-
return await to_thread(_list_dir, self.root, prefix)

src/zarr/v3/store/memory.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from collections.abc import AsyncGenerator
34
from typing import Optional, MutableMapping, List, Tuple
45

56
from zarr.v3.common import BytesLike
@@ -67,20 +68,16 @@ async def delete(self, key: str) -> None:
6768
async def set_partial_values(self, key_start_values: List[Tuple[str, int, bytes]]) -> None:
6869
raise NotImplementedError
6970

70-
async def list(self) -> List[str]:
71-
return list(self._store_dict.keys())
71+
async def list(self) -> AsyncGenerator[str, None]:
72+
for key in self._store_dict:
73+
yield key
7274

73-
async def list_prefix(self, prefix: str) -> List[str]:
74-
return [key for key in self._store_dict if key.startswith(prefix)]
75+
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
76+
for key in self._store_dict:
77+
if key.startswith(prefix):
78+
yield key
7579

76-
async def list_dir(self, prefix: str) -> List[str]:
77-
if prefix == "":
78-
return list({key.split("/", maxsplit=1)[0] for key in self._store_dict})
79-
else:
80-
return list(
81-
{
82-
key.removeprefix(prefix + "/").split("/")[0]
83-
for key in self._store_dict
84-
if (key.startswith(prefix + "/") and key != prefix)
85-
}
86-
)
80+
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
81+
for key in self._store_dict:
82+
if key.startswith(prefix + "/") and key != prefix:
83+
yield key.removeprefix(prefix + "/").rsplit("/", maxsplit=1)[0]

0 commit comments

Comments
 (0)