-
-
Notifications
You must be signed in to change notification settings - Fork 350
Added info for Group and Array #2400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
a6ef792
b94bff2
73ea1d2
a3b797d
30c4e6a
297a9f3
60a0881
615c025
125129b
19fd7ff
5599da3
d96c202
9118056
0aef240
9ecbbd1
73c304a
cdb1672
035f53a
447dbe5
2811215
f7cab1d
2d0bdd7
f30838c
a1479ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import dataclasses | ||
import textwrap | ||
from typing import Literal | ||
|
||
|
||
@dataclasses.dataclass(kw_only=True) | ||
class GroupInfo: | ||
""" | ||
Information about a group. | ||
|
||
Attributes | ||
---------- | ||
name : str | ||
The path of the group within the Store | ||
type : "Group" | ||
zarr_format : {2, 3} | ||
The zarr format of the Group. | ||
read_only : bool | ||
Whether the Group's access mode is read only. | ||
store_type : str | ||
The name of the Store class containing this group. | ||
count_members : int, optional | ||
The number of child members below this group. This | ||
will be set when the Group has consolidated metadata | ||
or when using :class:`Group.info_complete`. | ||
count_arrays : int, optional | ||
The number of child arrays below this group. This | ||
will be set when the Group has consolidated metadata | ||
or when using :class:`Group.info_complete`. | ||
count_groups : int, optional | ||
The number of child groups below this group. This | ||
will be set when the Group has consolidated metadata | ||
or when using :class:`Group.info_complete`. | ||
""" | ||
|
||
name: str | ||
type: Literal["Group"] = "Group" | ||
zarr_format: Literal[2, 3] | ||
read_only: bool | ||
store_type: str | ||
count_members: int | None = None | ||
count_arrays: int | None = None | ||
count_groups: int | None = None | ||
|
||
def __repr__(self) -> str: | ||
template = textwrap.dedent("""\ | ||
Name : {name} | ||
Type : {type} | ||
Zarr format : {zarr_format} | ||
Read-only : {read_only} | ||
Store type : {store_type}""") | ||
|
||
if self.count_members is not None: | ||
template += "\nNo. members : {count_members}" | ||
if self.count_arrays is not None: | ||
template += "\nNo. arrays : {count_arrays}" | ||
if self.count_groups is not None: | ||
template += "\nNo. groups : {count_groups}" | ||
return template.format(**dataclasses.asdict(self)) | ||
|
||
|
||
def human_readable_size(size: int) -> str: | ||
if size < 2**10: | ||
return f"{size}" | ||
elif size < 2**20: | ||
return f"{size / float(2**10):.1f}K" | ||
elif size < 2**30: | ||
return f"{size / float(2**20):.1f}M" | ||
elif size < 2**40: | ||
return f"{size / float(2**30):.1f}G" | ||
elif size < 2**50: | ||
return f"{size / float(2**40):.1f}T" | ||
else: | ||
return f"{size / float(2**50):.1f}P" | ||
|
||
|
||
def byte_info(size: int) -> str: | ||
if size < 2**10: | ||
return str(size) | ||
else: | ||
return f"{size} ({human_readable_size(size)})" | ||
|
||
|
||
@dataclasses.dataclass(kw_only=True) | ||
class ArrayInfo: | ||
type: Literal["Array"] = "Array" | ||
zarr_format: Literal[2, 3] | ||
data_type: str | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this, I'll look into why I switched over to |
||
shape: tuple[int, ...] | ||
chunk_shape: tuple[int, ...] | None = None | ||
order: Literal["C", "F"] | ||
read_only: bool | ||
store_type: str | ||
compressor: str | None = None | ||
filters: list[str] | None = None | ||
codecs: str | None = None | ||
count_bytes: int | None = None | ||
count_bytes_stored: int | None = None | ||
count_chunks_initialized: int | None = None | ||
|
||
def __repr__(self) -> str: | ||
template = textwrap.dedent("""\ | ||
Type : {type} | ||
Zarr format : {zarr_format} | ||
Data type : {data_type} | ||
Shape : {shape} | ||
Chunk shape : {chunk_shape} | ||
Order : {order} | ||
Read-only : {read_only} | ||
Store type : {store_type}""") | ||
|
||
kwargs = dataclasses.asdict(self) | ||
if self.chunk_shape is None: | ||
# for non-regular chunk grids | ||
kwargs["chunk_shape"] = "<variable>" | ||
if self.compressor is not None: | ||
template += "\nCompressor : {compressor}" | ||
|
||
if self.filters is not None: | ||
template += "\nFilters : {filters}" | ||
|
||
if self.codecs is not None: | ||
template += "\nCodecs : {codecs}" | ||
|
||
if self.count_bytes is not None: | ||
template += "\nNo. bytes : {count_bytes}" | ||
kwargs["count_bytes"] = byte_info(self.count_bytes) | ||
|
||
if self.count_bytes_stored is not None: | ||
template += "\nNo. bytes stored : {count_bytes_stored}" | ||
kwargs["count_stored"] = byte_info(self.count_bytes_stored) | ||
|
||
if ( | ||
self.count_bytes is not None | ||
and self.count_bytes_stored is not None | ||
and self.count_bytes_stored > 0 | ||
): | ||
template += "\nStorage ratio : {storage_ratio}" | ||
kwargs["storage_ratio"] = f"{self.count_bytes / self.count_bytes_stored:.1f}" | ||
|
||
if self.count_chunks_initialized is not None: | ||
template += "\nChunks Initialized : {count_chunks_initialized}" | ||
return template.format(**kwargs) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
from zarr.abc.store import Store, set_or_delete | ||
from zarr.codecs import _get_default_array_bytes_codec | ||
from zarr.codecs._v2 import V2Compressor, V2Filters | ||
from zarr.core._info import ArrayInfo | ||
from zarr.core.attributes import Attributes | ||
from zarr.core.buffer import ( | ||
BufferPrototype, | ||
|
@@ -1199,9 +1200,65 @@ async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: | |
def __repr__(self) -> str: | ||
return f"<AsyncArray {self.store_path} shape={self.shape} dtype={self.dtype}>" | ||
|
||
async def info(self) -> None: | ||
@property | ||
def info(self) -> ArrayInfo: | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Return the statically known information for an array. | ||
|
||
Returns | ||
------- | ||
ArrayInfo | ||
|
||
See Also | ||
-------- | ||
AsyncArray.info_complete | ||
All information about a group, including dynamic information | ||
like the number of bytes and chunks written. | ||
""" | ||
return self._info() | ||
|
||
async def info_complete(self) -> ArrayInfo: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we just don't have this in the API till we implement it (since it's new). |
||
# TODO: get the size of the object from the store. | ||
extra = { | ||
"count_chunks_initialized": self.nchunks_initialized, # this should be async? | ||
# count_bytes_stored isn't yet implemented. | ||
} | ||
return self._info(extra=extra) | ||
|
||
raise NotImplementedError | ||
|
||
def _info(self, extra: dict[str, int] | None = None) -> ArrayInfo: | ||
kwargs: dict[str, Any] = {} | ||
if self.metadata.zarr_format == 2: | ||
assert isinstance(self.metadata, ArrayV2Metadata) | ||
if self.metadata.compressor is not None: | ||
kwargs["compressor"] = str(self.metadata.compressor) | ||
if self.metadata.filters is not None: | ||
kwargs["filters"] = str(self.metadata.filters) | ||
kwargs["data_type"] = str(self.metadata.dtype) | ||
kwargs["chunk_shape"] = self.metadata.chunks | ||
else: | ||
kwargs["codecs"] = str(self.metadata.codecs) | ||
kwargs["data_type"] = str(self.metadata.data_type) | ||
# just regular? | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
chunk_grid = self.metadata.chunk_grid | ||
if isinstance(chunk_grid, RegularChunkGrid): | ||
kwargs["chunk_shape"] = chunk_grid.chunk_shape | ||
else: | ||
raise NotImplementedError( | ||
"'info' is not yet implemented for chunk grids of type {type(self.metadata.chunk_grid)}" | ||
) | ||
|
||
return ArrayInfo( | ||
zarr_format=self.metadata.zarr_format, | ||
shape=self.shape, | ||
order=self.order, | ||
read_only=self.store_path.store.mode.readonly, | ||
store_type=type(self.store_path.store).__name__, | ||
count_bytes=self.dtype.itemsize * self.size, | ||
**kwargs, | ||
) | ||
|
||
|
||
# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed | ||
@dataclass(frozen=False) | ||
|
@@ -2900,10 +2957,25 @@ def update_attributes(self, new_attributes: dict[str, JSON]) -> Array: | |
def __repr__(self) -> str: | ||
return f"<Array {self.store_path} shape={self.shape} dtype={self.dtype}>" | ||
|
||
def info(self) -> None: | ||
return sync( | ||
self._async_array.info(), | ||
) | ||
@property | ||
def info(self) -> ArrayInfo: | ||
""" | ||
Return the statically known information for an array. | ||
|
||
Returns | ||
------- | ||
ArrayInfo | ||
|
||
See Also | ||
-------- | ||
Array.info_complete | ||
All information about a group, including dynamic information | ||
like the number of bytes and chunks written. | ||
""" | ||
return self._async_array.info | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add docstrings here and in |
||
|
||
def info_complete(self) -> ArrayInfo: | ||
return sync(self._async_array.info_complete()) | ||
|
||
|
||
def nchunks_initialized( | ||
|
Uh oh!
There was an error while loading. Please reload this page.