Skip to content

Commit 359e117

Browse files
authored
Merge pull request #455 from Blosc/c2array
Added functionality to C2array
2 parents d24df1e + 185f4a2 commit 359e117

File tree

2 files changed

+133
-6
lines changed

2 files changed

+133
-6
lines changed

src/blosc2/c2array.py

Lines changed: 126 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import requests
2020

2121
import blosc2
22+
from blosc2.info import InfoReporter
2223

2324
_subscriber_data = {
2425
"urlbase": os.environ.get("BLOSC_C2URLBASE"),
@@ -148,17 +149,22 @@ def subscribe(root, urlbase, auth_token):
148149
return _xpost(url, auth_token=auth_token)
149150

150151

151-
def fetch_data(path, urlbase, params, auth_token=None):
152+
def fetch_data(path, urlbase, params, auth_token=None, as_blosc2=False):
152153
url = _sub_url(urlbase, f"api/fetch/{path}")
153154
response = _xget(url, params=params, auth_token=auth_token)
154155
data = response.content
156+
# Try different deserialization methods
155157
try:
156158
data = blosc2.ndarray_from_cframe(data)
157-
data = data[:] if data.ndim == 1 else data[()]
158159
except RuntimeError:
159160
data = blosc2.schunk_from_cframe(data)
160-
data = data[:]
161-
return data
161+
if as_blosc2:
162+
return data
163+
if hasattr(data, "ndim"): # if b2nd or b2frame
164+
# catch 0d case where [:] fails
165+
return data[()] if data.ndim == 0 else data[:]
166+
else:
167+
return data[:]
162168

163169

164170
def slice_to_string(slice_):
@@ -243,7 +249,7 @@ def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | N
243249

244250
def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray:
245251
"""
246-
Get a slice of the array.
252+
Get a slice of the array (returning NumPy array).
247253
248254
Parameters
249255
----------
@@ -269,7 +275,40 @@ def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray:
269275
[81, 82, 83]], dtype=uint16)
270276
"""
271277
slice_ = slice_to_string(slice_)
272-
return fetch_data(self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token)
278+
return fetch_data(
279+
self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=False
280+
)
281+
282+
def slice(self, slice_: int | slice | Sequence[slice]) -> blosc2.NDArray:
283+
"""
284+
Get a slice of the array (returning blosc2 NDArray array).
285+
286+
Parameters
287+
----------
288+
slice_ : int, slice, tuple of ints and slices, or None
289+
The slice to fetch.
290+
291+
Returns
292+
-------
293+
out: blosc2.NDArray
294+
A blosc2.NDArray containing the data slice.
295+
296+
Examples
297+
--------
298+
>>> import blosc2
299+
>>> urlbase = "https://cat2.cloud/demo"
300+
>>> path = "@public/examples/dir1/ds-2d.b2nd"
301+
>>> remote_array = blosc2.C2Array(path, urlbase=urlbase)
302+
>>> data_slice = remote_array.slice((slice(3,5), slice(1,4)))
303+
>>> data_slice.shape
304+
(2, 3)
305+
>>> type(data_slice)
306+
blosc2.ndarray.NDArray
307+
"""
308+
slice_ = slice_to_string(slice_)
309+
return fetch_data(
310+
self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=True
311+
)
273312

274313
def __len__(self) -> int:
275314
"""Returns the length of the first dimension of the array.
@@ -339,6 +378,87 @@ def cparams(self) -> blosc2.CParams:
339378
"""The compression parameters of the remote array"""
340379
return self._cparams
341380

381+
@property
382+
def nbytes(self) -> int:
383+
"""The number of bytes of the remote array"""
384+
return self.meta["schunk"]["nbytes"]
385+
386+
@property
387+
def cbytes(self) -> int:
388+
"""The number of compressed bytes of the remote array"""
389+
return self.meta["schunk"]["cbytes"]
390+
391+
@property
392+
def cratio(self) -> float:
393+
"""The compression ratio of the remote array"""
394+
return self.meta["schunk"]["cratio"]
395+
396+
# TODO: Add these to SChunk model in srv_utils and then access them here
397+
# @property
398+
# def dparams(self) -> float:
399+
# """The dparams of the remote array"""
400+
# return
401+
#
402+
# @property
403+
# def meta(self) -> float:
404+
# """The meta of the remote array"""
405+
# return
406+
407+
# TODO: This seems to cause problems for proxy sources (see tests/ndarray/test_proxy_c2array.py::test_open)
408+
# @property
409+
# def urlpath(self) -> str:
410+
# """The URL path of the remote array"""
411+
# return self.meta["schunk"]["urlpath"]
412+
413+
@property
414+
def vlmeta(self) -> dict:
415+
"""The variable-length metadata f the remote array"""
416+
return self.meta["schunk"]["vlmeta"]
417+
418+
@property
419+
def info(self) -> InfoReporter:
420+
"""
421+
Print information about this remote array.
422+
"""
423+
return InfoReporter(self)
424+
425+
@property
426+
def info_items(self) -> list:
427+
"""A list of tuples with the information about the remote array.
428+
Each tuple contains the name of the attribute and its value.
429+
"""
430+
items = []
431+
items += [("type", f"{self.__class__.__name__}")]
432+
items += [("shape", self.shape)]
433+
items += [("chunks", self.chunks)]
434+
items += [("blocks", self.blocks)]
435+
items += [("dtype", self.dtype)]
436+
items += [("nbytes", self.nbytes)]
437+
items += [("cbytes", self.cbytes)]
438+
items += [("cratio", f"{self.cratio:.2f}")]
439+
items += [("cparams", self.cparams)]
440+
# items += [("dparams", self.dparams)]
441+
return items
442+
443+
# TODO: Access chunksize, size, ext_chunks, etc.
444+
# @property
445+
# def size(self) -> int:
446+
# """The size (in bytes) for this container."""
447+
# return self.cbytes
448+
# @property
449+
# def chunksize(self) -> int:
450+
# """NOT the same as `SChunk.chunksize <blosc2.schunk.SChunk.chunksize>`
451+
# in case :attr:`chunks` is not multiple in
452+
# each dimension of :attr:`blocks` (or equivalently, if :attr:`chunks` is
453+
# not the same as :attr:`ext_chunks`).
454+
# """
455+
# return
456+
457+
@property
458+
def blocksize(self) -> int:
459+
"""The block size (in bytes) for the remote container."""
460+
return self.meta["schunk"]["blocksize"]
461+
342462

343463
class URLPath:
344464
def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None):

tests/test_open_c2array.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ def test_open_c2array(c2sub_context):
3636
a_open = blosc2.open(urlpath, mode="r")
3737
np.testing.assert_allclose(a1[:], a_open[:])
3838

39+
## Test slicing
40+
np.testing.assert_allclose(a1[:10], a_open[:10])
41+
np.testing.assert_allclose(a1.slice(slice(1, 10, 1))[:], a_open.slice(slice(1, 10, 1))[:])
42+
43+
## Test metadata
44+
assert a1.cratio == a_open.cratio
45+
3946
with pytest.raises(NotImplementedError):
4047
_ = blosc2.open(urlpath)
4148

0 commit comments

Comments
 (0)