Skip to content

Remove deprecated blosc code #712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ repos:
hooks:
- id: mypy
args: [--config-file, pyproject.toml]
additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.0.0rc1']
additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3']
24 changes: 22 additions & 2 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,30 @@ Unreleased
Enhancements
~~~~~~~~~~~~

* Add support for the Linux AArch64 architecture, and bump the minimum
* Add support for the Linux aarch64 architecture, and bump the minimum
macOS deployment target for x86_64 to 10.13.
By :user:`Agriya Khetarpal <agriyakhetarpal>`, :issue:`288`.

Removals
~~~~~~~~

The following ``blosc`` funcitons are removed, with no replacement.
This is because they were not intended to be public API.

- ``numcodecs.blosc.init``
- ``numcodecs.blosc.destroy``
- ``numcodecs.blosc.compname_to_compcode``
- ``numcodecs.blosc.cbuffer_sizes``
- ``numcodecs.blosc.cbuffer_metainfo``

In addition, ``numcodecs.blosc.decompress_partial`` is removed as
has always been experimental and there is no equivalent in the official
blsoc Python package.
By :user:`David Stansby <dstansby>`, :issue:`712`

0.15.1
------

Improvements
~~~~~~~~~~~~
* Raise a custom `UnknownCodecError` when trying to retrieve an unavailable codec.
Expand Down Expand Up @@ -70,7 +90,7 @@ This is because they are not intended to be public API.
In addition, ``numcodecs.blosc.decompress_partial`` is deprecated as
has always been experimental and there is no equivalent in the official
blsoc Python package.
By :user:`David Stansby <dstansby>`, :issue`619`
By :user:`David Stansby <dstansby>`, :issue:`619`

Fixes
~~~~~
Expand Down
2 changes: 1 addition & 1 deletion numcodecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
ncores = 1
blosc._init()
blosc.set_nthreads(min(8, ncores))
atexit.register(blosc.destroy)
atexit.register(blosc._destroy)

from numcodecs import zstd as zstd
from numcodecs.zstd import Zstd
Expand Down
107 changes: 0 additions & 107 deletions numcodecs/blosc.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import threading
import multiprocessing
import os
from deprecated import deprecated


from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize
Expand Down Expand Up @@ -44,7 +43,6 @@ cdef extern from "blosc.h":
void* src, void* dest, size_t destsize) nogil
int blosc_decompress(void *src, void *dest, size_t destsize) nogil
int blosc_getitem(void* src, int start, int nitems, void* dest)
int blosc_compname_to_compcode(const char* compname)
int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize, size_t nbytes,
const void* src, void* dest, size_t destsize,
const char* compressor, size_t blocksize,
Expand Down Expand Up @@ -99,28 +97,12 @@ def _init():
"""Initialize the Blosc library environment."""
blosc_init()

init = deprecated(_init)


def _destroy():
"""Destroy the Blosc library environment."""
blosc_destroy()


destroy = deprecated(_destroy)


def _compname_to_compcode(cname):
"""Return the compressor code associated with the compressor name. If the compressor
name is not recognized, or there is not support for it in this build, -1 is returned
instead."""
if isinstance(cname, str):
cname = cname.encode('ascii')
return blosc_compname_to_compcode(cname)

compname_to_compcode = deprecated(_compname_to_compcode)


def list_compressors():
"""Get a list of compressors supported in the current build."""
s = blosc_list_compressors()
Expand Down Expand Up @@ -166,7 +148,6 @@ def _cbuffer_sizes(source):

return nbytes, cbytes, blocksize

cbuffer_sizes = deprecated(_cbuffer_sizes)

def cbuffer_complib(source):
"""Return the name of the compression library used to compress `source`."""
Expand Down Expand Up @@ -222,13 +203,10 @@ def _cbuffer_metainfo(source):

return typesize, shuffle, memcpyed

cbuffer_metainfo = deprecated(_cbuffer_metainfo)

def _err_bad_cname(cname):
raise ValueError('bad compressor or compressor not supported: %r; expected one of '
'%s' % (cname, list_compressors()))

err_bad_cname = deprecated(_err_bad_cname)

def compress(source, char* cname, int clevel, int shuffle=SHUFFLE,
int blocksize=AUTOBLOCKS, typesize=None):
Expand Down Expand Up @@ -423,86 +401,6 @@ def decompress(source, dest=None):
return dest


def _decompress_partial(source, start, nitems, dest=None):
"""**Experimental**
Decompress data of only a part of a buffer.

Parameters
----------
source : bytes-like
Compressed data, including blosc header. Can be any object supporting the buffer
protocol.
start: int,
Offset in item where we want to start decoding
nitems: int
Number of items we want to decode
dest : array-like, optional
Object to decompress into.


Returns
-------
dest : bytes
Object containing decompressed data.

"""
cdef:
int ret
int encoding_size
int nitems_bytes
int start_bytes
memoryview source_mv
const Py_buffer* source_pb
const char* source_ptr
memoryview dest_mv
Py_buffer* dest_pb
char* dest_ptr
size_t dest_nbytes

# obtain source memoryview
source_mv = ensure_continguous_memoryview(source)
source_pb = PyMemoryView_GET_BUFFER(source_mv)

# setup source pointer
source_ptr = <const char*>source_pb.buf

# get encoding size from source buffer header
encoding_size = source[3]

# convert variables to handle type and encoding sizes
nitems_bytes = nitems * encoding_size
start_bytes = (start * encoding_size)

# setup destination buffer
if dest is None:
# allocate memory
dest_1d = dest = PyBytes_FromStringAndSize(NULL, nitems_bytes)
else:
dest_1d = ensure_contiguous_ndarray(dest)

# obtain dest memoryview
dest_mv = memoryview(dest_1d)
dest_pb = PyMemoryView_GET_BUFFER(dest_mv)
dest_ptr = <char*>dest_pb.buf
dest_nbytes = dest_pb.len

# try decompression
try:
if dest_nbytes < nitems_bytes:
raise ValueError('destination buffer too small; expected at least %s, '
'got %s' % (nitems_bytes, dest_nbytes))
ret = blosc_getitem(source_ptr, start, nitems, dest_ptr)
finally:
pass

# ret refers to the number of bytes returned from blosc_getitem.
if ret <= 0:
raise RuntimeError('error during blosc partial decompression: %d', ret)

return dest

decompress_partial = deprecated(_decompress_partial)

# set the value of this variable to True or False to override the
# default adaptive behaviour
use_threads = None
Expand Down Expand Up @@ -601,11 +499,6 @@ class Blosc(Codec):
buf = ensure_contiguous_ndarray(buf, self.max_buffer_size)
return decompress(buf, out)

def decode_partial(self, buf, int start, int nitems, out=None):
'''**Experimental**'''
buf = ensure_contiguous_ndarray(buf, self.max_buffer_size)
return _decompress_partial(buf, start, nitems, dest=out)

def __repr__(self):
r = '%s(cname=%r, clevel=%r, shuffle=%s, blocksize=%s)' % \
(type(self).__name__,
Expand Down
68 changes: 0 additions & 68 deletions numcodecs/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,74 +115,6 @@ def check_encode_decode(arr, codec, precision=None):
compare_arrays(arr, out, precision=precision)


def check_encode_decode_partial(arr, codec, precision=None):
# N.B., watch out here with blosc compressor, if the itemsize of
# the source buffer is different then the results of encoding
# (i.e., compression) may be different. Hence we *do not* require that
# the results of encoding be identical for all possible inputs, rather
# we just require that the results of the encode/decode round-trip can
# be compared to the original array.

itemsize = arr.itemsize
start, nitems = 5, 10
compare_arr = arr[start : start + nitems]
# test encoding of numpy array
enc = codec.encode(arr)
dec = codec.decode_partial(enc, start, nitems)
compare_arrays(compare_arr, dec, precision=precision)

# out = np.empty_like(compare_arr)
out = np.empty_like(compare_arr)
print(len(out))

# test partial decode of encoded bytes
buf = arr.tobytes(order='A')
enc = codec.encode(buf)
dec = codec.decode_partial(enc, start * itemsize, nitems * itemsize, out=out)
compare_arrays(compare_arr, dec, precision=precision)

# test partial decode of encoded bytearray
buf = bytearray(arr.tobytes(order='A'))
enc = codec.encode(buf)
dec = codec.decode_partial(enc, start * itemsize, nitems * itemsize, out=out)
compare_arrays(compare_arr, dec, precision=precision)

# test partial decode of encoded array.array
buf = array.array('b', arr.tobytes(order='A'))
enc = codec.encode(buf)
dec = codec.decode_partial(enc, start * itemsize, nitems * itemsize, out=out)
compare_arrays(compare_arr, dec, precision=precision)

# # decoding should support any object exporting the buffer protocol,

# # setup
enc_bytes = ensure_bytes(enc)

# test decoding of raw bytes into numpy array
dec = codec.decode_partial(enc_bytes, start * itemsize, nitems * itemsize, out=out)
compare_arrays(compare_arr, dec, precision=precision)

# test partial decoding of bytearray
dec = codec.decode_partial(bytearray(enc_bytes), start * itemsize, nitems * itemsize, out=out)
compare_arrays(compare_arr, dec, precision=precision)

# test partial decoding of array.array
buf = array.array('b', enc_bytes)
dec = codec.decode_partial(buf, start * itemsize, nitems * itemsize, out=out)
compare_arrays(compare_arr, dec, precision=precision)

# test decoding of numpy array into numpy array
buf = np.frombuffer(enc_bytes, dtype='u1')
dec = codec.decode_partial(buf, start * itemsize, nitems * itemsize, out=out)
compare_arrays(compare_arr, dec, precision=precision)

# test decoding directly into bytearray
out = bytearray(compare_arr.nbytes)
codec.decode_partial(enc_bytes, start * itemsize, nitems * itemsize, out=out)
# noinspection PyTypeChecker
compare_arrays(compare_arr, out, precision=precision)


def assert_array_items_equal(res, arr):
assert isinstance(res, np.ndarray)
res = res.reshape(-1, order='A')
Expand Down
14 changes: 0 additions & 14 deletions numcodecs/tests/test_blosc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
check_backwards_compatibility,
check_config,
check_encode_decode,
check_encode_decode_partial,
check_err_decode_object_buffer,
check_err_encode_object_buffer,
check_max_buffer_size,
Expand Down Expand Up @@ -75,19 +74,6 @@ def test_encode_decode(array, codec):
check_encode_decode(array, codec)


@pytest.mark.parametrize('codec', codecs)
@pytest.mark.parametrize(
'array',
[
pytest.param(x) if len(x.shape) == 1 else pytest.param(x, marks=[pytest.mark.xfail])
for x in arrays
],
)
def test_partial_decode(codec, array):
_skip_null(codec)
check_encode_decode_partial(array, codec)


def test_config():
codec = Blosc(cname='zstd', clevel=3, shuffle=1)
check_config(codec)
Expand Down
4 changes: 2 additions & 2 deletions numcodecs/zarr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def __init__(self, **codec_config: dict[str, JSON]) -> None:

def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
if astype := self.codec_config.get("astype"):
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[arg-type]
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload]
return chunk_spec


Expand All @@ -304,7 +304,7 @@ def __init__(self, **codec_config: JSON) -> None:

def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
if astype := self.codec_config.get("astype"):
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[arg-type]
return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload]
return chunk_spec

def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ description = """
A Python package providing buffer compression and transformation codecs \
for use in data storage and communication applications."""
readme = "README.rst"
dependencies = ["numpy>=1.24", "deprecated", "typing_extensions"]
dependencies = ["numpy>=1.24", "typing_extensions"]
requires-python = ">=3.11"
dynamic = [
"version",
Expand Down
Loading