From d193b7f0a380ee14644871c06b92827d09d0480e Mon Sep 17 00:00:00 2001 From: David Stansby Date: Mon, 7 Apr 2025 15:00:56 +0100 Subject: [PATCH 1/2] Remove deprecated blosc code --- docs/release.rst | 24 +++++++- numcodecs/__init__.py | 2 +- numcodecs/blosc.pyx | 107 ---------------------------------- numcodecs/tests/common.py | 68 --------------------- numcodecs/tests/test_blosc.py | 14 ----- pyproject.toml | 2 +- 6 files changed, 24 insertions(+), 193 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index e865a9e0..964e1066 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -20,10 +20,30 @@ Unreleased Enhancements ~~~~~~~~~~~~ -* Add support for the Linux AArch64 architecture, and bump the minimum +* Add support for the Linux aarch64 architecture, and bump the minimum macOS deployment target for x86_64 to 10.13. By :user:`Agriya Khetarpal `, :issue:`288`. +Removals +~~~~~~~~ + +The following ``blosc`` funcitons are removed, with no replacement. +This is because they were not intended to be public API. + +- ``numcodecs.blosc.init`` +- ``numcodecs.blosc.destroy`` +- ``numcodecs.blosc.compname_to_compcode`` +- ``numcodecs.blosc.cbuffer_sizes`` +- ``numcodecs.blosc.cbuffer_metainfo`` + +In addition, ``numcodecs.blosc.decompress_partial`` is removed as +has always been experimental and there is no equivalent in the official +blsoc Python package. +By :user:`David Stansby `, :issue:`712` + +0.15.1 +------ + Improvements ~~~~~~~~~~~~ * Raise a custom `UnknownCodecError` when trying to retrieve an unavailable codec. @@ -70,7 +90,7 @@ This is because they are not intended to be public API. In addition, ``numcodecs.blosc.decompress_partial`` is deprecated as has always been experimental and there is no equivalent in the official blsoc Python package. -By :user:`David Stansby `, :issue`619` +By :user:`David Stansby `, :issue:`619` Fixes ~~~~~ diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index b4b983ce..e3626b1b 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -51,7 +51,7 @@ ncores = 1 blosc._init() blosc.set_nthreads(min(8, ncores)) -atexit.register(blosc.destroy) +atexit.register(blosc._destroy) from numcodecs import zstd as zstd from numcodecs.zstd import Zstd diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 17e6b4d8..6ec72e21 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -6,7 +6,6 @@ import threading import multiprocessing import os -from deprecated import deprecated from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize @@ -44,7 +43,6 @@ cdef extern from "blosc.h": void* src, void* dest, size_t destsize) nogil int blosc_decompress(void *src, void *dest, size_t destsize) nogil int blosc_getitem(void* src, int start, int nitems, void* dest) - int blosc_compname_to_compcode(const char* compname) int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize, size_t nbytes, const void* src, void* dest, size_t destsize, const char* compressor, size_t blocksize, @@ -99,28 +97,12 @@ def _init(): """Initialize the Blosc library environment.""" blosc_init() -init = deprecated(_init) - def _destroy(): """Destroy the Blosc library environment.""" blosc_destroy() -destroy = deprecated(_destroy) - - -def _compname_to_compcode(cname): - """Return the compressor code associated with the compressor name. If the compressor - name is not recognized, or there is not support for it in this build, -1 is returned - instead.""" - if isinstance(cname, str): - cname = cname.encode('ascii') - return blosc_compname_to_compcode(cname) - -compname_to_compcode = deprecated(_compname_to_compcode) - - def list_compressors(): """Get a list of compressors supported in the current build.""" s = blosc_list_compressors() @@ -166,7 +148,6 @@ def _cbuffer_sizes(source): return nbytes, cbytes, blocksize -cbuffer_sizes = deprecated(_cbuffer_sizes) def cbuffer_complib(source): """Return the name of the compression library used to compress `source`.""" @@ -222,13 +203,10 @@ def _cbuffer_metainfo(source): return typesize, shuffle, memcpyed -cbuffer_metainfo = deprecated(_cbuffer_metainfo) - def _err_bad_cname(cname): raise ValueError('bad compressor or compressor not supported: %r; expected one of ' '%s' % (cname, list_compressors())) -err_bad_cname = deprecated(_err_bad_cname) def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, int blocksize=AUTOBLOCKS, typesize=None): @@ -423,86 +401,6 @@ def decompress(source, dest=None): return dest -def _decompress_partial(source, start, nitems, dest=None): - """**Experimental** - Decompress data of only a part of a buffer. - - Parameters - ---------- - source : bytes-like - Compressed data, including blosc header. Can be any object supporting the buffer - protocol. - start: int, - Offset in item where we want to start decoding - nitems: int - Number of items we want to decode - dest : array-like, optional - Object to decompress into. - - - Returns - ------- - dest : bytes - Object containing decompressed data. - - """ - cdef: - int ret - int encoding_size - int nitems_bytes - int start_bytes - memoryview source_mv - const Py_buffer* source_pb - const char* source_ptr - memoryview dest_mv - Py_buffer* dest_pb - char* dest_ptr - size_t dest_nbytes - - # obtain source memoryview - source_mv = ensure_continguous_memoryview(source) - source_pb = PyMemoryView_GET_BUFFER(source_mv) - - # setup source pointer - source_ptr = source_pb.buf - - # get encoding size from source buffer header - encoding_size = source[3] - - # convert variables to handle type and encoding sizes - nitems_bytes = nitems * encoding_size - start_bytes = (start * encoding_size) - - # setup destination buffer - if dest is None: - # allocate memory - dest_1d = dest = PyBytes_FromStringAndSize(NULL, nitems_bytes) - else: - dest_1d = ensure_contiguous_ndarray(dest) - - # obtain dest memoryview - dest_mv = memoryview(dest_1d) - dest_pb = PyMemoryView_GET_BUFFER(dest_mv) - dest_ptr = dest_pb.buf - dest_nbytes = dest_pb.len - - # try decompression - try: - if dest_nbytes < nitems_bytes: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (nitems_bytes, dest_nbytes)) - ret = blosc_getitem(source_ptr, start, nitems, dest_ptr) - finally: - pass - - # ret refers to the number of bytes returned from blosc_getitem. - if ret <= 0: - raise RuntimeError('error during blosc partial decompression: %d', ret) - - return dest - -decompress_partial = deprecated(_decompress_partial) - # set the value of this variable to True or False to override the # default adaptive behaviour use_threads = None @@ -601,11 +499,6 @@ class Blosc(Codec): buf = ensure_contiguous_ndarray(buf, self.max_buffer_size) return decompress(buf, out) - def decode_partial(self, buf, int start, int nitems, out=None): - '''**Experimental**''' - buf = ensure_contiguous_ndarray(buf, self.max_buffer_size) - return _decompress_partial(buf, start, nitems, dest=out) - def __repr__(self): r = '%s(cname=%r, clevel=%r, shuffle=%s, blocksize=%s)' % \ (type(self).__name__, diff --git a/numcodecs/tests/common.py b/numcodecs/tests/common.py index bb7c4780..12cccb20 100644 --- a/numcodecs/tests/common.py +++ b/numcodecs/tests/common.py @@ -115,74 +115,6 @@ def check_encode_decode(arr, codec, precision=None): compare_arrays(arr, out, precision=precision) -def check_encode_decode_partial(arr, codec, precision=None): - # N.B., watch out here with blosc compressor, if the itemsize of - # the source buffer is different then the results of encoding - # (i.e., compression) may be different. Hence we *do not* require that - # the results of encoding be identical for all possible inputs, rather - # we just require that the results of the encode/decode round-trip can - # be compared to the original array. - - itemsize = arr.itemsize - start, nitems = 5, 10 - compare_arr = arr[start : start + nitems] - # test encoding of numpy array - enc = codec.encode(arr) - dec = codec.decode_partial(enc, start, nitems) - compare_arrays(compare_arr, dec, precision=precision) - - # out = np.empty_like(compare_arr) - out = np.empty_like(compare_arr) - print(len(out)) - - # test partial decode of encoded bytes - buf = arr.tobytes(order='A') - enc = codec.encode(buf) - dec = codec.decode_partial(enc, start * itemsize, nitems * itemsize, out=out) - compare_arrays(compare_arr, dec, precision=precision) - - # test partial decode of encoded bytearray - buf = bytearray(arr.tobytes(order='A')) - enc = codec.encode(buf) - dec = codec.decode_partial(enc, start * itemsize, nitems * itemsize, out=out) - compare_arrays(compare_arr, dec, precision=precision) - - # test partial decode of encoded array.array - buf = array.array('b', arr.tobytes(order='A')) - enc = codec.encode(buf) - dec = codec.decode_partial(enc, start * itemsize, nitems * itemsize, out=out) - compare_arrays(compare_arr, dec, precision=precision) - - # # decoding should support any object exporting the buffer protocol, - - # # setup - enc_bytes = ensure_bytes(enc) - - # test decoding of raw bytes into numpy array - dec = codec.decode_partial(enc_bytes, start * itemsize, nitems * itemsize, out=out) - compare_arrays(compare_arr, dec, precision=precision) - - # test partial decoding of bytearray - dec = codec.decode_partial(bytearray(enc_bytes), start * itemsize, nitems * itemsize, out=out) - compare_arrays(compare_arr, dec, precision=precision) - - # test partial decoding of array.array - buf = array.array('b', enc_bytes) - dec = codec.decode_partial(buf, start * itemsize, nitems * itemsize, out=out) - compare_arrays(compare_arr, dec, precision=precision) - - # test decoding of numpy array into numpy array - buf = np.frombuffer(enc_bytes, dtype='u1') - dec = codec.decode_partial(buf, start * itemsize, nitems * itemsize, out=out) - compare_arrays(compare_arr, dec, precision=precision) - - # test decoding directly into bytearray - out = bytearray(compare_arr.nbytes) - codec.decode_partial(enc_bytes, start * itemsize, nitems * itemsize, out=out) - # noinspection PyTypeChecker - compare_arrays(compare_arr, out, precision=precision) - - def assert_array_items_equal(res, arr): assert isinstance(res, np.ndarray) res = res.reshape(-1, order='A') diff --git a/numcodecs/tests/test_blosc.py b/numcodecs/tests/test_blosc.py index 46d3e3a4..e5a40df6 100644 --- a/numcodecs/tests/test_blosc.py +++ b/numcodecs/tests/test_blosc.py @@ -15,7 +15,6 @@ check_backwards_compatibility, check_config, check_encode_decode, - check_encode_decode_partial, check_err_decode_object_buffer, check_err_encode_object_buffer, check_max_buffer_size, @@ -75,19 +74,6 @@ def test_encode_decode(array, codec): check_encode_decode(array, codec) -@pytest.mark.parametrize('codec', codecs) -@pytest.mark.parametrize( - 'array', - [ - pytest.param(x) if len(x.shape) == 1 else pytest.param(x, marks=[pytest.mark.xfail]) - for x in arrays - ], -) -def test_partial_decode(codec, array): - _skip_null(codec) - check_encode_decode_partial(array, codec) - - def test_config(): codec = Blosc(cname='zstd', clevel=3, shuffle=1) check_config(codec) diff --git a/pyproject.toml b/pyproject.toml index a12c1d1f..7ede5d9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ description = """ A Python package providing buffer compression and transformation codecs \ for use in data storage and communication applications.""" readme = "README.rst" -dependencies = ["numpy>=1.24", "deprecated", "typing_extensions"] +dependencies = ["numpy>=1.24", "typing_extensions"] requires-python = ">=3.11" dynamic = [ "version", From 42f9051031050d30212c247b4ebb0d1d28eb5e08 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Mon, 7 Apr 2025 15:07:31 +0100 Subject: [PATCH 2/2] mypy fixes --- .pre-commit-config.yaml | 2 +- numcodecs/zarr3.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e712e89..3b583279 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,4 +30,4 @@ repos: hooks: - id: mypy args: [--config-file, pyproject.toml] - additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.0.0rc1'] + additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3'] diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index f1743ffb..43684c3d 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -286,7 +286,7 @@ def __init__(self, **codec_config: dict[str, JSON]) -> None: def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[arg-type] + return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] return chunk_spec @@ -304,7 +304,7 @@ def __init__(self, **codec_config: JSON) -> None: def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): - return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[arg-type] + return replace(chunk_spec, dtype=np.dtype(astype)) # type: ignore[call-overload] return chunk_spec def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: