Skip to content

Commit bc61f5c

Browse files
authored
Merge pull request #81 from jeromekelleher/fix-blosc-errors
Fixing blosc encode error handling
2 parents a82fea0 + d5a5267 commit bc61f5c

16 files changed

+2709
-2072
lines changed

docs/release.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ Release notes
4040
some codecs, and also simplifies the implementation of some codecs, improving
4141
code readability and maintainability. By :user:`John Kirkham <jakirkham>` and
4242
:user:`Alistair Miles <alimanfoo>`; :issue:`119`, :issue:`121`, :issue:`128`.
43+
44+
* Improvements to handling of errors in the :class:`numcodecs.blosc.Blosc` and
45+
:class:`numcodecs.lz4.LZ4` codecs when the maximum allowed size of an input
46+
buffer is exceeded. By :user:`Jerome Kelleher <jeromekelleher>`, :issue:`80`,
47+
:issue:`81`.
4348

4449

4550
.. _release_0.5.5:

numcodecs/blosc.c

Lines changed: 687 additions & 503 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

numcodecs/blosc.pyx

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING
1616

1717
from .compat_ext cimport Buffer
1818
from .compat_ext import Buffer
19-
from .compat import PY2, text_type
19+
from .compat import PY2, text_type, ensure_contiguous_ndarray
2020
from .abc import Codec
2121

2222

@@ -248,7 +248,8 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE,
248248
char *source_ptr
249249
char *dest_ptr
250250
Buffer source_buffer
251-
size_t nbytes, cbytes, itemsize
251+
size_t nbytes, itemsize
252+
int cbytes
252253
bytes dest
253254

254255
# check valid cname early
@@ -365,7 +366,8 @@ def decompress(source, dest=None):
365366
dest_ptr = PyBytes_AS_STRING(dest)
366367
dest_nbytes = nbytes
367368
else:
368-
dest_buffer = Buffer(dest, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE)
369+
arr = ensure_contiguous_ndarray(dest)
370+
dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE)
369371
dest_ptr = dest_buffer.ptr
370372
dest_nbytes = dest_buffer.nbytes
371373

@@ -472,6 +474,7 @@ class Blosc(Codec):
472474
SHUFFLE = SHUFFLE
473475
BITSHUFFLE = BITSHUFFLE
474476
AUTOSHUFFLE = AUTOSHUFFLE
477+
max_buffer_size = 2**31 - 1
475478

476479
def __init__(self, cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=AUTOBLOCKS):
477480
self.cname = cname
@@ -484,9 +487,11 @@ class Blosc(Codec):
484487
self.blocksize = blocksize
485488

486489
def encode(self, buf):
490+
buf = ensure_contiguous_ndarray(buf, self.max_buffer_size)
487491
return compress(buf, self._cname_bytes, self.clevel, self.shuffle, self.blocksize)
488492

489493
def decode(self, buf, out=None):
494+
buf = ensure_contiguous_ndarray(buf, self.max_buffer_size)
490495
return decompress(buf, out)
491496

492497
def __repr__(self):

numcodecs/compat.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,20 @@ def ensure_ndarray(buf):
9090
return arr
9191

9292

93-
def ensure_contiguous_ndarray(buf):
93+
def ensure_contiguous_ndarray(buf, max_buffer_size=None):
9494
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
9595
numpy array. Also ensures that the returned value exports fully contiguous memory,
96-
and supports the new-style buffer interface.
96+
and supports the new-style buffer interface. If the optional max_buffer_size is
97+
provided, raise a ValueError if the number of bytes consumed by the returned
98+
array exceeds this value.
9799
98100
Parameters
99101
----------
100102
buf : array-like or bytes-like
101103
A numpy array or any object exporting a buffer interface.
104+
max_buffer_size : int
105+
If specified, the largest allowable value of arr.nbytes, where arr
106+
is the retured array.
102107
103108
Returns
104109
-------
@@ -132,6 +137,10 @@ def ensure_contiguous_ndarray(buf):
132137
else:
133138
raise ValueError('an array with contiguous memory is required')
134139

140+
if max_buffer_size is not None and arr.nbytes > max_buffer_size:
141+
msg = "Codec does not support buffers of > {} bytes".format(max_buffer_size)
142+
raise ValueError(msg)
143+
135144
return arr
136145

137146

0 commit comments

Comments
 (0)