Skip to content

Commit 923df20

Browse files
committed
Switch Buffers to memoryviews
When this was written in the code, Python's Buffer Protocol support was inconsistent across Python versions (specifically on Python 2.7). Since Python 2.7 reached EOL and it was dropped from Numcodecs, the Python Buffer Protocol support has become more consistent. At this stage the `memoryview` object, which Cython also supports, does all the same things that `Buffer` would do for us. Plus it is builtin to the Python standard library. It behaves similarly in a lot of ways. Given this, switch the code over to `memoryview`s internally and drop `Buffer`.
1 parent 88660de commit 923df20

File tree

7 files changed

+315
-356
lines changed

7 files changed

+315
-356
lines changed

numcodecs/blosc.pyx

Lines changed: 129 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@ import multiprocessing
88
import os
99

1010

11-
from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS, PyBUF_WRITEABLE
11+
from cpython.buffer cimport PyBuffer_IsContiguous
1212
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING
13+
from cpython.memoryview cimport PyMemoryView_GET_BUFFER
1314

1415

15-
from .compat_ext cimport Buffer
16-
from .compat_ext import Buffer
1716
from .compat import ensure_contiguous_ndarray
1817
from .abc import Codec
1918

@@ -146,34 +145,36 @@ def cbuffer_sizes(source):
146145
147146
"""
148147
cdef:
149-
Buffer buffer
148+
memoryview source_mv
149+
const Py_buffer* source_pb
150150
size_t nbytes, cbytes, blocksize
151151

152-
# obtain buffer
153-
buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
152+
# obtain source memoryview
153+
source_mv = memoryview(source)
154+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
155+
if not PyBuffer_IsContiguous(source_pb, b'A'):
156+
raise BufferError("`source` must contain contiguous memory")
154157

155158
# determine buffer size
156-
blosc_cbuffer_sizes(buffer.ptr, &nbytes, &cbytes, &blocksize)
157-
158-
# release buffers
159-
buffer.release()
159+
blosc_cbuffer_sizes(source_pb.buf, &nbytes, &cbytes, &blocksize)
160160

161161
return nbytes, cbytes, blocksize
162162

163163

164164
def cbuffer_complib(source):
165165
"""Return the name of the compression library used to compress `source`."""
166166
cdef:
167-
Buffer buffer
167+
memoryview source_mv
168+
const Py_buffer* source_pb
168169

169-
# obtain buffer
170-
buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
170+
# obtain source memoryview
171+
source_mv = memoryview(source)
172+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
173+
if not PyBuffer_IsContiguous(source_pb, b'A'):
174+
raise BufferError("`source` must contain contiguous memory")
171175

172176
# determine buffer size
173-
complib = blosc_cbuffer_complib(buffer.ptr)
174-
175-
# release buffers
176-
buffer.release()
177+
complib = blosc_cbuffer_complib(source_pb.buf)
177178

178179
complib = complib.decode('ascii')
179180

@@ -193,18 +194,19 @@ def cbuffer_metainfo(source):
193194
194195
"""
195196
cdef:
196-
Buffer buffer
197+
memoryview source_mv
198+
const Py_buffer* source_pb
197199
size_t typesize
198200
int flags
199201

200-
# obtain buffer
201-
buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
202+
# obtain source memoryview
203+
source_mv = memoryview(source)
204+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
205+
if not PyBuffer_IsContiguous(source_pb, b'A'):
206+
raise BufferError("`source` must contain contiguous memory")
202207

203208
# determine buffer size
204-
blosc_cbuffer_metainfo(buffer.ptr, &typesize, &flags)
205-
206-
# release buffers
207-
buffer.release()
209+
blosc_cbuffer_metainfo(source_pb.buf, &typesize, &flags)
208210

209211
# decompose flags
210212
if flags & BLOSC_DOSHUFFLE:
@@ -252,9 +254,10 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE,
252254
"""
253255

254256
cdef:
255-
char *source_ptr
256-
char *dest_ptr
257-
Buffer source_buffer
257+
memoryview source_mv
258+
const Py_buffer* source_pb
259+
const char* source_ptr
260+
char* dest_ptr
258261
size_t nbytes, itemsize
259262
int cbytes
260263
bytes dest
@@ -264,11 +267,16 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE,
264267
if cname_str not in list_compressors():
265268
err_bad_cname(cname_str)
266269

267-
# setup source buffer
268-
source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
269-
source_ptr = source_buffer.ptr
270-
nbytes = source_buffer.nbytes
271-
itemsize = source_buffer.itemsize
270+
# obtain source memoryview
271+
source_mv = memoryview(source)
272+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
273+
if not PyBuffer_IsContiguous(source_pb, b'A'):
274+
raise BufferError("`source` must contain contiguous memory")
275+
276+
# extract metadata
277+
source_ptr = <const char*>source_pb.buf
278+
nbytes = source_pb.len
279+
itemsize = source_pb.itemsize
272280

273281
# determine shuffle
274282
if shuffle == AUTOSHUFFLE:
@@ -280,46 +288,40 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE,
280288
raise ValueError('invalid shuffle argument; expected -1, 0, 1 or 2, found %r' %
281289
shuffle)
282290

283-
try:
284-
285-
# setup destination
286-
dest = PyBytes_FromStringAndSize(NULL, nbytes + BLOSC_MAX_OVERHEAD)
287-
dest_ptr = PyBytes_AS_STRING(dest)
288-
289-
# perform compression
290-
if _get_use_threads():
291-
# allow blosc to use threads internally
291+
# setup destination
292+
dest = PyBytes_FromStringAndSize(NULL, nbytes + BLOSC_MAX_OVERHEAD)
293+
dest_ptr = PyBytes_AS_STRING(dest)
292294

293-
# N.B., we are using blosc's global context, and so we need to use a lock
294-
# to ensure no-one else can modify the global context while we're setting it
295-
# up and using it.
296-
with get_mutex():
295+
# perform compression
296+
if _get_use_threads():
297+
# allow blosc to use threads internally
297298

298-
# set compressor
299-
compressor_set = blosc_set_compressor(cname)
300-
if compressor_set < 0:
301-
# shouldn't happen if we checked against list of compressors
302-
# already, but just in case
303-
err_bad_cname(cname_str)
299+
# N.B., we are using blosc's global context, and so we need to use a lock
300+
# to ensure no-one else can modify the global context while we're setting it
301+
# up and using it.
302+
with get_mutex():
304303

305-
# set blocksize
306-
blosc_set_blocksize(blocksize)
304+
# set compressor
305+
compressor_set = blosc_set_compressor(cname)
306+
if compressor_set < 0:
307+
# shouldn't happen if we checked against list of compressors
308+
# already, but just in case
309+
err_bad_cname(cname_str)
307310

308-
# perform compression
309-
with nogil:
310-
cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source_ptr,
311-
dest_ptr, nbytes + BLOSC_MAX_OVERHEAD)
311+
# set blocksize
312+
blosc_set_blocksize(blocksize)
312313

313-
else:
314+
# perform compression
314315
with nogil:
315-
cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, source_ptr,
316-
dest_ptr, nbytes + BLOSC_MAX_OVERHEAD,
317-
cname, blocksize, 1)
316+
cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source_ptr,
317+
dest_ptr, nbytes + BLOSC_MAX_OVERHEAD)
318318

319-
finally:
319+
else:
320+
with nogil:
321+
cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, source_ptr,
322+
dest_ptr, nbytes + BLOSC_MAX_OVERHEAD,
323+
cname, blocksize, 1)
320324

321-
# release buffers
322-
source_buffer.release()
323325

324326
# check compression was successful
325327
if cbytes <= 0:
@@ -350,15 +352,22 @@ def decompress(source, dest=None):
350352
"""
351353
cdef:
352354
int ret
353-
char *source_ptr
354-
char *dest_ptr
355-
Buffer source_buffer
356-
Buffer dest_buffer = None
355+
memoryview source_mv
356+
const Py_buffer* source_pb
357+
const char* source_ptr
358+
memoryview dest_mv
359+
Py_buffer* dest_pb
360+
char* dest_ptr
357361
size_t nbytes, cbytes, blocksize
358362

359-
# setup source buffer
360-
source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
361-
source_ptr = source_buffer.ptr
363+
# obtain source memoryview
364+
source_mv = memoryview(source)
365+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
366+
if not PyBuffer_IsContiguous(source_pb, b'A'):
367+
raise BufferError("`source` must contain contiguous memory")
368+
369+
# get source pointer
370+
source_ptr = <const char*>source_pb.buf
362371

363372
# determine buffer size
364373
blosc_cbuffer_sizes(source_ptr, &nbytes, &cbytes, &blocksize)
@@ -367,36 +376,28 @@ def decompress(source, dest=None):
367376
if dest is None:
368377
# allocate memory
369378
dest = PyBytes_FromStringAndSize(NULL, nbytes)
370-
dest_ptr = PyBytes_AS_STRING(dest)
371-
dest_nbytes = nbytes
372379
else:
373-
arr = ensure_contiguous_ndarray(dest)
374-
dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE)
375-
dest_ptr = dest_buffer.ptr
376-
dest_nbytes = dest_buffer.nbytes
377-
378-
try:
379-
380-
# guard condition
381-
if dest_nbytes < nbytes:
382-
raise ValueError('destination buffer too small; expected at least %s, '
383-
'got %s' % (nbytes, dest_nbytes))
384-
385-
# perform decompression
386-
if _get_use_threads():
387-
# allow blosc to use threads internally
388-
with nogil:
389-
ret = blosc_decompress(source_ptr, dest_ptr, nbytes)
390-
else:
391-
with nogil:
392-
ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1)
393-
394-
finally:
395-
396-
# release buffers
397-
source_buffer.release()
398-
if dest_buffer is not None:
399-
dest_buffer.release()
380+
dest = ensure_contiguous_ndarray(dest)
381+
382+
# obtain dest memoryview
383+
dest_mv = memoryview(dest)
384+
dest_pb = PyMemoryView_GET_BUFFER(dest_mv)
385+
dest_ptr = <char*>dest_pb.buf
386+
dest_nbytes = dest_pb.len
387+
388+
# guard condition
389+
if dest_nbytes < nbytes:
390+
raise ValueError('destination buffer too small; expected at least %s, '
391+
'got %s' % (nbytes, dest_nbytes))
392+
393+
# perform decompression
394+
if _get_use_threads():
395+
# allow blosc to use threads internally
396+
with nogil:
397+
ret = blosc_decompress(source_ptr, dest_ptr, nbytes)
398+
else:
399+
with nogil:
400+
ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1)
400401

401402
# handle errors
402403
if ret <= 0:
@@ -433,14 +434,22 @@ def decompress_partial(source, start, nitems, dest=None):
433434
int encoding_size
434435
int nitems_bytes
435436
int start_bytes
436-
char *source_ptr
437-
char *dest_ptr
438-
Buffer source_buffer
439-
Buffer dest_buffer = None
440-
441-
# setup source buffer
442-
source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS)
443-
source_ptr = source_buffer.ptr
437+
const char* source_ptr
438+
memoryview source_mv
439+
const Py_buffer* source_pb
440+
memoryview dest_mv
441+
Py_buffer* dest_pb
442+
char* dest_ptr
443+
size_t dest_nbytes
444+
445+
# obtain source memoryview
446+
source_mv = memoryview(source)
447+
source_pb = PyMemoryView_GET_BUFFER(source_mv)
448+
if not PyBuffer_IsContiguous(source_pb, b"A"):
449+
raise BufferError("`source` must contain contiguous memory")
450+
451+
# setup source pointer
452+
source_ptr = <const char*>source_pb.buf
444453

445454
# get encoding size from source buffer header
446455
encoding_size = source[3]
@@ -451,26 +460,22 @@ def decompress_partial(source, start, nitems, dest=None):
451460

452461
# setup destination buffer
453462
if dest is None:
463+
# allocate memory
454464
dest = PyBytes_FromStringAndSize(NULL, nitems_bytes)
455-
dest_ptr = PyBytes_AS_STRING(dest)
456-
dest_nbytes = nitems_bytes
457465
else:
458-
arr = ensure_contiguous_ndarray(dest)
459-
dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE)
460-
dest_ptr = dest_buffer.ptr
461-
dest_nbytes = dest_buffer.nbytes
466+
dest = ensure_contiguous_ndarray(dest)
467+
468+
# obtain dest memoryview
469+
dest_mv = memoryview(dest)
470+
dest_pb = PyMemoryView_GET_BUFFER(dest_mv)
471+
dest_ptr = <char*>dest_pb.buf
472+
dest_nbytes = dest_pb.len
462473

463474
# try decompression
464-
try:
465-
if dest_nbytes < nitems_bytes:
466-
raise ValueError('destination buffer too small; expected at least %s, '
467-
'got %s' % (nitems_bytes, dest_nbytes))
468-
ret = blosc_getitem(source_ptr, start, nitems, dest_ptr)
469-
470-
finally:
471-
source_buffer.release()
472-
if dest_buffer is not None:
473-
dest_buffer.release()
475+
if dest_nbytes < nitems_bytes:
476+
raise ValueError('destination buffer too small; expected at least %s, '
477+
'got %s' % (nitems_bytes, dest_nbytes))
478+
ret = blosc_getitem(source_ptr, start, nitems, dest_ptr)
474479

475480
# ret refers to the number of bytes returned from blosc_getitem.
476481
if ret <= 0:

numcodecs/compat_ext.pxd

Lines changed: 0 additions & 12 deletions
This file was deleted.

0 commit comments

Comments
 (0)