Skip to content

Commit ead30b9

Browse files
committed
refactor utility functions
1 parent 457788c commit ead30b9

14 files changed

+73
-112
lines changed

numcodecs/astype.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55

66
from .abc import Codec
7-
from .compat import buffer_copy, ndarray_from_buffer
7+
from .compat import memory_copy, ensure_ndarray_from_memory
88

99

1010
class AsType(Codec):
@@ -50,7 +50,7 @@ def __init__(self, encode_dtype, decode_dtype):
5050
def encode(self, buf):
5151

5252
# view input data as 1D array
53-
arr = ndarray_from_buffer(buf, self.decode_dtype)
53+
arr = ensure_ndarray_from_memory(buf).view(self.decode_dtype)
5454

5555
# convert and copy
5656
enc = arr.astype(self.encode_dtype)
@@ -60,13 +60,13 @@ def encode(self, buf):
6060
def decode(self, buf, out=None):
6161

6262
# view encoded data as 1D array
63-
enc = ndarray_from_buffer(buf, self.encode_dtype)
63+
enc = ensure_ndarray_from_memory(buf).view(self.encode_dtype)
6464

6565
# convert and copy
6666
dec = enc.astype(self.decode_dtype)
6767

6868
# handle output
69-
out = buffer_copy(dec, out)
69+
out = memory_copy(dec, out)
7070

7171
return out
7272

numcodecs/bz2.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, print_function, division
33
import bz2 as _bz2
4-
import array
5-
6-
7-
import numpy as np
84

95

106
from numcodecs.abc import Codec
11-
from numcodecs.compat import buffer_copy, ensure_memoryview
7+
from numcodecs.compat import memory_copy, ensure_memoryview
128

139

1410
class BZ2(Codec):
@@ -46,4 +42,4 @@ def decode(self, buf, out=None):
4642
# handle destination - Python standard library bz2 module does not
4743
# support direct decompression into buffer, so we have to copy into
4844
# out if given
49-
return buffer_copy(dec, out)
45+
return memory_copy(dec, out)

numcodecs/categorize.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55
from .abc import Codec
6-
from .compat import ndarray_from_buffer, buffer_copy, ensure_text
6+
from .compat import ensure_ndarray_from_memory, memory_copy, ensure_text
77

88

99
import numpy as np
@@ -53,7 +53,10 @@ def __init__(self, labels, dtype, astype='u1'):
5353
def encode(self, buf):
5454

5555
# view input as ndarray
56-
arr = ndarray_from_buffer(buf, self.dtype)
56+
if self.dtype == object:
57+
arr = np.asanyarray(buf, dtype=object).reshape(-1, order='A')
58+
else:
59+
arr = ensure_ndarray_from_memory(buf).view(self.dtype)
5760

5861
# setup output array
5962
enc = np.zeros_like(arr, dtype=self.astype)
@@ -67,7 +70,7 @@ def encode(self, buf):
6770
def decode(self, buf, out=None):
6871

6972
# view encoded data as ndarray
70-
enc = ndarray_from_buffer(buf, self.astype)
73+
enc = ensure_ndarray_from_memory(buf).view(self.astype)
7174

7275
# setup output
7376
if isinstance(out, np.ndarray):
@@ -84,7 +87,7 @@ def decode(self, buf, out=None):
8487

8588
# handle output
8689
if copy_needed:
87-
dec = buffer_copy(dec, out)
90+
dec = memory_copy(dec, out)
8891

8992
return dec
9093

numcodecs/checksum32.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,28 @@
77

88

99
from .abc import Codec
10-
from .compat import ndarray_from_buffer, buffer_copy
10+
from .compat import ensure_ndarray_from_memory, memory_copy
1111

1212

1313
class Checksum32(Codec):
1414

1515
checksum = None
1616

1717
def encode(self, buf):
18-
if isinstance(buf, np.ndarray) and buf.dtype == object:
19-
raise ValueError('cannot encode object array')
20-
arr = ndarray_from_buffer(buf, dtype='u1')
18+
arr = ensure_ndarray_from_memory(buf).view('u1')
2119
checksum = self.checksum(arr) & 0xffffffff
2220
enc = np.empty(arr.nbytes + 4, dtype='u1')
2321
enc[:4].view('<u4')[0] = checksum
2422
enc[4:] = arr
2523
return enc
2624

2725
def decode(self, buf, out=None):
28-
arr = ndarray_from_buffer(buf, dtype='u1')
26+
arr = ensure_ndarray_from_memory(buf).view('u1')
2927
expect = arr[:4].view('<u4')[0]
3028
checksum = self.checksum(arr[4:]) & 0xffffffff
3129
if expect != checksum:
3230
raise RuntimeError('checksum failed')
33-
return buffer_copy(arr[4:], out)
31+
return memory_copy(arr[4:], out)
3432

3533

3634
class CRC32(Checksum32):

numcodecs/compat.py

Lines changed: 17 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -25,68 +25,31 @@
2525
from functools import reduce
2626

2727

28-
def buffer_copy(buf, out=None):
28+
def memory_copy(buf, out=None):
2929
"""Copy the contents of the memory buffer from `buf` to `out`."""
3030

3131
if out is None:
3232
# no-op
3333
return buf
3434

35-
# handle ndarray destination
36-
if isinstance(out, np.ndarray):
35+
# obtain ndarrays, casting to the same data type
36+
buf = ensure_ndarray_from_memory(buf).view('u1')
37+
out = ensure_ndarray_from_memory(out).view('u1')
3738

38-
# view source as destination dtype
39-
if isinstance(buf, np.ndarray):
40-
buf = buf.view(dtype=out.dtype).reshape(-1, order='A')
41-
else:
42-
buf = np.frombuffer(buf, dtype=out.dtype)
43-
44-
# ensure shapes are compatible
45-
if buf.shape != out.shape:
46-
if out.flags.f_contiguous:
47-
order = 'F'
48-
else:
49-
order = 'C'
50-
buf = buf.reshape(out.shape, order=order)
51-
52-
# copy via numpy
53-
np.copyto(out, buf)
54-
55-
# handle generic buffer destination
56-
else:
57-
58-
# obtain memoryview of destination
59-
dest = memoryview(out)
60-
61-
# ensure source is 1D
62-
if isinstance(buf, np.ndarray):
63-
buf = buf.reshape(-1, order='A')
64-
# try to match itemsize
65-
dtype = 'u%s' % dest.itemsize
66-
buf = buf.view(dtype=dtype)
67-
68-
# try to copy via memoryview
69-
dest[:] = buf
39+
# copy memory
40+
np.copyto(out, buf)
7041

7142
return out
7243

7344

74-
def ndarray_from_buffer(buf, dtype):
75-
if isinstance(buf, np.ndarray):
76-
arr = buf.reshape(-1, order='A').view(dtype)
77-
else:
78-
arr = np.frombuffer(buf, dtype=dtype)
79-
return arr
80-
81-
8245
def ensure_text(l, encoding='utf-8'):
8346
if isinstance(l, text_type):
8447
return l
8548
else: # pragma: py3 no cover
8649
return text_type(l, encoding=encoding)
8750

8851

89-
def ensure_ndarray_from_memory(o):
52+
def ensure_ndarray_from_memory(o, flatten=True):
9053
"""Convenience function to obtain a numpy ndarray using memory exposed by object `o`,
9154
ensuring that no memory copies are made, and that `o` is not an object array.
9255
@@ -96,6 +59,8 @@ def ensure_ndarray_from_memory(o):
9659
Any object exposing a memory buffer. On Python 3 this must be an object exposing
9760
the new-style buffer interface. On Python 2 this can also be an object exposing
9861
the old-style buffer interface.
62+
flatten : bool, optional
63+
If True, flatten any multi-dimensional inputs into a one-dimensional memoryview.
9964
10065
Returns
10166
-------
@@ -106,7 +71,7 @@ def ensure_ndarray_from_memory(o):
10671
if not isinstance(o, np.ndarray):
10772

10873
# first try to obtain a memoryview or buffer, needed to ensure that we don't
109-
# accidentally copy memory when going via np.array()
74+
# subsequently copy memory when going via np.array()
11075

11176
if PY2: # pragma: py3 no cover
11277
# accept objects exposing either old-style or new-style buffer interface
@@ -120,13 +85,18 @@ def ensure_ndarray_from_memory(o):
12085

12186
# N.B., this is not documented, but np.array() will accept an object exposing
12287
# a buffer interface, and will take a view of the memory rather than making a
123-
# copy, preserving type information
88+
# copy, preserving type information where present
12489
o = np.array(o, copy=False)
12590

12691
# check for object arrays
12792
if o.dtype == object:
12893
raise ValueError('object arrays are not supported')
12994

95+
if flatten:
96+
97+
# flatten the array to 1 dimension
98+
o = o.reshape(-1, order='A')
99+
130100
return o
131101

132102

@@ -149,17 +119,12 @@ def ensure_memoryview(o, flatten=True):
149119
"""
150120

151121
# go via numpy, for convenience
152-
o = ensure_ndarray_from_memory(o)
122+
o = ensure_ndarray_from_memory(o, flatten=flatten)
153123

154124
# check for datetime or timedelta ndarray, cannot take a memoryview of those
155125
if o.dtype.kind in 'Mm':
156126
o = o.view(np.int64)
157127

158-
if flatten:
159-
160-
# flatten the array
161-
o = o.reshape(-1, order='A')
162-
163128
# expose as memoryview
164129
o = memoryview(o)
165130

@@ -203,9 +168,6 @@ def ensure_buffer(o):
203168
# go via numpy, for convenience
204169
o = ensure_ndarray_from_memory(o)
205170

206-
# N.B., no need to flatten multi-dimensional arrays, as the old-style buffer
207-
# interface just exposes the flat memory
208-
209171
# expose as buffer
210172
o = buffer(o)
211173

numcodecs/delta.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
from .abc import Codec
9-
from .compat import ndarray_from_buffer, buffer_copy
9+
from .compat import ensure_ndarray_from_memory, memory_copy
1010

1111

1212
class Delta(Codec):
@@ -57,7 +57,7 @@ def __init__(self, dtype, astype=None):
5757
def encode(self, buf):
5858

5959
# view input data as 1D array
60-
arr = ndarray_from_buffer(buf, self.dtype)
60+
arr = ensure_ndarray_from_memory(buf).reshape(-1, order='A').view(self.dtype)
6161

6262
# setup encoded output
6363
enc = np.empty_like(arr, dtype=self.astype)
@@ -73,7 +73,7 @@ def encode(self, buf):
7373
def decode(self, buf, out=None):
7474

7575
# view encoded data as 1D array
76-
enc = ndarray_from_buffer(buf, self.astype)
76+
enc = ensure_ndarray_from_memory(buf).reshape(-1, order='A').view(self.astype)
7777

7878
# setup decoded output
7979
if isinstance(out, np.ndarray):
@@ -89,7 +89,7 @@ def decode(self, buf, out=None):
8989

9090
# handle output
9191
if copy_needed:
92-
out = buffer_copy(dec, out)
92+
out = memory_copy(dec, out)
9393

9494
return out
9595

numcodecs/fixedscaleoffset.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
from .abc import Codec
9-
from .compat import ndarray_from_buffer, buffer_copy
9+
from .compat import ensure_ndarray_from_memory, memory_copy
1010

1111

1212
class FixedScaleOffset(Codec):
@@ -88,7 +88,7 @@ def __init__(self, offset, scale, dtype, astype=None):
8888
def encode(self, buf):
8989

9090
# interpret buffer as 1D array
91-
arr = ndarray_from_buffer(buf, self.dtype)
91+
arr = ensure_ndarray_from_memory(buf).view(self.dtype)
9292

9393
# compute scale offset
9494
enc = (arr - self.offset) * self.scale
@@ -104,7 +104,7 @@ def encode(self, buf):
104104
def decode(self, buf, out=None):
105105

106106
# interpret buffer as 1D array
107-
enc = ndarray_from_buffer(buf, self.astype)
107+
enc = ensure_ndarray_from_memory(buf).view(self.astype)
108108

109109
# decode scale offset
110110
dec = (enc / self.scale) + self.offset
@@ -113,7 +113,7 @@ def decode(self, buf, out=None):
113113
dec = dec.astype(self.dtype, copy=False)
114114

115115
# handle output
116-
return buffer_copy(dec, out)
116+
return memory_copy(dec, out)
117117

118118
def get_config(self):
119119
# override to handle encoding dtypes

numcodecs/gzip.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
from .abc import Codec
8-
from .compat import buffer_copy, ensure_memoryview
8+
from .compat import memory_copy, ensure_memoryview
99

1010

1111
class GZip(Codec):
@@ -52,4 +52,4 @@ def decode(self, buf, out=None):
5252
# handle destination - Python standard library zlib module does not
5353
# support direct decompression into buffer, so we have to copy into
5454
# out if given
55-
return buffer_copy(decompressed, out)
55+
return memory_copy(decompressed, out)

numcodecs/lzma.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@
1414

1515
if _lzma:
1616

17-
import numpy as np
1817
from .abc import Codec
19-
from .compat import buffer_copy, ensure_memoryview
18+
from .compat import memory_copy, ensure_memoryview
2019

2120
# noinspection PyShadowingBuiltins
2221
class LZMA(Codec):
@@ -64,7 +63,7 @@ def decode(self, buf, out=None):
6463
dec = _lzma.decompress(buf, format=self.format, filters=self.filters)
6564

6665
# handle destination
67-
return buffer_copy(dec, out)
66+
return memory_copy(dec, out)
6867

6968
def __repr__(self):
7069
r = '%s(format=%r, check=%r, preset=%r, filters=%r)' % \

0 commit comments

Comments
 (0)