Skip to content

Commit 687562c

Browse files
committed
Optimised fast path for 1d arrays
1 parent 0c9af6a commit 687562c

File tree

3 files changed

+93
-39
lines changed

3 files changed

+93
-39
lines changed

bench/ndarray/fancy_index.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
plt.rc('font',**{'serif':['cm']})
2525
plt.style.use('seaborn-v0_8-paper')
2626

27-
NUMPY_BLOSC = True # activate NUMPY and BLOSC tests
27+
NUMPY_BLOSC = False # activate NUMPY and BLOSC tests
2828
NUMPY_BLOSC_ZARR = False # activate NUMPY, BLOSC and Zarr tests
2929
# default if both are false is to run tests for Numpy, Blosc, Zarr and HDF5
3030

@@ -46,7 +46,7 @@ def genarray(r, ndims=1, verbose=True):
4646
return arr
4747

4848

49-
sizes = np.int64(np.array([1, 2, 4, 8, 16, 24]))
49+
sizes = np.int64(np.array([1, 2, 4, 8, 16, 24, 32]))
5050
rng = np.random.default_rng()
5151
blosctimes = []
5252
nptimes = []
@@ -72,14 +72,14 @@ def genarray(r, ndims=1, verbose=True):
7272
except:
7373
for d in sizes:
7474
arr = genarray(d, ndims=2)
75-
idx = rng.integers(low=0, high=arr.shape[0], size=(arr.shape[0],))
75+
idx = rng.integers(low=0, high=arr.shape[0], size=(arr.shape[0]//4,))
7676
row = np.sort(np.unique(idx))
77-
col = np.sort(np.unique(rng.integers(low=0, high=arr.shape[0], size=(arr.shape[0],))))
77+
col = np.sort(np.unique(rng.integers(low=0, high=arr.shape[0], size=(arr.shape[0]//4,))))
7878
mask = rng.integers(low=0, high=2, size=(arr.shape[0],)) == 1
7979

80-
8180
## Test fancy indexing for different use cases
8281
m, M = np.min(idx), np.max(idx)
82+
res = arr[:][row]
8383
def timer(arr, row=row, col=col):
8484
time_list = []
8585
if NUMPY_BLOSC or NUMPY_BLOSC_ZARR:
@@ -112,14 +112,14 @@ def timer(arr, row=row, col=col):
112112
arr=arr[:]
113113
nptimes += [timer(arr, row=idx, col=idx)]
114114
if NUMPY_BLOSC_ZARR:
115-
z_test = zarr.zeros(shape=arr.shape, dtype=arr.dtype)
115+
z_test = zarr.create_array(store='data/example.zarr', shape=arr.shape, dtype=arr.dtype, overwrite=True)
116116
z_test[:] = arr
117117
zarrtimes += [timer(z_test, row=idx, col=idx)]
118118
else:
119119
blosctimes += [timer(arr)]
120120
arr=arr[:]
121121
nptimes += [timer(arr)]
122-
z_test = zarr.zeros(shape=arr.shape, dtype=arr.dtype)
122+
z_test = zarr.create_array(store='data/example.zarr', shape=arr.shape, dtype=arr.dtype, overwrite=True)
123123
z_test[:] = arr
124124
zarrtimes += [timer(z_test)]
125125
with h5py.File('my_hdf5_file.h5', 'w') as f:
@@ -152,7 +152,6 @@ def timer(arr, row=row, col=col):
152152
plt.xticks(x-width, np.round(sizes, 2))
153153
plt.ylabel("Time (s)")
154154
plt.title('Fancy indexing performance comparison')
155-
# plt.ylim([0,10])
156155
plt.gca().set_yscale('log')
157156
plt.savefig(f'plots/fancyIdx{labs}.png', format="png")
158157
plt.show()

src/blosc2/ndarray.py

Lines changed: 74 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,10 +1427,10 @@ def oindex(self) -> OIndex:
14271427
"""Shortcut for orthogonal (outer) indexing, see :func:`get_oselection_numpy`"""
14281428
return OIndex(self)
14291429

1430-
@property
1431-
def vindex(self) -> VIndex:
1432-
"""Shortcut for vectorised indexing. Not yet supported."""
1433-
return VIndex(self)
1430+
# @property
1431+
# def vindex(self) -> VIndex:
1432+
# """Shortcut for vectorised indexing. Not yet supported."""
1433+
# return VIndex(self)
14341434

14351435
@property
14361436
def T(self):
@@ -1441,24 +1441,64 @@ def T(self):
14411441

14421442
def get_fselection_numpy(self, key):
14431443
# TODO: Make this faster for broadcasted keys
1444+
if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE:
1445+
return self[:][key] # load into memory for smallish arrays
14441446
shape = self.shape
14451447
chunks = self.chunks
14461448
_slice = ndindex.ndindex(key).expand(shape)
1447-
chunk_size = ndindex.ChunkSize(chunks)
1448-
1449-
# repeated indices are grouped together
1450-
intersecting_chunks = chunk_size.as_subchunks(_slice, shape) # if _slice is (), returns all chunks
14511449
out_shape = _slice.newshape(shape)
1452-
14531450
out = np.empty(out_shape, dtype=self.dtype)
1454-
for c in intersecting_chunks:
1455-
sub_idx = _slice.as_subindex(c).raw
1456-
sel_idx = c.as_subindex(_slice)
1457-
new_shape = sel_idx.newshape(out_shape)
1458-
start, stop, step = get_ndarray_start_stop(self.ndim, c.raw, shape)
1459-
chunk = np.empty(tuple(sp - st for st, sp in zip(start, stop, strict=True)), dtype=self.dtype)
1460-
super().get_slice_numpy(chunk, (start, stop))
1461-
out[sel_idx.raw] = chunk[sub_idx].reshape(new_shape)
1451+
1452+
if self.ndim == 1:
1453+
# Fast path so we can avoid the costly lines in slow path
1454+
# (sub_idx = _slice.as_subindex(c).raw, sel_idx = c.as_subindex(_slice))
1455+
key = np.array(key)
1456+
if np.any(np.diff(key) < 0):
1457+
idx_order = np.argsort(key)
1458+
sorted_idxs = key[idx_order]
1459+
else:
1460+
idx_order = None
1461+
sorted_idxs = key
1462+
chunk_nitems = np.bincount(
1463+
sorted_idxs // chunks[0], minlength=self.schunk.nchunks
1464+
) # number of items per chunk
1465+
chunk_nitems_cumsum = np.cumsum(chunk_nitems)
1466+
for chunk_i, c in enumerate(chunk_nitems):
1467+
if c == 0:
1468+
continue # no items in chunk
1469+
start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1]
1470+
stop = chunk_nitems_cumsum[chunk_i]
1471+
selection = sorted_idxs[start:stop]
1472+
out_selection = idx_order[start:stop] if idx_order is not None else slice(start, stop)
1473+
to_be_loaded = np.empty((selection[-1] - selection[0] + 1,), dtype=self.dtype)
1474+
# if len(selection) < 10:
1475+
# # TODO: optimise for case of few elements and go index-by-index
1476+
# selector = out_selection.start + np.arange(out_selection.stop) if isinstance(out_selection, slice) else out_selection
1477+
# for j, idx in enumerate(sorted_idxs[start:stop]):
1478+
# out[out_selection[j]] = self[idx]
1479+
# else:
1480+
super().get_slice_numpy(
1481+
to_be_loaded, ((selection[0],), (selection[-1] + 1,))
1482+
) # get relevant section of chunk
1483+
loc_idx = selection - selection[0]
1484+
out[out_selection] = to_be_loaded[loc_idx]
1485+
1486+
else:
1487+
chunk_size = ndindex.ChunkSize(chunks)
1488+
# repeated indices are grouped together
1489+
intersecting_chunks = chunk_size.as_subchunks(
1490+
_slice, shape
1491+
) # if _slice is (), returns all chunks
1492+
for c in intersecting_chunks:
1493+
sub_idx = _slice.as_subindex(c).raw
1494+
sel_idx = c.as_subindex(_slice)
1495+
new_shape = sel_idx.newshape(out_shape)
1496+
start, stop, step = get_ndarray_start_stop(self.ndim, c.raw, shape)
1497+
chunk = np.empty(
1498+
tuple(sp - st for st, sp in zip(start, stop, strict=True)), dtype=self.dtype
1499+
)
1500+
super().get_slice_numpy(chunk, (start, stop))
1501+
out[sel_idx.raw] = chunk[sub_idx].reshape(new_shape)
14621502

14631503
return out
14641504

@@ -1474,7 +1514,8 @@ def get_oselection_numpy(self, key):
14741514

14751515
def set_oselection_numpy(self, key, arr: np.ndarray):
14761516
"""
1477-
Select independently from self along axes specified in key and set to entries in arr. Key must be same length as self shape.
1517+
Select independently from self along axes specified in key and set to entries in arr.
1518+
Key must be same length as self shape.
14781519
See Zarr https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#orthogonal-indexing.
14791520
"""
14801521
return super().set_oindex_numpy(key, arr)
@@ -1483,7 +1524,10 @@ def __getitem__( # noqa: C901
14831524
self,
14841525
key: int | slice | Sequence[slice | int] | np.ndarray[np.bool_] | NDArray | blosc2.LazyExpr | str,
14851526
) -> np.ndarray | blosc2.LazyExpr:
1486-
"""Retrieve a (multidimensional) slice as specified by the key. Matches NumPy fancy indexing behaviour.
1527+
"""
1528+
Retrieve a (multidimensional) slice as specified by the key.
1529+
1530+
Note that this __getitem__ matches NumPy fancy indexing behaviour.
14871531
14881532
Parameters
14891533
----------
@@ -1547,10 +1591,9 @@ def __getitem__( # noqa: C901
15471591
key = key[:]
15481592
# This is a fast path for the case where the key is a short list of 1-d indices
15491593
# For the rest, use an array of booleans.
1550-
if key.dtype == np.int64 and key.ndim == 1 and self.ndim == 1:
1551-
return extract_values(self, key)
1552-
else:
1553-
return self.get_fselection_numpy(key) # fancy index
1594+
# if key.dtype == np.int64 and key.ndim == 1 and self.ndim == 1:
1595+
# return extract_values(self, key)
1596+
return self.get_fselection_numpy(key) # fancy index
15541597
else:
15551598
# The more general case (this is quite slow)
15561599
# If the key is a LazyExpr, decorate with ``where`` and return it
@@ -4400,13 +4443,13 @@ def __setitem__(self, selection, input) -> np.ndarray:
44004443
return self.array.set_oselection_numpy(selection, input)
44014444

44024445

4403-
class VIndex:
4404-
def __init__(self, array: NDArray):
4405-
self.array = array
4446+
# class VIndex:
4447+
# def __init__(self, array: NDArray):
4448+
# self.array = array
44064449

4407-
# TODO: all this
4408-
def __getitem__(self, selection) -> np.ndarray:
4409-
return NotImplementedError
4450+
# # TODO: all this
4451+
# def __getitem__(self, selection) -> np.ndarray:
4452+
# return NotImplementedError
44104453

4411-
def __setitem__(self, selection, input) -> np.ndarray:
4412-
return NotImplementedError
4454+
# def __setitem__(self, selection, input) -> np.ndarray:
4455+
# return NotImplementedError

tests/ndarray/test_ndarray.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,18 @@ def test_oindex():
293293

294294

295295
def test_findex():
296+
# Test 1d fast path
297+
ndim = 1
298+
d = 100
299+
shape = (d,) * ndim
300+
arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype="i4")
301+
rng = np.random.default_rng()
302+
idx = rng.integers(low=0, high=d, size=(d // 4,))
303+
nparr = arr[:]
304+
b = arr[idx]
305+
n = nparr[idx]
306+
np.testing.assert_allclose(b, n)
307+
296308
ndim = 3
297309
d = 100
298310
shape = (d,) * ndim

0 commit comments

Comments
 (0)