Skip to content

Commit db6d355

Browse files
committed
Fast path generalized to slices aligned to chunks
1 parent 8b5f589 commit db6d355

File tree

4 files changed

+146
-78
lines changed

4 files changed

+146
-78
lines changed

bench/ndarray/aligned_chunks.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
9+
# Benchmark for comparing speeds of NDArray.slice() when using
10+
# different slices containing consecutive and non-consecutive chunks,
11+
# as well as aligned and unaligned.
12+
13+
import math
14+
from time import time
15+
import numpy as np
16+
import blosc2
17+
18+
# Dimensions and type properties for the arrays
19+
shape = (50, 100, 300)
20+
chunks = (5, 25, 50)
21+
blocks = (1, 5, 10)
22+
dtype = np.dtype(np.int32)
23+
24+
# Non-consecutive slices
25+
nc_slices = [
26+
(slice(0, 50), slice(0, 100), slice(0, 300-1)),
27+
(slice(0, 10), slice(0, 100-1), slice(0, 300)),
28+
(slice(0, 5-1), slice(0, 25), slice(0, 300)),
29+
(slice(0, 5), slice(0, 25), slice(0, 50-1)),
30+
]
31+
# Consecutive slices
32+
c_slices = [
33+
(slice(0, 50), slice(0, 100), slice(0, 300)),
34+
(slice(0, 10), slice(0, 100), slice(0, 300)),
35+
(slice(0, 5), slice(0, 25), slice(0, 300)),
36+
(slice(0, 5), slice(0, 25), slice(0, 50)),
37+
]
38+
# Non-aligned slices
39+
na_slices = [
40+
(slice(10, 50-1), slice(25, 100), slice(50, 300)),
41+
(slice(10, 40), slice(25, 75-1), slice(100, 200)),
42+
(slice(20, 35), slice(50, 75), slice(100, 300-1)),
43+
(slice(20+1, 25), slice(25, 50), slice(50, 100)),
44+
]
45+
# Aligned slices
46+
a_slices = [
47+
(slice(10, 50), slice(25, 100), slice(50, 300)),
48+
(slice(10, 40), slice(25, 75), slice(100, 200)),
49+
(slice(20, 35), slice(50, 75), slice(100, 300)),
50+
(slice(20, 25), slice(25, 50), slice(50, 100)),
51+
]
52+
53+
print("Creating array with shape:", shape)
54+
t0 = time()
55+
arr = blosc2.arange(math.prod(shape), dtype=dtype, shape=shape, chunks=chunks, blocks=blocks)
56+
print(f"Time to create array: {time() - t0 : .5f}")
57+
58+
print("Timing non-consecutive slices...")
59+
nc_times = []
60+
t0 = time()
61+
for s in nc_slices:
62+
t1 = time()
63+
arr2 = arr.slice(s)
64+
nc_times.append(time() - t1)
65+
# print(arr2.schunk.nbytes, arr[s].nbytes)
66+
# np.testing.assert_array_equal(arr2[:], arr[s])
67+
print(f"Time to get non-consecutive slices: {time() - t0 : .5f}")
68+
69+
print("Timing consecutive slices...")
70+
c_times = []
71+
c_speedup = []
72+
t0 = time()
73+
for i, s in enumerate(c_slices):
74+
t1 = time()
75+
arr2 = arr.slice(s)
76+
c_times.append(time() - t1)
77+
c_speedup.append(nc_times[i] / c_times[i])
78+
# print(arr2.shape, arr[s].shape)
79+
# print(arr2.schunk.nbytes, arr[s].nbytes)
80+
# np.testing.assert_array_equal(arr2[:], arr[s])
81+
print(f"Time to get consecutive slices: {time() - t0 : .5f}")
82+
print(f"Speedups for consecutive slices: ", [f"{s:.2f}x" for s in c_speedup])
83+
84+
print("Timing non-aligned slices...")
85+
na_times = []
86+
t0 = time()
87+
for i, s in enumerate(na_slices):
88+
t1 = time()
89+
arr2 = arr.slice(s)
90+
na_times.append(time() - t1)
91+
# print(arr2.shape, arr[s].shape)
92+
# print(arr2.schunk.nbytes, arr[s].nbytes)
93+
# np.testing.assert_array_equal(arr2[:], arr[s])
94+
print(f"Time to get non-aligned slices: {time() - t0 : .5f}")
95+
96+
print("Timing aligned slices...")
97+
a_times = []
98+
a_speedup = []
99+
t0 = time()
100+
for i, s in enumerate(a_slices):
101+
t1 = time()
102+
arr2 = arr.slice(s)
103+
a_times.append(time() - t1)
104+
a_speedup.append(na_times[i] / a_times[i])
105+
# print(arr2.shape, arr[s].shape)
106+
# print(arr2.schunk.nbytes, arr[s].nbytes)
107+
# np.testing.assert_array_equal(arr2[:], arr[s])
108+
print(f"Time to get aligned slices: {time() - t0 : .5f}")
109+
print(f"Speedups for aligned slices: ", [f"{s:.2f}x" for s in a_speedup])

bench/ndarray/consecutive_chunks.py

Lines changed: 0 additions & 56 deletions
This file was deleted.

src/blosc2/ndarray.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from itertools import product
1717
from typing import TYPE_CHECKING, Any, NamedTuple
1818

19+
from dask.array.routines import aligned_coarsen_chunks
1920
from numpy.exceptions import ComplexWarning
2021

2122
if TYPE_CHECKING:
@@ -1030,11 +1031,11 @@ def extract_values(arr, indices: np.ndarray[np.int_], max_cache_size: int = 10)
10301031
return extracted_values
10311032

10321033

1033-
def detect_consecutive_chunks( # noqa: C901
1034-
key: Sequence[slice], shape: Sequence[int], chunks: Sequence[int]
1034+
def detect_aligned_chunks(
1035+
key: Sequence[slice], shape: Sequence[int], chunks: Sequence[int], consecutive: bool = False
10351036
) -> list[int]:
10361037
"""
1037-
Detect whether a multidimensional slice matches a sequence of consecutive chunk boundaries.
1038+
Detect whether a multidimensional slice is aligned with chunk boundaries.
10381039
10391040
Parameters
10401041
----------
@@ -1044,27 +1045,28 @@ def detect_consecutive_chunks( # noqa: C901
10441045
Shape of the NDArray.
10451046
chunks : Sequence of int
10461047
Chunk shape of the NDArray.
1048+
consecutive : bool, default=False
1049+
If True, check if the chunks are consecutive in storage order.
1050+
If False, only check for chunk boundary alignment.
10471051
10481052
Returns
10491053
-------
10501054
list[int]
1051-
Index of the chunk (in C-order) if the slice matches exactly with a single chunk,
1052-
a list of chunk indices if the slice matches a consecutive sequence of chunks.
1053-
If it doesn't match any chunk(s) properly, return an empty list.
1055+
List of chunk indices (in C-order) that the slice overlaps with.
1056+
If the slice isn't aligned with chunk boundaries, returns an empty list.
1057+
If consecutive=True and chunks aren't consecutive, returns an empty list.
10541058
"""
10551059
if len(key) != len(shape):
10561060
return []
10571061

1058-
# Check that slice boundaries are exact multiple of chunk boundaries.
1059-
# We want to do that so we don't copy data, and hence, waste space,
1060-
# unnecessarily into destination.
1062+
# Check that slice boundaries are exact multiple of chunk boundaries
10611063
for i, s in enumerate(key):
10621064
if s.start is not None and s.start % chunks[i] != 0:
10631065
return []
10641066
if s.stop is not None and s.stop % chunks[i] != 0:
10651067
return []
10661068

1067-
# Parse the slice boundaries and check for alignment
1069+
# Parse the slice boundaries
10681070
start_indices = []
10691071
end_indices = []
10701072
n_chunks = []
@@ -1088,6 +1090,7 @@ def detect_consecutive_chunks( # noqa: C901
10881090
end_indices.append(end_idx)
10891091
n_chunks.append(math.ceil(shape[i] / chunk_size))
10901092

1093+
# Get all chunk combinations in the slice
10911094
indices = [range(start, end) for start, end in zip(start_indices, end_indices, strict=False)]
10921095
result = []
10931096

@@ -1100,17 +1103,18 @@ def detect_consecutive_chunks( # noqa: C901
11001103

11011104
result.append(flat_index)
11021105

1103-
if not result:
1104-
return []
1105-
1106-
# The product() of ranges might not naturally produce indices in ascending order
1107-
result.sort()
1108-
is_consecutive = builtins.all(result[i] == result[i - 1] + 1 for i in range(1, len(result)))
1106+
# Check if chunks are consecutive if requested
1107+
if consecutive and result:
1108+
sorted_result = sorted(result)
1109+
if sorted_result[-1] - sorted_result[0] + 1 != len(sorted_result):
1110+
return []
11091111

1110-
if not is_consecutive:
1111-
return []
1112+
# The array of indices must be consecutive
1113+
for i in range(len(sorted_result) - 1):
1114+
if sorted_result[i + 1] - sorted_result[i] != 1:
1115+
return []
11121116

1113-
return result
1117+
return sorted(result)
11141118

11151119

11161120
class NDOuterIterator:
@@ -1933,8 +1937,9 @@ def slice(self, key: int | slice | Sequence[slice], **kwargs: Any) -> NDArray:
19331937

19341938
# Fast path for slices made with consecutive chunks
19351939
if step == (1,) * self.ndim:
1936-
consecutive_chunks = detect_consecutive_chunks(key, self.shape, self.chunks)
1937-
if consecutive_chunks:
1940+
aligned_chunks = detect_aligned_chunks(key, self.shape, self.chunks, consecutive=False)
1941+
if aligned_chunks:
1942+
# print("Aligned chunks detected", aligned_chunks)
19381943
# Create a new ndarray for the key slice
19391944
new_shape = [
19401945
sp - st for sp, st in zip([k.stop for k in key], [k.start for k in key], strict=False)
@@ -1948,7 +1953,7 @@ def slice(self, key: int | slice | Sequence[slice], **kwargs: Any) -> NDArray:
19481953
)
19491954
# Get the chunks from the original array and update the new array
19501955
# No need for chunks to decompress and compress again
1951-
for order, nchunk in enumerate(consecutive_chunks):
1956+
for order, nchunk in enumerate(aligned_chunks):
19521957
chunk = self.schunk.get_chunk(nchunk)
19531958
newarr.schunk.update_chunk(order, chunk)
19541959
return newarr

tests/ndarray/test_slice.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,21 @@
2121
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 100), slice(0, 300)), np.int32),
2222
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 200)), np.int32),
2323
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 50)), np.int32),
24+
# Aligned slices
25+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 50), slice(25, 100), slice(50, 300)), np.int32),
26+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 40), slice(25, 75), slice(100, 200)), np.int32),
27+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 35), slice(50, 75), slice(100, 300)), np.int32),
28+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 25), slice(25, 50), slice(50, 100)), np.int32),
2429
# Non-consecutive slices
2530
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 10), slice(0, 100), slice(0, 300 - 1)), np.int32),
2631
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 100 - 1), slice(0, 300)), np.int32),
2732
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5 - 1), slice(0, 25), slice(0, 200)), np.int32),
2833
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 50 - 1)), np.int32),
34+
# Non-aligned slices
35+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 50 - 1), slice(25, 100), slice(50, 300)), np.int32),
36+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 40), slice(25, 75 - 1), slice(100, 200)), np.int32),
37+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 35), slice(50, 75), slice(100, 300 - 1)), np.int32),
38+
((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20 + 1, 25), slice(25, 50), slice(50, 100)), np.int32),
2939
]
3040

3141

0 commit comments

Comments
 (0)