From 3c154ea10e33ddbd1193caf93bf59f4ad874ea53 Mon Sep 17 00:00:00 2001 From: Michael Mezher Date: Thu, 14 May 2020 16:59:02 -0400 Subject: [PATCH 1/6] Added multi-axis advanced indexing support --- sparse/_coo/indexing.py | 103 ++++++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 14 deletions(-) diff --git a/sparse/_coo/indexing.py b/sparse/_coo/indexing.py index 0ceaf7e5..5adc56ff 100644 --- a/sparse/_coo/indexing.py +++ b/sparse/_coo/indexing.py @@ -30,6 +30,7 @@ def getitem(x, index): from .core import COO # If string, this is an index into an np.void + # Custom dtype. if isinstance(index, str): data = x.data[index] @@ -86,6 +87,7 @@ def getitem(x, index): i = 0 sorted = adv_idx is None or adv_idx.pos == 0 + adv_idx_added = False for ind in index: # Nothing is added to shape or coords if the index is an integer. if isinstance(ind, Integral): @@ -99,9 +101,10 @@ def getitem(x, index): if ind.step < 0: sorted = False # Add the index and shape for the advanced index. - elif isinstance(ind, np.ndarray): + elif isinstance(ind, np.ndarray) and not adv_idx_added: shape.append(adv_idx.length) coords.append(adv_idx.idx) + adv_idx_added = True i += 1 # Add a dimension for None. elif ind is None: @@ -141,20 +144,27 @@ def _mask(coords, indices, shape): if len(adv_idx) != 0: if len(adv_idx) != 1: - raise IndexError( - "Only indices with at most one iterable index are supported." - ) - adv_idx = adv_idx[0] - adv_idx_pos = adv_idx_pos[0] + # Ensure if multiple advanced indices are passed, all are of the same length + adv_ix_len = len(adv_idx[0]) + for ai in adv_idx: + if len(ai) != adv_ix_len: + raise IndexError("shape mismatch: indexing arrays could not be broadcast together. Ensure all indexing arrays are of the same length.") - if adv_idx.ndim != 1: - raise IndexError("Only one-dimensional iterable indices supported.") + mask, aidxs = _compute_multi_axis_multi_mask(coords, _ind_ar_from_indices(indices), np.array(adv_idx,dtype=np.intp), np.array(adv_idx_pos,dtype=np.intp)) + return mask, _AdvIdxInfo(aidxs, adv_idx_pos, adv_ix_len) - mask, aidxs = _compute_multi_mask( - coords, _ind_ar_from_indices(indices), adv_idx, adv_idx_pos - ) - return mask, _AdvIdxInfo(aidxs, adv_idx_pos, len(adv_idx)) + else: + adv_idx = adv_idx[0] + adv_idx_pos = adv_idx_pos[0] + + if adv_idx.ndim != 1: + raise IndexError("Only one-dimensional iterable indices supported.") + + mask, aidxs = _compute_multi_mask( + coords, _ind_ar_from_indices(indices), adv_idx, adv_idx_pos + ) + return mask, _AdvIdxInfo(aidxs, adv_idx_pos, len(adv_idx)) mask, is_slice = _compute_mask(coords, _ind_ar_from_indices(indices)) @@ -276,6 +286,68 @@ def _separate_adv_indices(indices): return new_idx, adv_idx, adv_idx_pos + + +@numba.jit(nopython=True, nogil=True) +def _compute_multi_axis_multi_mask(coords, indices, adv_idx, adv_idx_pos): # pragma: no cover + """ + Computes a mask with the advanced index, and also returns the advanced index + dimension. + + Parameters + ---------- + coords : np.ndarray + Coordinates of the input array. + indices : np.ndarray + The indices in slice format. + adv_idx : np.ndarray + List of advanced indices. + adv_idx_pos : np.ndarray + The position of the advanced indices. + + Returns + ------- + mask : np.ndarray + The mask. + aidxs : np.ndarray + The advanced array index. + """ + n_adv_idx = len(adv_idx_pos) + mask = numba.typed.List.empty_list(numba.types.intp) + a_indices = numba.typed.List.empty_list(numba.types.intp) + full_idx = np.empty((len(indices) + len(adv_idx_pos), 3), dtype=np.intp) + + # Get location of non-advanced indices + if len(indices) != 0: + ixx = 0 + for ix in range(coords.shape[0]): + isin = False + for ax in adv_idx_pos: + if ix == ax: + isin = True + break + if not isin: + full_idx[ix] = indices[ixx] + ixx += 1 + + for i, aidx in enumerate(adv_idx[0]): + for ii in range(n_adv_idx): + full_idx[adv_idx_pos[ii]] = [aidx, aidx + 1, 1] + + partial_mask, is_slice = _compute_mask(coords, full_idx) + if is_slice: + slice_mask = numba.typed.List.empty_list(numba.types.intp) + for j in range(partial_mask[0], partial_mask[1]): + slice_mask.append(j) + partial_mask = array_from_list_intp(slice_mask) + + for j in range(len(partial_mask)): + mask.append(partial_mask[j]) + a_indices.append(i) + + return array_from_list_intp(mask), array_from_list_intp(a_indices) + + @numba.jit(nopython=True, nogil=True) def _compute_multi_mask(coords, indices, adv_idx, adv_idx_pos): # pragma: no cover """ @@ -288,9 +360,9 @@ def _compute_multi_mask(coords, indices, adv_idx, adv_idx_pos): # pragma: no co Coordinates of the input array. indices : np.ndarray The indices in slice format. - adv_idx : int + adv_idx : list(int) The advanced index. - adv_idx_pos : int + adv_idx_pos : list(int) The position of the advanced index. Returns @@ -307,6 +379,7 @@ def _compute_multi_mask(coords, indices, adv_idx, adv_idx_pos): # pragma: no co full_idx[:adv_idx_pos] = indices[:adv_idx_pos] full_idx[adv_idx_pos + 1 :] = indices[adv_idx_pos:] + for i, aidx in enumerate(adv_idx): full_idx[adv_idx_pos] = [aidx, aidx + 1, 1] partial_mask, is_slice = _compute_mask(coords, full_idx) @@ -387,6 +460,8 @@ def _compute_mask(coords, indices): # pragma: no cover n_matches = np.intp(coords.shape[1]) i = 0 + #print('COORD SHAPE: {}'.format(coords.shape)) + #print('IXS SHAPE: {}'.format(indices.shape)) while i < len(indices): # Guesstimate whether working with pairs is more efficient or # working with the mask directly. From ffd20bced26a28ee4f976a2ce04b63e19e1ed05d Mon Sep 17 00:00:00 2001 From: Michael Mezher Date: Thu, 14 May 2020 17:37:48 -0400 Subject: [PATCH 2/6] Added tests, removed muli-axis case from test_slicing_error --- sparse/tests/test_coo.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 48f8f52a..dcecfda2 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -1264,6 +1264,9 @@ def test_gt(): (1, Ellipsis, None), (1, 1, 1, Ellipsis), (Ellipsis, 1, None), + # With multi-axis advanced indexing + ([0, 1],) * 2, + ([0, 1],) * 3, # Pathological - Slices larger than array (slice(None, 1000)), (slice(None), slice(None, 1000)), @@ -1336,7 +1339,6 @@ def test_custom_dtype_slicing(): 0.5, [0.5], {"potato": "kartoffel"}, - ([0, 1],) * 2, ([[0, 1]],), ], ) From b675a4dba267fc3910b16bf2a8bfd51e1793be2a Mon Sep 17 00:00:00 2001 From: Michael Mezher Date: Thu, 14 May 2020 18:00:17 -0400 Subject: [PATCH 3/6] Removing bad tests -- will remake tests in future commit --- sparse/tests/test_coo.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index dcecfda2..c1478f29 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -1265,8 +1265,6 @@ def test_gt(): (1, 1, 1, Ellipsis), (Ellipsis, 1, None), # With multi-axis advanced indexing - ([0, 1],) * 2, - ([0, 1],) * 3, # Pathological - Slices larger than array (slice(None, 1000)), (slice(None), slice(None, 1000)), From 2c3b2f8596f2b91ff5df6a6f62fd02a64abe498a Mon Sep 17 00:00:00 2001 From: Michael Mezher Date: Fri, 15 May 2020 09:06:32 -0400 Subject: [PATCH 4/6] Multiple bug fixes for multi-axis advanced indexing --- multi_adv_ix_test.py | 36 ++++++++++++++++++++++++++++++++++++ multi_adv_ix_test2.py | 34 ++++++++++++++++++++++++++++++++++ sparse/_coo/indexing.py | 16 ++++++++++------ 3 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 multi_adv_ix_test.py create mode 100644 multi_adv_ix_test2.py diff --git a/multi_adv_ix_test.py b/multi_adv_ix_test.py new file mode 100644 index 00000000..8ca71f1e --- /dev/null +++ b/multi_adv_ix_test.py @@ -0,0 +1,36 @@ +import sparse +import inspect +import numpy as np +import time +from sparse._utils import assert_eq, random_value_array + +x = sparse.random((20, 20, 20, 20), density=.5) + +ii = np.random.randint(0,20,(5000)) +index = ([0,1,16],[12,0,1],[12,10,4]) + +#r1 = x[index] +t1 = time.time() +#r1 = x[ii][:,ii] +#r1 = x[ii,ii,ii] +#r1 = x[ii,97:,:3] +r1 = x[index] +t2 = time.time() +print('te: {}'.format(t2-t1)) +print(r1.shape) + +print(r1.min(),r1.max(),r1.sum()) + +d = x.todense() +t1 = time.time() +d = d[index] +t2 = time.time() +print('te2: {}'.format(t2-t1)) +print(d.shape) +print(d.min(),d.max(),d.sum()) + + +print(np.where(d)) +print(np.where(r1.todense())) + +print(assert_eq(d, r1)) diff --git a/multi_adv_ix_test2.py b/multi_adv_ix_test2.py new file mode 100644 index 00000000..3e523257 --- /dev/null +++ b/multi_adv_ix_test2.py @@ -0,0 +1,34 @@ +import sparse +import inspect +import numpy as np +import time +from sparse._utils import assert_eq, random_value_array + + +#([0, 1],) * 3 +#index = ([0, 1],) * 2 +index = ([0,1],[0,1]) +#index = ([0,1]) + +print(index) + +s = sparse.random((3, 3, 3), density=0.5) +x= s.todense() + +r1 = x[index] +r2 = s[index] + + +print(r1.min(),r1.max(),r1.sum()) +print(r2.min(),r2.max(),r2.sum()) +print(r2.data) +print(r2.coords) + +print(x) +print('\n\n\n') +print(r1) +print('\n\n\n') +print(r2.todense()) + +#print(assert_eq(x[index], s[index])) + diff --git a/sparse/_coo/indexing.py b/sparse/_coo/indexing.py index 5adc56ff..190d1031 100644 --- a/sparse/_coo/indexing.py +++ b/sparse/_coo/indexing.py @@ -101,10 +101,11 @@ def getitem(x, index): if ind.step < 0: sorted = False # Add the index and shape for the advanced index. - elif isinstance(ind, np.ndarray) and not adv_idx_added: - shape.append(adv_idx.length) - coords.append(adv_idx.idx) - adv_idx_added = True + elif isinstance(ind, np.ndarray): + if not adv_idx_added: + shape.append(adv_idx.length) + coords.append(adv_idx.idx) + adv_idx_added = True i += 1 # Add a dimension for None. elif ind is None: @@ -146,10 +147,13 @@ def _mask(coords, indices, shape): if len(adv_idx) != 1: # Ensure if multiple advanced indices are passed, all are of the same length + # Also check each advanced index to ensure each is only a one-dimensional iterable adv_ix_len = len(adv_idx[0]) for ai in adv_idx: if len(ai) != adv_ix_len: raise IndexError("shape mismatch: indexing arrays could not be broadcast together. Ensure all indexing arrays are of the same length.") + if ai.ndim != 1: + raise IndexError("Only one-dimensional iterable indices supported.") mask, aidxs = _compute_multi_axis_multi_mask(coords, _ind_ar_from_indices(indices), np.array(adv_idx,dtype=np.intp), np.array(adv_idx_pos,dtype=np.intp)) return mask, _AdvIdxInfo(aidxs, adv_idx_pos, adv_ix_len) @@ -330,9 +334,9 @@ def _compute_multi_axis_multi_mask(coords, indices, adv_idx, adv_idx_pos): # pr full_idx[ix] = indices[ixx] ixx += 1 - for i, aidx in enumerate(adv_idx[0]): + for i in range(len(adv_idx[0])): for ii in range(n_adv_idx): - full_idx[adv_idx_pos[ii]] = [aidx, aidx + 1, 1] + full_idx[adv_idx_pos[ii]] = [adv_idx[ii][i], adv_idx[ii][i] + 1, 1] partial_mask, is_slice = _compute_mask(coords, full_idx) if is_slice: From fc8acef288d8402e5eac190fdabd8d475f1804b5 Mon Sep 17 00:00:00 2001 From: Michael Mezher Date: Fri, 15 May 2020 09:11:45 -0400 Subject: [PATCH 5/6] Added test cases --- multi_adv_ix_test.py | 36 ------------------------------------ multi_adv_ix_test2.py | 34 ---------------------------------- sparse/tests/test_coo.py | 3 +++ 3 files changed, 3 insertions(+), 70 deletions(-) delete mode 100644 multi_adv_ix_test.py delete mode 100644 multi_adv_ix_test2.py diff --git a/multi_adv_ix_test.py b/multi_adv_ix_test.py deleted file mode 100644 index 8ca71f1e..00000000 --- a/multi_adv_ix_test.py +++ /dev/null @@ -1,36 +0,0 @@ -import sparse -import inspect -import numpy as np -import time -from sparse._utils import assert_eq, random_value_array - -x = sparse.random((20, 20, 20, 20), density=.5) - -ii = np.random.randint(0,20,(5000)) -index = ([0,1,16],[12,0,1],[12,10,4]) - -#r1 = x[index] -t1 = time.time() -#r1 = x[ii][:,ii] -#r1 = x[ii,ii,ii] -#r1 = x[ii,97:,:3] -r1 = x[index] -t2 = time.time() -print('te: {}'.format(t2-t1)) -print(r1.shape) - -print(r1.min(),r1.max(),r1.sum()) - -d = x.todense() -t1 = time.time() -d = d[index] -t2 = time.time() -print('te2: {}'.format(t2-t1)) -print(d.shape) -print(d.min(),d.max(),d.sum()) - - -print(np.where(d)) -print(np.where(r1.todense())) - -print(assert_eq(d, r1)) diff --git a/multi_adv_ix_test2.py b/multi_adv_ix_test2.py deleted file mode 100644 index 3e523257..00000000 --- a/multi_adv_ix_test2.py +++ /dev/null @@ -1,34 +0,0 @@ -import sparse -import inspect -import numpy as np -import time -from sparse._utils import assert_eq, random_value_array - - -#([0, 1],) * 3 -#index = ([0, 1],) * 2 -index = ([0,1],[0,1]) -#index = ([0,1]) - -print(index) - -s = sparse.random((3, 3, 3), density=0.5) -x= s.todense() - -r1 = x[index] -r2 = s[index] - - -print(r1.min(),r1.max(),r1.sum()) -print(r2.min(),r2.max(),r2.sum()) -print(r2.data) -print(r2.coords) - -print(x) -print('\n\n\n') -print(r1) -print('\n\n\n') -print(r2.todense()) - -#print(assert_eq(x[index], s[index])) - diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index c1478f29..929e81aa 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -1265,6 +1265,9 @@ def test_gt(): (1, 1, 1, Ellipsis), (Ellipsis, 1, None), # With multi-axis advanced indexing + ([0, 1],) * 2, + ([0, 1],[0, 2]), + ([0, 0, 0],[0, 1, 2],[1, 2, 1]), # Pathological - Slices larger than array (slice(None, 1000)), (slice(None), slice(None, 1000)), From 95ae8e56197cea0df3509df868a528e1aabe7a78 Mon Sep 17 00:00:00 2001 From: Michael Mezher Date: Fri, 15 May 2020 09:36:22 -0400 Subject: [PATCH 6/6] Formatting --- sparse/_coo/indexing.py | 20 ++++++++++++-------- sparse/tests/test_coo.py | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sparse/_coo/indexing.py b/sparse/_coo/indexing.py index 190d1031..98b0ca5a 100644 --- a/sparse/_coo/indexing.py +++ b/sparse/_coo/indexing.py @@ -151,11 +151,18 @@ def _mask(coords, indices, shape): adv_ix_len = len(adv_idx[0]) for ai in adv_idx: if len(ai) != adv_ix_len: - raise IndexError("shape mismatch: indexing arrays could not be broadcast together. Ensure all indexing arrays are of the same length.") + raise IndexError( + "shape mismatch: indexing arrays could not be broadcast together. Ensure all indexing arrays are of the same length." + ) if ai.ndim != 1: raise IndexError("Only one-dimensional iterable indices supported.") - mask, aidxs = _compute_multi_axis_multi_mask(coords, _ind_ar_from_indices(indices), np.array(adv_idx,dtype=np.intp), np.array(adv_idx_pos,dtype=np.intp)) + mask, aidxs = _compute_multi_axis_multi_mask( + coords, + _ind_ar_from_indices(indices), + np.array(adv_idx, dtype=np.intp), + np.array(adv_idx_pos, dtype=np.intp), + ) return mask, _AdvIdxInfo(aidxs, adv_idx_pos, adv_ix_len) else: @@ -290,10 +297,10 @@ def _separate_adv_indices(indices): return new_idx, adv_idx, adv_idx_pos - - @numba.jit(nopython=True, nogil=True) -def _compute_multi_axis_multi_mask(coords, indices, adv_idx, adv_idx_pos): # pragma: no cover +def _compute_multi_axis_multi_mask( + coords, indices, adv_idx, adv_idx_pos +): # pragma: no cover """ Computes a mask with the advanced index, and also returns the advanced index dimension. @@ -383,7 +390,6 @@ def _compute_multi_mask(coords, indices, adv_idx, adv_idx_pos): # pragma: no co full_idx[:adv_idx_pos] = indices[:adv_idx_pos] full_idx[adv_idx_pos + 1 :] = indices[adv_idx_pos:] - for i, aidx in enumerate(adv_idx): full_idx[adv_idx_pos] = [aidx, aidx + 1, 1] partial_mask, is_slice = _compute_mask(coords, full_idx) @@ -464,8 +470,6 @@ def _compute_mask(coords, indices): # pragma: no cover n_matches = np.intp(coords.shape[1]) i = 0 - #print('COORD SHAPE: {}'.format(coords.shape)) - #print('IXS SHAPE: {}'.format(indices.shape)) while i < len(indices): # Guesstimate whether working with pairs is more efficient or # working with the mask directly. diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 929e81aa..42042ea0 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -1266,8 +1266,8 @@ def test_gt(): (Ellipsis, 1, None), # With multi-axis advanced indexing ([0, 1],) * 2, - ([0, 1],[0, 2]), - ([0, 0, 0],[0, 1, 2],[1, 2, 1]), + ([0, 1], [0, 2]), + ([0, 0, 0], [0, 1, 2], [1, 2, 1]), # Pathological - Slices larger than array (slice(None, 1000)), (slice(None), slice(None, 1000)),