diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index a4e20b7131..5eaafefbf4 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -48,7 +48,7 @@ jobs: conda activate zarr-env mkdir ~/blob_emulator azurite -l ~/blob_emulator --debug debug.log 2>&1 > stdouterr.log & - pytest + pytest -sv --timeout=300 env: ZARR_TEST_ABS: 1 - name: Conda info diff --git a/docs/release.rst b/docs/release.rst index 6c814cbedb..0810946ee6 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,6 +6,12 @@ Release notes Unreleased ---------- +Enhancements +~~~~~~~~~~~~ + +* array indexing with [] (getitem and setitem) now supports fancy indexing. + By :user:`Juan Nunez-Iglesias `; :issue:`725`. + .. _release_2.10.2: 2.10.2 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index a3421608cc..68673f1295 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -509,7 +509,7 @@ e.g.:: [10, 11, 12, -2, 14]]) For convenience, coordinate indexing is also available via the ``vindex`` -property, e.g.:: +property, as well as the square bracket operator, e.g.:: >>> z.vindex[[0, 2], [1, 3]] array([-1, -2]) @@ -518,6 +518,16 @@ property, e.g.:: array([[ 0, -3, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, -4, 14]]) + >>> z[[0, 2], [1, 3]] + array([-3, -4]) + +When the indexing arrays have different shapes, they are broadcast together. +That is, the following two calls are equivalent:: + + >>> z[1, [1, 3]] + array([5, 7]) + >>> z[[1, 1], [1, 3]] + array([5, 7]) Indexing with a mask array ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 10f5990837..4ac0c6c294 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,6 +17,7 @@ coverage flake8==3.9.2 pytest-cov==2.12.1 pytest-doctestplus==0.11.0 +pytest-timeout==1.4.2 h5py==3.4.0 fsspec[s3]==2021.10.0 moto[server]>=1.3.14 diff --git a/zarr/core.py b/zarr/core.py index c1a1c341ff..f53c2b9b05 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -25,6 +25,7 @@ ensure_tuple, err_too_many_indices, is_contiguous_selection, + is_pure_fancy_indexing, is_scalar, pop_fields, ) @@ -351,7 +352,7 @@ def attrs(self): @property def ndim(self): """Number of dimensions.""" - return len(self.shape) + return len(self._shape) @property def _size(self): @@ -658,8 +659,20 @@ def __getitem__(self, selection): Slices with step > 1 are supported, but slices with negative step are not. Currently the implementation for __getitem__ is provided by - :func:`get_basic_selection`. For advanced ("fancy") indexing, see the methods - listed under See Also. + :func:`vindex` if the indexing is pure fancy indexing (ie a + broadcast-compatible tuple of integer array indices), or by + :func:`set_basic_selection` otherwise. + + Effectively, this means that the following indexing modes are supported: + + - integer indexing + - slice indexing + - mixed slice and integer indexing + - boolean indexing + - fancy indexing (vectorized list of integers) + + For specific indexing options including outer indexing, see the + methods listed under See Also. See Also -------- @@ -668,9 +681,12 @@ def __getitem__(self, selection): set_orthogonal_selection, vindex, oindex, __setitem__ """ - - fields, selection = pop_fields(selection) - return self.get_basic_selection(selection, fields=fields) + fields, pure_selection = pop_fields(selection) + if is_pure_fancy_indexing(pure_selection, self.ndim): + result = self.vindex[selection] + else: + result = self.get_basic_selection(pure_selection, fields=fields) + return result def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): """Retrieve data for an item or region of the array. @@ -1208,8 +1224,19 @@ def __setitem__(self, selection, value): Slices with step > 1 are supported, but slices with negative step are not. Currently the implementation for __setitem__ is provided by - :func:`set_basic_selection`, which means that only integers and slices are - supported within the selection. For advanced ("fancy") indexing, see the + :func:`vindex` if the indexing is pure fancy indexing (ie a + broadcast-compatible tuple of integer array indices), or by + :func:`set_basic_selection` otherwise. + + Effectively, this means that the following indexing modes are supported: + + - integer indexing + - slice indexing + - mixed slice and integer indexing + - boolean indexing + - fancy indexing (vectorized list of integers) + + For specific indexing options including outer indexing, see the methods listed under See Also. See Also @@ -1219,9 +1246,11 @@ def __setitem__(self, selection, value): set_orthogonal_selection, vindex, oindex, __getitem__ """ - - fields, selection = pop_fields(selection) - self.set_basic_selection(selection, value, fields=fields) + fields, pure_selection = pop_fields(selection) + if is_pure_fancy_indexing(pure_selection, self.ndim): + self.vindex[selection] = value + else: + self.set_basic_selection(pure_selection, value, fields=fields) def set_basic_selection(self, selection, value, fields=None): """Modify data for an item or region of the array. diff --git a/zarr/indexing.py b/zarr/indexing.py index e58e7ba339..2e9f7c8c03 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -16,9 +16,23 @@ def is_integer(x): + """True if x is an integer (both pure Python or NumPy). + + Note that Python's bool is considered an integer too. + """ return isinstance(x, numbers.Integral) +def is_integer_list(x): + """True if x is a list of integers. + + This function assumes ie *does not check* that all elements of the list + have the same type. Mixed type lists will result in other errors that will + bubble up anyway. + """ + return isinstance(x, list) and len(x) > 0 and is_integer(x[0]) + + def is_integer_array(x, ndim=None): t = hasattr(x, 'shape') and hasattr(x, 'dtype') and x.dtype.kind in 'ui' if ndim is not None: @@ -41,6 +55,49 @@ def is_scalar(value, dtype): return False +def is_pure_fancy_indexing(selection, ndim): + """Check whether a selection contains only scalars or integer array-likes. + + Parameters + ---------- + selection : tuple, slice, or scalar + A valid selection value for indexing into arrays. + + Returns + ------- + is_pure : bool + True if the selection is a pure fancy indexing expression (ie not mixed + with boolean or slices). + """ + if ndim == 1: + if is_integer_list(selection) or is_integer_array(selection): + return True + # if not, we go through the normal path below, because a 1-tuple + # of integers is also allowed. + no_slicing = ( + isinstance(selection, tuple) + and len(selection) == ndim + and not ( + any(isinstance(elem, slice) or elem is Ellipsis + for elem in selection) + ) + ) + return ( + no_slicing and + all( + is_integer(elem) + or is_integer_list(elem) + or is_integer_array(elem) + for elem in selection + ) and + any( + is_integer_list(elem) + or is_integer_array(elem) + for elem in selection + ) + ) + + def normalize_integer_selection(dim_sel, dim_len): # normalize type to int @@ -833,10 +890,14 @@ def make_slice_selection(selection): ls = [] for dim_selection in selection: if is_integer(dim_selection): - ls.append(slice(dim_selection, dim_selection + 1, 1)) + ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) elif isinstance(dim_selection, np.ndarray): if len(dim_selection) == 1: - ls.append(slice(dim_selection[0], dim_selection[0] + 1, 1)) + ls.append( + slice( + int(dim_selection[0]), int(dim_selection[0]) + 1, 1 + ) + ) else: raise ArrayIndexError() else: diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index 8c534f8e4a..a58a309534 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -4,6 +4,7 @@ import zarr from zarr.indexing import ( + make_slice_selection, normalize_integer_selection, oindex, oindex_set, @@ -198,15 +199,15 @@ def test_get_basic_selection_1d(): for selection in basic_selections_1d: _test_get_basic_selection(a, z, selection) - bad_selections = basic_selections_1d_bad + [ - [0, 1], # fancy indexing - ] - for selection in bad_selections: + for selection in basic_selections_1d_bad: with pytest.raises(IndexError): z.get_basic_selection(selection) with pytest.raises(IndexError): z[selection] + with pytest.raises(IndexError): + z.get_basic_selection([1, 0]) + basic_selections_2d = [ # single row @@ -274,7 +275,6 @@ def test_get_basic_selection_2d(): bad_selections = basic_selections_2d_bad + [ # integer arrays [0, 1], - ([0, 1], [0, 1]), (slice(None), [0, 1]), ] for selection in bad_selections: @@ -282,6 +282,68 @@ def test_get_basic_selection_2d(): z.get_basic_selection(selection) with pytest.raises(IndexError): z[selection] + # check fallback on fancy indexing + fancy_selection = ([0, 1], [0, 1]) + np.testing.assert_array_equal(z[fancy_selection], [0, 11]) + + +def test_fancy_indexing_fallback_on_get_setitem(): + z = zarr.zeros((20, 20)) + z[[1, 2, 3], [1, 2, 3]] = 1 + np.testing.assert_array_equal( + z[:4, :4], + [ + [0, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1], + ], + ) + np.testing.assert_array_equal( + z[[1, 2, 3], [1, 2, 3]], 1 + ) + # test broadcasting + np.testing.assert_array_equal( + z[1, [1, 2, 3]], [1, 0, 0] + ) + # test 1D fancy indexing + z2 = zarr.zeros(5) + z2[[1, 2, 3]] = 1 + np.testing.assert_array_equal( + z2, [0, 1, 1, 1, 0] + ) + + +def test_fancy_indexing_doesnt_mix_with_slicing(): + z = zarr.zeros((20, 20)) + with pytest.raises(IndexError): + z[[1, 2, 3], :] = 2 + with pytest.raises(IndexError): + np.testing.assert_array_equal( + z[[1, 2, 3], :], 0 + ) + + +def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): + z2 = zarr.zeros((5, 5, 5)) + with pytest.raises(IndexError): + z2[[1, 2, 3], [1, 2, 3]] = 2 + with pytest.raises(IndexError): + np.testing.assert_array_equal( + z2[[1, 2, 3], [1, 2, 3]], 0 + ) + with pytest.raises(IndexError): + z2[[1, 2, 3]] = 2 + with pytest.raises(IndexError): + np.testing.assert_array_equal( + z2[[1, 2, 3]], 0 + ) + with pytest.raises(IndexError): + z2[..., [1, 2, 3]] = 2 + with pytest.raises(IndexError): + np.testing.assert_array_equal( + z2[..., [1, 2, 3]], 0 + ) def test_set_basic_selection_0d(): @@ -1373,3 +1435,10 @@ def test_PartialChunkIterator(selection, arr, expected): PCI = PartialChunkIterator(selection, arr.shape) results = list(PCI) assert results == expected + + +def test_slice_selection_uints(): + arr = np.arange(24).reshape((4, 6)) + idx = np.uint64(3) + slice_sel = make_slice_selection((idx,)) + assert arr[slice_sel].shape == (1, 6)