From 39830bdba9b2f5ab5366686954fcddc1e92d86e5 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 27 Feb 2024 16:38:17 -0800 Subject: [PATCH 01/20] refactor __getitem__() by removing vectorized and orthogonal indexing logic from it --- xarray/core/indexing.py | 36 +++++++++++++-------------- xarray/tests/test_indexing.py | 47 +++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 26 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 62889e03861..2d0eb9fb621 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -491,6 +491,13 @@ def _oindex_get(self, key): def _vindex_get(self, key): raise NotImplementedError("This method should be overridden") + def _check_and_raise_if_non_basic_indexer(self, key): + if isinstance(key, (VectorizedIndexer, OuterIndexer)): + raise TypeError( + "Vectorized indexing with vectorized or outer indexers is not supported. " + "Please use .vindex and .oindex properties to index the array." + ) + @property def oindex(self): return IndexCallable(self._oindex_get) @@ -597,9 +604,7 @@ def _vindex_get(self, indexer): return array[indexer] def __getitem__(self, indexer): - if isinstance(indexer, VectorizedIndexer): - array = LazilyVectorizedIndexedArray(self.array, self.key) - return array[indexer] + self._check_and_raise_if_non_basic_indexer(indexer) return type(self)(self.array, self._updated_key(indexer)) def __setitem__(self, key, value): @@ -662,6 +667,8 @@ def _vindex_get(self, indexer): return type(self)(self.array, self._updated_key(indexer)) def __getitem__(self, indexer): + + self._check_and_raise_if_non_basic_indexer(indexer) # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer.tuple): key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple)) @@ -712,6 +719,7 @@ def _vindex_get(self, key): return type(self)(_wrap_numpy_scalars(self.array[key])) def __getitem__(self, key): + self._check_and_raise_if_non_basic_indexer(key) return type(self)(_wrap_numpy_scalars(self.array[key])) def transpose(self, order): @@ -751,6 +759,7 @@ def _vindex_get(self, key): return type(self)(_wrap_numpy_scalars(self.array[key])) def __getitem__(self, key): + self._check_and_raise_if_non_basic_indexer(key) return type(self)(_wrap_numpy_scalars(self.array[key])) def transpose(self, order): @@ -1395,6 +1404,7 @@ def _vindex_get(self, key): return array[key.tuple] def __getitem__(self, key): + self._check_and_raise_if_non_basic_indexer(key) array, key = self._indexing_array_and_key(key) return array[key] @@ -1450,15 +1460,8 @@ def _vindex_get(self, key): raise TypeError("Vectorized indexing is not supported") def __getitem__(self, key): - if isinstance(key, BasicIndexer): - return self.array[key.tuple] - elif isinstance(key, OuterIndexer): - return self.oindex[key] - else: - if isinstance(key, VectorizedIndexer): - raise TypeError("Vectorized indexing is not supported") - else: - raise TypeError(f"Unrecognized indexer: {key}") + self._check_and_raise_if_non_basic_indexer(key) + return self.array[key.tuple] def __setitem__(self, key, value): if isinstance(key, (BasicIndexer, OuterIndexer)): @@ -1499,13 +1502,8 @@ def _vindex_get(self, key): return self.array.vindex[key.tuple] def __getitem__(self, key): - if isinstance(key, BasicIndexer): - return self.array[key.tuple] - elif isinstance(key, VectorizedIndexer): - return self.vindex[key] - else: - assert isinstance(key, OuterIndexer) - return self.oindex[key] + self._check_and_raise_if_non_basic_indexer(key) + return self.array[key.tuple] def __setitem__(self, key, value): if isinstance(key, BasicIndexer): diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 10192b6587a..996dc594f5b 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -328,6 +328,7 @@ def test_lazily_indexed_array(self) -> None: ([0, 3, 5], arr[:2]), ] for i, j in indexers: + expected_b = v[i][j] actual = v_lazy[i][j] assert expected_b.shape == actual.shape @@ -360,8 +361,15 @@ def test_vectorized_lazily_indexed_array(self) -> None: def check_indexing(v_eager, v_lazy, indexers): for indexer in indexers: - actual = v_lazy[indexer] - expected = v_eager[indexer] + if isinstance(indexer, indexing.VectorizedIndexer): + actual = v_lazy.vindex[indexer] + expected = v_eager.vindex[indexer] + elif isinstance(indexer, indexing.OuterIndexer): + actual = v_lazy.oindex[indexer] + expected = v_eager.oindex[indexer] + else: + actual = v_lazy[indexer] + expected = v_eager[indexer] assert expected.shape == actual.shape assert isinstance( actual._data, @@ -556,7 +564,9 @@ def test_arrayize_vectorized_indexer(self) -> None: vindex_array = indexing._arrayize_vectorized_indexer( vindex, self.data.shape ) - np.testing.assert_array_equal(self.data[vindex], self.data[vindex_array]) + np.testing.assert_array_equal( + self.data.vindex[vindex], self.data.vindex[vindex_array] + ) actual = indexing._arrayize_vectorized_indexer( indexing.VectorizedIndexer((slice(None),)), shape=(5,) @@ -667,16 +677,39 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: indexer = get_indexers(shape, indexer_mode) backend_ind, np_ind = indexing.decompose_indexer(indexer, shape, indexing_support) + indexing_adapter = indexing.NumpyIndexingAdapter(data) + + # Dispatch to appropriate indexing method + if indexer_mode.startswith("vectorized"): + expected = indexing_adapter.vindex[indexer] + + elif indexer_mode.startswith("outer"): + expected = indexing_adapter.oindex[indexer] + + else: + expected = indexing_adapter[indexer] # Basic indexing + + if isinstance(backend_ind, indexing.VectorizedIndexer): + array = indexing_adapter.vindex[backend_ind] + elif isinstance(backend_ind, indexing.OuterIndexer): + array = indexing_adapter.oindex[backend_ind] + else: + array = indexing_adapter[backend_ind] - expected = indexing.NumpyIndexingAdapter(data)[indexer] - array = indexing.NumpyIndexingAdapter(data)[backend_ind] if len(np_ind.tuple) > 0: - array = indexing.NumpyIndexingAdapter(array)[np_ind] + array_indexing_adapter = indexing.NumpyIndexingAdapter(array) + if isinstance(np_ind, indexing.VectorizedIndexer): + array = array_indexing_adapter.vindex[np_ind] + elif isinstance(np_ind, indexing.OuterIndexer): + array = array_indexing_adapter.oindex[np_ind] + else: + array = array_indexing_adapter[np_ind] np.testing.assert_array_equal(expected, array) if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) - array = indexing.NumpyIndexingAdapter(data)[combined_ind] + # combined_ind is a VectorizedIndexer + array = indexing_adapter.vindex[combined_ind] np.testing.assert_array_equal(expected, array) From 9f758eda52a66c9a591639034349702ce1f363c0 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Feb 2024 18:35:18 -0800 Subject: [PATCH 02/20] implement explicit routing of vectorized and outer indexers --- xarray/core/indexing.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 2d0eb9fb621..06ee8990e66 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -524,7 +524,15 @@ def get_duck_array(self): def __getitem__(self, key): key = expanded_indexer(key, self.ndim) - result = self.array[self.indexer_cls(key)] + indexer = self.indexer_cls(key) + + if isinstance(indexer, OuterIndexer): + result = self.array.oindex[indexer] + elif isinstance(indexer, VectorizedIndexer): + result = self.array.vindex[indexer] + else: + result = self.array[indexer] + if isinstance(result, ExplicitlyIndexed): return type(self)(result, self.indexer_cls) else: @@ -601,7 +609,7 @@ def _oindex_get(self, indexer): def _vindex_get(self, indexer): array = LazilyVectorizedIndexedArray(self.array, self.key) - return array[indexer] + return array.vindex[indexer] def __getitem__(self, indexer): self._check_and_raise_if_non_basic_indexer(indexer) @@ -667,7 +675,6 @@ def _vindex_get(self, indexer): return type(self)(self.array, self._updated_key(indexer)) def __getitem__(self, indexer): - self._check_and_raise_if_non_basic_indexer(indexer) # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer.tuple): @@ -713,10 +720,10 @@ def get_duck_array(self): return self.array.get_duck_array() def _oindex_get(self, key): - return type(self)(_wrap_numpy_scalars(self.array[key])) + return type(self)(_wrap_numpy_scalars(self.array.oindex[key])) def _vindex_get(self, key): - return type(self)(_wrap_numpy_scalars(self.array[key])) + return type(self)(_wrap_numpy_scalars(self.array.vindex[key])) def __getitem__(self, key): self._check_and_raise_if_non_basic_indexer(key) @@ -753,10 +760,10 @@ def get_duck_array(self): return self.array.get_duck_array() def _oindex_get(self, key): - return type(self)(_wrap_numpy_scalars(self.array[key])) + return type(self)(_wrap_numpy_scalars(self.array.oindex[key])) def _vindex_get(self, key): - return type(self)(_wrap_numpy_scalars(self.array[key])) + return type(self)(_wrap_numpy_scalars(self.array.vindex[key])) def __getitem__(self, key): self._check_and_raise_if_non_basic_indexer(key) @@ -921,7 +928,13 @@ def explicit_indexing_adapter( result = raw_indexing_method(raw_key.tuple) if numpy_indices.tuple: # index the loaded np.ndarray - result = NumpyIndexingAdapter(result)[numpy_indices] + indexable = NumpyIndexingAdapter(result) + if isinstance(numpy_indices, VectorizedIndexer): + result = indexable.vindex[numpy_indices] + elif isinstance(numpy_indices, OuterIndexer): + result = indexable.oindex[numpy_indices] + else: + result = indexable[numpy_indices] return result @@ -1601,7 +1614,13 @@ def __getitem__( (key,) = key if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional - return NumpyIndexingAdapter(np.asarray(self))[indexer] + indexable = NumpyIndexingAdapter(np.asarray(self)) + if isinstance(indexer, VectorizedIndexer): + return indexable.vindex[indexer] + elif isinstance(indexer, OuterIndexer): + return indexable.oindex[indexer] + else: + return indexable[indexer] result = self.array[key] From 283e733fb2a50dbe5e7c0793240f5373043ab4eb Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Feb 2024 18:38:18 -0800 Subject: [PATCH 03/20] Add VectorizedIndexer and OuterIndexer to ScipyArrayWrapper's __getitem__ method --- xarray/backends/scipy_.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 1ecc70cf376..8448e1db9aa 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -23,7 +23,7 @@ is_valid_nc3_name, ) from xarray.backends.store import StoreBackendEntrypoint -from xarray.core.indexing import NumpyIndexingAdapter +from xarray.core.indexing import NumpyIndexingAdapter, OuterIndexer, VectorizedIndexer from xarray.core.utils import ( Frozen, FrozenDict, @@ -64,7 +64,13 @@ def get_variable(self, needs_lock=True): return ds.variables[self.variable_name] def __getitem__(self, key): - data = NumpyIndexingAdapter(self.get_variable().data)[key] + indexable = NumpyIndexingAdapter(self.get_variable().data) + if isinstance(key, OuterIndexer): + data = indexable.oindex[key] + elif isinstance(key, VectorizedIndexer): + data = indexable.vindex[key] + else: + data = indexable[key] # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. From 7b7451e021af978cc69e7b03dbb9eb0c404dca7d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Feb 2024 18:45:07 -0800 Subject: [PATCH 04/20] Refactor indexing in LazilyIndexedArray and LazilyVectorizedIndexedArray --- xarray/core/indexing.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 06ee8990e66..05db154c91b 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -592,7 +592,12 @@ def shape(self) -> tuple[int, ...]: return tuple(shape) def get_duck_array(self): - array = self.array[self.key] + if isinstance(self.key, OuterIndexer): + array = self.array.oindex[self.key] + elif isinstance(self.key, VectorizedIndexer): + array = self.array.vindex[self.key] + else: + array = self.array[self.key] # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) @@ -656,7 +661,12 @@ def shape(self) -> tuple[int, ...]: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - array = self.array[self.key] + if isinstance(self.key, OuterIndexer): + array = self.array.oindex[self.key] + elif isinstance(self.key, VectorizedIndexer): + array = self.array.vindex[self.key] + else: + array = self.array[self.key] # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) From f84a3d657df0c0b5bf44e404a2de61188a0b9cae Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Feb 2024 19:02:16 -0800 Subject: [PATCH 05/20] Add vindex and oindex methods to StackedBytesArray --- xarray/coding/strings.py | 6 ++++++ xarray/tests/test_coding_strings.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 2c4501dfb0f..b3b9d8d1041 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -238,6 +238,12 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" + def _vindex_get(self, key): + return _numpy_char_to_bytes(self.array.vindex[key]) + + def _oindex_get(self, key): + return _numpy_char_to_bytes(self.array.oindex[key]) + def __getitem__(self, key): # require slicing the last dimension completely key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index f1eca00f9a1..51f63ea72dd 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -181,7 +181,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: V = IndexerMaker(indexing.VectorizedIndexer) indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked[indexer] + actual = stacked.vindex[indexer] assert_array_equal(actual, expected) From d9dc5df1cdd4797a80807f9c10211f7b313f9d43 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Feb 2024 19:25:47 -0800 Subject: [PATCH 06/20] handle explicitlyindexed arrays --- xarray/core/indexing.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 05db154c91b..ab80bd7ef22 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -592,12 +592,18 @@ def shape(self) -> tuple[int, ...]: return tuple(shape) def get_duck_array(self): - if isinstance(self.key, OuterIndexer): + # TODO: Remove explicitlyIndexed special case when we implement fall back .oindex, .vindex properties on BackendArray base class + if not isinstance(self.array, ExplicitlyIndexed) and isinstance( + self.key, OuterIndexer + ): array = self.array.oindex[self.key] - elif isinstance(self.key, VectorizedIndexer): + elif not isinstance(self.array, ExplicitlyIndexed) and isinstance( + self.key, VectorizedIndexer + ): array = self.array.vindex[self.key] else: array = self.array[self.key] + # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) @@ -661,9 +667,14 @@ def shape(self) -> tuple[int, ...]: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - if isinstance(self.key, OuterIndexer): + # TODO: Remove explicitlyIndexed special case when we implement fall back .oindex, .vindex properties on BackendArray base class + if not isinstance(self.array, ExplicitlyIndexed) and isinstance( + self.key, OuterIndexer + ): array = self.array.oindex[self.key] - elif isinstance(self.key, VectorizedIndexer): + elif not isinstance(self.array, ExplicitlyIndexed) and isinstance( + self.key, VectorizedIndexer + ): array = self.array.vindex[self.key] else: array = self.array[self.key] From 9faa280ebb582b0ce3293c395fa231a62420c84d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Feb 2024 19:34:51 -0800 Subject: [PATCH 07/20] Refactor LazilyIndexedArray and LazilyVectorizedIndexedArray classes --- xarray/core/indexing.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index ab80bd7ef22..54e3f2bc944 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -593,14 +593,13 @@ def shape(self) -> tuple[int, ...]: def get_duck_array(self): # TODO: Remove explicitlyIndexed special case when we implement fall back .oindex, .vindex properties on BackendArray base class - if not isinstance(self.array, ExplicitlyIndexed) and isinstance( - self.key, OuterIndexer - ): - array = self.array.oindex[self.key] - elif not isinstance(self.array, ExplicitlyIndexed) and isinstance( - self.key, VectorizedIndexer - ): - array = self.array.vindex[self.key] + if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): + if isinstance(self.key, VectorizedIndexer): + array = self.array.vindex[self.key] + elif isinstance(self.key, OuterIndexer): + array = self.array.oindex[self.key] + else: + array = self.array[self.key] else: array = self.array[self.key] @@ -668,14 +667,13 @@ def shape(self) -> tuple[int, ...]: def get_duck_array(self): # TODO: Remove explicitlyIndexed special case when we implement fall back .oindex, .vindex properties on BackendArray base class - if not isinstance(self.array, ExplicitlyIndexed) and isinstance( - self.key, OuterIndexer - ): - array = self.array.oindex[self.key] - elif not isinstance(self.array, ExplicitlyIndexed) and isinstance( - self.key, VectorizedIndexer - ): - array = self.array.vindex[self.key] + if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): + if isinstance(self.key, VectorizedIndexer): + array = self.array.vindex[self.key] + elif isinstance(self.key, OuterIndexer): + array = self.array.oindex[self.key] + else: + array = self.array[self.key] else: array = self.array[self.key] # self.array[self.key] is now a numpy array when From 928296fc1e4ac7d026c9c822f7a7172db2040f07 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 28 Feb 2024 19:35:56 -0800 Subject: [PATCH 08/20] Remove TODO comments in indexing.py --- xarray/core/indexing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 54e3f2bc944..745ebe8780e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -592,7 +592,6 @@ def shape(self) -> tuple[int, ...]: return tuple(shape) def get_duck_array(self): - # TODO: Remove explicitlyIndexed special case when we implement fall back .oindex, .vindex properties on BackendArray base class if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): if isinstance(self.key, VectorizedIndexer): array = self.array.vindex[self.key] @@ -666,7 +665,7 @@ def shape(self) -> tuple[int, ...]: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - # TODO: Remove explicitlyIndexed special case when we implement fall back .oindex, .vindex properties on BackendArray base class + if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): if isinstance(self.key, VectorizedIndexer): array = self.array.vindex[self.key] From 2bd21dcb996424bf9a3a8bb40171ac0d47ade1aa Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Feb 2024 20:29:00 -0800 Subject: [PATCH 09/20] use indexing.explicit_indexing_adapter() in scipy backend --- xarray/backends/scipy_.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 8448e1db9aa..3af8e09d0cc 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -23,7 +23,7 @@ is_valid_nc3_name, ) from xarray.backends.store import StoreBackendEntrypoint -from xarray.core.indexing import NumpyIndexingAdapter, OuterIndexer, VectorizedIndexer +from xarray.core import indexing from xarray.core.utils import ( Frozen, FrozenDict, @@ -63,14 +63,15 @@ def get_variable(self, needs_lock=True): ds = self.datastore._manager.acquire(needs_lock) return ds.variables[self.variable_name] + def _getitem(self, key): + with self.datastore.lock: + data = self.get_variable(needs_lock=False).data + return data[key] + def __getitem__(self, key): - indexable = NumpyIndexingAdapter(self.get_variable().data) - if isinstance(key, OuterIndexer): - data = indexable.oindex[key] - elif isinstance(key, VectorizedIndexer): - data = indexable.vindex[key] - else: - data = indexable[key] + data = indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. From 7af526d0f30b1332d9265e809e756b91dbfce12a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Feb 2024 20:38:06 -0800 Subject: [PATCH 10/20] update docstring --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 745ebe8780e..009e506326f 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1027,10 +1027,10 @@ def _decompose_vectorized_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3]))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array)[backend_indexer] + ... array = NumpyIndexingAdapter(array).oindex[backend_indexer] >>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # vectorized indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array)[np_indexer] + ... NumpyIndexingAdapter(array).vindex[np_indexer] array([ 2, 21, 8]) """ assert isinstance(indexer, VectorizedIndexer) From 96324a999b27462c883ff8f12a08749c85722b85 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 29 Feb 2024 20:43:54 -0800 Subject: [PATCH 11/20] fix docstring --- xarray/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 009e506326f..8d78ed14b31 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1112,7 +1112,7 @@ def _decompose_outer_indexer( ... array = NumpyIndexingAdapter(array)[backend_indexer] >>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # outer indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array)[np_indexer] + ... NumpyIndexingAdapter(array).oindex[np_indexer] array([[ 2, 3, 2], [14, 15, 14], [ 8, 9, 8]]) From ead0b9043f87e4e4660f9c56673ea51844585b31 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 1 Mar 2024 12:06:45 -0800 Subject: [PATCH 12/20] Add _oindex_get and _vindex_get methods to NativeEndiannessArray and BoolTypeArray --- xarray/coding/variables.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index b5e4167f2b2..05541201d34 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -109,6 +109,12 @@ def __init__(self, array) -> None: def dtype(self) -> np.dtype: return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize)) + def _oindex_get(self, key): + return np.asarray(self.array.oindex[key], dtype=self.dtype) + + def _vindex_get(self, key): + return np.asarray(self.array.vindex[key], dtype=self.dtype) + def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) @@ -141,6 +147,12 @@ def __init__(self, array) -> None: def dtype(self) -> np.dtype: return np.dtype("bool") + def _oindex_get(self, key): + return np.asarray(self.array.oindex[key], dtype=self.dtype) + + def _vindex_get(self, key): + return np.asarray(self.array.vindex[key], dtype=self.dtype) + def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) From 5a78ba5443c77dbdf7cb94fc69073331d669a7cc Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 6 Mar 2024 18:34:11 -0800 Subject: [PATCH 13/20] Update indexing support in ScipyArrayWrapper --- xarray/backends/scipy_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 3af8e09d0cc..154d82bb871 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -70,7 +70,7 @@ def _getitem(self, key): def __getitem__(self, key): data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem ) # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even From bec65a799e8b08ff83ce662174f163a629eeea86 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:34:45 -0800 Subject: [PATCH 14/20] Update xarray/tests/test_indexing.py Co-authored-by: Deepak Cherian --- xarray/tests/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 996dc594f5b..a1ea78c68c0 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -708,7 +708,7 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) - # combined_ind is a VectorizedIndexer + assert isinstance(combined_ind, VectorizedIndexer) array = indexing_adapter.vindex[combined_ind] np.testing.assert_array_equal(expected, array) From a578fa66647428b3c78553a450c52835f86fdaa9 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 6 Mar 2024 18:37:13 -0800 Subject: [PATCH 15/20] Fix assert statement in test_decompose_indexers --- xarray/tests/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index a1ea78c68c0..c3989bbf23e 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -708,7 +708,7 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) - assert isinstance(combined_ind, VectorizedIndexer) + assert isinstance(combined_ind, indexing.VectorizedIndexer) array = indexing_adapter.vindex[combined_ind] np.testing.assert_array_equal(expected, array) From 6b6163fc09de0b73c6abd8ec28fc3ac3b788a586 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 6 Mar 2024 19:10:45 -0800 Subject: [PATCH 16/20] add comments to clarifying why the else branch is needed --- xarray/core/indexing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 8d78ed14b31..4e2c4485774 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -600,6 +600,8 @@ def get_duck_array(self): else: array = self.array[self.key] else: + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray so use its __getitem__ array = self.array[self.key] # self.array[self.key] is now a numpy array when @@ -673,6 +675,9 @@ def get_duck_array(self): array = self.array.oindex[self.key] else: array = self.array[self.key] + + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray so use its __getitem__ else: array = self.array[self.key] # self.array[self.key] is now a numpy array when From d806772afea0debb9088c8b26493e726ca1cec29 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 7 Mar 2024 09:21:17 -0800 Subject: [PATCH 17/20] Add _oindex_get and _vindex_get methods to _ElementwiseFunctionArray --- xarray/coding/variables.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 05541201d34..23c58eb85a2 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -68,6 +68,12 @@ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): def dtype(self) -> np.dtype: return np.dtype(self._dtype) + def _oindex_get(self, key): + return type(self)(self.array.oindex[key], self.func, self.dtype) + + def _vindex_get(self, key): + return type(self)(self.array.vindex[key], self.func, self.dtype) + def __getitem__(self, key): return type(self)(self.array[key], self.func, self.dtype) From cc10b80e0869b7286b074a574ad45fdaf52d4f35 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 11 Mar 2024 11:13:50 -0700 Subject: [PATCH 18/20] update whats-new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index eb5dcbda36f..e3f170a8810 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,6 +29,9 @@ New Features - Add the ``.vindex`` property to Explicitly Indexed Arrays for vectorized indexing functionality. (:issue:`8238`, :pull:`8780`) By `Anderson Banihirwe `_. +- Expand use of ``.oindex`` and ``.vindex`` properties. (:pull: `8790`) + By `Anderson Banihirwe `_ and `Deepak Cherian `_. + Breaking changes ~~~~~~~~~~~~~~~~ From 97617af8c2fd6b7ea345d1f3c93191b637177094 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 14 Mar 2024 14:06:54 -0700 Subject: [PATCH 19/20] Refactor apply_indexer function in indexing.py and variable.py for code reuse --- xarray/core/indexing.py | 24 ++++++++++++------------ xarray/core/variable.py | 17 ++++------------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 4e2c4485774..fc509ab6ddd 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -952,15 +952,20 @@ def explicit_indexing_adapter( if numpy_indices.tuple: # index the loaded np.ndarray indexable = NumpyIndexingAdapter(result) - if isinstance(numpy_indices, VectorizedIndexer): - result = indexable.vindex[numpy_indices] - elif isinstance(numpy_indices, OuterIndexer): - result = indexable.oindex[numpy_indices] - else: - result = indexable[numpy_indices] + result = apply_indexer(indexable, numpy_indices) return result +def apply_indexer(indexable, indexer): + """Apply an indexer to an indexable object.""" + if isinstance(indexer, VectorizedIndexer): + return indexable.vindex[indexer] + elif isinstance(indexer, OuterIndexer): + return indexable.oindex[indexer] + else: + return indexable[indexer] + + def decompose_indexer( indexer: ExplicitIndexer, shape: tuple[int, ...], indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: @@ -1638,12 +1643,7 @@ def __getitem__( if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) - if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[indexer] - elif isinstance(indexer, OuterIndexer): - return indexable.oindex[indexer] - else: - return indexable[indexer] + return apply_indexer(indexable, indexer) result = self.array[key] diff --git a/xarray/core/variable.py b/xarray/core/variable.py index cac4d3049e2..a03e93ac699 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -761,12 +761,8 @@ def __getitem__(self, key) -> Self: dims, indexer, new_order = self._broadcast_indexes(key) indexable = as_indexable(self._data) - if isinstance(indexer, OuterIndexer): - data = indexable.oindex[indexer] - elif isinstance(indexer, VectorizedIndexer): - data = indexable.vindex[indexer] - else: - data = indexable[indexer] + data = indexing.apply_indexer(indexable, indexer) + if new_order: data = np.moveaxis(data, range(len(new_order)), new_order) return self._finalize_indexing_result(dims, data) @@ -791,6 +787,7 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA): dims, indexer, new_order = self._broadcast_indexes(key) if self.size: + if is_duck_dask_array(self._data): # dask's indexing is faster this way; also vindex does not # support negative indices yet: @@ -800,14 +797,8 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA): actual_indexer = indexer indexable = as_indexable(self._data) + data = indexing.apply_indexer(indexable, actual_indexer) - if isinstance(indexer, OuterIndexer): - data = indexable.oindex[indexer] - - elif isinstance(indexer, VectorizedIndexer): - data = indexable.vindex[indexer] - else: - data = indexable[actual_indexer] mask = indexing.create_mask(indexer, self.shape, data) # we need to invert the mask in order to pass data first. This helps # pint to choose the correct unit From 0ec56cd7fec939ab32d3216abc54e66ce50c0fc3 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 14 Mar 2024 21:38:24 -0600 Subject: [PATCH 20/20] cleanup --- xarray/core/indexing.py | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index fc509ab6ddd..ea8ae44bb4d 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -526,12 +526,7 @@ def __getitem__(self, key): key = expanded_indexer(key, self.ndim) indexer = self.indexer_cls(key) - if isinstance(indexer, OuterIndexer): - result = self.array.oindex[indexer] - elif isinstance(indexer, VectorizedIndexer): - result = self.array.vindex[indexer] - else: - result = self.array[indexer] + result = apply_indexer(self.array, indexer) if isinstance(result, ExplicitlyIndexed): return type(self)(result, self.indexer_cls) @@ -593,12 +588,7 @@ def shape(self) -> tuple[int, ...]: def get_duck_array(self): if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): - if isinstance(self.key, VectorizedIndexer): - array = self.array.vindex[self.key] - elif isinstance(self.key, OuterIndexer): - array = self.array.oindex[self.key] - else: - array = self.array[self.key] + array = apply_indexer(self.array, self.key) else: # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray so use its __getitem__ @@ -669,16 +659,10 @@ def shape(self) -> tuple[int, ...]: def get_duck_array(self): if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): - if isinstance(self.key, VectorizedIndexer): - array = self.array.vindex[self.key] - elif isinstance(self.key, OuterIndexer): - array = self.array.oindex[self.key] - else: - array = self.array[self.key] - - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ + array = apply_indexer(self.array, self.key) else: + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray so use its __getitem__ array = self.array[self.key] # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass