@@ -1427,10 +1427,10 @@ def oindex(self) -> OIndex:
14271427 """Shortcut for orthogonal (outer) indexing, see :func:`get_oselection_numpy`"""
14281428 return OIndex (self )
14291429
1430- @property
1431- def vindex (self ) -> VIndex :
1432- """Shortcut for vectorised indexing. Not yet supported."""
1433- return VIndex (self )
1430+ # @property
1431+ # def vindex(self) -> VIndex:
1432+ # """Shortcut for vectorised indexing. Not yet supported."""
1433+ # return VIndex(self)
14341434
14351435 @property
14361436 def T (self ):
@@ -1441,24 +1441,64 @@ def T(self):
14411441
14421442 def get_fselection_numpy (self , key ):
14431443 # TODO: Make this faster for broadcasted keys
1444+ if math .prod (self .shape ) * self .dtype .itemsize < blosc2 .MAX_FAST_PATH_SIZE :
1445+ return self [:][key ] # load into memory for smallish arrays
14441446 shape = self .shape
14451447 chunks = self .chunks
14461448 _slice = ndindex .ndindex (key ).expand (shape )
1447- chunk_size = ndindex .ChunkSize (chunks )
1448-
1449- # repeated indices are grouped together
1450- intersecting_chunks = chunk_size .as_subchunks (_slice , shape ) # if _slice is (), returns all chunks
14511449 out_shape = _slice .newshape (shape )
1452-
14531450 out = np .empty (out_shape , dtype = self .dtype )
1454- for c in intersecting_chunks :
1455- sub_idx = _slice .as_subindex (c ).raw
1456- sel_idx = c .as_subindex (_slice )
1457- new_shape = sel_idx .newshape (out_shape )
1458- start , stop , step = get_ndarray_start_stop (self .ndim , c .raw , shape )
1459- chunk = np .empty (tuple (sp - st for st , sp in zip (start , stop , strict = True )), dtype = self .dtype )
1460- super ().get_slice_numpy (chunk , (start , stop ))
1461- out [sel_idx .raw ] = chunk [sub_idx ].reshape (new_shape )
1451+
1452+ if self .ndim == 1 :
1453+ # Fast path so we can avoid the costly lines in slow path
1454+ # (sub_idx = _slice.as_subindex(c).raw, sel_idx = c.as_subindex(_slice))
1455+ key = np .array (key )
1456+ if np .any (np .diff (key ) < 0 ):
1457+ idx_order = np .argsort (key )
1458+ sorted_idxs = key [idx_order ]
1459+ else :
1460+ idx_order = None
1461+ sorted_idxs = key
1462+ chunk_nitems = np .bincount (
1463+ sorted_idxs // chunks [0 ], minlength = self .schunk .nchunks
1464+ ) # number of items per chunk
1465+ chunk_nitems_cumsum = np .cumsum (chunk_nitems )
1466+ for chunk_i , c in enumerate (chunk_nitems ):
1467+ if c == 0 :
1468+ continue # no items in chunk
1469+ start = 0 if chunk_i == 0 else chunk_nitems_cumsum [chunk_i - 1 ]
1470+ stop = chunk_nitems_cumsum [chunk_i ]
1471+ selection = sorted_idxs [start :stop ]
1472+ out_selection = idx_order [start :stop ] if idx_order is not None else slice (start , stop )
1473+ to_be_loaded = np .empty ((selection [- 1 ] - selection [0 ] + 1 ,), dtype = self .dtype )
1474+ # if len(selection) < 10:
1475+ # # TODO: optimise for case of few elements and go index-by-index
1476+ # selector = out_selection.start + np.arange(out_selection.stop) if isinstance(out_selection, slice) else out_selection
1477+ # for j, idx in enumerate(sorted_idxs[start:stop]):
1478+ # out[out_selection[j]] = self[idx]
1479+ # else:
1480+ super ().get_slice_numpy (
1481+ to_be_loaded , ((selection [0 ],), (selection [- 1 ] + 1 ,))
1482+ ) # get relevant section of chunk
1483+ loc_idx = selection - selection [0 ]
1484+ out [out_selection ] = to_be_loaded [loc_idx ]
1485+
1486+ else :
1487+ chunk_size = ndindex .ChunkSize (chunks )
1488+ # repeated indices are grouped together
1489+ intersecting_chunks = chunk_size .as_subchunks (
1490+ _slice , shape
1491+ ) # if _slice is (), returns all chunks
1492+ for c in intersecting_chunks :
1493+ sub_idx = _slice .as_subindex (c ).raw
1494+ sel_idx = c .as_subindex (_slice )
1495+ new_shape = sel_idx .newshape (out_shape )
1496+ start , stop , step = get_ndarray_start_stop (self .ndim , c .raw , shape )
1497+ chunk = np .empty (
1498+ tuple (sp - st for st , sp in zip (start , stop , strict = True )), dtype = self .dtype
1499+ )
1500+ super ().get_slice_numpy (chunk , (start , stop ))
1501+ out [sel_idx .raw ] = chunk [sub_idx ].reshape (new_shape )
14621502
14631503 return out
14641504
@@ -1474,7 +1514,8 @@ def get_oselection_numpy(self, key):
14741514
14751515 def set_oselection_numpy (self , key , arr : np .ndarray ):
14761516 """
1477- Select independently from self along axes specified in key and set to entries in arr. Key must be same length as self shape.
1517+ Select independently from self along axes specified in key and set to entries in arr.
1518+ Key must be same length as self shape.
14781519 See Zarr https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#orthogonal-indexing.
14791520 """
14801521 return super ().set_oindex_numpy (key , arr )
@@ -1483,7 +1524,10 @@ def __getitem__( # noqa: C901
14831524 self ,
14841525 key : int | slice | Sequence [slice | int ] | np .ndarray [np .bool_ ] | NDArray | blosc2 .LazyExpr | str ,
14851526 ) -> np .ndarray | blosc2 .LazyExpr :
1486- """Retrieve a (multidimensional) slice as specified by the key. Matches NumPy fancy indexing behaviour.
1527+ """
1528+ Retrieve a (multidimensional) slice as specified by the key.
1529+
1530+ Note that this __getitem__ matches NumPy fancy indexing behaviour.
14871531
14881532 Parameters
14891533 ----------
@@ -1547,10 +1591,9 @@ def __getitem__( # noqa: C901
15471591 key = key [:]
15481592 # This is a fast path for the case where the key is a short list of 1-d indices
15491593 # For the rest, use an array of booleans.
1550- if key .dtype == np .int64 and key .ndim == 1 and self .ndim == 1 :
1551- return extract_values (self , key )
1552- else :
1553- return self .get_fselection_numpy (key ) # fancy index
1594+ # if key.dtype == np.int64 and key.ndim == 1 and self.ndim == 1:
1595+ # return extract_values(self, key)
1596+ return self .get_fselection_numpy (key ) # fancy index
15541597 else :
15551598 # The more general case (this is quite slow)
15561599 # If the key is a LazyExpr, decorate with ``where`` and return it
@@ -4400,13 +4443,13 @@ def __setitem__(self, selection, input) -> np.ndarray:
44004443 return self .array .set_oselection_numpy (selection , input )
44014444
44024445
4403- class VIndex :
4404- def __init__ (self , array : NDArray ):
4405- self .array = array
4446+ # class VIndex:
4447+ # def __init__(self, array: NDArray):
4448+ # self.array = array
44064449
4407- # TODO: all this
4408- def __getitem__ (self , selection ) -> np .ndarray :
4409- return NotImplementedError
4450+ # # TODO: all this
4451+ # def __getitem__(self, selection) -> np.ndarray:
4452+ # return NotImplementedError
44104453
4411- def __setitem__ (self , selection , input ) -> np .ndarray :
4412- return NotImplementedError
4454+ # def __setitem__(self, selection, input) -> np.ndarray:
4455+ # return NotImplementedError
0 commit comments