diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 33266c3bc..93e1973ee 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -53,10 +53,10 @@ def ne_evaluate(expression, local_dict=None, **kwargs): k: v for k, v in dict(sys._getframe(_frame_depth).f_locals).items() if ( - (hasattr(v, "shape") or np.isscalar(v)) - and - # Do not overwrite the local_dict with the expression variables - not (k in local_dict or k in ("_where_x", "_where_y")) + (hasattr(v, "shape") or np.isscalar(v)) + and + # Do not overwrite the local_dict with the expression variables + not (k in local_dict or k in ("_where_x", "_where_y")) ) } if blosc2.IS_WASM: @@ -472,7 +472,7 @@ def convert_inputs(inputs): inputs_ = [] for obj in inputs: if not isinstance( - obj, np.ndarray | blosc2.NDArray | blosc2.NDField | blosc2.C2Array + obj, np.ndarray | blosc2.NDArray | blosc2.NDField | blosc2.C2Array ) and not np.isscalar(obj): try: obj = np.asarray(obj) @@ -638,10 +638,10 @@ def __init__(self): def visit_Call(self, node): # Check if the call is a numpy type-casting call if ( - isinstance(node.func, ast.Attribute) - and isinstance(node.func.value, ast.Name) - and node.func.value.id in ["np", "numpy"] - and isinstance(node.args[0], ast.Constant) + isinstance(node.func, ast.Attribute) + and isinstance(node.func.value, ast.Name) + and node.func.value.id in ["np", "numpy"] + and isinstance(node.args[0], ast.Constant) ): # Create a new temporary variable name tmp_var = f"tmp{self.tmp_counter}" @@ -863,7 +863,7 @@ def read_nchunk(arrs, info): def fill_chunk_operands( # noqa: C901 - operands, slice_, chunks_, full_chunk, aligned, nchunk, iter_disk, chunk_operands, reduc=False + operands, slice_, chunks_, full_chunk, aligned, nchunk, iter_disk, chunk_operands, reduc=False ): """Retrieve the chunk operands for evaluating an expression. @@ -938,9 +938,9 @@ def fill_chunk_operands( # noqa: C901 # If key is in operands, we can reuse the buffer if ( - key in chunk_operands - and chunks_ == chunk_operands[key].shape - and isinstance(value, blosc2.NDArray) + key in chunk_operands + and chunks_ == chunk_operands[key].shape + and isinstance(value, blosc2.NDArray) ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) continue @@ -955,10 +955,10 @@ def fill_chunk_operands( # noqa: C901 def fast_eval( # noqa: C901 - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], - operands: dict, - getitem: bool, - **kwargs, + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + getitem: bool, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands using a fast path. @@ -1125,11 +1125,11 @@ def compute_start_index(shape, slice_obj): def slices_eval( # noqa: C901 - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], - operands: dict, - getitem: bool, - _slice=None, - **kwargs, + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + getitem: bool, + _slice=None, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands. @@ -1259,9 +1259,9 @@ def slices_eval( # noqa: C901 continue # If key is in operands, we can reuse the buffer if ( - key in chunk_operands - and slice_shape == chunk_operands[key].shape - and isinstance(value, blosc2.NDArray) + key in chunk_operands + and slice_shape == chunk_operands[key].shape + and isinstance(value, blosc2.NDArray) ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) continue @@ -1345,9 +1345,9 @@ def slices_eval( # noqa: C901 out[slice_] = result elif len(where) == 1: lenres = len(result) - out[lenout : lenout + lenres] = result + out[lenout: lenout + lenres] = result if _order is not None: - indices_[lenout : lenout + lenres] = chunk_indices + indices_[lenout: lenout + lenres] = chunk_indices lenout += lenres else: raise ValueError("The where condition must be a tuple with one or two elements") @@ -1394,11 +1394,11 @@ def infer_reduction_dtype(dtype, operation): def reduce_slices( # noqa: C901 - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], - operands: dict, - reduce_args, - _slice=None, - **kwargs, + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + reduce_args, + _slice=None, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands. @@ -1544,9 +1544,9 @@ def reduce_slices( # noqa: C901 continue # If key is in operands, we can reuse the buffer if ( - key in chunk_operands - and chunks_ == chunk_operands[key].shape - and isinstance(value, blosc2.NDArray) + key in chunk_operands + and chunks_ == chunk_operands[key].shape + and isinstance(value, blosc2.NDArray) ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) continue @@ -1657,7 +1657,7 @@ def convert_none_out(dtype, reduce_op, reduced_shape): def chunked_eval( # noqa: C901 - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, item=None, **kwargs + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, item=None, **kwargs ): """ Evaluate the expression in chunks of operands. @@ -1716,7 +1716,7 @@ def chunked_eval( # noqa: C901 # When using getitem, taking the fast path is always possible return fast_eval(expression, operands, getitem=True, **kwargs) elif (kwargs.get("chunks") is None and kwargs.get("blocks") is None) and ( - out is None or isinstance(out, blosc2.NDArray) + out is None or isinstance(out, blosc2.NDArray) ): # If not, the conditions to use the fast path are a bit more restrictive # e.g. the user cannot specify chunks or blocks, or an output that is not @@ -1773,7 +1773,7 @@ def fuse_expressions(expr, new_base, dup_op): break if expr[i + j] == ")": j -= 1 - old_pos = int(expr[i + 1 : i + j + 1]) + old_pos = int(expr[i + 1: i + j + 1]) old_op = f"o{old_pos}" if old_op not in dup_op: if old_pos in prev_pos: @@ -1977,9 +1977,9 @@ def dtype(self): # In some situations, we already know the dtype return self._dtype if ( - hasattr(self, "_dtype_") - and hasattr(self, "_expression_") - and self._expression_ == self.expression + hasattr(self, "_dtype_") + and hasattr(self, "_expression_") + and self._expression_ == self.expression ): # Use the cached dtype return self._dtype_ @@ -2019,9 +2019,9 @@ def shape(self): if hasattr(self, "_shape"): return self._shape if ( - hasattr(self, "_shape_") - and hasattr(self, "_expression_") - and self._expression_ == self.expression + hasattr(self, "_shape_") + and hasattr(self, "_expression_") + and self._expression_ == self.expression ): # Use the cached shape return self._shape_ @@ -2396,14 +2396,14 @@ def find_args(expr): idx = expression.find(f"{constructor}") # Find the arguments of the constructor function try: - args, idx2 = find_args(expression[idx + len(constructor) :]) + args, idx2 = find_args(expression[idx + len(constructor):]) except ValueError as err: raise ValueError(f"Unbalanced parenthesis in expression: {expression}") from err idx2 = idx + len(constructor) + idx2 # Give a chance to a possible .reshape() method - if expression[idx2 : idx2 + len(".reshape(")] == ".reshape(": - args2, idx3 = find_args(expression[idx2 + len("reshape(") :]) + if expression[idx2: idx2 + len(".reshape(")] == ".reshape(": + args2, idx3 = find_args(expression[idx2 + len("reshape("):]) # Remove a possible shape= from the reshape call (due to rewriting the expression # via extract_numpy_scalars(), other variants like .reshape(shape = shape_) work too) args2 = args2.replace("shape=", "") @@ -2440,16 +2440,16 @@ def _compute_expr(self, item, kwargs): # noqa: C901 if len(self._where_args) == 1: # We have a single argument where_x = self._where_args["_where_x"] - return where_x[:][lazy_expr] + return (where_x[:][lazy_expr])[item] if len(self._where_args) == 2: # We have two arguments where_x = self._where_args["_where_x"] where_y = self._where_args["_where_y"] - return np.where(lazy_expr, where_x, where_y) + return np.where(lazy_expr, where_x, where_y)[item] if hasattr(self, "_output"): # This is not exactly optimized, but it works for now - self._output[:] = lazy_expr - return lazy_expr + self._output[:] = lazy_expr[item] + return lazy_expr[item] return chunked_eval(lazy_expr.expression, lazy_expr.operands, item, **kwargs) @@ -2534,10 +2534,10 @@ def compute(self, item=None, **kwargs) -> blosc2.NDArray: x = self._where_args["_where_x"] result = x[result] # always a numpy array; TODO: optimize this for _getitem not in kwargs if ( - "_getitem" not in kwargs - and "_output" not in kwargs - and "_reduce_args" not in kwargs - and not isinstance(result, blosc2.NDArray) + "_getitem" not in kwargs + and "_output" not in kwargs + and "_reduce_args" not in kwargs + and not isinstance(result, blosc2.NDArray) ): # Get rid of all the extra kwargs that are not accepted by blosc2.asarray kwargs_not_accepted = {"_where_args", "_indices", "_order", "_ne_args", "dtype"} @@ -2824,7 +2824,7 @@ def compute(self, item=None, **kwargs): _ = kwargs.pop("dparams", None) urlpath = kwargs.get("urlpath") if urlpath is not None and urlpath == aux_kwargs.get( - "urlpath", + "urlpath", ): raise ValueError("Cannot use the same urlpath for LazyArray and eval NDArray") _ = aux_kwargs.pop("urlpath", None) @@ -2875,12 +2875,12 @@ def save(self, **kwargs): def lazyudf( - func: Callable[[tuple, np.ndarray, tuple[int]], None], - inputs: tuple | list | None, - dtype: np.dtype, - shape: tuple | list | None = None, - chunked_eval: bool = True, - **kwargs: Any, + func: Callable[[tuple, np.ndarray, tuple[int]], None], + inputs: tuple | list | None, + dtype: np.dtype, + shape: tuple | list | None = None, + chunked_eval: bool = True, + **kwargs: Any, ) -> LazyUDF: """ Get a LazyUDF from a python user-defined function. @@ -2980,14 +2980,14 @@ def seek_operands(names, local_dict=None, global_dict=None, _frame_depth: int = def lazyexpr( - expression: str | bytes | LazyExpr | blosc2.NDArray, - operands: dict | None = None, - out: blosc2.NDArray | np.ndarray = None, - where: tuple | list | None = None, - local_dict: dict | None = None, - global_dict: dict | None = None, - ne_args: dict | None = None, - _frame_depth: int = 2, + expression: str | bytes | LazyExpr | blosc2.NDArray, + operands: dict | None = None, + out: blosc2.NDArray | np.ndarray = None, + where: tuple | list | None = None, + local_dict: dict | None = None, + global_dict: dict | None = None, + ne_args: dict | None = None, + _frame_depth: int = 2, ) -> LazyExpr: """ Get a LazyExpr from an expression. @@ -3125,11 +3125,11 @@ def _open_lazyarray(array): # Mimim numexpr's evaluate function def evaluate( - ex: str, - local_dict: dict | None = None, - global_dict: dict | None = None, - out: np.ndarray | blosc2.NDArray = None, - **kwargs: Any, + ex: str, + local_dict: dict | None = None, + global_dict: dict | None = None, + out: np.ndarray | blosc2.NDArray = None, + **kwargs: Any, ) -> np.ndarray | blosc2.NDArray: """ Evaluate a string expression using the Blosc2 compute engine. diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index 81e7ba0f6..9c0383b59 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -186,7 +186,7 @@ def test_proxy_simple_expression(array_fixture): def test_iXXX(array_fixture): a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**3 + a2**2 + a3**3 - a4 + 3 + expr = a1 ** 3 + a2 ** 2 + a3 ** 3 - a4 + 3 expr += 5 # __iadd__ expr -= 15 # __isub__ expr *= 2 # __imul__ @@ -1126,7 +1126,7 @@ def test_fill_disk_operands(chunks, blocks, disk, fill_value): b = blosc2.open("b.b2nd") c = blosc2.open("c.b2nd") - expr = ((a**3 + blosc2.sin(c * 2)) < b) & (c > 0) + expr = ((a ** 3 + blosc2.sin(c * 2)) < b) & (c > 0) out = expr.compute() assert out.shape == (N, N) @@ -1256,6 +1256,13 @@ def test_indices(): expr.indices().compute() +def test_reduction_index(): + shape = (20, 20) + a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape) + expr = blosc2.sum(a, axis=0) + assert expr[:10].shape == (10,) + assert expr[0].shape == () + def test_sort(): shape = (20,) na = np.arange(shape[0]) @@ -1307,9 +1314,9 @@ def test_only_ndarrays_or_constructors(obj, getitem, item): def test_numpy_funcs(array_fixture, func): a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture npfunc = getattr(np, func) - d_blosc2 = npfunc(((a1**3 + blosc2.sin(na2 * 2)) < a3) & (na2 > 0), axis=0) + d_blosc2 = npfunc(((a1 ** 3 + blosc2.sin(na2 * 2)) < a3) & (na2 > 0), axis=0) npfunc = getattr(np, func) - d_numpy = npfunc(((na1**3 + np.sin(na2 * 2)) < na3) & (na2 > 0), axis=0) + d_numpy = npfunc(((na1 ** 3 + np.sin(na2 * 2)) < na3) & (na2 > 0), axis=0) np.testing.assert_equal(d_blosc2, d_numpy) @@ -1346,7 +1353,7 @@ def test_chain_expressions(): b = blosc2.linspace(1, 2, N * N, dtype=dtype, shape=(N, N)) c = blosc2.linspace(0, 1, N, dtype=dtype, shape=(N,)) - le1 = a**3 + blosc2.sin(a**2) + le1 = a ** 3 + blosc2.sin(a ** 2) le2 = le1 < c le3 = le2 & (b < 0) le1_ = blosc2.lazyexpr("a ** 3 + sin(a ** 2)", {"a": a}) @@ -1365,7 +1372,6 @@ def test_chain_expressions(): # le4_ = blosc2.lazyexpr("(le2 & le3)", {"le2": le2_, "le3": le3_}) # assert (le4_[:] == le4[:]).all() - # TODO: Test the chaining of multiple persistent lazy expressions # def test_chain_persistentexpressions(): # N = 1_000