diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index a8852ae06f578..fb8d3eb3ab2df 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -72,12 +72,59 @@ def _evaluate_standard(op, op_str, a, b): return op(a, b) +def can_use_numexpr(op, size: Optional[int] = None, dtypes=None, scalar=None): + """ + Initial check whether numexpr can be used with the given size and + involved data types and/or scalar operand. + + Returns False if it definitely cannot use numexpr, otherwise returns + True (which doesn't mean we always end up using numexpr) + + Parameters + ---------- + op : operator + size : int + dtypes : list + List of dtypes involved in the operation + scalar : + Optionally a scalar, eg if the right operand is a scalar. + + Returns + ------- + bool + """ + if not USE_NUMEXPR: + return False + + if size is not None: + if size < _MIN_ELEMENTS: + return False + + op_str = _op_str_mapping.get(op, None) + if op_str is None: + return False + + if scalar is not None: + if isinstance(scalar, str): + return False + + # allowed are a superset + if dtypes is not None: + return _ALLOWED_DTYPES["evaluate"] >= set(dtypes) + + # safe fallback if dtypes were not specified + return True + + def _can_use_numexpr(op, op_str, a, b, dtype_check): """ return a boolean if we WILL be using numexpr """ if op_str is not None: # required min elements (otherwise we are adding overhead) if a.size > _MIN_ELEMENTS: + if isinstance(b, str): + return False + # check for dtype compatibility dtypes: Set[str] = set() for o in [a, b]: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7f970a72cb12c..c50ae68a7a5fd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -139,6 +139,7 @@ from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseFrameAccessor +import pandas.core.computation.expressions as expressions from pandas.core.construction import ( extract_array, sanitize_array, @@ -6832,6 +6833,16 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None): right = lib.item_from_zerodim(right) if not is_list_like(right): + + if isinstance(self._mgr, ArrayManager): + use_numexpr = expressions.can_use_numexpr( + func, self.shape[0], None, right + ) + else: + use_numexpr = expressions.can_use_numexpr(func, None, None, right) + + array_op = ops.get_array_op(func, use_numexpr=use_numexpr) + # i.e. scalar, faster than checking np.ndim(right) == 0 with np.errstate(all="ignore"): bm = self._mgr.apply(array_op, right=right) @@ -6844,6 +6855,15 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None): # fails in cases with empty columns reached via # _frame_arith_method_with_reindex + if isinstance(self._mgr, ArrayManager): + use_numexpr = expressions.can_use_numexpr( + func, self.shape[0], None, None + ) + else: + use_numexpr = expressions.USE_NUMEXPR + + array_op = ops.get_array_op(func, use_numexpr=use_numexpr) + # TODO operate_blockwise expects a manager of the same type with np.errstate(all="ignore"): bm = self._mgr.operate_blockwise( @@ -6866,6 +6886,17 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None): # maybe_align_as_frame ensures we do not have an ndarray here assert not isinstance(right, np.ndarray) + if isinstance(self._mgr, ArrayManager): + use_numexpr = expressions.can_use_numexpr( + func, self.shape[1], (right.dtype,), None + ) + else: + use_numexpr = expressions.can_use_numexpr( + func, None, (right.dtype,), None + ) + + array_op = ops.get_array_op(func, use_numexpr=use_numexpr) + with np.errstate(all="ignore"): arrays = [ array_op(_left, _right) @@ -6876,6 +6907,17 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None): assert right.index.equals(self.index) # Handle other cases later right = right._values + if isinstance(self._mgr, ArrayManager): + use_numexpr = expressions.can_use_numexpr( + func, self.shape[0], (right.dtype,), None + ) + else: + use_numexpr = expressions.can_use_numexpr( + func, None, (right.dtype,), None + ) + + array_op = ops.get_array_op(func, use_numexpr=use_numexpr) + with np.errstate(all="ignore"): arrays = [array_op(left, right) for left in self._iter_column_arrays()] diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 2ff93b203a001..4deb0cd9d5a7b 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -135,7 +135,7 @@ def _masked_arith_op(x: np.ndarray, y, op): return result -def _na_arithmetic_op(left, right, op, is_cmp: bool = False): +def _na_arithmetic_op(left, right, op, is_cmp: bool = False, use_numexpr=True): """ Return the result of evaluating op on the passed in values. @@ -156,14 +156,8 @@ def _na_arithmetic_op(left, right, op, is_cmp: bool = False): ------ TypeError : invalid operation """ - if isinstance(right, str): - # can never use numexpr - func = op - else: - func = partial(expressions.evaluate, op) - try: - result = func(left, right) + result = expressions.evaluate(op, left, right, use_numexpr=use_numexpr) except TypeError: if is_object_dtype(left) or is_object_dtype(right) and not is_cmp: # For object dtype, fallback to a masked operation (only operating @@ -182,7 +176,7 @@ def _na_arithmetic_op(left, right, op, is_cmp: bool = False): return missing.dispatch_fill_zeros(op, left, right, result) -def arithmetic_op(left: ArrayLike, right: Any, op): +def arithmetic_op(left: ArrayLike, right: Any, op, use_numexpr=True): """ Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... @@ -222,12 +216,12 @@ def arithmetic_op(left: ArrayLike, right: Any, op): # (https://github.com/pandas-dev/pandas/issues/41165) _bool_arith_check(op, left, right) - res_values = _na_arithmetic_op(left, right, op) + res_values = _na_arithmetic_op(left, right, op, use_numexpr=use_numexpr) return res_values -def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: +def comparison_op(left: ArrayLike, right: Any, op, use_numexpr=True) -> ArrayLike: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. @@ -285,7 +279,9 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) else: - res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) + res_values = _na_arithmetic_op( + lvalues, rvalues, op, is_cmp=True, use_numexpr=use_numexpr + ) return res_values @@ -331,7 +327,7 @@ def na_logical_op(x: np.ndarray, y, op): return result.reshape(x.shape) -def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: +def logical_op(left: ArrayLike, right: Any, op, use_numexpr=True) -> ArrayLike: """ Evaluate a logical operation `|`, `&`, or `^`. @@ -397,7 +393,7 @@ def fill_bool(x, left=None): return res_values -def get_array_op(op): +def get_array_op(op, use_numexpr=True): """ Return a binary array operation corresponding to the given operator op. @@ -421,9 +417,9 @@ def get_array_op(op): return op if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: - return partial(comparison_op, op=op) + return partial(comparison_op, op=op, use_numexpr=use_numexpr) elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: - return partial(logical_op, op=op) + return partial(logical_op, op=op, use_numexpr=use_numexpr) elif op_name in { "add", "sub", @@ -434,7 +430,7 @@ def get_array_op(op): "divmod", "pow", }: - return partial(arithmetic_op, op=op) + return partial(arithmetic_op, op=op, use_numexpr=use_numexpr) else: raise NotImplementedError(op_name)