diff --git a/onedal/_config.py b/onedal/_config.py index 5f28bb3b6d..e4ba5d3b5a 100644 --- a/onedal/_config.py +++ b/onedal/_config.py @@ -36,17 +36,11 @@ If True, allows to fallback computation to sklearn after onedal backend in case of runtime error on onedal backend computations. Global default: True. -use_raw_input: - If True, uses the raw input data in some SPMD onedal backend computations - without any checks on data consistency or validity. - Note: This option is not recommended for general use. - Global default: False. """ _default_global_config = { "target_offload": "auto", "allow_fallback_to_host": False, "allow_sklearn_after_onedal": True, - "use_raw_input": False, } _threadlocal = threading.local() diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 96ab5b2842..0c6aa75f72 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -15,21 +15,17 @@ # ============================================================================== import inspect -import logging from functools import wraps from operator import xor import numpy as np from sklearn import get_config -from ._config import _get_config from .datatypes import copy_to_dpnp, dlpack_to_numpy from .utils import _sycl_queue_manager as QM from .utils._array_api import _asarray, _get_sycl_namespace, _is_numpy_namespace from .utils._third_party import is_dpnp_ndarray -logger = logging.getLogger("sklearnex") - def supports_queue(func): """Decorator that updates the global queue before function evaluation. @@ -126,26 +122,7 @@ def wrapper_impl(*args, **kwargs): else: self = None - # KNeighbors*.fit can not be used with raw inputs, ignore `use_raw_input=True` - override_raw_input = ( - self - and self.__class__.__name__ in ("KNeighborsClassifier", "KNeighborsRegressor") - and func.__name__ == "fit" - and _get_config()["use_raw_input"] is True - ) - if override_raw_input: - pretty_name = f"{self.__class__.__name__}.{func.__name__}" - logger.warning( - f"Using raw inputs is not supported for {pretty_name}. Ignoring `use_raw_input=True` setting." - ) - if _get_config()["use_raw_input"] is True and not override_raw_input: - if "queue" not in kwargs: - if usm_iface := getattr(args[0], "__sycl_usm_array_interface__", None): - kwargs["queue"] = usm_iface["syclobj"] - else: - kwargs["queue"] = None - return invoke_func(self, *args, **kwargs) - elif len(args) == 0 and len(kwargs) == 0: + if len(args) == 0 and len(kwargs) == 0: # no arguments, there's nothing we can deduce from them -> just call the function return invoke_func(self, *args, **kwargs) diff --git a/sklearnex/_config.py b/sklearnex/_config.py index a62f3cac69..1d8eb2d327 100644 --- a/sklearnex/_config.py +++ b/sklearnex/_config.py @@ -15,7 +15,6 @@ # ============================================================================== import sys -import warnings from contextlib import contextmanager from sklearn import get_config as skl_get_config @@ -54,26 +53,10 @@ {tab} {tab} Global default: ``True``. {tab} -{tab}use_raw_input : bool or None -{tab} If ``True``, uses the raw input data in some SPMD onedal backend computations -{tab} without any checks on data consistency or validity. Note that this can be -{tab} better achieved through usage of :ref:`array API classes ` without -{tab} ``target_offload``. Not recommended for general use. -{tab} -{tab} Global default: ``False``. -{tab} -{tab} .. deprecated:: 2026.0 -{tab} {tab}sklearn_configs : kwargs {tab} Other settings accepted by scikit-learn. See :obj:`sklearn.set_config` for {tab} details. {tab} -{tab}Warnings -{tab}-------- -{tab}Using ``use_raw_input=True`` is not recommended for general use as it -{tab}bypasses data consistency checks, which may lead to unexpected behavior. It is -{tab}recommended to use the newer :ref:`array API ` instead. -{tab} {tab}Note {tab}---- {tab}Usage of ``target_offload`` requires additional dependencies - see @@ -102,7 +85,6 @@ def set_config( target_offload=None, allow_fallback_to_host=None, allow_sklearn_after_onedal=None, - use_raw_input=None, **sklearn_configs, ): # numpydoc ignore=PR01,PR07 """Set global configuration. @@ -125,15 +107,6 @@ def set_config( local_config["allow_fallback_to_host"] = allow_fallback_to_host if allow_sklearn_after_onedal is not None: local_config["allow_sklearn_after_onedal"] = allow_sklearn_after_onedal - if use_raw_input is not None: - if use_raw_input: - warnings.warn( - "The 'use_raw_input' parameter is deprecated and will be removed in version 2026.0. " - "On-device input validation can now be achieved by setting 'array_api_dispatch' to True.", - FutureWarning, - stacklevel=2, - ) - local_config["use_raw_input"] = use_raw_input set_config.__doc__ = set_config.__doc__.replace( diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 6cc105a3c9..25a0d8ffbd 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -80,7 +80,7 @@ def dispatch( Depending on support conditions, oneDAL will be called, otherwise it will fall back to calling scikit-learn. Dispatching to oneDAL can be influenced - by the 'use_raw_input' or 'allow_fallback_to_host' config parameters. + by the 'allow_fallback_to_host' config parameter. Parameters ---------- @@ -112,10 +112,6 @@ def dispatch( object types should match for the sklearn and onedal object methods. """ - if get_config()["use_raw_input"]: - with QM.manage_global_queue(None, *args) as queue: - return branches["onedal"](obj, *args, **kwargs, queue=queue) - # Determine if array_api dispatching is enabled, and if estimator is capable onedal_array_api = _array_api_offload() and get_tags(obj).onedal_array_api sklearn_array_api = _array_api_offload() and get_tags(obj).array_api_support @@ -215,16 +211,8 @@ def wrapper(self, *args, **kwargs) -> Any: ): _, (result,) = _transfer_to_host(result) return result - # Remove check for result __sycl_usm_array_interface__ on deprecation of use_raw_inputs - if ( - usm_iface := getattr(data, "__sycl_usm_array_interface__", None) - ) and not hasattr(result, "__sycl_usm_array_interface__"): - # Skip if result elements are already SYCL arrays - # (e.g. kneighbors tuple from from_table(like=X)) - if isinstance(result, (tuple, list)) and all( - hasattr(r, "__sycl_usm_array_interface__") for r in result - ): - return result + + if usm_iface := getattr(data, "__sycl_usm_array_interface__", None): queue = usm_iface["syclobj"] return copy_to_dpnp(queue, result) diff --git a/sklearnex/basic_statistics/basic_statistics.py b/sklearnex/basic_statistics/basic_statistics.py index 2bc2f4e50f..223f9b2e3f 100644 --- a/sklearnex/basic_statistics/basic_statistics.py +++ b/sklearnex/basic_statistics/basic_statistics.py @@ -21,7 +21,6 @@ from onedal.basic_statistics import BasicStatistics as onedal_BasicStatistics from onedal.utils.validation import _is_csr -from .._config import get_config from .._device_offload import dispatch from .._utils import PatchingConditionsChain from ..base import oneDALEstimator @@ -157,20 +156,19 @@ def _onedal_gpu_supported(self, method_name, *data): return patching_status def _onedal_fit(self, X, sample_weight=None, queue=None): - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X, sample_weight) - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - ensure_2d=False, - accept_sparse="csr", - ) + xp, _ = get_namespace(X, sample_weight) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + ensure_2d=False, + accept_sparse="csr", + ) - if sample_weight is not None: - sample_weight = _check_sample_weight( - sample_weight, X, dtype=[xp.float64, xp.float32] - ) + if sample_weight is not None: + sample_weight = _check_sample_weight( + sample_weight, X, dtype=[xp.float64, xp.float32] + ) onedal_params = { "result_options": self.result_options, diff --git a/sklearnex/basic_statistics/incremental_basic_statistics.py b/sklearnex/basic_statistics/incremental_basic_statistics.py index 0a5795636c..4be3b7007f 100644 --- a/sklearnex/basic_statistics/incremental_basic_statistics.py +++ b/sklearnex/basic_statistics/incremental_basic_statistics.py @@ -23,7 +23,6 @@ IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, ) -from .._config import get_config from .._device_offload import dispatch from .._utils import PatchingConditionsChain, _add_inc_serialization_note from ..base import oneDALEstimator @@ -174,8 +173,7 @@ def _onedal_finalize_fit(self, queue=None): def _onedal_partial_fit(self, X, sample_weight=None, queue=None, check_input=True): first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 - # never check input when using raw input - if check_input and not get_config()["use_raw_input"]: + if check_input: xp, _ = get_namespace(X) X = validate_data( self, @@ -204,14 +202,13 @@ def _onedal_partial_fit(self, X, sample_weight=None, queue=None, check_input=Tru self._need_to_finalize = True def _onedal_fit(self, X, sample_weight=None, queue=None): - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X, sample_weight) - X = validate_data(self, X, dtype=[xp.float64, xp.float32]) + xp, _ = get_namespace(X, sample_weight) + X = validate_data(self, X, dtype=[xp.float64, xp.float32]) - if sample_weight is not None: - sample_weight = _check_sample_weight( - sample_weight, X, dtype=[xp.float64, xp.float32] - ) + if sample_weight is not None: + sample_weight = _check_sample_weight( + sample_weight, X, dtype=[xp.float64, xp.float32] + ) _, n_features = X.shape if self.batch_size is None: diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index 1ac380512d..0f245e1215 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -21,7 +21,6 @@ from onedal.cluster import DBSCAN as onedal_DBSCAN from onedal.utils._array_api import _is_numpy_namespace -from .._config import get_config from .._device_offload import dispatch from .._utils import PatchingConditionsChain from ..base import oneDALEstimator @@ -77,14 +76,11 @@ def __init__( def _onedal_fit(self, X, y, sample_weight=None, queue=None): xp, _ = get_namespace(X, y, sample_weight) - if not get_config()["use_raw_input"]: - X = validate_data( - self, X, accept_sparse="csr", dtype=[xp.float64, xp.float32] + X = validate_data(self, X, accept_sparse="csr", dtype=[xp.float64, xp.float32]) + if sample_weight is not None: + sample_weight = _check_sample_weight( + sample_weight, X, dtype=[xp.float64, xp.float32] ) - if sample_weight is not None: - sample_weight = _check_sample_weight( - sample_weight, X, dtype=[xp.float64, xp.float32] - ) onedal_params = { "eps": self.eps, diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index c1b114fedb..e9660993a8 100644 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -193,29 +193,28 @@ def _onedal_fit(self, X, _, sample_weight, queue=None): xp, _ = get_namespace(X) - if not get_config()["use_raw_input"]: - if _is_arraylike_not_scalar(self.init): - init = validate_data( - self, - self.init, - dtype=[xp.float64, xp.float32], - accept_sparse="csr", - copy=True, - order="C", - reset=False, - ) - self._validate_center_shape(X, init) - self.init = init - - X = validate_data( + if _is_arraylike_not_scalar(self.init): + init = validate_data( self, - X, - accept_sparse="csr", + self.init, dtype=[xp.float64, xp.float32], + accept_sparse="csr", + copy=True, order="C", - copy=self.copy_x, - accept_large_sparse=False, + reset=False, ) + self._validate_center_shape(X, init) + self.init = init + + X = validate_data( + self, + X, + accept_sparse="csr", + dtype=[xp.float64, xp.float32], + order="C", + copy=self.copy_x, + accept_large_sparse=False, + ) # Validate critical parameters to match sklearn's _check_params # behavior, which we bypass in the oneDAL path. This is needed @@ -386,16 +385,13 @@ def predict( def _onedal_predict(self, X, sample_weight=None, queue=None): xp, _ = get_namespace(X) - - if not get_config()["use_raw_input"]: - X = validate_data( - self, - X, - accept_sparse="csr", - reset=False, - dtype=[xp.float64, xp.float32], - ) - + X = validate_data( + self, + X, + accept_sparse="csr", + reset=False, + dtype=[xp.float64, xp.float32], + ) return self._onedal_estimator.predict(X, queue=queue) def _onedal_supported(self, method_name, *data): @@ -456,17 +452,15 @@ def transform(self, X): def _onedal_transform(self, X, queue=None): xp, is_array_api = get_namespace(X) - - if not get_config()["use_raw_input"]: - X = validate_data( - self, - X, - accept_sparse="csr", - reset=False, - dtype=[xp.float64, xp.float32], - order="C", - accept_large_sparse=False, - ) + X = validate_data( + self, + X, + accept_sparse="csr", + reset=False, + dtype=[xp.float64, xp.float32], + order="C", + accept_large_sparse=False, + ) if is_array_api: centers = xp.asarray(self.cluster_centers_) @@ -500,15 +494,13 @@ def score(self, X, y=None, sample_weight=None): def _onedal_score(self, X, y=None, sample_weight=None, queue=None): xp, _ = get_namespace(X) - - if not get_config()["use_raw_input"]: - X = validate_data( - self, - X, - accept_sparse="csr", - reset=False, - dtype=[xp.float64, xp.float32], - ) + X = validate_data( + self, + X, + accept_sparse="csr", + reset=False, + dtype=[xp.float64, xp.float32], + ) if not sklearn_check_version("1.5") and sklearn_check_version("1.3"): if isinstance(sample_weight, str) and sample_weight == "deprecated": diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index 67773e45f5..ba2d168514 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -33,7 +33,7 @@ ) from onedal.utils._array_api import _is_numpy_namespace -from .._config import config_context, get_config +from .._config import config_context from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain, _add_inc_serialization_note from ..base import oneDALEstimator @@ -191,7 +191,7 @@ def location_(self): def _onedal_partial_fit(self, X, queue=None, check_input=True): first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 - if check_input and not get_config()["use_raw_input"]: + if check_input: xp, _ = get_namespace(X) X = validate_data( self, @@ -297,9 +297,8 @@ def _onedal_fit(self, X, queue=None): if hasattr(self, "_onedal_estimator"): self._onedal_estimator._reset() - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X) - X = validate_data(self, X, dtype=[xp.float64, xp.float32], copy=self.copy) + xp, _ = get_namespace(X) + X = validate_data(self, X, dtype=[xp.float64, xp.float32], copy=self.copy) self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_ diff --git a/sklearnex/decomposition/pca.py b/sklearnex/decomposition/pca.py index 905db8a6c3..bdd3db0b4a 100755 --- a/sklearnex/decomposition/pca.py +++ b/sklearnex/decomposition/pca.py @@ -31,7 +31,6 @@ from daal4py.sklearn._utils import sklearn_check_version from onedal._device_offload import _transfer_to_host - from .._config import get_config from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain, register_hyperparameters from ..base import oneDALEstimator @@ -325,30 +324,26 @@ def fit(self, X, y=None): def _onedal_fit(self, X, queue=None): xp, _ = get_namespace(X) - if not get_config()["use_raw_input"]: - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - ensure_2d=True, - copy=self.copy, - ) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + ensure_2d=True, + copy=self.copy, + ) - # `use_raw_input` disabled by hasattr check - if ( - sklearn_check_version("1.5") - and self._fit_svd_solver == "full" - and self.svd_solver == "auto" - ): - self._fit_svd_solver = "covariance_eigh" - # warning should only be emitted if to be offloaded to oneDAL - warn( - "Sklearnex always uses `covariance_eigh` solver instead of `full` " - "when `svd_solver` parameter is set to `auto` " - "for performance purposes." - ) - else: + if ( + sklearn_check_version("1.5") + and self._fit_svd_solver == "full" + and self.svd_solver == "auto" + ): self._fit_svd_solver = "covariance_eigh" + # warning should only be emitted if to be offloaded to oneDAL + warn( + "Sklearnex always uses `covariance_eigh` solver instead of `full` " + "when `svd_solver` parameter is set to `auto` " + "for performance purposes." + ) if self.n_components is not None: self._validate_n_components(X) @@ -437,14 +432,13 @@ def transform(self, X): ) def _onedal_transform(self, X, queue=None): - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X) - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - reset=False, - ) + xp, _ = get_namespace(X) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + reset=False, + ) return self._onedal_estimator.predict(X, queue=queue) diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 73ca765441..8f02270ef0 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -58,7 +58,6 @@ from onedal.primitives import get_tree_state_cls, get_tree_state_reg from onedal.utils.validation import _num_features -from .._config import get_config from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain, register_hyperparameters from ..base import oneDALEstimator @@ -91,24 +90,22 @@ class BaseForest(oneDALEstimator, ABC): _onedal_factory = None def _onedal_fit(self, X, y, sample_weight=None, queue=None): - use_raw_input = get_config().get("use_raw_input", False) is True xp, _ = get_namespace(X, y, sample_weight) - if not use_raw_input: - X, y = validate_data( - self, - X, - y, - multi_output=True, - accept_sparse=False, - dtype=[xp.float64, xp.float32], - ensure_all_finite=not sklearn_check_version( - "1.4" - ), # completed in offload check - y_numeric=not is_classifier(self), # trigger for Regressors - ) + X, y = validate_data( + self, + X, + y, + multi_output=True, + accept_sparse=False, + dtype=[xp.float64, xp.float32], + ensure_all_finite=not sklearn_check_version( + "1.4" + ), # completed in offload check + y_numeric=not is_classifier(self), # trigger for Regressors + ) - if sample_weight is not None: - sample_weight = _check_sample_weight(sample_weight, X) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) if y.ndim == 2 and y.shape[1] == 1: warnings.warn( @@ -133,40 +130,26 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): self._n_samples, self.n_outputs_ = y.shape self.n_features_in_ = X.shape[1] - if not use_raw_input: - if is_classifier(self): - if sklearn_check_version("1.9"): - y, expanded_class_weight = self._validate_y_class_weight( - y, sample_weight - ) - else: - y, expanded_class_weight = self._validate_y_class_weight(y) + if is_classifier(self): + if sklearn_check_version("1.9"): + y, expanded_class_weight = self._validate_y_class_weight(y, sample_weight) else: - expanded_class_weight = None - - if expanded_class_weight is not None: - if sample_weight is not None: - sample_weight = sample_weight * expanded_class_weight - else: - sample_weight = expanded_class_weight + y, expanded_class_weight = self._validate_y_class_weight(y) + else: + expanded_class_weight = None - # Decapsulate classes_ attributes following scikit-learn's - # BaseForest.fit. oneDAL does not support multi-output, therefore - # the logic can be hardcoded in comparison to scikit-learn's logic - if hasattr(self, "classes_"): - self.n_classes_ = self.n_classes_[0] - self.classes_ = self.classes_[0] + if expanded_class_weight is not None: + if sample_weight is not None: + sample_weight = sample_weight * expanded_class_weight + else: + sample_weight = expanded_class_weight - else: - # try catch needed for raw_inputs + array_api data where unlike - # numpy the way to yield unique values is via `unique_values` - # This should be removed when refactored for gpu zero-copy - if is_classifier(self): - try: - self.classes_ = xp.unique(y) - except AttributeError: - self.classes_ = xp.unique_values(y) - self.n_classes_ = self.classes_.shape[0] + # Decapsulate classes_ attributes following scikit-learn's + # BaseForest.fit. oneDAL does not support multi-output, therefore + # the logic can be hardcoded in comparison to scikit-learn's logic + if hasattr(self, "classes_"): + self.n_classes_ = self.n_classes_[0] + self.classes_ = self.classes_[0] # conform to scikit-learn internal calculations if self.bootstrap: @@ -914,13 +897,12 @@ def score(self, X, y, sample_weight=None): def _onedal_predict(self, X, queue=None): xp, is_array_api_compliant = get_namespace(X, self.classes_) - if not get_config()["use_raw_input"]: - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - reset=False, - ) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + reset=False, + ) res = self._onedal_estimator.predict(X, queue=queue) @@ -935,14 +917,12 @@ def _onedal_predict(self, X, queue=None): def _onedal_predict_proba(self, X, queue=None): xp, _ = get_namespace(X) - use_raw_input = get_config().get("use_raw_input", False) is True - if not use_raw_input: - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - reset=False, - ) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + reset=False, + ) # TODO: fix probabilities out of [0, 1] interval on oneDAL side out = self._onedal_estimator.predict_proba(X, queue=queue) @@ -1130,15 +1110,13 @@ def score(self, X, y, sample_weight=None): def _onedal_predict(self, X, queue=None): check_is_fitted(self, "_onedal_estimator") xp, _ = get_namespace(X) - use_raw_input = get_config().get("use_raw_input", False) is True - - if not use_raw_input: - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - reset=False, - ) # Warning, order of dtype matters + + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + reset=False, + ) # Warning, order of dtype matters return self._onedal_estimator.predict(X, queue=queue) diff --git a/sklearnex/linear_model/incremental_linear.py b/sklearnex/linear_model/incremental_linear.py index 370cc53def..d85c8dae3c 100644 --- a/sklearnex/linear_model/incremental_linear.py +++ b/sklearnex/linear_model/incremental_linear.py @@ -28,7 +28,6 @@ from onedal.linear_model import ( IncrementalLinearRegression as onedal_IncrementalLinearRegression, ) -from sklearnex._config import get_config from .._device_offload import dispatch, wrap_output_data from .._utils import ( @@ -164,17 +163,16 @@ def _onedal_supported(self, method_name, *data): def _onedal_predict(self, X, queue=None): xp, _ = get_namespace(X) - if not get_config()["use_raw_input"]: - if sklearn_check_version("1.2"): - self._validate_params() + if sklearn_check_version("1.2"): + self._validate_params() - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - copy=self.copy_X, - reset=False, - ) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + copy=self.copy_X, + reset=False, + ) assert hasattr(self, "_onedal_estimator") if self._need_to_finalize: @@ -193,7 +191,7 @@ def _onedal_score(self, X, y, sample_weight=None, queue=None): def _onedal_partial_fit(self, X, y, check_input=True, queue=None): first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 - if check_input and not get_config()["use_raw_input"]: + if check_input: xp, _ = get_namespace(X, y) X, y = validate_data( self, @@ -245,18 +243,17 @@ def _onedal_finalize_fit(self): self._need_to_finalize = False def _onedal_fit(self, X, y, queue=None): - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X, y) + xp, _ = get_namespace(X, y) - X, y = validate_data( - self, - X, - y, - dtype=[xp.float64, xp.float32], - copy=self.copy_X, - multi_output=True, - y_numeric=True, - ) + X, y = validate_data( + self, + X, + y, + dtype=[xp.float64, xp.float32], + copy=self.copy_X, + multi_output=True, + y_numeric=True, + ) n_samples, n_features = X.shape diff --git a/sklearnex/linear_model/linear.py b/sklearnex/linear_model/linear.py index 5c3c2a652c..3c93587c61 100644 --- a/sklearnex/linear_model/linear.py +++ b/sklearnex/linear_model/linear.py @@ -256,17 +256,16 @@ def _onedal_fit(self, X, y, sample_weight, queue=None): xp, _ = get_namespace(X, y) - if not get_config()["use_raw_input"]: - supports_multi_output = daal_check_version((2025, "P", 1)) - X, y = validate_data( - self, - X=X, - y=y, - dtype=[xp.float64, xp.float32], - accept_sparse=["csr", "csc", "coo"], - y_numeric=True, - multi_output=supports_multi_output, - ) + supports_multi_output = daal_check_version((2025, "P", 1)) + X, y = validate_data( + self, + X=X, + y=y, + dtype=[xp.float64, xp.float32], + accept_sparse=["csr", "csc", "coo"], + y_numeric=True, + multi_output=supports_multi_output, + ) if not sklearn_check_version("1.2"): self._normalize = _deprecate_normalize( @@ -305,10 +304,9 @@ def _onedal_fit(self, X, y, sample_weight, queue=None): def _onedal_predict(self, X, queue=None): xp, _ = get_namespace(X) - if not get_config()["use_raw_input"]: - X = validate_data( - self, X, accept_sparse=False, dtype=[xp.float64, xp.float32], reset=False - ) + X = validate_data( + self, X, accept_sparse=False, dtype=[xp.float64, xp.float32], reset=False + ) if not hasattr(self, "_onedal_estimator"): self._initialize_onedal_estimator() diff --git a/sklearnex/linear_model/logistic_regression.py b/sklearnex/linear_model/logistic_regression.py index fd97709695..a53842e796 100644 --- a/sklearnex/linear_model/logistic_regression.py +++ b/sklearnex/linear_model/logistic_regression.py @@ -400,16 +400,14 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): xp, _ = get_namespace(X) xp_y, is_array_api_compliant_y = get_namespace(y) - use_raw_input = get_config().get("use_raw_input", False) is True - if not use_raw_input: - X, y = validate_data( - self, - X, - y, - accept_sparse=_sparsity_enabled, - accept_large_sparse=_sparsity_enabled, - dtype=[xp.float64, xp.float32], - ) + X, y = validate_data( + self, + X, + y, + accept_sparse=_sparsity_enabled, + accept_large_sparse=_sparsity_enabled, + dtype=[xp.float64, xp.float32], + ) self.classes_ = ( xp_y.unique_values(y) @@ -453,16 +451,14 @@ def _onedal_predict(self, X, queue=None): xp, _ = get_namespace(X) xp_y, _ = get_namespace(self.classes_) - use_raw_input = get_config().get("use_raw_input", False) is True - if not use_raw_input: - X = validate_data( - self, - X, - reset=False, - accept_sparse=_sparsity_enabled, - accept_large_sparse=_sparsity_enabled, - dtype=[xp.float64, xp.float32], - ) + X = validate_data( + self, + X, + reset=False, + accept_sparse=_sparsity_enabled, + accept_large_sparse=_sparsity_enabled, + dtype=[xp.float64, xp.float32], + ) assert hasattr(self, "_onedal_estimator") @@ -486,16 +482,14 @@ def _onedal_predict_proba(self, X, queue=None): return daal4py_predict(self, X, "computeClassProbabilities") xp, _ = get_namespace(X) - use_raw_input = get_config().get("use_raw_input", False) is True - if not use_raw_input: - X = validate_data( - self, - X, - reset=False, - accept_sparse=_sparsity_enabled, - accept_large_sparse=_sparsity_enabled, - dtype=[xp.float64, xp.float32], - ) + X = validate_data( + self, + X, + reset=False, + accept_sparse=_sparsity_enabled, + accept_large_sparse=_sparsity_enabled, + dtype=[xp.float64, xp.float32], + ) assert hasattr(self, "_onedal_estimator") res = self._onedal_estimator.predict_proba(X, queue=queue) @@ -537,16 +531,14 @@ def _onedal_decision_function(self, X, queue=None): return super().decision_function(X) xp, _ = get_namespace(X) - use_raw_input = get_config().get("use_raw_input", False) is True - if not use_raw_input: - X = validate_data( - self, - X, - reset=False, - accept_sparse=_sparsity_enabled, - accept_large_sparse=_sparsity_enabled, - dtype=[xp.float64, xp.float32], - ) + X = validate_data( + self, + X, + reset=False, + accept_sparse=_sparsity_enabled, + accept_large_sparse=_sparsity_enabled, + dtype=[xp.float64, xp.float32], + ) assert hasattr(self, "_onedal_estimator") diff --git a/sklearnex/neighbors/common.py b/sklearnex/neighbors/common.py index 5fca6562b7..02cc8f8968 100644 --- a/sklearnex/neighbors/common.py +++ b/sklearnex/neighbors/common.py @@ -34,7 +34,6 @@ from onedal.utils._array_api import _is_numpy_namespace from onedal.utils.validation import _check_array, _num_features, _num_samples -from .._config import get_config from .._utils import PatchingConditionsChain from ..base import oneDALEstimator from ..utils._array_api import get_namespace @@ -242,8 +241,8 @@ def _set_effective_metric(self): """Set effective_metric_ and effective_metric_params_ without validation. Used when we need to set metrics but can't call _fit_validation - (e.g., in SPMD mode with use_raw_input=True where sklearn validation - would try to convert array API to numpy). + (e.g., in SPMD mode where sklearn validation would try to convert + array API to numpy). """ if self.metric_params is not None and "p" in self.metric_params: if self.p is not None: diff --git a/sklearnex/neighbors/knn_classification.py b/sklearnex/neighbors/knn_classification.py index a447230394..e4383da4c5 100755 --- a/sklearnex/neighbors/knn_classification.py +++ b/sklearnex/neighbors/knn_classification.py @@ -28,7 +28,6 @@ from onedal.utils._array_api import _is_numpy_namespace from onedal.utils.validation import _check_classification_targets -from .._config import get_config from .._device_offload import dispatch, wrap_output_data from ..utils._array_api import enable_array_api, get_namespace from ..utils.validation import validate_data @@ -160,20 +159,17 @@ def _onedal_fit(self, X, y, queue=None): xp, _ = get_namespace(X) self._set_effective_metric() - if not get_config()["use_raw_input"]: - X, y = validate_data( - self, - X, - y, - dtype=[xp.float64, xp.float32], - accept_sparse="csr", - multi_output=True, - ) + X, y = validate_data( + self, + X, + y, + dtype=[xp.float64, xp.float32], + accept_sparse="csr", + multi_output=True, + ) # Process classification targets before passing to onedal - self._process_classification_targets( - y, skip_validation=get_config()["use_raw_input"] - ) + self._process_classification_targets(y, skip_validation=False) # Call onedal backend onedal_params = { @@ -213,7 +209,6 @@ def _process_classification_targets(self, y, skip_validation=False): Target values skip_validation : bool, default=False If True, skip check_classification_targets validation. - Used when use_raw_input=True (SPMD mode). """ # Array API support: get namespace from y xp, _ = get_namespace(y) @@ -252,7 +247,7 @@ def _process_classification_targets(self, y, skip_validation=False): self._y = xp.reshape(self._y, (-1,)) def _onedal_predict(self, X, queue=None): - if X is not None and not get_config()["use_raw_input"]: + if X is not None: xp, _ = get_namespace(X) X = validate_data( self, @@ -274,7 +269,7 @@ def _onedal_predict(self, X, queue=None): ) def _onedal_predict_proba(self, X, queue=None): - if X is not None and not get_config()["use_raw_input"]: + if X is not None: xp, _ = get_namespace(X) X = validate_data( self, @@ -293,21 +288,17 @@ def _onedal_predict_proba(self, X, queue=None): def _onedal_kneighbors( self, X=None, n_neighbors=None, return_distance=True, queue=None ): - # Only skip validation when use_raw_input=True (SPMD mode) - use_raw_input = get_config()["use_raw_input"] - # Determine if query is the training data if X is not None: query_is_train = False - if not use_raw_input: - xp, _ = get_namespace(X) - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - accept_sparse="csr", - reset=False, - ) + xp, _ = get_namespace(X) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + accept_sparse="csr", + reset=False, + ) else: query_is_train = True X = self._fit_X diff --git a/sklearnex/neighbors/knn_regression.py b/sklearnex/neighbors/knn_regression.py index 88553efaeb..9b2cb58e62 100755 --- a/sklearnex/neighbors/knn_regression.py +++ b/sklearnex/neighbors/knn_regression.py @@ -26,7 +26,6 @@ from onedal.neighbors import KNeighborsRegressor as onedal_KNeighborsRegressor from onedal.utils._array_api import _is_numpy_namespace -from .._config import get_config from .._device_offload import dispatch, wrap_output_data from ..utils._array_api import enable_array_api, get_namespace from ..utils.validation import validate_data @@ -142,16 +141,15 @@ def _onedal_fit(self, X, y, queue=None): xp, _ = get_namespace(X, y) self._set_effective_metric() - if not get_config()["use_raw_input"]: - X, y = validate_data( - self, - X, - y, - dtype=[xp.float64, xp.float32], - accept_sparse="csr", - multi_output=True, - y_numeric=True, - ) + X, y = validate_data( + self, + X, + y, + dtype=[xp.float64, xp.float32], + accept_sparse="csr", + multi_output=True, + y_numeric=True, + ) self._process_regression_targets(y) onedal_params = { @@ -243,7 +241,7 @@ def _predict_skl_regression(self, X): def _predict_skl(self, X, queue=None): """SKL prediction path - calls kneighbors through sklearnex, computes prediction here.""" - if X is not None and not get_config()["use_raw_input"]: + if X is not None: xp, _ = get_namespace(X) X = validate_data( self, X, dtype=[xp.float64, xp.float32], accept_sparse="csr", reset=False @@ -256,15 +254,14 @@ def _onedal_kneighbors( # Determine if query is the training data if X is not None: query_is_train = False - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X) - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - accept_sparse="csr", - reset=False, - ) + xp, _ = get_namespace(X) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + accept_sparse="csr", + reset=False, + ) else: query_is_train = True X = self._fit_X diff --git a/sklearnex/neighbors/knn_unsupervised.py b/sklearnex/neighbors/knn_unsupervised.py index a07de455c2..19b48a81ef 100755 --- a/sklearnex/neighbors/knn_unsupervised.py +++ b/sklearnex/neighbors/knn_unsupervised.py @@ -24,7 +24,6 @@ from onedal.neighbors import NearestNeighbors as onedal_NearestNeighbors from onedal.utils._array_api import _is_numpy_namespace -from .._config import get_config from .._device_offload import dispatch, wrap_output_data from ..utils._array_api import enable_array_api, get_namespace from ..utils.validation import validate_data @@ -155,13 +154,12 @@ def _onedal_fit(self, X, y=None, queue=None): xp, _ = get_namespace(X) self._set_effective_metric() - if not get_config()["use_raw_input"]: - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - accept_sparse="csr", - ) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + accept_sparse="csr", + ) onedal_params = { "n_neighbors": self.n_neighbors, diff --git a/sklearnex/preview/covariance/covariance.py b/sklearnex/preview/covariance/covariance.py index c34e7b6645..3c780d3eec 100644 --- a/sklearnex/preview/covariance/covariance.py +++ b/sklearnex/preview/covariance/covariance.py @@ -31,7 +31,6 @@ from onedal.utils.validation import _num_features from sklearnex import config_context -from ..._config import get_config from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain, register_hyperparameters from ...base import oneDALEstimator @@ -57,12 +56,11 @@ class EmpiricalCovariance(oneDALEstimator, _sklearn_EmpiricalCovariance): } def _set_covariance(self, covariance): - if not get_config()["use_raw_input"]: - if sklearn_check_version("1.6"): - covariance = check_array(covariance, ensure_all_finite=False) - else: - covariance = check_array(covariance, force_all_finite=False) - assert_all_finite(covariance) + if sklearn_check_version("1.6"): + covariance = check_array(covariance, ensure_all_finite=False) + else: + covariance = check_array(covariance, force_all_finite=False) + assert_all_finite(covariance) # set covariance self.covariance_ = covariance # set precision @@ -86,9 +84,8 @@ def _save_attributes(self): _onedal_covariance = staticmethod(onedal_EmpiricalCovariance) def _onedal_fit(self, X, queue=None): - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X) - X = validate_data(self, X, dtype=[xp.float64, xp.float32]) + xp, _ = get_namespace(X) + X = validate_data(self, X, dtype=[xp.float64, xp.float32]) if X.shape[0] == 1: warnings.warn( diff --git a/sklearnex/preview/decomposition/incremental_pca.py b/sklearnex/preview/decomposition/incremental_pca.py index dd8c1bb265..8e1401502c 100644 --- a/sklearnex/preview/decomposition/incremental_pca.py +++ b/sklearnex/preview/decomposition/incremental_pca.py @@ -22,7 +22,6 @@ from daal4py.sklearn._utils import is_sparse, sklearn_check_version from onedal.decomposition import IncrementalPCA as onedal_IncrementalPCA -from ..._config import get_config from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain, _add_inc_serialization_note from ...base import oneDALEstimator @@ -130,9 +129,8 @@ def _onedal_transform(self, X, queue=None): # does not batch out data like sklearn's ``IncrementalPCA.transform`` if self._need_to_finalize: self._onedal_finalize_fit() - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X) - X = validate_data(self, X, dtype=[xp.float64, xp.float32], reset=False) + xp, _ = get_namespace(X) + X = validate_data(self, X, dtype=[xp.float64, xp.float32], reset=False) return self._onedal_estimator.predict(X, queue=queue) def _onedal_fit_transform(self, X, queue=None): @@ -145,13 +143,8 @@ def _onedal_partial_fit(self, X, check_input=True, queue=None): self._components_ = None if check_input: - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X) - X = validate_data( - self, X, dtype=[xp.float64, xp.float32], reset=first_pass - ) - else: - self.n_features_in_ = X.shape[1] + xp, _ = get_namespace(X) + X = validate_data(self, X, dtype=[xp.float64, xp.float32], reset=first_pass) n_samples, n_features = X.shape @@ -211,13 +204,10 @@ def _onedal_fit(self, X, queue=None): # Taken from sklearn for conformance purposes self.components_ = None - if not get_config()["use_raw_input"]: - if sklearn_check_version("1.2"): - self._validate_params() - xp, _ = get_namespace(X) - X = validate_data(self, X, dtype=[xp.float64, xp.float32], copy=self.copy) - else: - self.n_features_in_ = X.shape[1] + if sklearn_check_version("1.2"): + self._validate_params() + xp, _ = get_namespace(X) + X = validate_data(self, X, dtype=[xp.float64, xp.float32], copy=self.copy) n_samples, n_features = X.shape diff --git a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py index e36311ec6c..1ddb91262b 100644 --- a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +++ b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py @@ -24,7 +24,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_statistic_data, _get_local_tensor, @@ -84,11 +83,8 @@ def test_basic_stats_spmd_gold(dataframe, queue): "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi -def test_basic_stats_spmd_synthetic( - n_samples, n_features, dataframe, queue, dtype, use_raw_input -): +def test_basic_stats_spmd_synthetic(n_samples, n_features, dataframe, queue, dtype): # Import spmd and batch algo from onedal.basic_statistics import BasicStatistics as BasicStatistics_Batch from sklearnex.spmd.basic_statistics import BasicStatistics as BasicStatistics_SPMD @@ -101,9 +97,7 @@ def test_basic_stats_spmd_synthetic( ) # Ensure results of batch algo match spmd - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_result = BasicStatistics_SPMD().fit(local_dpt_data) + spmd_result = BasicStatistics_SPMD().fit(local_dpt_data) batch_result = BasicStatistics_Batch().fit(data) tol = 1e-5 if dtype == np.float32 else 1e-7 diff --git a/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py b/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py index 9626f70733..1f00f14a56 100644 --- a/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +++ b/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py @@ -24,7 +24,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_statistic_data, _get_local_tensor, @@ -254,10 +253,9 @@ def test_incremental_basic_statistics_single_option_partial_fit_spmd_gold( @pytest.mark.parametrize("n_samples", [100, 10000]) @pytest.mark.parametrize("n_features", [10, 100]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_incremental_basic_statistics_partial_fit_spmd_synthetic( - dataframe, queue, num_blocks, weighted, n_samples, n_features, dtype, use_raw_input + dataframe, queue, num_blocks, weighted, n_samples, n_features, dtype ): # Import spmd and batch algo from sklearnex.basic_statistics import IncrementalBasicStatistics @@ -297,11 +295,9 @@ def test_incremental_basic_statistics_partial_fit_spmd_synthetic( dpt_weights = _convert_to_dataframe( split_weights[i], sycl_queue=queue, target_df=dataframe ) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - incbs_spmd.partial_fit( - local_dpt_data, sample_weight=local_dpt_weights if weighted else None - ) + incbs_spmd.partial_fit( + local_dpt_data, sample_weight=local_dpt_weights if weighted else None + ) incbs.partial_fit(dpt_data, sample_weight=dpt_weights if weighted else None) for option in options_and_tests: diff --git a/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py b/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py index c5bbb034c0..7607929eb9 100644 --- a/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py @@ -21,7 +21,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_clustering_data, _get_local_tensor, @@ -70,7 +69,6 @@ def test_dbscan_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_dbscan_spmd_synthetic( n_samples, @@ -80,7 +78,6 @@ def test_dbscan_spmd_synthetic( dataframe, queue, dtype, - use_raw_input, ): n_features, eps = n_features_and_eps # Import spmd and batch algo @@ -96,9 +93,7 @@ def test_dbscan_spmd_synthetic( ) # Ensure labels from fit of batch algo matches spmd - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_model = DBSCAN_SPMD(eps=eps, min_samples=min_samples).fit(local_dpt_data) + spmd_model = DBSCAN_SPMD(eps=eps, min_samples=min_samples).fit(local_dpt_data) batch_model = DBSCAN_Batch(eps=eps, min_samples=min_samples).fit(data) _spmd_assert_allclose(spmd_model.labels_, batch_model.labels_) diff --git a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py index 1ed14a02fe..00a7b90c9e 100644 --- a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py @@ -22,7 +22,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _assert_kmeans_labels_allclose, _assert_unordered_allclose, @@ -109,10 +108,9 @@ def test_kmeans_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_kmeans_spmd_synthetic( - n_samples, n_features, n_clusters, dataframe, queue, dtype, use_raw_input + n_samples, n_features, n_clusters, dataframe, queue, dtype ): # Import spmd and batch algo from sklearnex.cluster import KMeans as KMeans_Batch @@ -144,9 +142,7 @@ def test_kmeans_spmd_synthetic( spmd_model = KMeans_SPMD( n_clusters=n_clusters, init=spmd_model_init.cluster_centers_, random_state=0 ) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_model.fit(local_dpt_X_train) + spmd_model.fit(local_dpt_X_train) batch_model = KMeans_Batch( n_clusters=n_clusters, init=spmd_model_init.cluster_centers_, random_state=0 ).fit(X_train) @@ -166,9 +162,7 @@ def test_kmeans_spmd_synthetic( # assert_allclose(spmd_model.n_iter_, batch_model.n_iter_, atol=1) # Ensure predictions of batch algo match spmd - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_result = spmd_model.predict(local_dpt_X_test) + spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(X_test) _assert_kmeans_labels_allclose( diff --git a/sklearnex/spmd/covariance/tests/test_covariance_spmd.py b/sklearnex/spmd/covariance/tests/test_covariance_spmd.py index f30534c616..ef9df83402 100644 --- a/sklearnex/spmd/covariance/tests/test_covariance_spmd.py +++ b/sklearnex/spmd/covariance/tests/test_covariance_spmd.py @@ -23,7 +23,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_statistic_data, _get_local_tensor, @@ -86,10 +85,9 @@ def test_covariance_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_covariance_spmd_synthetic( - n_samples, n_features, assume_centered, dataframe, queue, dtype, use_raw_input + n_samples, n_features, assume_centered, dataframe, queue, dtype ): # Import spmd and batch algo from sklearnex.preview.covariance import ( @@ -105,10 +103,9 @@ def test_covariance_spmd_synthetic( ) # Ensure results of batch algo match spmd - with config_context(use_raw_input=use_raw_input): - spmd_result = EmpiricalCovariance_SPMD(assume_centered=assume_centered).fit( - local_dpt_data - ) + spmd_result = EmpiricalCovariance_SPMD(assume_centered=assume_centered).fit( + local_dpt_data + ) batch_result = EmpiricalCovariance_Batch(assume_centered=assume_centered).fit(data) atol = 1e-5 if dtype == np.float32 else 1e-7 diff --git a/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py b/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py index 034f637fa5..9fe0891a8f 100644 --- a/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +++ b/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py @@ -23,7 +23,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_statistic_data, _get_local_tensor, @@ -151,7 +150,6 @@ def test_incremental_covariance_partial_fit_spmd_gold( "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_incremental_covariance_partial_fit_spmd_synthetic( n_samples, @@ -161,7 +159,6 @@ def test_incremental_covariance_partial_fit_spmd_synthetic( dataframe, queue, dtype, - use_raw_input, ): # Import spmd and batch algo from sklearnex.covariance import IncrementalEmpiricalCovariance @@ -184,9 +181,7 @@ def test_incremental_covariance_partial_fit_spmd_synthetic( local_dpt_data = _convert_to_dataframe( split_local_data[i], sycl_queue=queue, target_df=dataframe ) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - inccov_spmd.partial_fit(local_dpt_data) + inccov_spmd.partial_fit(local_dpt_data) inccov.fit(dpt_data) diff --git a/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py b/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py index 4683bf9547..84b873fbe6 100644 --- a/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +++ b/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py @@ -23,7 +23,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_statistic_data, _get_local_tensor, @@ -219,7 +218,6 @@ def test_incremental_pca_fit_spmd_random( @pytest.mark.parametrize("num_samples", [200, 400]) @pytest.mark.parametrize("num_features", [10, 20]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_incremental_pca_partial_fit_spmd_random( dataframe, @@ -230,7 +228,6 @@ def test_incremental_pca_partial_fit_spmd_random( num_samples, num_features, dtype, - use_raw_input, ): # Import spmd and non-SPMD algo from sklearnex.preview.decomposition import IncrementalPCA @@ -255,9 +252,7 @@ def test_incremental_pca_partial_fit_spmd_random( split_local_X[i], sycl_queue=queue, target_df=dataframe ) dpt_X = _convert_to_dataframe(X_split[i], sycl_queue=queue, target_df=dataframe) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - incpca_spmd.partial_fit(local_dpt_X) + incpca_spmd.partial_fit(local_dpt_X) incpca.partial_fit(dpt_X) for attribute in attributes_to_compare: @@ -268,9 +263,7 @@ def test_incremental_pca_partial_fit_spmd_random( err_msg=f"{attribute} is incorrect", ) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - y_trans_spmd = incpca_spmd.transform(dpt_X_test) + y_trans_spmd = incpca_spmd.transform(dpt_X_test) y_trans = incpca.transform(dpt_X_test) assert_allclose(_as_numpy(y_trans_spmd), _as_numpy(y_trans), atol=tol) diff --git a/sklearnex/spmd/decomposition/tests/test_pca_spmd.py b/sklearnex/spmd/decomposition/tests/test_pca_spmd.py index 2144620881..0408992b1a 100644 --- a/sklearnex/spmd/decomposition/tests/test_pca_spmd.py +++ b/sklearnex/spmd/decomposition/tests/test_pca_spmd.py @@ -23,7 +23,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_statistic_data, _get_local_tensor, @@ -93,10 +92,9 @@ def test_pca_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_pca_spmd_synthetic( - n_samples, n_features, n_components, whiten, dataframe, queue, dtype, use_raw_input + n_samples, n_features, n_components, whiten, dataframe, queue, dtype ): # TODO: Resolve issues with batch fallback and lack of support for n_rows_rank < n_cols if n_components == "mle" or n_components == 3: @@ -116,10 +114,7 @@ def test_pca_spmd_synthetic( ) # Ensure results of batch algo match spmd - with config_context(use_raw_input=use_raw_input): - spmd_result = PCA_SPMD(n_components=n_components, whiten=whiten).fit( - local_dpt_data - ) + spmd_result = PCA_SPMD(n_components=n_components, whiten=whiten).fit(local_dpt_data) batch_result = PCA_Batch(n_components=n_components, whiten=whiten).fit(data) tol = 1e-3 if dtype == np.float32 else 1e-7 diff --git a/sklearnex/spmd/ensemble/tests/test_forest_spmd.py b/sklearnex/spmd/ensemble/tests/test_forest_spmd.py index dbbd2f67ef..c5acb0297d 100644 --- a/sklearnex/spmd/ensemble/tests/test_forest_spmd.py +++ b/sklearnex/spmd/ensemble/tests/test_forest_spmd.py @@ -22,7 +22,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_classification_data, _generate_regression_data, @@ -110,7 +109,6 @@ def test_rfcls_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_rfcls_spmd_synthetic( n_samples, @@ -121,7 +119,6 @@ def test_rfcls_spmd_synthetic( dataframe, queue, dtype, - use_raw_input, ): n_features, n_classes = n_features_and_classes # Import spmd and batch algo @@ -152,15 +149,11 @@ def test_rfcls_spmd_synthetic( local_trees_mode=local_trees_mode, random_state=0, ) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_model.fit(local_dpt_X_train, local_dpt_y_train) + spmd_model.fit(local_dpt_X_train, local_dpt_y_train) batch_model = RandomForestClassifier_Batch( n_estimators=n_estimators, max_depth=max_depth, random_state=0 ).fit(X_train, y_train) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_result = spmd_model.predict(local_dpt_X_test) + spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(X_test) pytest.skip("SPMD and batch random forest results not aligned") @@ -245,7 +238,6 @@ def test_rfreg_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_rfreg_spmd_synthetic( n_samples, @@ -256,7 +248,6 @@ def test_rfreg_spmd_synthetic( dataframe, queue, dtype, - use_raw_input, ): # Import spmd and batch algo from sklearnex.ensemble import RandomForestRegressor as RandomForestRegressor_Batch @@ -280,18 +271,16 @@ def test_rfreg_spmd_synthetic( ) # Ensure predictions of batch algo match spmd - with config_context(use_raw_input=use_raw_input): - spmd_model = RandomForestRegressor_SPMD( - n_estimators=n_estimators, - max_depth=max_depth, - local_trees_mode=local_trees_mode, - random_state=0, - ).fit(local_dpt_X_train, local_dpt_y_train) + spmd_model = RandomForestRegressor_SPMD( + n_estimators=n_estimators, + max_depth=max_depth, + local_trees_mode=local_trees_mode, + random_state=0, + ).fit(local_dpt_X_train, local_dpt_y_train) batch_model = RandomForestRegressor_Batch( n_estimators=n_estimators, max_depth=max_depth, random_state=0 ).fit(X_train, y_train) - with config_context(use_raw_input=use_raw_input): - spmd_result = spmd_model.predict(local_dpt_X_test) + spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(X_test) # TODO: remove skips when SPMD and batch are aligned diff --git a/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py b/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py index c3dcd648f2..62ca93e066 100644 --- a/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py @@ -23,7 +23,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_regression_data, _get_local_tensor, @@ -269,7 +268,6 @@ def test_incremental_linear_regression_fit_spmd_random( @pytest.mark.parametrize("num_features", [5, 10]) @pytest.mark.parametrize("macro_block", [None, 1024]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_incremental_linear_regression_partial_fit_spmd_random( dataframe, @@ -280,7 +278,6 @@ def test_incremental_linear_regression_partial_fit_spmd_random( num_features, macro_block, dtype, - use_raw_input, ): # Import spmd and non-SPMD algo from sklearnex.linear_model import IncrementalLinearRegression @@ -327,9 +324,7 @@ def test_incremental_linear_regression_partial_fit_spmd_random( dpt_X = _convert_to_dataframe(X_split[i], sycl_queue=queue, target_df=dataframe) dpt_y = _convert_to_dataframe(y_split[i], sycl_queue=queue, target_df=dataframe) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - inclin_spmd.partial_fit(local_dpt_X, local_dpt_y) + inclin_spmd.partial_fit(local_dpt_X, local_dpt_y) inclin.partial_fit(dpt_X, dpt_y) assert_allclose(_as_numpy(inclin.coef_), _as_numpy(inclin_spmd.coef_), atol=tol) @@ -338,8 +333,7 @@ def test_incremental_linear_regression_partial_fit_spmd_random( _as_numpy(inclin.intercept_), _as_numpy(inclin_spmd.intercept_), atol=tol ) - with config_context(use_raw_input=use_raw_input): - y_pred_spmd = inclin_spmd.predict(dpt_X_test) + y_pred_spmd = inclin_spmd.predict(dpt_X_test) y_pred = inclin.predict(dpt_X_test) assert_allclose(_as_numpy(y_pred_spmd), _as_numpy(y_pred), atol=tol) diff --git a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py index e5edd2df84..61b5a112e5 100644 --- a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py @@ -23,7 +23,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_regression_data, _get_local_tensor, @@ -105,11 +104,8 @@ def test_linear_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi -def test_linear_spmd_synthetic( - n_samples, n_features, dataframe, queue, dtype, use_raw_input -): +def test_linear_spmd_synthetic(n_samples, n_features, dataframe, queue, dtype): # Import spmd and batch algo from sklearnex.linear_model import LinearRegression as LinearRegression_Batch from sklearnex.spmd.linear_model import LinearRegression as LinearRegression_SPMD @@ -136,10 +132,8 @@ def test_linear_spmd_synthetic( ) # ensure trained model of batch algo matches spmd - # Configure raw input status for spmd estimator spmd_model = LinearRegression_SPMD() - with config_context(use_raw_input=use_raw_input): - spmd_model.fit(local_dpt_X_train, local_dpt_y_train) + spmd_model.fit(local_dpt_X_train, local_dpt_y_train) batch_model = LinearRegression_Batch().fit(X_train, y_train) tol = 1e-3 if dtype == np.float32 else 1e-7 @@ -154,9 +148,7 @@ def test_linear_spmd_synthetic( ) # ensure predictions of batch algo match spmd - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_result = spmd_model.predict(local_dpt_X_test) + spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(X_test) _spmd_assert_allclose(spmd_result, batch_result, rtol=tol, atol=tol) diff --git a/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py b/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py index b34d58819d..38e74a23d5 100644 --- a/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py @@ -23,7 +23,6 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex import config_context from sklearnex.tests.utils.spmd import ( _generate_classification_data, _get_local_tensor, @@ -115,11 +114,8 @@ def test_logistic_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi -def test_logistic_spmd_synthetic( - n_samples, n_features, C, tol, dataframe, queue, dtype, use_raw_input -): +def test_logistic_spmd_synthetic(n_samples, n_features, C, tol, dataframe, queue, dtype): # TODO: Resolve numerical issues when n_rows_rank < n_cols if n_samples <= n_features: pytest.skip("Numerical issues when rank rows < columns") @@ -148,9 +144,7 @@ def test_logistic_spmd_synthetic( # Ensure trained model of batch algo matches spmd spmd_model = LogisticRegression_SPMD(random_state=0, solver="newton-cg", C=C, tol=tol) - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_model.fit(local_dpt_X_train, local_dpt_y_train) + spmd_model.fit(local_dpt_X_train, local_dpt_y_train) batch_model = LogisticRegression_Batch( random_state=0, solver="newton-cg", C=C, tol=tol ).fit(dpt_X_train, dpt_y_train) @@ -168,9 +162,7 @@ def test_logistic_spmd_synthetic( ) # Ensure predictions of batch algo match spmd - # Configure raw input status for spmd estimator - with config_context(use_raw_input=use_raw_input): - spmd_result = spmd_model.predict(local_dpt_X_test) + spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(dpt_X_test) _spmd_assert_allclose(_as_numpy(spmd_result), _as_numpy(batch_result)) diff --git a/sklearnex/spmd/neighbors/neighbors.py b/sklearnex/spmd/neighbors/neighbors.py index 8a1b4e661a..9c7741b3de 100644 --- a/sklearnex/spmd/neighbors/neighbors.py +++ b/sklearnex/spmd/neighbors/neighbors.py @@ -57,7 +57,6 @@ class KNeighborsRegressor(base_KNeighborsRegressor): def _onedal_fit(self, X, y, queue=None): # SPMD is always GPU; extract queue from data when not provided - # (use_raw_input=True bypasses queue detection in dispatch) if queue is None: xp, is_array_api = get_namespace(X) if is_array_api and not _is_numpy_namespace(xp): diff --git a/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py b/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py index c0267a33d0..5d5311ca43 100644 --- a/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +++ b/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py @@ -112,14 +112,7 @@ def test_knncls_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize( - "use_raw_input,array_api_dispatch", - [ - (True, False), - (False, True), - (False, False), - ], -) +@pytest.mark.parametrize("array_api_dispatch", [True, False]) @pytest.mark.mpi def test_knncls_spmd_synthetic( n_samples, @@ -129,7 +122,6 @@ def test_knncls_spmd_synthetic( dataframe, queue, dtype, - use_raw_input, array_api_dispatch, metric="euclidean", ): @@ -157,20 +149,16 @@ def test_knncls_spmd_synthetic( spmd_model = KNeighborsClassifier_SPMD( n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute" ) - # Configure raw input status and array_api_dispatch for spmd estimator - with config_context( - use_raw_input=use_raw_input, array_api_dispatch=array_api_dispatch - ): + # Configure array_api_dispatch for spmd estimator + with config_context(array_api_dispatch=array_api_dispatch): spmd_model.fit(local_dpt_X_train, local_dpt_y_train) batch_model = KNeighborsClassifier_Batch( n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute" ).fit(X_train, y_train) spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test) batch_dists, batch_indcs = batch_model.kneighbors(X_test) - # Configure raw input status and array_api_dispatch for spmd estimator - with config_context( - use_raw_input=use_raw_input, array_api_dispatch=array_api_dispatch - ): + # Configure array_api_dispatch for spmd estimator + with config_context(array_api_dispatch=array_api_dispatch): spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(X_test) @@ -262,14 +250,7 @@ def test_knnreg_spmd_gold(dataframe, queue): get_dataframes_and_queues(dataframe_filter_="dpnp", device_filter_="gpu"), ) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize( - "use_raw_input,array_api_dispatch", - [ - (True, False), - (False, True), - (False, False), - ], -) +@pytest.mark.parametrize("array_api_dispatch", [True, False]) @pytest.mark.mpi def test_knnreg_spmd_synthetic( n_samples, @@ -280,7 +261,6 @@ def test_knnreg_spmd_synthetic( dataframe, queue, dtype, - use_raw_input, array_api_dispatch, ): # Import spmd and batch algo @@ -306,20 +286,16 @@ def test_knnreg_spmd_synthetic( spmd_model = KNeighborsRegressor_SPMD( n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute" ) - # Configure raw input status and array_api_dispatch for spmd estimator - with config_context( - use_raw_input=use_raw_input, array_api_dispatch=array_api_dispatch - ): + # Configure array_api_dispatch for spmd estimator + with config_context(array_api_dispatch=array_api_dispatch): spmd_model.fit(local_dpt_X_train, local_dpt_y_train) batch_model = KNeighborsRegressor_Batch( n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute" ).fit(X_train, y_train) spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test) batch_dists, batch_indcs = batch_model.kneighbors(X_test) - # Configure raw input status and array_api_dispatch for spmd estimator - with config_context( - use_raw_input=use_raw_input, array_api_dispatch=array_api_dispatch - ): + # Configure array_api_dispatch for spmd estimator + with config_context(array_api_dispatch=array_api_dispatch): spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(X_test)