uxlfoundation
diff --git a/‎daal4py/sklearn/linear_model/logistic_path.py‎
Lines changed: 9 additions & 3 deletions b/‎daal4py/sklearn/linear_model/logistic_path.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎deselected_tests.yaml‎
Lines changed: 5 additions & 0 deletions b/‎deselected_tests.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎doc/sources/algorithms.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/sources/algorithms.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/sources/array_api.rst‎
Lines changed: 5 additions & 0 deletions b/‎doc/sources/array_api.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/sklearnex/logistic_regression_spmd.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/sklearnex/logistic_regression_spmd.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎onedal/linear_model/logistic_regression.py‎
Lines changed: 35 additions & 185 deletions b/‎onedal/linear_model/logistic_regression.py‎
Lines changed: 35 additions & 185 deletions
diff --git a/‎onedal/spmd/linear_model/logistic_regression.py‎
Lines changed: 2 additions & 6 deletions b/‎onedal/spmd/linear_model/logistic_regression.py‎
Lines changed: 2 additions & 6 deletions
@@ -39,7 +39,13 @@
 import daal4py as d4p
 
 from .._n_jobs_support import control_n_jobs
-from .._utils import PatchingConditionsChain, getFPType, is_sparse, sklearn_check_version
+from .._utils import (
+    PatchingConditionsChain,
+    check_is_array_api,
+    getFPType,
+    is_sparse,
+    sklearn_check_version,
+)
 from ..utils.validation import check_feature_names
 from .logistic_loss import (
     _daal4py_cross_entropy_loss_extra_args,
@@ -444,7 +450,7 @@ def daal4py_predict(self, X, resultsToEvaluate):
     _dal_ready = _patching_status.and_conditions(
         [
             (
-                not ((not isinstance(X, np.ndarray)) and hasattr(X, "__dlpack__")),
+                not check_is_array_api(X),
                 "Array API inputs not supported.",
             )
         ]
@@ -772,7 +778,7 @@ def logistic_regression_path_dispatcher(
             ),
             (not is_sparse(X), "X is sparse. Sparse input is not supported."),
             (
-                not ((not isinstance(X, np.ndarray)) and hasattr(X, "__dlpack__")),
+                not check_is_array_api(X),
                 "Array API inputs not supported.",
             ),
             (sample_weight is None, "Sample weights are not supported."),
 
@@ -387,6 +387,11 @@ deselected_tests:
   - linear_model/tests/test_logistic.py::test_logistic_cv_sparse[42-csr_matrix]
   - linear_model/tests/test_logistic.py::test_logistic_cv_sparse[42-csr_array]
   - tests/test_common.py::test_estimators[LogisticRegressionCV(cv=3,max_iter=5,use_legacy_attributes=False)-check_sample_weight_equivalence_on_dense_data]
+  - linear_model/tests/test_logistic.py::test_logistic_regression_array_api_compliance[array_api_strict-CPU_DEVICE-float64-None-False-False-False]
+  - linear_model/tests/test_logistic.py::test_logistic_regression_array_api_compliance[array_api_strict-CPU_DEVICE-float64-None-False-True-False]
+
+  # Logistic regression array api compliance test fails on Windows only due to convergence issue
+  - linear_model/tests/test_logistic.py::test_logistic_regression_array_api_compliance[array_api_strict-device1-float32-balanced-True-True-True]
 
   # Scikit-learn does not constraint multinomial logistic intercepts to sum to zero.
   # Softmax function is invariant to additions by a constant, so even though the numbers
 
@@ -677,7 +677,7 @@ Classification
        - ``intercept_scaling`` != `1`
        - ``warm_start`` = ``True``
        - ``l1_ratio`` != ``0``
-     - No limitations
+     - Method ``score`` is not supported.
      - Only binary classification is supported
 
 Regression
 
@@ -102,6 +102,7 @@ The following patched classes have support for array API inputs:
 - :obj:`sklearn.ensemble.RandomForestClassifier`
 - :obj:`sklearn.ensemble.RandomForestRegressor`
 - :obj:`sklearn.linear_model.LinearRegression`
+- :obj:`sklearn.linear_model.LogisticRegression`
 - :obj:`sklearn.linear_model.Ridge`
 - :obj:`sklearnex.linear_model.IncrementalLinearRegression`
 - :obj:`sklearnex.linear_model.IncrementalRidge`
@@ -165,6 +166,10 @@ Note that some cases of estimator-specific methods are still fully array API com
 for example, :meth:`sklearn.neighbors.NearestNeighbors.kneighbors` will produce outputs
 of array API classes when fitted to them.
 
+For :obj:`sklearn.linear_model.LogisticRegression`, array API coverage is limited to cases where the input array
+is allocated on a GPU device, so passing array API inputs on CPU other than NumPy arrays will not result
+in calling accelerated routines from the |sklearnex|.
+
 Example usage
 =============
 
 
@@ -68,7 +68,7 @@ def generate_X_y(par, seed):
 dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q)
 dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q)
 
-model_spmd = LogisticRegression()
+model_spmd = LogisticRegression(solver="newton-cg")
 model_spmd.fit(dpnp_X_train, dpnp_y_train)
 
 y_predict = model_spmd.predict(dpnp_X_test)
 
@@ -15,28 +15,16 @@
 # ==============================================================================
 
 from abc import ABCMeta, abstractmethod
-from numbers import Number
 
 import numpy as np
 
-from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d
+from daal4py.sklearn._utils import daal_check_version
 from onedal._device_offload import supports_queue
 from onedal.common._backend import bind_default_backend
-from onedal.utils import _sycl_queue_manager as QM
 
-from .._config import _get_config
 from ..common._estimator_checks import _check_is_fitted
-from ..common._mixin import ClassifierMixin
 from ..datatypes import from_table, to_table
-from ..utils._array_api import _get_sycl_namespace
-from ..utils.validation import (
-    _check_array,
-    _check_n_features,
-    _check_X_y,
-    _is_csr,
-    _num_features,
-    _type_of_target,
-)
+from ..utils.validation import _check_n_features, _is_csr, _num_features
 
 
 class BaseLogisticRegression(metaclass=ABCMeta):
@@ -49,14 +37,13 @@ def __init__(self, tol, C, fit_intercept, solver, max_iter, algorithm):
         self.max_iter = max_iter
         self.algorithm = algorithm
 
-    @abstractmethod
-    def train(self, params, X, y): ...
+    @bind_default_backend("logistic_regression.classification")
+    def train(self, params, X, y, queue=None): ...
 
-    @abstractmethod
-    def infer(self, params, X): ...
+    @bind_default_backend("logistic_regression.classification")
+    def infer(self, params, model, X, queue=None): ...
 
-    # direct access to the backend model constructor
-    @abstractmethod
+    @bind_default_backend("logistic_regression.classification")
     def model(self): ...
 
     def _get_onedal_params(self, is_csr, dtype=np.float32):
@@ -76,172 +63,64 @@ def _get_onedal_params(self, is_csr, dtype=np.float32):
             ),
         }
 
-    def _fit(self, X, y):
-        use_raw_input = _get_config()["use_raw_input"] is True
-
-        sparsity_enabled = daal_check_version((2024, "P", 700))
-        if not use_raw_input:
-            X, y = _check_X_y(
-                X,
-                y,
-                accept_sparse=sparsity_enabled,
-                force_all_finite=True,
-                accept_2d_y=False,
-                dtype=[np.float64, np.float32],
-            )
-            if _type_of_target(y) != "binary":
-                raise ValueError("Only binary classification is supported")
-
-            self.classes_, y = np.unique(y, return_inverse=True)
-            y = y.astype(dtype=np.int32)
-        else:
-            _, xp, _ = _get_sycl_namespace(X)
-            # try catch needed for raw_inputs + array_api data where unlike
-            # numpy the way to yield unique values is via `unique_values`
-            # This should be removed when refactored for gpu zero-copy
-            try:
-                self.classes_ = xp.unique(y)
-            except AttributeError:
-                self.classes_ = xp.unique_values(y)
-
-            n_classes = len(self.classes_)
-            if n_classes != 2:
-                raise ValueError("Only binary classification is supported")
+    @supports_queue
+    def fit(self, X, y, queue=None):
+
         is_csr = _is_csr(X)
 
         self.n_features_in_ = _num_features(X, fallback_1d=True)
-        X_table, y_table = to_table(X, y, queue=QM.get_global_queue())
+
+        X_table, y_table = to_table(X, y, queue=queue)
         params = self._get_onedal_params(is_csr, X_table.dtype)
 
         result = self.train(params, X_table, y_table)
 
         self._onedal_model = result.model
+
         self.n_iter_ = np.array([result.iterations_count])
 
         # _n_inner_iter is the total number of cg-solver iterations
         if daal_check_version((2024, "P", 300)) and self.solver == "newton-cg":
             self._n_inner_iter = result.inner_iterations_count
 
-        coeff = from_table(result.model.packed_coefficients)
+        coeff = from_table(result.model.packed_coefficients, like=X)
         self.coef_, self.intercept_ = coeff[:, 1:], coeff[:, 0]
 
         return self
 
-    def _create_model(self):
-        m = self.model()
-
-        coefficients = self.coef_
-        dtype = get_dtype(coefficients)
-        coefficients = np.asarray(coefficients, dtype=dtype)
-
-        if coefficients.ndim == 2:
-            n_features_in = coefficients.shape[1]
-            assert coefficients.shape[0] == 1
-        else:
-            n_features_in = coefficients.size
-
-        intercept = self.intercept_
-        if not isinstance(intercept, Number):
-            intercept = np.asarray(intercept, dtype=dtype)
-            assert intercept.size == 1
-
-        intercept = _check_array(
-            intercept,
-            dtype=[np.float64, np.float32],
-            force_all_finite=True,
-            ensure_2d=False,
-        )
-        coefficients = _check_array(
-            coefficients,
-            dtype=[np.float64, np.float32],
-            force_all_finite=True,
-            ensure_2d=False,
-        )
-
-        coefficients, intercept = make2d(coefficients), make2d(intercept)
-
-        assert coefficients.shape == (1, n_features_in)
-        assert intercept.shape == (1, 1)
-
-        desired_shape = (1, n_features_in + 1)
-        packed_coefficients = np.zeros(desired_shape, dtype=dtype)
-
-        packed_coefficients[:, 1:] = coefficients
-        if self.fit_intercept:
-            packed_coefficients[:, 0][:, np.newaxis] = intercept
-
-        m.packed_coefficients = to_table(packed_coefficients, queue=QM.get_global_queue())
-
-        self._onedal_model = m
-
-        return m
-
-    def _infer(self, X):
+    def _infer(self, X, queue=None):
         _check_is_fitted(self)
 
-        sparsity_enabled = daal_check_version((2024, "P", 700))
-
-        if not _get_config()["use_raw_input"]:
-            X = _check_array(
-                X,
-                dtype=[np.float64, np.float32],
-                accept_sparse=sparsity_enabled,
-                force_all_finite=True,
-                ensure_2d=False,
-                accept_large_sparse=sparsity_enabled,
-            )
         is_csr = _is_csr(X)
-        _check_n_features(self, X, False)
 
-        X = make2d(X)
+        _check_n_features(self, X, False)
 
-        if hasattr(self, "_onedal_model"):
-            model = self._onedal_model
-        else:
-            model = self._create_model()
+        assert hasattr(self, "_onedal_model")
 
-        X_table = to_table(X, queue=QM.get_global_queue())
-        params = self._get_onedal_params(is_csr, X.dtype)
+        X_table = to_table(X, queue=queue)
+        params = self._get_onedal_params(is_csr, X_table.dtype)
 
-        result = self.infer(params, model, X_table)
+        result = self.infer(params, self._onedal_model, X_table)
         return result
 
-    def _predict(self, X):
-        result = self._infer(X)
-        _, xp, _ = _get_sycl_namespace(X)
-        y = from_table(result.responses, like=X)
-        y = xp.take(xp.asarray(self.classes_), xp.reshape(y, (-1,)), axis=0)
+    @supports_queue
+    def predict(self, X, queue=None, classes=None):
+        result = self._infer(X, queue)
+
+        # Starting from sklearn 1.9 type of predicted labels should match the type of self.classes_
+        # In general case, classes attribute is provided from sklearnex estimator
+        # In case it's not provided, result would be of the same type as X
+        y = from_table(result.responses, like=classes if classes is not None else X)
         return y
 
-    def _predict_proba(self, X):
-        result = self._infer(X)
-        _, xp, _ = _get_sycl_namespace(X)
+    @supports_queue
+    def predict_proba(self, X, queue=None):
+        result = self._infer(X, queue)
         y = from_table(result.probabilities, like=X)
-        y = xp.reshape(y, -1)
-        return xp.stack([1 - y, y], axis=1)
-
-    def _predict_log_proba(self, X):
-        _, xp, _ = _get_sycl_namespace(X)
-        y_proba = self._predict_proba(X)
-        # These are the same thresholds used by oneDAL during the model fitting procedure
-        if y_proba.dtype == np.float32:
-            min_prob = 1e-7
-            max_prob = 1.0 - 1e-7
-        else:
-            min_prob = 1e-15
-            max_prob = 1.0 - 1e-15
-        y_proba = xp.clip(y_proba, min_prob, max_prob)
-        return xp.log(y_proba)
-
-    def _decision_function(self, X):
-        _, xp, _ = _get_sycl_namespace(X)
-        raw = xp.matmul(X, xp.reshape(self.coef_, -1))
-        if self.fit_intercept:
-            raw += self.intercept_
-        return raw
-
-
-class LogisticRegression(ClassifierMixin, BaseLogisticRegression):
+        return y
+
+
+class LogisticRegression(BaseLogisticRegression):
 
     def __init__(
         self,
@@ -262,32 +141,3 @@ def __init__(
             max_iter=max_iter,
             algorithm=algorithm,
         )
-
-    @bind_default_backend("logistic_regression.classification")
-    def train(self, params, X, y, queue=None): ...
-
-    @bind_default_backend("logistic_regression.classification")
-    def infer(self, params, X, model, queue=None): ...
-
-    @bind_default_backend("logistic_regression.classification")
-    def model(self): ...
-
-    @supports_queue
-    def fit(self, X, y, queue=None):
-        return self._fit(X, y)
-
-    @supports_queue
-    def predict(self, X, queue=None):
-        return self._predict(X)
-
-    @supports_queue
-    def predict_proba(self, X, queue=None):
-        return self._predict_proba(X)
-
-    @supports_queue
-    def predict_log_proba(self, X, queue=None):
-        return self._predict_log_proba(X)
-
-    @supports_queue
-    def decision_function(self, X, queue=None):
-        return self._decision_function(X)
@@ -32,13 +32,9 @@ def fit(self, X, y, queue=None):
         return super().fit(X, y, queue=queue)
 
     @support_input_format
-    def predict(self, X, queue=None):
-        return super().predict(X, queue=queue)
+    def predict(self, X, queue=None, classes=None):
+        return super().predict(X, queue=queue, classes=classes)
 
     @support_input_format
     def predict_proba(self, X, queue=None):
         return super().predict_proba(X, queue=queue)
-
-    @support_input_format
-    def predict_log_proba(self, X, queue=None):
-        return super().predict_log_proba(X, queue=queue)