From 646ac8c3298bc1731c04b72b4479b439ae4f6454 Mon Sep 17 00:00:00 2001 From: chico Date: Wed, 24 Mar 2021 12:21:02 +0100 Subject: [PATCH 1/4] [feat] Fix random halt problems on traditional pipelines --- autoPyTorch/evaluation/abstract_evaluator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py index 5b8d5d5c3..b53b35977 100644 --- a/autoPyTorch/evaluation/abstract_evaluator.py +++ b/autoPyTorch/evaluation/abstract_evaluator.py @@ -351,7 +351,7 @@ def _get_pipeline(self) -> BaseEstimator: if isinstance(self.configuration, int): pipeline = self.pipeline_class(config=self.configuration, random_state=np.random.RandomState(self.seed), - init_params=self.fit_dictionary) + init_params=self._init_params) elif isinstance(self.configuration, Configuration): pipeline = self.pipeline_class(config=self.configuration, dataset_properties=self.dataset_properties, @@ -364,7 +364,7 @@ def _get_pipeline(self) -> BaseEstimator: pipeline = self.pipeline_class(config=self.configuration, dataset_properties=self.dataset_properties, random_state=np.random.RandomState(self.seed), - init_params=self.fit_dictionary) + init_params=self._init_params) else: raise ValueError("Invalid configuration entered") return pipeline From 501a9f44fe44f8495e487717e8399a6c54006dc7 Mon Sep 17 00:00:00 2001 From: chico Date: Sat, 27 Mar 2021 11:10:28 +0100 Subject: [PATCH 2/4] Documentation update --- autoPyTorch/evaluation/abstract_evaluator.py | 49 ++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py index b53b35977..cdf24b021 100644 --- a/autoPyTorch/evaluation/abstract_evaluator.py +++ b/autoPyTorch/evaluation/abstract_evaluator.py @@ -54,6 +54,23 @@ class MyTraditionalTabularClassificationPipeline(BaseEstimator): + """ + A wrapper class that holds a pipeline for traditional classification. + Estimators like CatBoost, and Random Forest are considered traditional machine + learning models and are fitted before neural architecture search. + + This class is an interface to fit a pipeline containing a traditional machine + learning model, and is the final object that is stored for inference. + + Attributes: + dataset_properties (Dict[str, Any]): + A dictionary containing dataset specific information + random_state (Optional[Union[int, np.random.RandomState]]): + Object that contains a seed and allows for reproducible results + init_params (Optional[Dict]): + An optional dictionary that is passed to the pipeline's steps. It complies + a similar function as the kwargs + """ def __init__(self, config: str, dataset_properties: Dict[str, Any], random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -98,6 +115,21 @@ def get_default_pipeline_options() -> Dict[str, Any]: class DummyClassificationPipeline(DummyClassifier): + """ + A wrapper class that holds a pipeline for dummy classification. + + A wrapper over DummyClassifier of scikit learn. This estimator is considered the + worst performing model. In case of failure, at least this model will be fitted. + + Attributes: + dataset_properties (Dict[str, Any]): + A dictionary containing dataset specific information + random_state (Optional[Union[int, np.random.RandomState]]): + Object that contains a seed and allows for reproducible results + init_params (Optional[Dict]): + An optional dictionary that is passed to the pipeline's steps. It complies + a similar function as the kwargs + """ def __init__(self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]] = None, init_params: Optional[Dict] = None @@ -148,6 +180,23 @@ def get_default_pipeline_options() -> Dict[str, Any]: class DummyRegressionPipeline(DummyRegressor): + """ + A wrapper class that holds a pipeline for dummy regression. + + A wrapper over DummyRegressor of scikit learn. This estimator is considered the + worst performing model. In case of failure, at least this model will be fitted. + + Attributes: + dataset_properties (Dict[str, Any]): + A dictionary containing dataset specific information + random_state (Optional[Union[int, np.random.RandomState]]): + Object that contains a seed and allows for reproducible results + init_params (Optional[Dict]): + An optional dictionary that is passed to the pipeline's steps. It complies + a similar function as the kwargs + """ + def __init__(self, config: Configuration, + random_state: Optional[Union[int, np.random.RandomState]] = None, def __init__(self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]] = None, init_params: Optional[Dict] = None) -> None: From c34ac76e58ac14ef90b011a9c3113d2f2acd74cd Mon Sep 17 00:00:00 2001 From: chico Date: Sat, 27 Mar 2021 11:12:58 +0100 Subject: [PATCH 3/4] Fix flake --- autoPyTorch/evaluation/abstract_evaluator.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py index cdf24b021..c14a88496 100644 --- a/autoPyTorch/evaluation/abstract_evaluator.py +++ b/autoPyTorch/evaluation/abstract_evaluator.py @@ -195,8 +195,6 @@ class DummyRegressionPipeline(DummyRegressor): An optional dictionary that is passed to the pipeline's steps. It complies a similar function as the kwargs """ - def __init__(self, config: Configuration, - random_state: Optional[Union[int, np.random.RandomState]] = None, def __init__(self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]] = None, init_params: Optional[Dict] = None) -> None: From 1f61cf777b1ca8fea308bcd23b4dee7b0cd5f52a Mon Sep 17 00:00:00 2001 From: chico Date: Mon, 29 Mar 2021 15:01:20 +0200 Subject: [PATCH 4/4] Flake due to kernel pca errors --- .../components/preprocessing/test_feature_preprocessor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py index 225193217..113e21121 100644 --- a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py +++ b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py @@ -1,3 +1,5 @@ +import flaky + import numpy as np import pytest @@ -51,6 +53,7 @@ def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor): transformed = column_transformer.transform(X['X_train']) assert isinstance(transformed, np.ndarray) + @flaky.flaky(max_runs=3) def test_pipeline_fit_include(self, fit_dictionary_tabular, preprocessor): """ This test ensures that a tabular classification