From c40f2d9f23cfe1e5f117713354837554e31c504d Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Sat, 8 May 2021 18:32:07 +0200 Subject: [PATCH 01/14] In progress, changing te4sts --- test/test_api/test_api.py | 80 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 62d220bfd..9b69331d5 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -2,6 +2,7 @@ import pathlib import pickle import sys +import time import unittest import numpy as np @@ -16,7 +17,7 @@ from sklearn.base import clone from sklearn.ensemble import VotingClassifier, VotingRegressor -from smac.runhistory.runhistory import RunHistory +from smac.runhistory.runhistory import RunHistory, StatusType import torch @@ -26,13 +27,90 @@ CrossValTypes, HoldoutValTypes, ) +from autoPyTorch.evaluation.train_evaluator import TrainEvaluator from autoPyTorch.optimizer.smbo import AutoMLSMBO from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy +# ======== # Fixtures # ======== +class DummyTrainEvaluator(TrainEvaluator): + + def fit_predict_and_loss(self) -> None: + self.start_time = time.time() + split_id = 0 + self.logger.info("Starting fit {}".format(split_id)) + + pipeline = self._get_pipeline() + + train_split, test_split = self.splits[split_id] + self.Y_optimization = self.y_train[test_split] + self.Y_actual_train = self.y_train[train_split] + y_train_pred, y_opt_pred, y_valid_pred, y_test_pred = self._fit_and_predict(pipeline, split_id, + train_indices=train_split, + test_indices=test_split, + add_pipeline_to_self=True) + train_loss = self._loss(self.y_train[train_split], y_train_pred) + loss = self._loss(self.y_train[test_split], y_opt_pred) + additional_run_info = pipeline.get_additional_run_info() if hasattr( + pipeline, 'get_additional_run_info') else {} + + status = StatusType.SUCCESS + + self.finish_up( + loss=loss, + train_loss=train_loss, + opt_pred=y_opt_pred, + valid_pred=y_valid_pred, + test_pred=y_test_pred, + additional_run_info=additional_run_info, + file_output=True, + status=status, + ) + +# create closure for evaluating an algorithm +def dummy_eval_function( + backend, + queue, + metric, + budget: float, + config, + seed: int, + output_y_hat_optimization: bool, + num_run: int, + include, + exclude, + disable_file_output, + pipeline_config=None, + budget_type=None, + init_params=None, + logger_port=None, + all_supported_metrics=True, + search_space_updates=None, + instance: str = None, +) -> None: + evaluator = TrainEvaluator( + backend=backend, + queue=queue, + metric=metric, + configuration=config, + seed=seed, + num_run=num_run, + output_y_hat_optimization=output_y_hat_optimization, + include=include, + exclude=exclude, + disable_file_output=disable_file_output, + init_params=init_params, + budget=budget, + budget_type=budget_type, + logger_port=logger_port, + all_supported_metrics=all_supported_metrics, + pipeline_config=pipeline_config, + search_space_updates=search_space_updates + ) + evaluator.fit_predict_and_loss() # Test # ======== From 91b1b1f1aa6f60780f5531a1fb57c359afda5a15 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Tue, 11 May 2021 15:34:17 +0200 Subject: [PATCH 02/14] Reduce time for tests --- test/conftest.py | 35 ++-- test/test_api/test_api.py | 151 ++++++++++-------- .../components/setup/test_setup_networks.py | 3 + .../test_pipeline/components/training/base.py | 9 +- .../components/training/test_training.py | 9 +- .../test_tabular_classification.py | 20 ++- test/test_pipeline/test_tabular_regression.py | 27 +++- 7 files changed, 163 insertions(+), 91 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index 592d41165..67f9e84cc 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -25,6 +25,9 @@ from autoPyTorch.utils.pipeline import get_dataset_requirements +N_SAMPLES = 200 + + @pytest.fixture(scope="session") def callattr_ahead_of_alltests(request): """ @@ -191,7 +194,7 @@ def session_run_at_end(): def get_tabular_data(task): if task == "classification_numerical_only": X, y = make_classification( - n_samples=200, + n_samples=N_SAMPLES, n_features=4, n_informative=3, n_redundant=1, @@ -207,18 +210,18 @@ def get_tabular_data(task): X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True) categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category'] X = X[categorical_columns] - X = X.iloc[0:200] - y = y.iloc[0:200] + X = X.iloc[0:N_SAMPLES] + y = y.iloc[0:N_SAMPLES] validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy()) elif task == "classification_numerical_and_categorical": X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True) - X = X.iloc[0:200] - y = y.iloc[0:200] + X = X.iloc[0:N_SAMPLES] + y = y.iloc[0:N_SAMPLES] validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy()) elif task == "regression_numerical_only": - X, y = make_regression(n_samples=200, + X, y = make_regression(n_samples=N_SAMPLES, n_features=4, n_informative=3, n_targets=1, @@ -240,8 +243,8 @@ def get_tabular_data(task): else: X[column] = X[column].fillna(0) - X = X.iloc[0:200] - y = y.iloc[0:200] + X = X.iloc[0:N_SAMPLES] + y = y.iloc[0:N_SAMPLES] y = (y - y.mean()) / y.std() validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy()) @@ -256,8 +259,8 @@ def get_tabular_data(task): else: X[column] = X[column].fillna(0) - X = X.iloc[0:200] - y = y.iloc[0:200] + X = X.iloc[0:N_SAMPLES] + y = y.iloc[0:N_SAMPLES] y = (y - y.mean()) / y.std() validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy()) elif task == 'iris': @@ -288,7 +291,7 @@ def get_fit_dictionary(X, y, validator, backend): 'num_run': np.random.randint(50), 'device': 'cpu', 'budget_type': 'epochs', - 'epochs': 100, + 'epochs': 5, 'torch_num_threads': 1, 'early_stopping': 10, 'working_dir': '/tmp', @@ -326,7 +329,7 @@ def dataset(request): @pytest.fixture def dataset_traditional_classifier_num_only(): X, y = make_classification( - n_samples=200, + n_samples=N_SAMPLES, n_features=4, n_informative=3, n_redundant=1, @@ -344,7 +347,7 @@ def dataset_traditional_classifier_categorical_only(): X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True) categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category'] X = X[categorical_columns] - X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int) + X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int) return X, y @@ -352,7 +355,7 @@ def dataset_traditional_classifier_categorical_only(): def dataset_traditional_classifier_num_categorical(): X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True) y = y.astype(np.int) - X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int) + X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int) return X, y @@ -456,3 +459,7 @@ def loss_mse(): @pytest.fixture def loss_details(request): return request.getfixturevalue(request.param) + +@pytest.fixture +def n_samples(): + return N_SAMPLES \ No newline at end of file diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 9b69331d5..c81b2de8d 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -27,9 +27,15 @@ CrossValTypes, HoldoutValTypes, ) +from autoPyTorch.evaluation.abstract_evaluator import ( + DummyClassificationPipeline, + DummyRegressionPipeline, + fit_and_suppress_warnings +) from autoPyTorch.evaluation.train_evaluator import TrainEvaluator from autoPyTorch.optimizer.smbo import AutoMLSMBO from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy +from autoPyTorch.constants import REGRESSION_TASKS # ======== @@ -37,38 +43,44 @@ # ======== class DummyTrainEvaluator(TrainEvaluator): - def fit_predict_and_loss(self) -> None: - self.start_time = time.time() - split_id = 0 - self.logger.info("Starting fit {}".format(split_id)) - - pipeline = self._get_pipeline() - - train_split, test_split = self.splits[split_id] - self.Y_optimization = self.y_train[test_split] - self.Y_actual_train = self.y_train[train_split] - y_train_pred, y_opt_pred, y_valid_pred, y_test_pred = self._fit_and_predict(pipeline, split_id, - train_indices=train_split, - test_indices=test_split, - add_pipeline_to_self=True) - train_loss = self._loss(self.y_train[train_split], y_train_pred) - loss = self._loss(self.y_train[test_split], y_opt_pred) - additional_run_info = pipeline.get_additional_run_info() if hasattr( - pipeline, 'get_additional_run_info') else {} - - status = StatusType.SUCCESS - - self.finish_up( - loss=loss, - train_loss=train_loss, - opt_pred=y_opt_pred, - valid_pred=y_valid_pred, - test_pred=y_test_pred, - additional_run_info=additional_run_info, - file_output=True, - status=status, + def _fit_and_predict(self, pipeline, fold: int, train_indices, + test_indices, + add_pipeline_to_self + ): + + if self.task_type in REGRESSION_TASKS: + pipeline = DummyRegressionPipeline(config=1) + else: + pipeline = DummyClassificationPipeline(config=1) + + self.indices[fold] = ((train_indices, test_indices)) + + X = {'train_indices': train_indices, + 'val_indices': test_indices, + 'split_id': fold, + 'num_run': self.num_run, + **self.fit_dictionary} # fit dictionary + y = None + fit_and_suppress_warnings(self.logger, pipeline, X, y) + self.logger.info("Model fitted, now predicting") + ( + Y_train_pred, + Y_opt_pred, + Y_valid_pred, + Y_test_pred + ) = self._predict( + pipeline, + train_indices=train_indices, + test_indices=test_indices, ) + if add_pipeline_to_self: + self.pipeline = pipeline + else: + self.pipelines[fold] = pipeline + + return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred + # create closure for evaluating an algorithm def dummy_eval_function( @@ -112,19 +124,29 @@ def dummy_eval_function( ) evaluator.fit_predict_and_loss() + +def dummy_do_dummy_prediction(): + return + + # Test # ======== +@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function', + new=dummy_eval_function) @pytest.mark.parametrize('openml_id', (40981, )) -@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation, - CrossValTypes.k_fold_cross_validation, - )) -def test_tabular_classification(openml_id, resampling_strategy, backend): +@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', + ((HoldoutValTypes.holdout_validation, None), + (CrossValTypes.k_fold_cross_validation, {'num_splits': 2}) + )) +def test_tabular_classification(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples): # Get the data and check that contents of data-manager make sense X, y = sklearn.datasets.fetch_openml( data_id=int(openml_id), return_X_y=True, as_frame=True ) + X, y = X.iloc[:n_samples], y.iloc[:n_samples] + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( X, y, random_state=1) @@ -137,21 +159,23 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): estimator = TabularClassificationTask( backend=backend, resampling_strategy=resampling_strategy, + resampling_strategy_args=resampling_strategy_args, include_components=include ) - estimator.search( - X_train=X_train, y_train=y_train, - X_test=X_test, y_test=y_test, - optimize_metric='accuracy', - total_walltime_limit=150, - func_eval_time_limit_secs=50, - enable_traditional_pipeline=False, - ) + with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction): + estimator.search( + X_train=X_train, y_train=y_train, + X_test=X_test, y_test=y_test, + optimize_metric='accuracy', + total_walltime_limit=40, + func_eval_time_limit_secs=5, + enable_traditional_pipeline=False, + ) # Internal dataset has expected settings assert estimator.dataset.task_type == 'tabular_classification' - expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 5 + expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 2 assert estimator.resampling_strategy == resampling_strategy assert estimator.dataset.resampling_strategy == resampling_strategy assert len(estimator.dataset.splits) == expected_num_splits @@ -212,7 +236,6 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): assert os.path.exists(model_file), model_file model = estimator._backend.load_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) - assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module) elif resampling_strategy == CrossValTypes.k_fold_cross_validation: model_file = os.path.join( run_key_model_run_dir, @@ -223,9 +246,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) assert isinstance(model, VotingClassifier) - assert len(model.estimators_) == 5 - assert isinstance(model.estimators_[0].named_steps['network'].get_network(), - torch.nn.Module) + assert len(model.estimators_) == 2 else: pytest.fail(resampling_strategy) @@ -278,10 +299,13 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): @pytest.mark.parametrize('openml_name', ("boston", )) -@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation, - CrossValTypes.k_fold_cross_validation, - )) -def test_tabular_regression(openml_name, resampling_strategy, backend): +@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function', + new=dummy_eval_function) +@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', + ((HoldoutValTypes.holdout_validation, None), + (CrossValTypes.k_fold_cross_validation, {'num_splits': 2}) + )) +def test_tabular_regression(openml_name, resampling_strategy, backend, resampling_strategy_args, n_samples): # Get the data and check that contents of data-manager make sense X, y = sklearn.datasets.fetch_openml( @@ -289,6 +313,8 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): return_X_y=True, as_frame=True ) + X, y = X.iloc[:n_samples], y.iloc[:n_samples] + # normalize values y = (y - y.mean()) / y.std() @@ -312,21 +338,23 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): estimator = TabularRegressionTask( backend=backend, resampling_strategy=resampling_strategy, + resampling_strategy_args=resampling_strategy_args, include_components=include ) - estimator.search( - X_train=X_train, y_train=y_train, - X_test=X_test, y_test=y_test, - optimize_metric='r2', - total_walltime_limit=100, - func_eval_time_limit_secs=10, - enable_traditional_pipeline=False, + with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction): + estimator.search( + X_train=X_train, y_train=y_train, + X_test=X_test, y_test=y_test, + optimize_metric='r2', + total_walltime_limit=35, + func_eval_time_limit_secs=5, + enable_traditional_pipeline=False, ) # Internal dataset has expected settings assert estimator.dataset.task_type == 'tabular_regression' - expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 5 + expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 2 assert estimator.resampling_strategy == resampling_strategy assert estimator.dataset.resampling_strategy == resampling_strategy assert len(estimator.dataset.splits) == expected_num_splits @@ -383,7 +411,6 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): assert os.path.exists(model_file), model_file model = estimator._backend.load_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) - assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module) elif resampling_strategy == CrossValTypes.k_fold_cross_validation: model_file = os.path.join( run_key_model_run_dir, @@ -393,9 +420,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) assert isinstance(model, VotingRegressor) - assert len(model.estimators_) == 5 - assert isinstance(model.estimators_[0].named_steps['network'].get_network(), - torch.nn.Module) + assert len(model.estimators_) == 2 else: pytest.fail(resampling_strategy) diff --git a/test/test_pipeline/components/setup/test_setup_networks.py b/test/test_pipeline/components/setup/test_setup_networks.py index 6826d7ef2..93ded0102 100644 --- a/test/test_pipeline/components/setup/test_setup_networks.py +++ b/test/test_pipeline/components/setup/test_setup_networks.py @@ -31,6 +31,9 @@ def test_pipeline_fit(self, fit_dictionary_tabular, embedding, backbone, head): """This test makes sure that the pipeline is able to fit every combination of network embedding, backbone, head""" + # increase number of epochs to test for performance + fit_dictionary_tabular['epochs'] = 50 + include = {'network_backbone': [backbone], 'network_head': [head], 'network_embedding': [embedding]} if len(fit_dictionary_tabular['dataset_properties'] diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py index 10d9ea416..ebf8486bf 100644 --- a/test/test_pipeline/components/training/base.py +++ b/test/test_pipeline/components/training/base.py @@ -21,10 +21,11 @@ class BaseTraining(unittest.TestCase): def prepare_trainer(self, trainer: BaseTrainerComponent, - task_type: int): + task_type: int, + epochs=50): if task_type in CLASSIFICATION_TASKS: X, y = make_classification( - n_samples=5000, + n_samples=500, n_features=4, n_informative=3, n_redundant=1, @@ -42,7 +43,7 @@ def prepare_trainer(self, elif task_type in REGRESSION_TASKS: X, y = make_regression( - n_samples=5000, + n_samples=500, n_features=4, n_informative=3, n_targets=1, @@ -78,7 +79,7 @@ def prepare_trainer(self, device = torch.device('cpu') logger = logging.getLogger('StandardTrainer - test') metrics = get_metrics(dataset_properties) - epochs = 1000 + epochs = epochs budget_tracker = BudgetTracker( budget_type='epochs', max_epochs=epochs, diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py index d6964fa14..f47cbe810 100644 --- a/test/test_pipeline/components/training/test_training.py +++ b/test/test_pipeline/components/training/test_training.py @@ -165,7 +165,8 @@ def test_regression_epoch_training(self): _, epochs, logger) = self.prepare_trainer(StandardTrainer(), - constants.TABULAR_REGRESSION) + constants.TABULAR_REGRESSION, + 1000) # Train the model counter = 0 @@ -186,7 +187,8 @@ def test_classification_epoch_training(self): _, epochs, logger) = self.prepare_trainer(StandardTrainer(), - constants.TABULAR_CLASSIFICATION) + constants.TABULAR_CLASSIFICATION, + 1000) # Train the model counter = 0 @@ -209,7 +211,8 @@ def test_classification_epoch_training(self): _, epochs, logger) = self.prepare_trainer(MixUpTrainer(alpha=0.5), - constants.TABULAR_CLASSIFICATION) + constants.TABULAR_CLASSIFICATION, + 1000) # Train the model counter = 0 diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py index 9497c8457..2a7abb2bd 100644 --- a/test/test_pipeline/test_tabular_classification.py +++ b/test/test_pipeline/test_tabular_classification.py @@ -7,7 +7,7 @@ UniformIntegerHyperparameter, ) -import flaky +import unittest import numpy as np @@ -53,7 +53,6 @@ def _assert_pipeline_search_space(self, pipeline, search_space_updates): elif isinstance(hyperparameter, CategoricalHyperparameter): assert update.value_range == hyperparameter.choices - @flaky.flaky(max_runs=2) def test_pipeline_fit(self, fit_dictionary_tabular): """This test makes sure that the pipeline is able to fit given random combinations of hyperparameters across the pipeline""" @@ -81,7 +80,10 @@ def test_pipeline_fit(self, fit_dictionary_tabular): # Make sure a network was fit assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module) +<<<<<<< HEAD @flaky.flaky(max_runs=3) +======= +>>>>>>> Reduce time for tests def test_pipeline_predict(self, fit_dictionary_tabular): """This test makes sure that the pipeline is able to predict given a random configuration""" @@ -129,7 +131,6 @@ def test_pipeline_predict_proba(self, fit_dictionary_tabular): assert isinstance(prediction, np.ndarray) assert prediction.shape == expected_output_shape - @flaky.flaky(max_runs=2) def test_pipeline_transform(self, fit_dictionary_tabular): """ In the context of autopytorch, transform expands a fit dictionary with @@ -144,8 +145,11 @@ def test_pipeline_transform(self, fit_dictionary_tabular): config = cs.sample_configuration() pipeline.set_hyperparameters(config) - # We do not want to make the same early preprocessing operation to the fit dictionary - pipeline.fit(fit_dictionary_tabular.copy()) + with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \ + as patch_train: + patch_train.return_value = 1, {} + # We do not want to make the same early preprocessing operation to the fit dictionary + pipeline.fit(fit_dictionary_tabular.copy()) transformed_fit_dictionary_tabular = pipeline.transform(fit_dictionary_tabular) @@ -377,6 +381,8 @@ def test_constant_pipeline_iris(fit_dictionary_tabular): search_space_updates=search_space_updates) fit_dictionary_tabular['additional_metrics'] = ['balanced_accuracy'] + # increase number of epochs to test for performance + fit_dictionary_tabular['epochs'] = 50 try: pipeline.fit(fit_dictionary_tabular) @@ -422,6 +428,10 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): given the default configuration""" X = fit_dictionary_tabular_dummy['X_train'].copy() y = fit_dictionary_tabular_dummy['y_train'].copy() + + # increase number of epochs to test for performance + fit_dictionary_tabular_dummy['epochs'] = 50 + pipeline = TabularClassificationPipeline( dataset_properties=fit_dictionary_tabular_dummy['dataset_properties']) diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py index 0215f996f..5b9b73c9c 100644 --- a/test/test_pipeline/test_tabular_regression.py +++ b/test/test_pipeline/test_tabular_regression.py @@ -10,6 +10,7 @@ import numpy as np import pytest +import unittest import torch @@ -114,8 +115,11 @@ def test_pipeline_transform(self, fit_dictionary_tabular): config = cs.sample_configuration() pipeline.set_hyperparameters(config) - # We do not want to make the same early preprocessing operation to the fit dictionary - pipeline.fit(fit_dictionary_tabular.copy()) + with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \ + as patch_train: + patch_train.return_value = 1, {} + # We do not want to make the same early preprocessing operation to the fit dictionary + pipeline.fit(fit_dictionary_tabular.copy()) transformed_fit_dictionary_tabular = pipeline.transform(fit_dictionary_tabular) @@ -279,6 +283,7 @@ def test_set_range_search_space_updates(self, fit_dictionary_tabular): assert 'fully_connected:units_layer' in e.args[0] +<<<<<<< HEAD @pytest.mark.parametrize("fit_dictionary_tabular_dummy", ["regression"], indirect=True) def test_pipeline_score(fit_dictionary_tabular_dummy): """This test makes sure that the pipeline is able to achieve a decent score on dummy data @@ -296,6 +301,20 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): value_range=[0.0001, 0.001], default_value=0.001) ]) +======= +@pytest.mark.parametrize("fit_dictionary_tabular_dummy", ['regression'], indirect=True) +def test_pipeline_score(fit_dictionary_tabular_dummy): + """This test makes sure that the pipeline is able to achieve a decent score on dummy data + given the default configuration""" + # increase number of epochs to test for performance + fit_dictionary_tabular_dummy['epochs'] = 50 + + X = fit_dictionary_tabular_dummy['X_train'].copy() + y = fit_dictionary_tabular_dummy['y_train'].copy() + + pipeline = TabularRegressionPipeline( + dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'], +>>>>>>> Reduce time for tests ) cs = pipeline.get_hyperparameter_search_space() @@ -315,4 +334,8 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): # we should be able to get a decent score on this dummy data r2_score = metrics.r2(y, prediction) +<<<<<<< HEAD + assert r2_score >= 0.5, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}" +======= assert r2_score >= 0.5, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}" +>>>>>>> Reduce time for tests From 56e3ed6db272e11c9edd618fd2360ef0eee7ab93 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Tue, 11 May 2021 15:36:51 +0200 Subject: [PATCH 03/14] Fix flake in tests --- test/conftest.py | 3 ++- test/test_api/test_api.py | 9 +++---- .../test_tabular_classification.py | 7 +---- test/test_pipeline/test_tabular_regression.py | 27 +------------------ 4 files changed, 7 insertions(+), 39 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index 67f9e84cc..cdaf53703 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -460,6 +460,7 @@ def loss_mse(): def loss_details(request): return request.getfixturevalue(request.param) + @pytest.fixture def n_samples(): - return N_SAMPLES \ No newline at end of file + return N_SAMPLES diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index c81b2de8d..a7d19914e 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -2,7 +2,6 @@ import pathlib import pickle import sys -import time import unittest import numpy as np @@ -17,12 +16,11 @@ from sklearn.base import clone from sklearn.ensemble import VotingClassifier, VotingRegressor -from smac.runhistory.runhistory import RunHistory, StatusType - -import torch +from smac.runhistory.runhistory import RunHistory from autoPyTorch.api.tabular_classification import TabularClassificationTask from autoPyTorch.api.tabular_regression import TabularRegressionTask +from autoPyTorch.constants import REGRESSION_TASKS from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, HoldoutValTypes, @@ -35,7 +33,6 @@ from autoPyTorch.evaluation.train_evaluator import TrainEvaluator from autoPyTorch.optimizer.smbo import AutoMLSMBO from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy -from autoPyTorch.constants import REGRESSION_TASKS # ======== @@ -350,7 +347,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend, resamplin total_walltime_limit=35, func_eval_time_limit_secs=5, enable_traditional_pipeline=False, - ) + ) # Internal dataset has expected settings assert estimator.dataset.task_type == 'tabular_regression' diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py index 2a7abb2bd..f37f58dd3 100644 --- a/test/test_pipeline/test_tabular_classification.py +++ b/test/test_pipeline/test_tabular_classification.py @@ -1,5 +1,6 @@ import os import re +import unittest from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -7,8 +8,6 @@ UniformIntegerHyperparameter, ) -import unittest - import numpy as np import pytest @@ -80,10 +79,6 @@ def test_pipeline_fit(self, fit_dictionary_tabular): # Make sure a network was fit assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module) -<<<<<<< HEAD - @flaky.flaky(max_runs=3) -======= ->>>>>>> Reduce time for tests def test_pipeline_predict(self, fit_dictionary_tabular): """This test makes sure that the pipeline is able to predict given a random configuration""" diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py index 5b9b73c9c..387d3138c 100644 --- a/test/test_pipeline/test_tabular_regression.py +++ b/test/test_pipeline/test_tabular_regression.py @@ -1,5 +1,6 @@ import os import re +import unittest from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -10,7 +11,6 @@ import numpy as np import pytest -import unittest import torch @@ -19,7 +19,6 @@ from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline from autoPyTorch.utils.common import FitRequirement from autoPyTorch.utils.hyperparameter_search_space_update import ( - HyperparameterSearchSpaceUpdate, HyperparameterSearchSpaceUpdates, parse_hyperparameter_search_space_updates ) @@ -283,25 +282,6 @@ def test_set_range_search_space_updates(self, fit_dictionary_tabular): assert 'fully_connected:units_layer' in e.args[0] -<<<<<<< HEAD -@pytest.mark.parametrize("fit_dictionary_tabular_dummy", ["regression"], indirect=True) -def test_pipeline_score(fit_dictionary_tabular_dummy): - """This test makes sure that the pipeline is able to achieve a decent score on dummy data - given the default configuration""" - X = fit_dictionary_tabular_dummy['X_train'].copy() - y = fit_dictionary_tabular_dummy['y_train'].copy() - - # lower the learning rate of the optimizer until seeding properly works - # with the default learning rate of 0.01 regression sometimes does not converge - pipeline = TabularRegressionPipeline( - dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'], - search_space_updates=HyperparameterSearchSpaceUpdates([ - HyperparameterSearchSpaceUpdate("optimizer", - "AdamOptimizer:lr", - value_range=[0.0001, 0.001], - default_value=0.001) - ]) -======= @pytest.mark.parametrize("fit_dictionary_tabular_dummy", ['regression'], indirect=True) def test_pipeline_score(fit_dictionary_tabular_dummy): """This test makes sure that the pipeline is able to achieve a decent score on dummy data @@ -314,7 +294,6 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): pipeline = TabularRegressionPipeline( dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'], ->>>>>>> Reduce time for tests ) cs = pipeline.get_hyperparameter_search_space() @@ -334,8 +313,4 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): # we should be able to get a decent score on this dummy data r2_score = metrics.r2(y, prediction) -<<<<<<< HEAD - assert r2_score >= 0.5, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}" -======= assert r2_score >= 0.5, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}" ->>>>>>> Reduce time for tests From a626e92be25ddb59fca07e7a2847c7f2037f0e5c Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Tue, 11 May 2021 15:47:47 +0200 Subject: [PATCH 04/14] Patch train in other tests also --- .../test_pipeline/test_tabular_classification.py | 16 ++++++++++++---- test/test_pipeline/test_tabular_regression.py | 10 ++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py index f37f58dd3..349d170ff 100644 --- a/test/test_pipeline/test_tabular_classification.py +++ b/test/test_pipeline/test_tabular_classification.py @@ -90,7 +90,10 @@ def test_pipeline_predict(self, fit_dictionary_tabular): config = cs.sample_configuration() pipeline.set_hyperparameters(config) - pipeline.fit(fit_dictionary_tabular) + with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \ + as patch_train: + patch_train.return_value = 1, {} + pipeline.fit(fit_dictionary_tabular) # we expect the output to have the same batch size as the test input, # and number of outputs per batch sample equal to the number of outputs @@ -114,7 +117,10 @@ def test_pipeline_predict_proba(self, fit_dictionary_tabular): pipeline.set_hyperparameters(config) try: - pipeline.fit(fit_dictionary_tabular) + with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \ + as patch_train: + patch_train.return_value = 1, {} + pipeline.fit(fit_dictionary_tabular) except Exception as e: pytest.fail(f"Failed on config={config} with {e}") @@ -172,8 +178,10 @@ def test_default_configuration(self, fit_dictionary_tabular, is_small_preprocess pipeline = TabularClassificationPipeline( dataset_properties=fit_dictionary_tabular['dataset_properties']) - - pipeline.fit(fit_dictionary_tabular) + with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \ + as patch_train: + patch_train.return_value = 1, {} + pipeline.fit(fit_dictionary_tabular) def test_remove_key_check_requirements(self, fit_dictionary_tabular): """Makes sure that when a key is removed from X, correct error is outputted""" diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py index 387d3138c..ec8a7f107 100644 --- a/test/test_pipeline/test_tabular_regression.py +++ b/test/test_pipeline/test_tabular_regression.py @@ -90,7 +90,10 @@ def test_pipeline_predict(self, fit_dictionary_tabular): config = cs.sample_configuration() pipeline.set_hyperparameters(config) - pipeline.fit(fit_dictionary_tabular) + with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \ + as patch_train: + patch_train.return_value = 1, {} + pipeline.fit(fit_dictionary_tabular) # we expect the output to have the same batch size as the test input, # and number of outputs per batch sample equal to the number of targets ("output_shape" in dataset_properties) @@ -147,7 +150,10 @@ def test_default_configuration(self, fit_dictionary_tabular, is_small_preprocess pipeline = TabularRegressionPipeline( dataset_properties=fit_dictionary_tabular['dataset_properties']) - pipeline.fit(fit_dictionary_tabular) + with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \ + as patch_train: + patch_train.return_value = 1, {} + pipeline.fit(fit_dictionary_tabular) def test_remove_key_check_requirements(self, fit_dictionary_tabular): """Makes sure that when a key is removed from X, correct error is outputted""" From f2ce09a580b00557179e3d446db7a081e51e1f32 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 12 May 2021 16:46:36 +0200 Subject: [PATCH 05/14] Address comments from shuhei and fransisco: --- test/test_api/test_api.py | 4 +-- .../test_pipeline/components/training/base.py | 1 + .../components/training/test_training.py | 29 ++++++++++++------- .../test_tabular_classification.py | 3 ++ test/test_pipeline/test_tabular_regression.py | 3 ++ 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index a7d19914e..4fbfa929d 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -165,8 +165,8 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, optimize_metric='accuracy', - total_walltime_limit=40, - func_eval_time_limit_secs=5, + total_walltime_limit=50, + func_eval_time_limit_secs=10, enable_traditional_pipeline=False, ) diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py index ebf8486bf..88068603c 100644 --- a/test/test_pipeline/components/training/base.py +++ b/test/test_pipeline/components/training/base.py @@ -20,6 +20,7 @@ class BaseTraining(unittest.TestCase): def prepare_trainer(self, + n_samples: int, trainer: BaseTrainerComponent, task_type: int, epochs=50): diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py index f47cbe810..9c1d79016 100644 --- a/test/test_pipeline/components/training/test_training.py +++ b/test/test_pipeline/components/training/test_training.py @@ -30,6 +30,9 @@ from test.test_pipeline.components.training.base import BaseTraining # noqa (E402: module level import not at top of file) +OVERFIT_EPOCHS = 1000 + + class BaseDataLoaderTest(unittest.TestCase): def test_get_set_config_space(self): """ @@ -123,7 +126,7 @@ def test_fit_transform(self): class BaseTrainerComponentTest(BaseTraining, unittest.TestCase): - def test_evaluate(self): + def test_evaluate(self, n_samples): """ Makes sure we properly evaluate data, returning a proper loss and metric @@ -135,7 +138,8 @@ def test_evaluate(self): loader, criterion, epochs, - logger) = self.prepare_trainer(BaseTrainerComponent(), + logger) = self.prepare_trainer(n_samples, + BaseTrainerComponent(), constants.TABULAR_CLASSIFICATION) prev_loss, prev_metrics = trainer.evaluate(loader, epoch=1, writer=None) @@ -157,16 +161,17 @@ def test_evaluate(self): class StandardTrainerTest(BaseTraining, unittest.TestCase): - def test_regression_epoch_training(self): + def test_regression_epoch_training(self, n_samples): (trainer, _, _, loader, _, epochs, - logger) = self.prepare_trainer(StandardTrainer(), + logger) = self.prepare_trainer(n_samples, + StandardTrainer(), constants.TABULAR_REGRESSION, - 1000) + OVERFIT_EPOCHS) # Train the model counter = 0 @@ -179,16 +184,17 @@ def test_regression_epoch_training(self): if counter > epochs: self.fail(f"Could not overfit a dummy regression under {epochs} epochs") - def test_classification_epoch_training(self): + def test_classification_epoch_training(self, n_samples): (trainer, _, _, loader, _, epochs, - logger) = self.prepare_trainer(StandardTrainer(), + logger) = self.prepare_trainer(n_samples, + StandardTrainer(), constants.TABULAR_CLASSIFICATION, - 1000) + OVERFIT_EPOCHS) # Train the model counter = 0 @@ -203,16 +209,17 @@ def test_classification_epoch_training(self): class MixUpTrainerTest(BaseTraining, unittest.TestCase): - def test_classification_epoch_training(self): + def test_classification_epoch_training(self, n_samples): (trainer, _, _, loader, _, epochs, - logger) = self.prepare_trainer(MixUpTrainer(alpha=0.5), + logger) = self.prepare_trainer(n_samples, + MixUpTrainer(alpha=0.5), constants.TABULAR_CLASSIFICATION, - 1000) + OVERFIT_EPOCHS) # Train the model counter = 0 diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py index 349d170ff..c90eb2a04 100644 --- a/test/test_pipeline/test_tabular_classification.py +++ b/test/test_pipeline/test_tabular_classification.py @@ -444,6 +444,9 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): pipeline.fit(fit_dictionary_tabular_dummy) + # Ensure that the network is an instance of torch Module + assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module) + # we expect the output to have the same batch size as the test input, # and number of outputs per batch sample equal to the number of classes ("num_classes" in dataset_properties) expected_output_shape = (X.shape[0], diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py index ec8a7f107..3df3c6c41 100644 --- a/test/test_pipeline/test_tabular_regression.py +++ b/test/test_pipeline/test_tabular_regression.py @@ -308,6 +308,9 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): pipeline.fit(fit_dictionary_tabular_dummy) + # Ensure that the network is an instance of torch Module + assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module) + # we expect the output to have the same batch size as the test input, # and number of outputs per batch sample equal to the number of targets ("output_shape" in dataset_properties) expected_output_shape = (X.shape[0], From e12db796a005329e994aac19d81bdc40663c5577 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 12 May 2021 18:17:50 +0200 Subject: [PATCH 06/14] Move base training to pytest --- .../test_pipeline/components/training/base.py | 2 +- .../components/training/test_training.py | 23 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py index 88068603c..07ba94f2a 100644 --- a/test/test_pipeline/components/training/base.py +++ b/test/test_pipeline/components/training/base.py @@ -17,7 +17,7 @@ from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent, BudgetTracker -class BaseTraining(unittest.TestCase): +class BaseTraining: def prepare_trainer(self, n_samples: int, diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py index 9c1d79016..83eb94198 100644 --- a/test/test_pipeline/components/training/test_training.py +++ b/test/test_pipeline/components/training/test_training.py @@ -6,6 +6,8 @@ import numpy as np +import pytest + from sklearn.base import clone import torch @@ -124,8 +126,7 @@ def test_fit_transform(self): loader.val_data_loader) -class BaseTrainerComponentTest(BaseTraining, unittest.TestCase): - +class TestBaseTrainerComponent(BaseTraining): def test_evaluate(self, n_samples): """ Makes sure we properly evaluate data, returning a proper loss @@ -143,7 +144,7 @@ def test_evaluate(self, n_samples): constants.TABULAR_CLASSIFICATION) prev_loss, prev_metrics = trainer.evaluate(loader, epoch=1, writer=None) - self.assertIn('accuracy', prev_metrics) + assert 'accuracy' in prev_metrics # Fit the model self.train_model(model, @@ -155,12 +156,12 @@ def test_evaluate(self, n_samples): # Loss and metrics should have improved after fit # And the prediction should be better than random loss, metrics = trainer.evaluate(loader, epoch=1, writer=None) - self.assertGreater(prev_loss, loss) - self.assertGreater(metrics['accuracy'], prev_metrics['accuracy']) - self.assertGreater(metrics['accuracy'], 0.5) + assert prev_loss > loss + assert metrics['accuracy'] > prev_metrics['accuracy'] + assert metrics['accuracy']> 0.5 -class StandardTrainerTest(BaseTraining, unittest.TestCase): +class StandardTrainerTest(BaseTraining): def test_regression_epoch_training(self, n_samples): (trainer, _, @@ -182,7 +183,7 @@ def test_regression_epoch_training(self, n_samples): r2 = metrics['r2'] if counter > epochs: - self.fail(f"Could not overfit a dummy regression under {epochs} epochs") + pytest.fail(f"Could not overfit a dummy regression under {epochs} epochs") def test_classification_epoch_training(self, n_samples): (trainer, @@ -205,10 +206,10 @@ def test_classification_epoch_training(self, n_samples): accuracy = metrics['accuracy'] if counter > epochs: - self.fail(f"Could not overfit a dummy classification under {epochs} epochs") + pytest.fail(f"Could not overfit a dummy classification under {epochs} epochs") -class MixUpTrainerTest(BaseTraining, unittest.TestCase): +class MixUpTrainerTest(BaseTraining): def test_classification_epoch_training(self, n_samples): (trainer, _, @@ -230,7 +231,7 @@ def test_classification_epoch_training(self, n_samples): accuracy = metrics['accuracy'] if counter > epochs: - self.fail(f"Could not overfit a dummy classification under {epochs} epochs") + pytest.fail(f"Could not overfit a dummy classification under {epochs} epochs") class TrainerTest(unittest.TestCase): From 91e030911959be11f0f5463fc572e401d120228e Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 12 May 2021 18:19:04 +0200 Subject: [PATCH 07/14] Fix flake in tests --- test/test_pipeline/components/training/base.py | 1 - test/test_pipeline/components/training/test_training.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py index 07ba94f2a..bfc99e555 100644 --- a/test/test_pipeline/components/training/base.py +++ b/test/test_pipeline/components/training/base.py @@ -1,5 +1,4 @@ import logging -import unittest from sklearn.datasets import make_classification, make_regression diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py index 83eb94198..98ea47716 100644 --- a/test/test_pipeline/components/training/test_training.py +++ b/test/test_pipeline/components/training/test_training.py @@ -158,7 +158,7 @@ def test_evaluate(self, n_samples): loss, metrics = trainer.evaluate(loader, epoch=1, writer=None) assert prev_loss > loss assert metrics['accuracy'] > prev_metrics['accuracy'] - assert metrics['accuracy']> 0.5 + assert metrics['accuracy'] > 0.5 class StandardTrainerTest(BaseTraining): From e60d45802e090940219e36bee2645300d28a3f1c Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 12 May 2021 19:07:11 +0200 Subject: [PATCH 08/14] forgot to pass n_samples --- test/test_pipeline/components/training/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py index bfc99e555..2c8aead4c 100644 --- a/test/test_pipeline/components/training/base.py +++ b/test/test_pipeline/components/training/base.py @@ -1,6 +1,7 @@ import logging from sklearn.datasets import make_classification, make_regression +from sklearn.model_selection import ParameterGrid #, ParameterSampler import torch @@ -25,7 +26,7 @@ def prepare_trainer(self, epochs=50): if task_type in CLASSIFICATION_TASKS: X, y = make_classification( - n_samples=500, + n_samples=n_samples, n_features=4, n_informative=3, n_redundant=1, @@ -43,7 +44,7 @@ def prepare_trainer(self, elif task_type in REGRESSION_TASKS: X, y = make_regression( - n_samples=500, + n_samples=n_samples, n_features=4, n_informative=3, n_targets=1, From 0f13c5ec2badc7f07554bc5e268fe93cb9c84bdb Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 12 May 2021 19:09:35 +0200 Subject: [PATCH 09/14] stupid error --- test/test_pipeline/components/training/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py index 2c8aead4c..d7cb2ebd1 100644 --- a/test/test_pipeline/components/training/base.py +++ b/test/test_pipeline/components/training/base.py @@ -1,7 +1,6 @@ import logging from sklearn.datasets import make_classification, make_regression -from sklearn.model_selection import ParameterGrid #, ParameterSampler import torch From 592278ab67e5351675b03c692f84cd5ef6f53ea5 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Fri, 14 May 2021 12:45:20 +0200 Subject: [PATCH 10/14] Address comments from shuhei, remove hardcoding and fix bug in dummy eval function --- test/test_api/test_api.py | 122 +++++--------------------------------- test/test_api/utils.py | 98 ++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 106 deletions(-) create mode 100644 test/test_api/utils.py diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 4fbfa929d..280617306 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -3,6 +3,7 @@ import pickle import sys import unittest +from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_function import numpy as np @@ -20,120 +21,27 @@ from autoPyTorch.api.tabular_classification import TabularClassificationTask from autoPyTorch.api.tabular_regression import TabularRegressionTask -from autoPyTorch.constants import REGRESSION_TASKS from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, HoldoutValTypes, ) -from autoPyTorch.evaluation.abstract_evaluator import ( - DummyClassificationPipeline, - DummyRegressionPipeline, - fit_and_suppress_warnings -) -from autoPyTorch.evaluation.train_evaluator import TrainEvaluator from autoPyTorch.optimizer.smbo import AutoMLSMBO from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy -# ======== -# Fixtures -# ======== -class DummyTrainEvaluator(TrainEvaluator): - - def _fit_and_predict(self, pipeline, fold: int, train_indices, - test_indices, - add_pipeline_to_self - ): - - if self.task_type in REGRESSION_TASKS: - pipeline = DummyRegressionPipeline(config=1) - else: - pipeline = DummyClassificationPipeline(config=1) - - self.indices[fold] = ((train_indices, test_indices)) - - X = {'train_indices': train_indices, - 'val_indices': test_indices, - 'split_id': fold, - 'num_run': self.num_run, - **self.fit_dictionary} # fit dictionary - y = None - fit_and_suppress_warnings(self.logger, pipeline, X, y) - self.logger.info("Model fitted, now predicting") - ( - Y_train_pred, - Y_opt_pred, - Y_valid_pred, - Y_test_pred - ) = self._predict( - pipeline, - train_indices=train_indices, - test_indices=test_indices, - ) - - if add_pipeline_to_self: - self.pipeline = pipeline - else: - self.pipelines[fold] = pipeline - - return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred - - -# create closure for evaluating an algorithm -def dummy_eval_function( - backend, - queue, - metric, - budget: float, - config, - seed: int, - output_y_hat_optimization: bool, - num_run: int, - include, - exclude, - disable_file_output, - pipeline_config=None, - budget_type=None, - init_params=None, - logger_port=None, - all_supported_metrics=True, - search_space_updates=None, - instance: str = None, -) -> None: - evaluator = TrainEvaluator( - backend=backend, - queue=queue, - metric=metric, - configuration=config, - seed=seed, - num_run=num_run, - output_y_hat_optimization=output_y_hat_optimization, - include=include, - exclude=exclude, - disable_file_output=disable_file_output, - init_params=init_params, - budget=budget, - budget_type=budget_type, - logger_port=logger_port, - all_supported_metrics=all_supported_metrics, - pipeline_config=pipeline_config, - search_space_updates=search_space_updates - ) - evaluator.fit_predict_and_loss() - - -def dummy_do_dummy_prediction(): - return +CV_NUM_SPLITS = 2 +HOLDOUT_NUM_SPLITS = 1 +# ==== # Test -# ======== +# ==== @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function', new=dummy_eval_function) @pytest.mark.parametrize('openml_id', (40981, )) @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', ((HoldoutValTypes.holdout_validation, None), - (CrossValTypes.k_fold_cross_validation, {'num_splits': 2}) + (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) )) def test_tabular_classification(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples): @@ -165,14 +73,15 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, optimize_metric='accuracy', - total_walltime_limit=50, - func_eval_time_limit_secs=10, + total_walltime_limit=30, + func_eval_time_limit_secs=5, enable_traditional_pipeline=False, ) # Internal dataset has expected settings assert estimator.dataset.task_type == 'tabular_classification' - expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 2 + expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation \ + else CV_NUM_SPLITS assert estimator.resampling_strategy == resampling_strategy assert estimator.dataset.resampling_strategy == resampling_strategy assert len(estimator.dataset.splits) == expected_num_splits @@ -243,7 +152,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) assert isinstance(model, VotingClassifier) - assert len(model.estimators_) == 2 + assert len(model.estimators_) == CV_NUM_SPLITS else: pytest.fail(resampling_strategy) @@ -300,7 +209,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl new=dummy_eval_function) @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', ((HoldoutValTypes.holdout_validation, None), - (CrossValTypes.k_fold_cross_validation, {'num_splits': 2}) + (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) )) def test_tabular_regression(openml_name, resampling_strategy, backend, resampling_strategy_args, n_samples): @@ -344,14 +253,15 @@ def test_tabular_regression(openml_name, resampling_strategy, backend, resamplin X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, optimize_metric='r2', - total_walltime_limit=35, + total_walltime_limit=30, func_eval_time_limit_secs=5, enable_traditional_pipeline=False, ) # Internal dataset has expected settings assert estimator.dataset.task_type == 'tabular_regression' - expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 2 + expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation\ + else CV_NUM_SPLITS assert estimator.resampling_strategy == resampling_strategy assert estimator.dataset.resampling_strategy == resampling_strategy assert len(estimator.dataset.splits) == expected_num_splits @@ -417,7 +327,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend, resamplin model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) assert isinstance(model, VotingRegressor) - assert len(model.estimators_) == 2 + assert len(model.estimators_) == CV_NUM_SPLITS else: pytest.fail(resampling_strategy) diff --git a/test/test_api/utils.py b/test/test_api/utils.py new file mode 100644 index 000000000..0fa84b55b --- /dev/null +++ b/test/test_api/utils.py @@ -0,0 +1,98 @@ +from autoPyTorch.constants import REGRESSION_TASKS +from autoPyTorch.evaluation.abstract_evaluator import ( + DummyClassificationPipeline, + DummyRegressionPipeline, + fit_and_suppress_warnings +) +from autoPyTorch.evaluation.train_evaluator import TrainEvaluator + + +# ======== +# Fixtures +# ======== +class DummyTrainEvaluator(TrainEvaluator): + + def _fit_and_predict(self, pipeline, fold: int, train_indices, + test_indices, + add_pipeline_to_self + ): + + if self.task_type in REGRESSION_TASKS: + pipeline = DummyRegressionPipeline(config=1) + else: + pipeline = DummyClassificationPipeline(config=1) + + self.indices[fold] = ((train_indices, test_indices)) + + X = {'train_indices': train_indices, + 'val_indices': test_indices, + 'split_id': fold, + 'num_run': self.num_run, + **self.fit_dictionary} # fit dictionary + y = None + fit_and_suppress_warnings(self.logger, pipeline, X, y) + self.logger.info("Model fitted, now predicting") + ( + Y_train_pred, + Y_opt_pred, + Y_valid_pred, + Y_test_pred + ) = self._predict( + pipeline, + train_indices=train_indices, + test_indices=test_indices, + ) + + if add_pipeline_to_self: + self.pipeline = pipeline + else: + self.pipelines[fold] = pipeline + + return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred + + +# create closure for evaluating an algorithm +def dummy_eval_function( + backend, + queue, + metric, + budget: float, + config, + seed: int, + output_y_hat_optimization: bool, + num_run: int, + include, + exclude, + disable_file_output, + pipeline_config=None, + budget_type=None, + init_params=None, + logger_port=None, + all_supported_metrics=True, + search_space_updates=None, + instance: str = None, +) -> None: + evaluator = DummyTrainEvaluator( + backend=backend, + queue=queue, + metric=metric, + configuration=config, + seed=seed, + num_run=num_run, + output_y_hat_optimization=output_y_hat_optimization, + include=include, + exclude=exclude, + disable_file_output=disable_file_output, + init_params=init_params, + budget=budget, + budget_type=budget_type, + logger_port=logger_port, + all_supported_metrics=all_supported_metrics, + pipeline_config=pipeline_config, + search_space_updates=search_space_updates + ) + evaluator.fit_predict_and_loss() + + +def dummy_do_dummy_prediction(): + return From aa8b3d666c27e4a9c44e0a15818bcb7b848cfae9 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Fri, 14 May 2021 13:57:22 +0200 Subject: [PATCH 11/14] Skip ensemble test for python >=3.7 and introduce random state for feature processors --- .../feature_preprocessing/KernelPCA.py | 5 ++--- .../feature_preprocessing/Nystroem.py | 5 ++--- .../feature_preprocessing/PolynomialFeatures.py | 5 ++--- .../feature_preprocessing/PowerTransformer.py | 5 ++--- .../feature_preprocessing/RandomKitchenSinks.py | 5 ++--- .../feature_preprocessing/TruncatedSVD.py | 5 ++--- .../base_feature_preprocessor.py | 14 ++++++++++++-- test/test_ensemble/test_ensemble.py | 2 ++ .../preprocessing/test_feature_preprocessor.py | 9 +++++++-- 9 files changed, 33 insertions(+), 22 deletions(-) diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py index 2a4737c4d..0d00e8ddb 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py @@ -23,15 +23,14 @@ class KernelPCA(autoPyTorchFeaturePreprocessingComponent): def __init__(self, n_components: int = 10, kernel: str = 'rbf', degree: int = 3, gamma: float = 0.01, coef0: float = 0.0, - random_state: Optional[Union[int, np.random.RandomState]] = None + random_state: Optional[np.random.RandomState] = None ) -> None: self.n_components = n_components self.kernel = kernel self.degree = degree self.gamma = gamma self.coef0 = coef0 - self.random_state = random_state - super().__init__() + super().__init__(random_state=random_state) self.add_fit_requirements([ FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)]) diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py index 0a8f6c63d..519be2e30 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py @@ -23,15 +23,14 @@ class Nystroem(autoPyTorchFeaturePreprocessingComponent): def __init__(self, n_components: int = 10, kernel: str = 'rbf', degree: int = 3, gamma: float = 0.01, coef0: float = 0.0, - random_state: Optional[Union[int, np.random.RandomState]] = None + random_state: Optional[np.random.RandomState] = None ) -> None: self.n_components = n_components self.kernel = kernel self.degree = degree self.gamma = gamma self.coef0 = coef0 - self.random_state = random_state - super().__init__() + super().__init__(random_state=random_state) def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py index a41c0a26d..5906b9651 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py @@ -19,13 +19,12 @@ class PolynomialFeatures(autoPyTorchFeaturePreprocessingComponent): def __init__(self, degree: int = 2, interaction_only: bool = False, include_bias: bool = False, - random_state: Optional[Union[int, np.random.RandomState]] = None): + random_state: Optional[np.random.RandomState] = None): self.degree = degree self.interaction_only = interaction_only self.include_bias = include_bias - self.random_state = random_state - super().__init__() + super().__init__(random_state=random_state) def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: self.preprocessor['numerical'] = sklearn.preprocessing.PolynomialFeatures( diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py index 767a0f6c1..e968ff34f 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py @@ -17,11 +17,10 @@ class PowerTransformer(autoPyTorchFeaturePreprocessingComponent): def __init__(self, standardize: bool = True, - random_state: Optional[Union[int, np.random.RandomState]] = None): + random_state: Optional[np.random.RandomState] = None): self.standardize = standardize - self.random_state = random_state - super().__init__() + super().__init__(random_state=random_state) def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: self.preprocessor['numerical'] = sklearn.preprocessing.PowerTransformer(method="yeo-johnson", diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py index 9dbf26cbc..9d60638ce 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py @@ -20,12 +20,11 @@ class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent): def __init__(self, n_components: int = 100, gamma: float = 1.0, - random_state: Optional[Union[int, np.random.RandomState]] = None + random_state: Optional[np.random.RandomState] = None ) -> None: self.n_components = n_components self.gamma = gamma - self.random_state = random_state - super().__init__() + super().__init__(random_state=random_state) def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py index bfe4568b3..29b24d627 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py @@ -18,11 +18,10 @@ class TruncatedSVD(autoPyTorchFeaturePreprocessingComponent): def __init__(self, target_dim: int = 128, - random_state: Optional[Union[int, np.random.RandomState]] = None): + random_state: Optional[np.random.RandomState] = None): self.target_dim = target_dim - self.random_state = random_state - super().__init__() + super().__init__(random_state=random_state) def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py index 8c85bbf30..eaad1d360 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py @@ -1,4 +1,8 @@ -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional + +import numpy as np + +from sklearn.utils import check_random_state from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import ( autoPyTorchTabularPreprocessingComponent @@ -8,7 +12,13 @@ class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent): _required_properties: List[str] = ['handles_sparse'] - def __init__(self) -> None: + def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None: + if random_state is None: + # A trainer components need a random state for + # sampling -- for example in MixUp training + self.random_state = check_random_state(1) + else: + self.random_state = random_state super().__init__() def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: diff --git a/test/test_ensemble/test_ensemble.py b/test/test_ensemble/test_ensemble.py index e0e4c3bb1..cd0f02e72 100644 --- a/test/test_ensemble/test_ensemble.py +++ b/test/test_ensemble/test_ensemble.py @@ -690,6 +690,8 @@ def test_ensemble_builder_process_realrun(dask_client, ensemble_backend): @flaky(max_runs=3) @unittest.mock.patch('autoPyTorch.ensemble.ensemble_builder.EnsembleBuilder.fit_ensemble') +@pytest.mark.skipif(sys.version_info >= (3, 7), + reason="Causes out-of-memory Errors in CI") def test_ensemble_builder_nbest_remembered(fit_ensemble, ensemble_backend, dask_client): """ Makes sure ensemble builder returns the size of the ensemble that pynisher allowed diff --git a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py index a5c342804..822112fca 100644 --- a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py +++ b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py @@ -14,6 +14,11 @@ from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline +@pytest.fixture +def random_state(): + return 11 + + @pytest.fixture(params=['TruncatedSVD', 'PolynomialFeatures', 'PowerTransformer', 'Nystroem', 'KernelPCA', 'RandomKitchenSinks']) def preprocessor(request): @@ -24,10 +29,10 @@ def preprocessor(request): 'classification_numerical_and_categorical'], indirect=True) class TestFeaturePreprocessors: - def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor): + def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor, random_state): preprocessor = FeatureProprocessorChoice( dataset_properties=fit_dictionary_tabular['dataset_properties'] - ).get_components()[preprocessor]() + ).get_components()[preprocessor](random_state=random_state) configuration = preprocessor. \ get_hyperparameter_search_space(dataset_properties=fit_dictionary_tabular["dataset_properties"]) \ .get_default_configuration().get_dictionary() From 342b3b82dffe513f9356cdecfc630f86f7a2bba5 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Fri, 14 May 2021 14:15:06 +0200 Subject: [PATCH 12/14] fix flake --- .../tabular_preprocessing/feature_preprocessing/KernelPCA.py | 2 +- .../tabular_preprocessing/feature_preprocessing/Nystroem.py | 2 +- .../feature_preprocessing/PolynomialFeatures.py | 2 +- .../feature_preprocessing/PowerTransformer.py | 2 +- .../feature_preprocessing/RandomKitchenSinks.py | 2 +- .../tabular_preprocessing/feature_preprocessing/TruncatedSVD.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py index 0d00e8ddb..883f6d636 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py @@ -1,5 +1,5 @@ from math import ceil, floor -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py index 519be2e30..17a785466 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py @@ -1,5 +1,5 @@ from math import ceil, floor -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py index 5906b9651..b64b32eb6 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py index e968ff34f..af187c50d 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py index 9d60638ce..f8983e815 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py @@ -1,5 +1,5 @@ from math import ceil, floor -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py index 29b24d627..69410d32f 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py @@ -1,5 +1,5 @@ from math import floor -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( From 80aa5abea4eb28432cc87dfa1db309ea7211d42a Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 17 May 2021 11:09:16 +0200 Subject: [PATCH 13/14] Remove example workflow --- .github/workflows/examples.yml | 39 ---------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 .github/workflows/examples.yml diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml deleted file mode 100644 index 538b44edd..000000000 --- a/.github/workflows/examples.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: Examples - -on: [push, pull_request] - -jobs: - ubuntu: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.8] - fail-fast: false - max-parallel: 2 - - steps: - - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install test dependencies - run: | - git submodule update --init --recursive - python -m pip install --upgrade pip - pip install -e .[examples] - which python - pip freeze - - name: Store repository status - id: status-before - run: | - echo "::set-output name=BEFORE::$(git status --porcelain -b)" - - name: Run tests - run: | - python examples/20_basics/example_image_classification.py - python examples/20_basics/example_tabular_classification.py - python examples/20_basics/example_tabular_regression.py - python examples/40_advanced/example_custom_configuration_space.py - python examples/40_advanced/example_resampling_strategy.py - python examples/40_advanced/example_visualization.py From 8425143625632c6022f5b64b06bcf8233eaf6166 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 17 May 2021 12:54:38 +0200 Subject: [PATCH 14/14] Remove from __init__ in feature preprocessing --- .../tabular_preprocessing/feature_preprocessing/KernelPCA.py | 2 +- .../tabular_preprocessing/feature_preprocessing/Nystroem.py | 2 +- .../feature_preprocessing/RandomKitchenSinks.py | 2 +- .../feature_preprocessing/base_feature_preprocessor.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py index 883f6d636..a03a35331 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py @@ -24,7 +24,7 @@ def __init__(self, n_components: int = 10, kernel: str = 'rbf', degree: int = 3, gamma: float = 0.01, coef0: float = 0.0, random_state: Optional[np.random.RandomState] = None - ) -> None: + ): self.n_components = n_components self.kernel = kernel self.degree = degree diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py index 17a785466..d00697c21 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py @@ -24,7 +24,7 @@ def __init__(self, n_components: int = 10, kernel: str = 'rbf', degree: int = 3, gamma: float = 0.01, coef0: float = 0.0, random_state: Optional[np.random.RandomState] = None - ) -> None: + ): self.n_components = n_components self.kernel = kernel self.degree = degree diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py index f8983e815..a3267391a 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py @@ -21,7 +21,7 @@ class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent): def __init__(self, n_components: int = 100, gamma: float = 1.0, random_state: Optional[np.random.RandomState] = None - ) -> None: + ): self.n_components = n_components self.gamma = gamma super().__init__(random_state=random_state) diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py index eaad1d360..d11f69b90 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py @@ -12,7 +12,7 @@ class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent): _required_properties: List[str] = ['handles_sparse'] - def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None: + def __init__(self, random_state: Optional[np.random.RandomState] = None): if random_state is None: # A trainer components need a random state for # sampling -- for example in MixUp training