From c40f2d9f23cfe1e5f117713354837554e31c504d Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Sat, 8 May 2021 18:32:07 +0200
Subject: [PATCH 01/14] In progress, changing te4sts

---
 test/test_api/test_api.py | 80 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 62d220bfd..9b69331d5 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -2,6 +2,7 @@
 import pathlib
 import pickle
 import sys
+import time
 import unittest
 
 import numpy as np
@@ -16,7 +17,7 @@
 from sklearn.base import clone
 from sklearn.ensemble import VotingClassifier, VotingRegressor
 
-from smac.runhistory.runhistory import RunHistory
+from smac.runhistory.runhistory import RunHistory, StatusType
 
 import torch
 
@@ -26,13 +27,90 @@
     CrossValTypes,
     HoldoutValTypes,
 )
+from autoPyTorch.evaluation.train_evaluator import TrainEvaluator
 from autoPyTorch.optimizer.smbo import AutoMLSMBO
 from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
 
 
+# ========
 # Fixtures
 # ========
+class DummyTrainEvaluator(TrainEvaluator):
+
+    def fit_predict_and_loss(self) -> None:
+        self.start_time = time.time()
+        split_id = 0
+        self.logger.info("Starting fit {}".format(split_id))
+
+        pipeline = self._get_pipeline()
+
+        train_split, test_split = self.splits[split_id]
+        self.Y_optimization = self.y_train[test_split]
+        self.Y_actual_train = self.y_train[train_split]
+        y_train_pred, y_opt_pred, y_valid_pred, y_test_pred = self._fit_and_predict(pipeline, split_id,
+                                                                                    train_indices=train_split,
+                                                                                    test_indices=test_split,
+                                                                                    add_pipeline_to_self=True)
+        train_loss = self._loss(self.y_train[train_split], y_train_pred)
+        loss = self._loss(self.y_train[test_split], y_opt_pred)
+        additional_run_info = pipeline.get_additional_run_info() if hasattr(
+            pipeline, 'get_additional_run_info') else {}
+
+        status = StatusType.SUCCESS
+
+        self.finish_up(
+            loss=loss,
+            train_loss=train_loss,
+            opt_pred=y_opt_pred,
+            valid_pred=y_valid_pred,
+            test_pred=y_test_pred,
+            additional_run_info=additional_run_info,
+            file_output=True,
+            status=status,
+        )
+
 
+# create closure for evaluating an algorithm
+def dummy_eval_function(
+        backend,
+        queue,
+        metric,
+        budget: float,
+        config,
+        seed: int,
+        output_y_hat_optimization: bool,
+        num_run: int,
+        include,
+        exclude,
+        disable_file_output,
+        pipeline_config=None,
+        budget_type=None,
+        init_params=None,
+        logger_port=None,
+        all_supported_metrics=True,
+        search_space_updates=None,
+        instance: str = None,
+) -> None:
+    evaluator = TrainEvaluator(
+        backend=backend,
+        queue=queue,
+        metric=metric,
+        configuration=config,
+        seed=seed,
+        num_run=num_run,
+        output_y_hat_optimization=output_y_hat_optimization,
+        include=include,
+        exclude=exclude,
+        disable_file_output=disable_file_output,
+        init_params=init_params,
+        budget=budget,
+        budget_type=budget_type,
+        logger_port=logger_port,
+        all_supported_metrics=all_supported_metrics,
+        pipeline_config=pipeline_config,
+        search_space_updates=search_space_updates
+    )
+    evaluator.fit_predict_and_loss()
 
 # Test
 # ========

From 91b1b1f1aa6f60780f5531a1fb57c359afda5a15 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 11 May 2021 15:34:17 +0200
Subject: [PATCH 02/14] Reduce time for tests

---
 test/conftest.py                              |  35 ++--
 test/test_api/test_api.py                     | 151 ++++++++++--------
 .../components/setup/test_setup_networks.py   |   3 +
 .../test_pipeline/components/training/base.py |   9 +-
 .../components/training/test_training.py      |   9 +-
 .../test_tabular_classification.py            |  20 ++-
 test/test_pipeline/test_tabular_regression.py |  27 +++-
 7 files changed, 163 insertions(+), 91 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index 592d41165..67f9e84cc 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -25,6 +25,9 @@
 from autoPyTorch.utils.pipeline import get_dataset_requirements
 
 
+N_SAMPLES = 200
+
+
 @pytest.fixture(scope="session")
 def callattr_ahead_of_alltests(request):
     """
@@ -191,7 +194,7 @@ def session_run_at_end():
 def get_tabular_data(task):
     if task == "classification_numerical_only":
         X, y = make_classification(
-            n_samples=200,
+            n_samples=N_SAMPLES,
             n_features=4,
             n_informative=3,
             n_redundant=1,
@@ -207,18 +210,18 @@ def get_tabular_data(task):
         X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
         categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
         X = X[categorical_columns]
-        X = X.iloc[0:200]
-        y = y.iloc[0:200]
+        X = X.iloc[0:N_SAMPLES]
+        y = y.iloc[0:N_SAMPLES]
         validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())
 
     elif task == "classification_numerical_and_categorical":
         X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
-        X = X.iloc[0:200]
-        y = y.iloc[0:200]
+        X = X.iloc[0:N_SAMPLES]
+        y = y.iloc[0:N_SAMPLES]
         validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())
 
     elif task == "regression_numerical_only":
-        X, y = make_regression(n_samples=200,
+        X, y = make_regression(n_samples=N_SAMPLES,
                                n_features=4,
                                n_informative=3,
                                n_targets=1,
@@ -240,8 +243,8 @@ def get_tabular_data(task):
             else:
                 X[column] = X[column].fillna(0)
 
-        X = X.iloc[0:200]
-        y = y.iloc[0:200]
+        X = X.iloc[0:N_SAMPLES]
+        y = y.iloc[0:N_SAMPLES]
         y = (y - y.mean()) / y.std()
         validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
 
@@ -256,8 +259,8 @@ def get_tabular_data(task):
             else:
                 X[column] = X[column].fillna(0)
 
-        X = X.iloc[0:200]
-        y = y.iloc[0:200]
+        X = X.iloc[0:N_SAMPLES]
+        y = y.iloc[0:N_SAMPLES]
         y = (y - y.mean()) / y.std()
         validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
     elif task == 'iris':
@@ -288,7 +291,7 @@ def get_fit_dictionary(X, y, validator, backend):
         'num_run': np.random.randint(50),
         'device': 'cpu',
         'budget_type': 'epochs',
-        'epochs': 100,
+        'epochs': 5,
         'torch_num_threads': 1,
         'early_stopping': 10,
         'working_dir': '/tmp',
@@ -326,7 +329,7 @@ def dataset(request):
 @pytest.fixture
 def dataset_traditional_classifier_num_only():
     X, y = make_classification(
-        n_samples=200,
+        n_samples=N_SAMPLES,
         n_features=4,
         n_informative=3,
         n_redundant=1,
@@ -344,7 +347,7 @@ def dataset_traditional_classifier_categorical_only():
     X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
     categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
     X = X[categorical_columns]
-    X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
+    X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
     return X, y
 
 
@@ -352,7 +355,7 @@ def dataset_traditional_classifier_categorical_only():
 def dataset_traditional_classifier_num_categorical():
     X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
     y = y.astype(np.int)
-    X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
+    X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
     return X, y
 
 
@@ -456,3 +459,7 @@ def loss_mse():
 @pytest.fixture
 def loss_details(request):
     return request.getfixturevalue(request.param)
+
+@pytest.fixture
+def n_samples():
+    return N_SAMPLES
\ No newline at end of file
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 9b69331d5..c81b2de8d 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -27,9 +27,15 @@
     CrossValTypes,
     HoldoutValTypes,
 )
+from autoPyTorch.evaluation.abstract_evaluator import (
+    DummyClassificationPipeline,
+    DummyRegressionPipeline,
+    fit_and_suppress_warnings
+)
 from autoPyTorch.evaluation.train_evaluator import TrainEvaluator
 from autoPyTorch.optimizer.smbo import AutoMLSMBO
 from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
+from autoPyTorch.constants import REGRESSION_TASKS
 
 
 # ========
@@ -37,38 +43,44 @@
 # ========
 class DummyTrainEvaluator(TrainEvaluator):
 
-    def fit_predict_and_loss(self) -> None:
-        self.start_time = time.time()
-        split_id = 0
-        self.logger.info("Starting fit {}".format(split_id))
-
-        pipeline = self._get_pipeline()
-
-        train_split, test_split = self.splits[split_id]
-        self.Y_optimization = self.y_train[test_split]
-        self.Y_actual_train = self.y_train[train_split]
-        y_train_pred, y_opt_pred, y_valid_pred, y_test_pred = self._fit_and_predict(pipeline, split_id,
-                                                                                    train_indices=train_split,
-                                                                                    test_indices=test_split,
-                                                                                    add_pipeline_to_self=True)
-        train_loss = self._loss(self.y_train[train_split], y_train_pred)
-        loss = self._loss(self.y_train[test_split], y_opt_pred)
-        additional_run_info = pipeline.get_additional_run_info() if hasattr(
-            pipeline, 'get_additional_run_info') else {}
-
-        status = StatusType.SUCCESS
-
-        self.finish_up(
-            loss=loss,
-            train_loss=train_loss,
-            opt_pred=y_opt_pred,
-            valid_pred=y_valid_pred,
-            test_pred=y_test_pred,
-            additional_run_info=additional_run_info,
-            file_output=True,
-            status=status,
+    def _fit_and_predict(self, pipeline, fold: int, train_indices,
+                         test_indices,
+                         add_pipeline_to_self
+                         ):
+
+        if self.task_type in REGRESSION_TASKS:
+            pipeline = DummyRegressionPipeline(config=1)
+        else:
+            pipeline = DummyClassificationPipeline(config=1)
+
+        self.indices[fold] = ((train_indices, test_indices))
+
+        X = {'train_indices': train_indices,
+             'val_indices': test_indices,
+             'split_id': fold,
+             'num_run': self.num_run,
+             **self.fit_dictionary}  # fit dictionary
+        y = None
+        fit_and_suppress_warnings(self.logger, pipeline, X, y)
+        self.logger.info("Model fitted, now predicting")
+        (
+            Y_train_pred,
+            Y_opt_pred,
+            Y_valid_pred,
+            Y_test_pred
+        ) = self._predict(
+            pipeline,
+            train_indices=train_indices,
+            test_indices=test_indices,
         )
 
+        if add_pipeline_to_self:
+            self.pipeline = pipeline
+        else:
+            self.pipelines[fold] = pipeline
+
+        return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred
+
 
 # create closure for evaluating an algorithm
 def dummy_eval_function(
@@ -112,19 +124,29 @@ def dummy_eval_function(
     )
     evaluator.fit_predict_and_loss()
 
+
+def dummy_do_dummy_prediction():
+    return
+
+
 # Test
 # ========
+@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',
+                     new=dummy_eval_function)
 @pytest.mark.parametrize('openml_id', (40981, ))
-@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation,
-                                                 CrossValTypes.k_fold_cross_validation,
-                                                 ))
-def test_tabular_classification(openml_id, resampling_strategy, backend):
+@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
+                         ((HoldoutValTypes.holdout_validation, None),
+                          (CrossValTypes.k_fold_cross_validation, {'num_splits': 2})
+                          ))
+def test_tabular_classification(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples):
 
     # Get the data and check that contents of data-manager make sense
     X, y = sklearn.datasets.fetch_openml(
         data_id=int(openml_id),
         return_X_y=True, as_frame=True
     )
+    X, y = X.iloc[:n_samples], y.iloc[:n_samples]
+
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
         X, y, random_state=1)
 
@@ -137,21 +159,23 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
     estimator = TabularClassificationTask(
         backend=backend,
         resampling_strategy=resampling_strategy,
+        resampling_strategy_args=resampling_strategy_args,
         include_components=include
     )
 
-    estimator.search(
-        X_train=X_train, y_train=y_train,
-        X_test=X_test, y_test=y_test,
-        optimize_metric='accuracy',
-        total_walltime_limit=150,
-        func_eval_time_limit_secs=50,
-        enable_traditional_pipeline=False,
-    )
+    with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction):
+        estimator.search(
+            X_train=X_train, y_train=y_train,
+            X_test=X_test, y_test=y_test,
+            optimize_metric='accuracy',
+            total_walltime_limit=40,
+            func_eval_time_limit_secs=5,
+            enable_traditional_pipeline=False,
+        )
 
     # Internal dataset has expected settings
     assert estimator.dataset.task_type == 'tabular_classification'
-    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 5
+    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 2
     assert estimator.resampling_strategy == resampling_strategy
     assert estimator.dataset.resampling_strategy == resampling_strategy
     assert len(estimator.dataset.splits) == expected_num_splits
@@ -212,7 +236,6 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
         assert os.path.exists(model_file), model_file
         model = estimator._backend.load_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
-        assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module)
     elif resampling_strategy == CrossValTypes.k_fold_cross_validation:
         model_file = os.path.join(
             run_key_model_run_dir,
@@ -223,9 +246,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model, VotingClassifier)
-        assert len(model.estimators_) == 5
-        assert isinstance(model.estimators_[0].named_steps['network'].get_network(),
-                          torch.nn.Module)
+        assert len(model.estimators_) == 2
     else:
         pytest.fail(resampling_strategy)
 
@@ -278,10 +299,13 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
 
 
 @pytest.mark.parametrize('openml_name', ("boston", ))
-@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation,
-                                                 CrossValTypes.k_fold_cross_validation,
-                                                 ))
-def test_tabular_regression(openml_name, resampling_strategy, backend):
+@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',
+                     new=dummy_eval_function)
+@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
+                         ((HoldoutValTypes.holdout_validation, None),
+                          (CrossValTypes.k_fold_cross_validation, {'num_splits': 2})
+                          ))
+def test_tabular_regression(openml_name, resampling_strategy, backend, resampling_strategy_args, n_samples):
 
     # Get the data and check that contents of data-manager make sense
     X, y = sklearn.datasets.fetch_openml(
@@ -289,6 +313,8 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
         return_X_y=True,
         as_frame=True
     )
+    X, y = X.iloc[:n_samples], y.iloc[:n_samples]
+
     # normalize values
     y = (y - y.mean()) / y.std()
 
@@ -312,21 +338,23 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
     estimator = TabularRegressionTask(
         backend=backend,
         resampling_strategy=resampling_strategy,
+        resampling_strategy_args=resampling_strategy_args,
         include_components=include
     )
 
-    estimator.search(
-        X_train=X_train, y_train=y_train,
-        X_test=X_test, y_test=y_test,
-        optimize_metric='r2',
-        total_walltime_limit=100,
-        func_eval_time_limit_secs=10,
-        enable_traditional_pipeline=False,
+    with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction):
+        estimator.search(
+            X_train=X_train, y_train=y_train,
+            X_test=X_test, y_test=y_test,
+            optimize_metric='r2',
+            total_walltime_limit=35,
+            func_eval_time_limit_secs=5,
+            enable_traditional_pipeline=False,
     )
 
     # Internal dataset has expected settings
     assert estimator.dataset.task_type == 'tabular_regression'
-    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 5
+    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 2
     assert estimator.resampling_strategy == resampling_strategy
     assert estimator.dataset.resampling_strategy == resampling_strategy
     assert len(estimator.dataset.splits) == expected_num_splits
@@ -383,7 +411,6 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
         assert os.path.exists(model_file), model_file
         model = estimator._backend.load_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
-        assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module)
     elif resampling_strategy == CrossValTypes.k_fold_cross_validation:
         model_file = os.path.join(
             run_key_model_run_dir,
@@ -393,9 +420,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model, VotingRegressor)
-        assert len(model.estimators_) == 5
-        assert isinstance(model.estimators_[0].named_steps['network'].get_network(),
-                          torch.nn.Module)
+        assert len(model.estimators_) == 2
     else:
         pytest.fail(resampling_strategy)
 
diff --git a/test/test_pipeline/components/setup/test_setup_networks.py b/test/test_pipeline/components/setup/test_setup_networks.py
index 6826d7ef2..93ded0102 100644
--- a/test/test_pipeline/components/setup/test_setup_networks.py
+++ b/test/test_pipeline/components/setup/test_setup_networks.py
@@ -31,6 +31,9 @@ def test_pipeline_fit(self, fit_dictionary_tabular, embedding, backbone, head):
         """This test makes sure that the pipeline is able to fit
         every combination of network embedding, backbone, head"""
 
+        # increase number of epochs to test for performance
+        fit_dictionary_tabular['epochs'] = 50
+
         include = {'network_backbone': [backbone], 'network_head': [head], 'network_embedding': [embedding]}
 
         if len(fit_dictionary_tabular['dataset_properties']
diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index 10d9ea416..ebf8486bf 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -21,10 +21,11 @@ class BaseTraining(unittest.TestCase):
 
     def prepare_trainer(self,
                         trainer: BaseTrainerComponent,
-                        task_type: int):
+                        task_type: int,
+                        epochs=50):
         if task_type in CLASSIFICATION_TASKS:
             X, y = make_classification(
-                n_samples=5000,
+                n_samples=500,
                 n_features=4,
                 n_informative=3,
                 n_redundant=1,
@@ -42,7 +43,7 @@ def prepare_trainer(self,
 
         elif task_type in REGRESSION_TASKS:
             X, y = make_regression(
-                n_samples=5000,
+                n_samples=500,
                 n_features=4,
                 n_informative=3,
                 n_targets=1,
@@ -78,7 +79,7 @@ def prepare_trainer(self,
         device = torch.device('cpu')
         logger = logging.getLogger('StandardTrainer - test')
         metrics = get_metrics(dataset_properties)
-        epochs = 1000
+        epochs = epochs
         budget_tracker = BudgetTracker(
             budget_type='epochs',
             max_epochs=epochs,
diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index d6964fa14..f47cbe810 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -165,7 +165,8 @@ def test_regression_epoch_training(self):
          _,
          epochs,
          logger) = self.prepare_trainer(StandardTrainer(),
-                                        constants.TABULAR_REGRESSION)
+                                        constants.TABULAR_REGRESSION,
+                                        1000)
 
         # Train the model
         counter = 0
@@ -186,7 +187,8 @@ def test_classification_epoch_training(self):
          _,
          epochs,
          logger) = self.prepare_trainer(StandardTrainer(),
-                                        constants.TABULAR_CLASSIFICATION)
+                                        constants.TABULAR_CLASSIFICATION,
+                                        1000)
 
         # Train the model
         counter = 0
@@ -209,7 +211,8 @@ def test_classification_epoch_training(self):
          _,
          epochs,
          logger) = self.prepare_trainer(MixUpTrainer(alpha=0.5),
-                                        constants.TABULAR_CLASSIFICATION)
+                                        constants.TABULAR_CLASSIFICATION,
+                                        1000)
 
         # Train the model
         counter = 0
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
index 9497c8457..2a7abb2bd 100644
--- a/test/test_pipeline/test_tabular_classification.py
+++ b/test/test_pipeline/test_tabular_classification.py
@@ -7,7 +7,7 @@
     UniformIntegerHyperparameter,
 )
 
-import flaky
+import unittest
 
 import numpy as np
 
@@ -53,7 +53,6 @@ def _assert_pipeline_search_space(self, pipeline, search_space_updates):
             elif isinstance(hyperparameter, CategoricalHyperparameter):
                 assert update.value_range == hyperparameter.choices
 
-    @flaky.flaky(max_runs=2)
     def test_pipeline_fit(self, fit_dictionary_tabular):
         """This test makes sure that the pipeline is able to fit
         given random combinations of hyperparameters across the pipeline"""
@@ -81,7 +80,10 @@ def test_pipeline_fit(self, fit_dictionary_tabular):
         # Make sure a network was fit
         assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module)
 
+<<<<<<< HEAD
     @flaky.flaky(max_runs=3)
+=======
+>>>>>>> Reduce time for tests
     def test_pipeline_predict(self, fit_dictionary_tabular):
         """This test makes sure that the pipeline is able to predict
         given a random configuration"""
@@ -129,7 +131,6 @@ def test_pipeline_predict_proba(self, fit_dictionary_tabular):
         assert isinstance(prediction, np.ndarray)
         assert prediction.shape == expected_output_shape
 
-    @flaky.flaky(max_runs=2)
     def test_pipeline_transform(self, fit_dictionary_tabular):
         """
         In the context of autopytorch, transform expands a fit dictionary with
@@ -144,8 +145,11 @@ def test_pipeline_transform(self, fit_dictionary_tabular):
         config = cs.sample_configuration()
         pipeline.set_hyperparameters(config)
 
-        # We do not want to make the same early preprocessing operation to the fit dictionary
-        pipeline.fit(fit_dictionary_tabular.copy())
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            # We do not want to make the same early preprocessing operation to the fit dictionary
+            pipeline.fit(fit_dictionary_tabular.copy())
 
         transformed_fit_dictionary_tabular = pipeline.transform(fit_dictionary_tabular)
 
@@ -377,6 +381,8 @@ def test_constant_pipeline_iris(fit_dictionary_tabular):
                                              search_space_updates=search_space_updates)
 
     fit_dictionary_tabular['additional_metrics'] = ['balanced_accuracy']
+    # increase number of epochs to test for performance
+    fit_dictionary_tabular['epochs'] = 50
 
     try:
         pipeline.fit(fit_dictionary_tabular)
@@ -422,6 +428,10 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
     given the default configuration"""
     X = fit_dictionary_tabular_dummy['X_train'].copy()
     y = fit_dictionary_tabular_dummy['y_train'].copy()
+
+    # increase number of epochs to test for performance
+    fit_dictionary_tabular_dummy['epochs'] = 50
+
     pipeline = TabularClassificationPipeline(
         dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'])
 
diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py
index 0215f996f..5b9b73c9c 100644
--- a/test/test_pipeline/test_tabular_regression.py
+++ b/test/test_pipeline/test_tabular_regression.py
@@ -10,6 +10,7 @@
 import numpy as np
 
 import pytest
+import unittest
 
 import torch
 
@@ -114,8 +115,11 @@ def test_pipeline_transform(self, fit_dictionary_tabular):
         config = cs.sample_configuration()
         pipeline.set_hyperparameters(config)
 
-        # We do not want to make the same early preprocessing operation to the fit dictionary
-        pipeline.fit(fit_dictionary_tabular.copy())
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            # We do not want to make the same early preprocessing operation to the fit dictionary
+            pipeline.fit(fit_dictionary_tabular.copy())
 
         transformed_fit_dictionary_tabular = pipeline.transform(fit_dictionary_tabular)
 
@@ -279,6 +283,7 @@ def test_set_range_search_space_updates(self, fit_dictionary_tabular):
             assert 'fully_connected:units_layer' in e.args[0]
 
 
+<<<<<<< HEAD
 @pytest.mark.parametrize("fit_dictionary_tabular_dummy", ["regression"], indirect=True)
 def test_pipeline_score(fit_dictionary_tabular_dummy):
     """This test makes sure that the pipeline is able to achieve a decent score on dummy data
@@ -296,6 +301,20 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
                                             value_range=[0.0001, 0.001],
                                             default_value=0.001)
         ])
+=======
+@pytest.mark.parametrize("fit_dictionary_tabular_dummy", ['regression'], indirect=True)
+def test_pipeline_score(fit_dictionary_tabular_dummy):
+    """This test makes sure that the pipeline is able to achieve a decent score on dummy data
+    given the default configuration"""
+    # increase number of epochs to test for performance
+    fit_dictionary_tabular_dummy['epochs'] = 50
+
+    X = fit_dictionary_tabular_dummy['X_train'].copy()
+    y = fit_dictionary_tabular_dummy['y_train'].copy()
+
+    pipeline = TabularRegressionPipeline(
+        dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'],
+>>>>>>> Reduce time for tests
     )
 
     cs = pipeline.get_hyperparameter_search_space()
@@ -315,4 +334,8 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
 
     # we should be able to get a decent score on this dummy data
     r2_score = metrics.r2(y, prediction)
+<<<<<<< HEAD
+    assert r2_score >= 0.5, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}"
+=======
     assert r2_score >= 0.5, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}"
+>>>>>>> Reduce time for tests

From 56e3ed6db272e11c9edd618fd2360ef0eee7ab93 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 11 May 2021 15:36:51 +0200
Subject: [PATCH 03/14] Fix flake in tests

---
 test/conftest.py                              |  3 ++-
 test/test_api/test_api.py                     |  9 +++----
 .../test_tabular_classification.py            |  7 +----
 test/test_pipeline/test_tabular_regression.py | 27 +------------------
 4 files changed, 7 insertions(+), 39 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index 67f9e84cc..cdaf53703 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -460,6 +460,7 @@ def loss_mse():
 def loss_details(request):
     return request.getfixturevalue(request.param)
 
+
 @pytest.fixture
 def n_samples():
-    return N_SAMPLES
\ No newline at end of file
+    return N_SAMPLES
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index c81b2de8d..a7d19914e 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -2,7 +2,6 @@
 import pathlib
 import pickle
 import sys
-import time
 import unittest
 
 import numpy as np
@@ -17,12 +16,11 @@
 from sklearn.base import clone
 from sklearn.ensemble import VotingClassifier, VotingRegressor
 
-from smac.runhistory.runhistory import RunHistory, StatusType
-
-import torch
+from smac.runhistory.runhistory import RunHistory
 
 from autoPyTorch.api.tabular_classification import TabularClassificationTask
 from autoPyTorch.api.tabular_regression import TabularRegressionTask
+from autoPyTorch.constants import REGRESSION_TASKS
 from autoPyTorch.datasets.resampling_strategy import (
     CrossValTypes,
     HoldoutValTypes,
@@ -35,7 +33,6 @@
 from autoPyTorch.evaluation.train_evaluator import TrainEvaluator
 from autoPyTorch.optimizer.smbo import AutoMLSMBO
 from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
-from autoPyTorch.constants import REGRESSION_TASKS
 
 
 # ========
@@ -350,7 +347,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend, resamplin
             total_walltime_limit=35,
             func_eval_time_limit_secs=5,
             enable_traditional_pipeline=False,
-    )
+        )
 
     # Internal dataset has expected settings
     assert estimator.dataset.task_type == 'tabular_regression'
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
index 2a7abb2bd..f37f58dd3 100644
--- a/test/test_pipeline/test_tabular_classification.py
+++ b/test/test_pipeline/test_tabular_classification.py
@@ -1,5 +1,6 @@
 import os
 import re
+import unittest
 
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -7,8 +8,6 @@
     UniformIntegerHyperparameter,
 )
 
-import unittest
-
 import numpy as np
 
 import pytest
@@ -80,10 +79,6 @@ def test_pipeline_fit(self, fit_dictionary_tabular):
         # Make sure a network was fit
         assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module)
 
-<<<<<<< HEAD
-    @flaky.flaky(max_runs=3)
-=======
->>>>>>> Reduce time for tests
     def test_pipeline_predict(self, fit_dictionary_tabular):
         """This test makes sure that the pipeline is able to predict
         given a random configuration"""
diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py
index 5b9b73c9c..387d3138c 100644
--- a/test/test_pipeline/test_tabular_regression.py
+++ b/test/test_pipeline/test_tabular_regression.py
@@ -1,5 +1,6 @@
 import os
 import re
+import unittest
 
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -10,7 +11,6 @@
 import numpy as np
 
 import pytest
-import unittest
 
 import torch
 
@@ -19,7 +19,6 @@
 from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline
 from autoPyTorch.utils.common import FitRequirement
 from autoPyTorch.utils.hyperparameter_search_space_update import (
-    HyperparameterSearchSpaceUpdate,
     HyperparameterSearchSpaceUpdates,
     parse_hyperparameter_search_space_updates
 )
@@ -283,25 +282,6 @@ def test_set_range_search_space_updates(self, fit_dictionary_tabular):
             assert 'fully_connected:units_layer' in e.args[0]
 
 
-<<<<<<< HEAD
-@pytest.mark.parametrize("fit_dictionary_tabular_dummy", ["regression"], indirect=True)
-def test_pipeline_score(fit_dictionary_tabular_dummy):
-    """This test makes sure that the pipeline is able to achieve a decent score on dummy data
-    given the default configuration"""
-    X = fit_dictionary_tabular_dummy['X_train'].copy()
-    y = fit_dictionary_tabular_dummy['y_train'].copy()
-
-    # lower the learning rate of the optimizer until seeding properly works
-    # with the default learning rate of 0.01 regression sometimes does not converge
-    pipeline = TabularRegressionPipeline(
-        dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'],
-        search_space_updates=HyperparameterSearchSpaceUpdates([
-            HyperparameterSearchSpaceUpdate("optimizer",
-                                            "AdamOptimizer:lr",
-                                            value_range=[0.0001, 0.001],
-                                            default_value=0.001)
-        ])
-=======
 @pytest.mark.parametrize("fit_dictionary_tabular_dummy", ['regression'], indirect=True)
 def test_pipeline_score(fit_dictionary_tabular_dummy):
     """This test makes sure that the pipeline is able to achieve a decent score on dummy data
@@ -314,7 +294,6 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
 
     pipeline = TabularRegressionPipeline(
         dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'],
->>>>>>> Reduce time for tests
     )
 
     cs = pipeline.get_hyperparameter_search_space()
@@ -334,8 +313,4 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
 
     # we should be able to get a decent score on this dummy data
     r2_score = metrics.r2(y, prediction)
-<<<<<<< HEAD
-    assert r2_score >= 0.5, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}"
-=======
     assert r2_score >= 0.5, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}"
->>>>>>> Reduce time for tests

From a626e92be25ddb59fca07e7a2847c7f2037f0e5c Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 11 May 2021 15:47:47 +0200
Subject: [PATCH 04/14] Patch train in other tests also

---
 .../test_pipeline/test_tabular_classification.py | 16 ++++++++++++----
 test/test_pipeline/test_tabular_regression.py    | 10 ++++++++--
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
index f37f58dd3..349d170ff 100644
--- a/test/test_pipeline/test_tabular_classification.py
+++ b/test/test_pipeline/test_tabular_classification.py
@@ -90,7 +90,10 @@ def test_pipeline_predict(self, fit_dictionary_tabular):
         config = cs.sample_configuration()
         pipeline.set_hyperparameters(config)
 
-        pipeline.fit(fit_dictionary_tabular)
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            pipeline.fit(fit_dictionary_tabular)
 
         # we expect the output to have the same batch size as the test input,
         # and number of outputs per batch sample equal to the number of outputs
@@ -114,7 +117,10 @@ def test_pipeline_predict_proba(self, fit_dictionary_tabular):
         pipeline.set_hyperparameters(config)
 
         try:
-            pipeline.fit(fit_dictionary_tabular)
+            with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+                 as patch_train:
+                patch_train.return_value = 1, {}
+                pipeline.fit(fit_dictionary_tabular)
         except Exception as e:
             pytest.fail(f"Failed on config={config} with {e}")
 
@@ -172,8 +178,10 @@ def test_default_configuration(self, fit_dictionary_tabular, is_small_preprocess
 
         pipeline = TabularClassificationPipeline(
             dataset_properties=fit_dictionary_tabular['dataset_properties'])
-
-        pipeline.fit(fit_dictionary_tabular)
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            pipeline.fit(fit_dictionary_tabular)
 
     def test_remove_key_check_requirements(self, fit_dictionary_tabular):
         """Makes sure that when a key is removed from X, correct error is outputted"""
diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py
index 387d3138c..ec8a7f107 100644
--- a/test/test_pipeline/test_tabular_regression.py
+++ b/test/test_pipeline/test_tabular_regression.py
@@ -90,7 +90,10 @@ def test_pipeline_predict(self, fit_dictionary_tabular):
         config = cs.sample_configuration()
         pipeline.set_hyperparameters(config)
 
-        pipeline.fit(fit_dictionary_tabular)
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            pipeline.fit(fit_dictionary_tabular)
 
         # we expect the output to have the same batch size as the test input,
         # and number of outputs per batch sample equal to the number of targets ("output_shape" in dataset_properties)
@@ -147,7 +150,10 @@ def test_default_configuration(self, fit_dictionary_tabular, is_small_preprocess
         pipeline = TabularRegressionPipeline(
             dataset_properties=fit_dictionary_tabular['dataset_properties'])
 
-        pipeline.fit(fit_dictionary_tabular)
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+                as patch_train:
+            patch_train.return_value = 1, {}
+            pipeline.fit(fit_dictionary_tabular)
 
     def test_remove_key_check_requirements(self, fit_dictionary_tabular):
         """Makes sure that when a key is removed from X, correct error is outputted"""

From f2ce09a580b00557179e3d446db7a081e51e1f32 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 12 May 2021 16:46:36 +0200
Subject: [PATCH 05/14] Address comments from shuhei and fransisco:

---
 test/test_api/test_api.py                     |  4 +--
 .../test_pipeline/components/training/base.py |  1 +
 .../components/training/test_training.py      | 29 ++++++++++++-------
 .../test_tabular_classification.py            |  3 ++
 test/test_pipeline/test_tabular_regression.py |  3 ++
 5 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index a7d19914e..4fbfa929d 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -165,8 +165,8 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
             X_train=X_train, y_train=y_train,
             X_test=X_test, y_test=y_test,
             optimize_metric='accuracy',
-            total_walltime_limit=40,
-            func_eval_time_limit_secs=5,
+            total_walltime_limit=50,
+            func_eval_time_limit_secs=10,
             enable_traditional_pipeline=False,
         )
 
diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index ebf8486bf..88068603c 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -20,6 +20,7 @@
 class BaseTraining(unittest.TestCase):
 
     def prepare_trainer(self,
+                        n_samples: int,
                         trainer: BaseTrainerComponent,
                         task_type: int,
                         epochs=50):
diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index f47cbe810..9c1d79016 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -30,6 +30,9 @@
 from test.test_pipeline.components.training.base import BaseTraining  # noqa (E402: module level import not at top of file)
 
 
+OVERFIT_EPOCHS = 1000
+
+
 class BaseDataLoaderTest(unittest.TestCase):
     def test_get_set_config_space(self):
         """
@@ -123,7 +126,7 @@ def test_fit_transform(self):
 
 class BaseTrainerComponentTest(BaseTraining, unittest.TestCase):
 
-    def test_evaluate(self):
+    def test_evaluate(self, n_samples):
         """
         Makes sure we properly evaluate data, returning a proper loss
         and metric
@@ -135,7 +138,8 @@ def test_evaluate(self):
          loader,
          criterion,
          epochs,
-         logger) = self.prepare_trainer(BaseTrainerComponent(),
+         logger) = self.prepare_trainer(n_samples,
+                                        BaseTrainerComponent(),
                                         constants.TABULAR_CLASSIFICATION)
 
         prev_loss, prev_metrics = trainer.evaluate(loader, epoch=1, writer=None)
@@ -157,16 +161,17 @@ def test_evaluate(self):
 
 
 class StandardTrainerTest(BaseTraining, unittest.TestCase):
-    def test_regression_epoch_training(self):
+    def test_regression_epoch_training(self, n_samples):
         (trainer,
          _,
          _,
          loader,
          _,
          epochs,
-         logger) = self.prepare_trainer(StandardTrainer(),
+         logger) = self.prepare_trainer(n_samples,
+                                        StandardTrainer(),
                                         constants.TABULAR_REGRESSION,
-                                        1000)
+                                        OVERFIT_EPOCHS)
 
         # Train the model
         counter = 0
@@ -179,16 +184,17 @@ def test_regression_epoch_training(self):
             if counter > epochs:
                 self.fail(f"Could not overfit a dummy regression under {epochs} epochs")
 
-    def test_classification_epoch_training(self):
+    def test_classification_epoch_training(self, n_samples):
         (trainer,
          _,
          _,
          loader,
          _,
          epochs,
-         logger) = self.prepare_trainer(StandardTrainer(),
+         logger) = self.prepare_trainer(n_samples,
+                                        StandardTrainer(),
                                         constants.TABULAR_CLASSIFICATION,
-                                        1000)
+                                        OVERFIT_EPOCHS)
 
         # Train the model
         counter = 0
@@ -203,16 +209,17 @@ def test_classification_epoch_training(self):
 
 
 class MixUpTrainerTest(BaseTraining, unittest.TestCase):
-    def test_classification_epoch_training(self):
+    def test_classification_epoch_training(self, n_samples):
         (trainer,
          _,
          _,
          loader,
          _,
          epochs,
-         logger) = self.prepare_trainer(MixUpTrainer(alpha=0.5),
+         logger) = self.prepare_trainer(n_samples,
+                                        MixUpTrainer(alpha=0.5),
                                         constants.TABULAR_CLASSIFICATION,
-                                        1000)
+                                        OVERFIT_EPOCHS)
 
         # Train the model
         counter = 0
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
index 349d170ff..c90eb2a04 100644
--- a/test/test_pipeline/test_tabular_classification.py
+++ b/test/test_pipeline/test_tabular_classification.py
@@ -444,6 +444,9 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
 
     pipeline.fit(fit_dictionary_tabular_dummy)
 
+    # Ensure that the network is an instance of torch Module
+    assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module)
+
     # we expect the output to have the same batch size as the test input,
     # and number of outputs per batch sample equal to the number of classes ("num_classes" in dataset_properties)
     expected_output_shape = (X.shape[0],
diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py
index ec8a7f107..3df3c6c41 100644
--- a/test/test_pipeline/test_tabular_regression.py
+++ b/test/test_pipeline/test_tabular_regression.py
@@ -308,6 +308,9 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
 
     pipeline.fit(fit_dictionary_tabular_dummy)
 
+    # Ensure that the network is an instance of torch Module
+    assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module)
+
     # we expect the output to have the same batch size as the test input,
     # and number of outputs per batch sample equal to the number of targets ("output_shape" in dataset_properties)
     expected_output_shape = (X.shape[0],

From e12db796a005329e994aac19d81bdc40663c5577 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 12 May 2021 18:17:50 +0200
Subject: [PATCH 06/14] Move base training to pytest

---
 .../test_pipeline/components/training/base.py |  2 +-
 .../components/training/test_training.py      | 23 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index 88068603c..07ba94f2a 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -17,7 +17,7 @@
 from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent, BudgetTracker
 
 
-class BaseTraining(unittest.TestCase):
+class BaseTraining:
 
     def prepare_trainer(self,
                         n_samples: int,
diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index 9c1d79016..83eb94198 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -6,6 +6,8 @@
 
 import numpy as np
 
+import pytest
+
 from sklearn.base import clone
 
 import torch
@@ -124,8 +126,7 @@ def test_fit_transform(self):
                          loader.val_data_loader)
 
 
-class BaseTrainerComponentTest(BaseTraining, unittest.TestCase):
-
+class TestBaseTrainerComponent(BaseTraining):
     def test_evaluate(self, n_samples):
         """
         Makes sure we properly evaluate data, returning a proper loss
@@ -143,7 +144,7 @@ def test_evaluate(self, n_samples):
                                         constants.TABULAR_CLASSIFICATION)
 
         prev_loss, prev_metrics = trainer.evaluate(loader, epoch=1, writer=None)
-        self.assertIn('accuracy', prev_metrics)
+        assert 'accuracy' in prev_metrics
 
         # Fit the model
         self.train_model(model,
@@ -155,12 +156,12 @@ def test_evaluate(self, n_samples):
         # Loss and metrics should have improved after fit
         # And the prediction should be better than random
         loss, metrics = trainer.evaluate(loader, epoch=1, writer=None)
-        self.assertGreater(prev_loss, loss)
-        self.assertGreater(metrics['accuracy'], prev_metrics['accuracy'])
-        self.assertGreater(metrics['accuracy'], 0.5)
+        assert prev_loss > loss
+        assert metrics['accuracy'] > prev_metrics['accuracy']
+        assert metrics['accuracy']>  0.5
 
 
-class StandardTrainerTest(BaseTraining, unittest.TestCase):
+class StandardTrainerTest(BaseTraining):
     def test_regression_epoch_training(self, n_samples):
         (trainer,
          _,
@@ -182,7 +183,7 @@ def test_regression_epoch_training(self, n_samples):
             r2 = metrics['r2']
 
             if counter > epochs:
-                self.fail(f"Could not overfit a dummy regression under {epochs} epochs")
+                pytest.fail(f"Could not overfit a dummy regression under {epochs} epochs")
 
     def test_classification_epoch_training(self, n_samples):
         (trainer,
@@ -205,10 +206,10 @@ def test_classification_epoch_training(self, n_samples):
             accuracy = metrics['accuracy']
 
             if counter > epochs:
-                self.fail(f"Could not overfit a dummy classification under {epochs} epochs")
+                pytest.fail(f"Could not overfit a dummy classification under {epochs} epochs")
 
 
-class MixUpTrainerTest(BaseTraining, unittest.TestCase):
+class MixUpTrainerTest(BaseTraining):
     def test_classification_epoch_training(self, n_samples):
         (trainer,
          _,
@@ -230,7 +231,7 @@ def test_classification_epoch_training(self, n_samples):
             accuracy = metrics['accuracy']
 
             if counter > epochs:
-                self.fail(f"Could not overfit a dummy classification under {epochs} epochs")
+                pytest.fail(f"Could not overfit a dummy classification under {epochs} epochs")
 
 
 class TrainerTest(unittest.TestCase):

From 91e030911959be11f0f5463fc572e401d120228e Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 12 May 2021 18:19:04 +0200
Subject: [PATCH 07/14] Fix flake in tests

---
 test/test_pipeline/components/training/base.py          | 1 -
 test/test_pipeline/components/training/test_training.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index 07ba94f2a..bfc99e555 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -1,5 +1,4 @@
 import logging
-import unittest
 
 from sklearn.datasets import make_classification, make_regression
 
diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index 83eb94198..98ea47716 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -158,7 +158,7 @@ def test_evaluate(self, n_samples):
         loss, metrics = trainer.evaluate(loader, epoch=1, writer=None)
         assert prev_loss > loss
         assert metrics['accuracy'] > prev_metrics['accuracy']
-        assert metrics['accuracy']>  0.5
+        assert metrics['accuracy'] > 0.5
 
 
 class StandardTrainerTest(BaseTraining):

From e60d45802e090940219e36bee2645300d28a3f1c Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 12 May 2021 19:07:11 +0200
Subject: [PATCH 08/14] forgot to pass n_samples

---
 test/test_pipeline/components/training/base.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index bfc99e555..2c8aead4c 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -1,6 +1,7 @@
 import logging
 
 from sklearn.datasets import make_classification, make_regression
+from sklearn.model_selection import ParameterGrid  #, ParameterSampler
 
 import torch
 
@@ -25,7 +26,7 @@ def prepare_trainer(self,
                         epochs=50):
         if task_type in CLASSIFICATION_TASKS:
             X, y = make_classification(
-                n_samples=500,
+                n_samples=n_samples,
                 n_features=4,
                 n_informative=3,
                 n_redundant=1,
@@ -43,7 +44,7 @@ def prepare_trainer(self,
 
         elif task_type in REGRESSION_TASKS:
             X, y = make_regression(
-                n_samples=500,
+                n_samples=n_samples,
                 n_features=4,
                 n_informative=3,
                 n_targets=1,

From 0f13c5ec2badc7f07554bc5e268fe93cb9c84bdb Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 12 May 2021 19:09:35 +0200
Subject: [PATCH 09/14] stupid error

---
 test/test_pipeline/components/training/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index 2c8aead4c..d7cb2ebd1 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -1,7 +1,6 @@
 import logging
 
 from sklearn.datasets import make_classification, make_regression
-from sklearn.model_selection import ParameterGrid  #, ParameterSampler
 
 import torch
 

From 592278ab67e5351675b03c692f84cd5ef6f53ea5 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 14 May 2021 12:45:20 +0200
Subject: [PATCH 10/14] Address comments from shuhei, remove hardcoding and fix
 bug in dummy eval function

---
 test/test_api/test_api.py | 122 +++++---------------------------------
 test/test_api/utils.py    |  98 ++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 106 deletions(-)
 create mode 100644 test/test_api/utils.py

diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 4fbfa929d..280617306 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -3,6 +3,7 @@
 import pickle
 import sys
 import unittest
+from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_function
 
 import numpy as np
 
@@ -20,120 +21,27 @@
 
 from autoPyTorch.api.tabular_classification import TabularClassificationTask
 from autoPyTorch.api.tabular_regression import TabularRegressionTask
-from autoPyTorch.constants import REGRESSION_TASKS
 from autoPyTorch.datasets.resampling_strategy import (
     CrossValTypes,
     HoldoutValTypes,
 )
-from autoPyTorch.evaluation.abstract_evaluator import (
-    DummyClassificationPipeline,
-    DummyRegressionPipeline,
-    fit_and_suppress_warnings
-)
-from autoPyTorch.evaluation.train_evaluator import TrainEvaluator
 from autoPyTorch.optimizer.smbo import AutoMLSMBO
 from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
 
 
-# ========
-# Fixtures
-# ========
-class DummyTrainEvaluator(TrainEvaluator):
-
-    def _fit_and_predict(self, pipeline, fold: int, train_indices,
-                         test_indices,
-                         add_pipeline_to_self
-                         ):
-
-        if self.task_type in REGRESSION_TASKS:
-            pipeline = DummyRegressionPipeline(config=1)
-        else:
-            pipeline = DummyClassificationPipeline(config=1)
-
-        self.indices[fold] = ((train_indices, test_indices))
-
-        X = {'train_indices': train_indices,
-             'val_indices': test_indices,
-             'split_id': fold,
-             'num_run': self.num_run,
-             **self.fit_dictionary}  # fit dictionary
-        y = None
-        fit_and_suppress_warnings(self.logger, pipeline, X, y)
-        self.logger.info("Model fitted, now predicting")
-        (
-            Y_train_pred,
-            Y_opt_pred,
-            Y_valid_pred,
-            Y_test_pred
-        ) = self._predict(
-            pipeline,
-            train_indices=train_indices,
-            test_indices=test_indices,
-        )
-
-        if add_pipeline_to_self:
-            self.pipeline = pipeline
-        else:
-            self.pipelines[fold] = pipeline
-
-        return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred
-
-
-# create closure for evaluating an algorithm
-def dummy_eval_function(
-        backend,
-        queue,
-        metric,
-        budget: float,
-        config,
-        seed: int,
-        output_y_hat_optimization: bool,
-        num_run: int,
-        include,
-        exclude,
-        disable_file_output,
-        pipeline_config=None,
-        budget_type=None,
-        init_params=None,
-        logger_port=None,
-        all_supported_metrics=True,
-        search_space_updates=None,
-        instance: str = None,
-) -> None:
-    evaluator = TrainEvaluator(
-        backend=backend,
-        queue=queue,
-        metric=metric,
-        configuration=config,
-        seed=seed,
-        num_run=num_run,
-        output_y_hat_optimization=output_y_hat_optimization,
-        include=include,
-        exclude=exclude,
-        disable_file_output=disable_file_output,
-        init_params=init_params,
-        budget=budget,
-        budget_type=budget_type,
-        logger_port=logger_port,
-        all_supported_metrics=all_supported_metrics,
-        pipeline_config=pipeline_config,
-        search_space_updates=search_space_updates
-    )
-    evaluator.fit_predict_and_loss()
-
-
-def dummy_do_dummy_prediction():
-    return
+CV_NUM_SPLITS = 2
+HOLDOUT_NUM_SPLITS = 1
 
 
+# ====
 # Test
-# ========
+# ====
 @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',
                      new=dummy_eval_function)
 @pytest.mark.parametrize('openml_id', (40981, ))
 @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
                          ((HoldoutValTypes.holdout_validation, None),
-                          (CrossValTypes.k_fold_cross_validation, {'num_splits': 2})
+                          (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS})
                           ))
 def test_tabular_classification(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples):
 
@@ -165,14 +73,15 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
             X_train=X_train, y_train=y_train,
             X_test=X_test, y_test=y_test,
             optimize_metric='accuracy',
-            total_walltime_limit=50,
-            func_eval_time_limit_secs=10,
+            total_walltime_limit=30,
+            func_eval_time_limit_secs=5,
             enable_traditional_pipeline=False,
         )
 
     # Internal dataset has expected settings
     assert estimator.dataset.task_type == 'tabular_classification'
-    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 2
+    expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation \
+        else CV_NUM_SPLITS
     assert estimator.resampling_strategy == resampling_strategy
     assert estimator.dataset.resampling_strategy == resampling_strategy
     assert len(estimator.dataset.splits) == expected_num_splits
@@ -243,7 +152,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model, VotingClassifier)
-        assert len(model.estimators_) == 2
+        assert len(model.estimators_) == CV_NUM_SPLITS
     else:
         pytest.fail(resampling_strategy)
 
@@ -300,7 +209,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
                      new=dummy_eval_function)
 @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
                          ((HoldoutValTypes.holdout_validation, None),
-                          (CrossValTypes.k_fold_cross_validation, {'num_splits': 2})
+                          (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS})
                           ))
 def test_tabular_regression(openml_name, resampling_strategy, backend, resampling_strategy_args, n_samples):
 
@@ -344,14 +253,15 @@ def test_tabular_regression(openml_name, resampling_strategy, backend, resamplin
             X_train=X_train, y_train=y_train,
             X_test=X_test, y_test=y_test,
             optimize_metric='r2',
-            total_walltime_limit=35,
+            total_walltime_limit=30,
             func_eval_time_limit_secs=5,
             enable_traditional_pipeline=False,
         )
 
     # Internal dataset has expected settings
     assert estimator.dataset.task_type == 'tabular_regression'
-    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 2
+    expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation\
+        else CV_NUM_SPLITS
     assert estimator.resampling_strategy == resampling_strategy
     assert estimator.dataset.resampling_strategy == resampling_strategy
     assert len(estimator.dataset.splits) == expected_num_splits
@@ -417,7 +327,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend, resamplin
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model, VotingRegressor)
-        assert len(model.estimators_) == 2
+        assert len(model.estimators_) == CV_NUM_SPLITS
     else:
         pytest.fail(resampling_strategy)
 
diff --git a/test/test_api/utils.py b/test/test_api/utils.py
new file mode 100644
index 000000000..0fa84b55b
--- /dev/null
+++ b/test/test_api/utils.py
@@ -0,0 +1,98 @@
+from autoPyTorch.constants import REGRESSION_TASKS
+from autoPyTorch.evaluation.abstract_evaluator import (
+    DummyClassificationPipeline,
+    DummyRegressionPipeline,
+    fit_and_suppress_warnings
+)
+from autoPyTorch.evaluation.train_evaluator import TrainEvaluator
+
+
+# ========
+# Fixtures
+# ========
+class DummyTrainEvaluator(TrainEvaluator):
+
+    def _fit_and_predict(self, pipeline, fold: int, train_indices,
+                         test_indices,
+                         add_pipeline_to_self
+                         ):
+
+        if self.task_type in REGRESSION_TASKS:
+            pipeline = DummyRegressionPipeline(config=1)
+        else:
+            pipeline = DummyClassificationPipeline(config=1)
+
+        self.indices[fold] = ((train_indices, test_indices))
+
+        X = {'train_indices': train_indices,
+             'val_indices': test_indices,
+             'split_id': fold,
+             'num_run': self.num_run,
+             **self.fit_dictionary}  # fit dictionary
+        y = None
+        fit_and_suppress_warnings(self.logger, pipeline, X, y)
+        self.logger.info("Model fitted, now predicting")
+        (
+            Y_train_pred,
+            Y_opt_pred,
+            Y_valid_pred,
+            Y_test_pred
+        ) = self._predict(
+            pipeline,
+            train_indices=train_indices,
+            test_indices=test_indices,
+        )
+
+        if add_pipeline_to_self:
+            self.pipeline = pipeline
+        else:
+            self.pipelines[fold] = pipeline
+
+        return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred
+
+
+# create closure for evaluating an algorithm
+def dummy_eval_function(
+        backend,
+        queue,
+        metric,
+        budget: float,
+        config,
+        seed: int,
+        output_y_hat_optimization: bool,
+        num_run: int,
+        include,
+        exclude,
+        disable_file_output,
+        pipeline_config=None,
+        budget_type=None,
+        init_params=None,
+        logger_port=None,
+        all_supported_metrics=True,
+        search_space_updates=None,
+        instance: str = None,
+) -> None:
+    evaluator = DummyTrainEvaluator(
+        backend=backend,
+        queue=queue,
+        metric=metric,
+        configuration=config,
+        seed=seed,
+        num_run=num_run,
+        output_y_hat_optimization=output_y_hat_optimization,
+        include=include,
+        exclude=exclude,
+        disable_file_output=disable_file_output,
+        init_params=init_params,
+        budget=budget,
+        budget_type=budget_type,
+        logger_port=logger_port,
+        all_supported_metrics=all_supported_metrics,
+        pipeline_config=pipeline_config,
+        search_space_updates=search_space_updates
+    )
+    evaluator.fit_predict_and_loss()
+
+
+def dummy_do_dummy_prediction():
+    return

From aa8b3d666c27e4a9c44e0a15818bcb7b848cfae9 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 14 May 2021 13:57:22 +0200
Subject: [PATCH 11/14] Skip ensemble test for python >=3.7 and introduce
 random state for feature processors

---
 .../feature_preprocessing/KernelPCA.py             |  5 ++---
 .../feature_preprocessing/Nystroem.py              |  5 ++---
 .../feature_preprocessing/PolynomialFeatures.py    |  5 ++---
 .../feature_preprocessing/PowerTransformer.py      |  5 ++---
 .../feature_preprocessing/RandomKitchenSinks.py    |  5 ++---
 .../feature_preprocessing/TruncatedSVD.py          |  5 ++---
 .../base_feature_preprocessor.py                   | 14 ++++++++++++--
 test/test_ensemble/test_ensemble.py                |  2 ++
 .../preprocessing/test_feature_preprocessor.py     |  9 +++++++--
 9 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
index 2a4737c4d..0d00e8ddb 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
@@ -23,15 +23,14 @@ class KernelPCA(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, n_components: int = 10,
                  kernel: str = 'rbf', degree: int = 3,
                  gamma: float = 0.01, coef0: float = 0.0,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None
+                 random_state: Optional[np.random.RandomState] = None
                  ) -> None:
         self.n_components = n_components
         self.kernel = kernel
         self.degree = degree
         self.gamma = gamma
         self.coef0 = coef0
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
         self.add_fit_requirements([
             FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)])
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
index 0a8f6c63d..519be2e30 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
@@ -23,15 +23,14 @@ class Nystroem(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, n_components: int = 10,
                  kernel: str = 'rbf', degree: int = 3,
                  gamma: float = 0.01, coef0: float = 0.0,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None
+                 random_state: Optional[np.random.RandomState] = None
                  ) -> None:
         self.n_components = n_components
         self.kernel = kernel
         self.degree = degree
         self.gamma = gamma
         self.coef0 = coef0
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
 
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
index a41c0a26d..5906b9651 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
@@ -19,13 +19,12 @@
 class PolynomialFeatures(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, degree: int = 2, interaction_only: bool = False,
                  include_bias: bool = False,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+                 random_state: Optional[np.random.RandomState] = None):
         self.degree = degree
         self.interaction_only = interaction_only
         self.include_bias = include_bias
 
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         self.preprocessor['numerical'] = sklearn.preprocessing.PolynomialFeatures(
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
index 767a0f6c1..e968ff34f 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
@@ -17,11 +17,10 @@
 
 class PowerTransformer(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, standardize: bool = True,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+                 random_state: Optional[np.random.RandomState] = None):
         self.standardize = standardize
 
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         self.preprocessor['numerical'] = sklearn.preprocessing.PowerTransformer(method="yeo-johnson",
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
index 9dbf26cbc..9d60638ce 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
@@ -20,12 +20,11 @@
 class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, n_components: int = 100,
                  gamma: float = 1.0,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None
+                 random_state: Optional[np.random.RandomState] = None
                  ) -> None:
         self.n_components = n_components
         self.gamma = gamma
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
 
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
index bfe4568b3..29b24d627 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
@@ -18,11 +18,10 @@
 
 class TruncatedSVD(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, target_dim: int = 128,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+                 random_state: Optional[np.random.RandomState] = None):
         self.target_dim = target_dim
 
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
 
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
index 8c85bbf30..eaad1d360 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
@@ -1,4 +1,8 @@
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+from sklearn.utils import check_random_state
 
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
     autoPyTorchTabularPreprocessingComponent
@@ -8,7 +12,13 @@
 class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
     _required_properties: List[str] = ['handles_sparse']
 
-    def __init__(self) -> None:
+    def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None:
+        if random_state is None:
+            # A trainer components need a random state for
+            # sampling -- for example in MixUp training
+            self.random_state = check_random_state(1)
+        else:
+            self.random_state = random_state
         super().__init__()
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/test/test_ensemble/test_ensemble.py b/test/test_ensemble/test_ensemble.py
index e0e4c3bb1..cd0f02e72 100644
--- a/test/test_ensemble/test_ensemble.py
+++ b/test/test_ensemble/test_ensemble.py
@@ -690,6 +690,8 @@ def test_ensemble_builder_process_realrun(dask_client, ensemble_backend):
 
 @flaky(max_runs=3)
 @unittest.mock.patch('autoPyTorch.ensemble.ensemble_builder.EnsembleBuilder.fit_ensemble')
+@pytest.mark.skipif(sys.version_info >= (3, 7),
+                    reason="Causes out-of-memory Errors in CI")
 def test_ensemble_builder_nbest_remembered(fit_ensemble, ensemble_backend, dask_client):
     """
     Makes sure ensemble builder returns the size of the ensemble that pynisher allowed
diff --git a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
index a5c342804..822112fca 100644
--- a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
+++ b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
@@ -14,6 +14,11 @@
 from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
 
 
+@pytest.fixture
+def random_state():
+    return 11
+
+
 @pytest.fixture(params=['TruncatedSVD', 'PolynomialFeatures', 'PowerTransformer',
                         'Nystroem', 'KernelPCA', 'RandomKitchenSinks'])
 def preprocessor(request):
@@ -24,10 +29,10 @@ def preprocessor(request):
                                                     'classification_numerical_and_categorical'], indirect=True)
 class TestFeaturePreprocessors:
 
-    def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor):
+    def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor, random_state):
         preprocessor = FeatureProprocessorChoice(
             dataset_properties=fit_dictionary_tabular['dataset_properties']
-        ).get_components()[preprocessor]()
+        ).get_components()[preprocessor](random_state=random_state)
         configuration = preprocessor. \
             get_hyperparameter_search_space(dataset_properties=fit_dictionary_tabular["dataset_properties"]) \
             .get_default_configuration().get_dictionary()

From 342b3b82dffe513f9356cdecfc630f86f7a2bba5 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 14 May 2021 14:15:06 +0200
Subject: [PATCH 12/14] fix flake

---
 .../tabular_preprocessing/feature_preprocessing/KernelPCA.py    | 2 +-
 .../tabular_preprocessing/feature_preprocessing/Nystroem.py     | 2 +-
 .../feature_preprocessing/PolynomialFeatures.py                 | 2 +-
 .../feature_preprocessing/PowerTransformer.py                   | 2 +-
 .../feature_preprocessing/RandomKitchenSinks.py                 | 2 +-
 .../tabular_preprocessing/feature_preprocessing/TruncatedSVD.py | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
index 0d00e8ddb..883f6d636 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
@@ -1,5 +1,5 @@
 from math import ceil, floor
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
index 519be2e30..17a785466 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
@@ -1,5 +1,5 @@
 from math import ceil, floor
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
index 5906b9651..b64b32eb6 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
index e968ff34f..af187c50d 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
index 9d60638ce..f8983e815 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
@@ -1,5 +1,5 @@
 from math import ceil, floor
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
index 29b24d627..69410d32f 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
@@ -1,5 +1,5 @@
 from math import floor
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (

From 80aa5abea4eb28432cc87dfa1db309ea7211d42a Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 17 May 2021 11:09:16 +0200
Subject: [PATCH 13/14] Remove example workflow

---
 .github/workflows/examples.yml | 39 ----------------------------------
 1 file changed, 39 deletions(-)
 delete mode 100644 .github/workflows/examples.yml

diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
deleted file mode 100644
index 538b44edd..000000000
--- a/.github/workflows/examples.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: Examples
-
-on: [push, pull_request]
-
-jobs:
-  ubuntu:
-
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: [3.8]
-      fail-fast:  false
-      max-parallel: 2
-
-    steps:
-    - uses: actions/checkout@v2
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install test dependencies
-      run: |
-        git submodule update --init --recursive
-        python -m pip install --upgrade pip
-        pip install -e .[examples]
-        which python
-        pip freeze
-    - name: Store repository status
-      id: status-before
-      run: |
-        echo "::set-output name=BEFORE::$(git status --porcelain -b)"
-    - name: Run tests
-      run: |
-        python examples/20_basics/example_image_classification.py
-        python examples/20_basics/example_tabular_classification.py
-        python examples/20_basics/example_tabular_regression.py
-        python examples/40_advanced/example_custom_configuration_space.py
-        python examples/40_advanced/example_resampling_strategy.py
-        python examples/40_advanced/example_visualization.py

From 8425143625632c6022f5b64b06bcf8233eaf6166 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 17 May 2021 12:54:38 +0200
Subject: [PATCH 14/14] Remove  from __init__ in feature preprocessing

---
 .../tabular_preprocessing/feature_preprocessing/KernelPCA.py    | 2 +-
 .../tabular_preprocessing/feature_preprocessing/Nystroem.py     | 2 +-
 .../feature_preprocessing/RandomKitchenSinks.py                 | 2 +-
 .../feature_preprocessing/base_feature_preprocessor.py          | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
index 883f6d636..a03a35331 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
@@ -24,7 +24,7 @@ def __init__(self, n_components: int = 10,
                  kernel: str = 'rbf', degree: int = 3,
                  gamma: float = 0.01, coef0: float = 0.0,
                  random_state: Optional[np.random.RandomState] = None
-                 ) -> None:
+                 ):
         self.n_components = n_components
         self.kernel = kernel
         self.degree = degree
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
index 17a785466..d00697c21 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
@@ -24,7 +24,7 @@ def __init__(self, n_components: int = 10,
                  kernel: str = 'rbf', degree: int = 3,
                  gamma: float = 0.01, coef0: float = 0.0,
                  random_state: Optional[np.random.RandomState] = None
-                 ) -> None:
+                 ):
         self.n_components = n_components
         self.kernel = kernel
         self.degree = degree
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
index f8983e815..a3267391a 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
@@ -21,7 +21,7 @@ class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, n_components: int = 100,
                  gamma: float = 1.0,
                  random_state: Optional[np.random.RandomState] = None
-                 ) -> None:
+                 ):
         self.n_components = n_components
         self.gamma = gamma
         super().__init__(random_state=random_state)
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
index eaad1d360..d11f69b90 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
@@ -12,7 +12,7 @@
 class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
     _required_properties: List[str] = ['handles_sparse']
 
-    def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None:
+    def __init__(self, random_state: Optional[np.random.RandomState] = None):
         if random_state is None:
             # A trainer components need a random state for
             # sampling -- for example in MixUp training