From cdcf7664fd7ddf866829947180adcac17e1a29c6 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 30 Nov 2021 15:29:54 +0100
Subject: [PATCH 01/32] Add fit pipeline with tests

---
 autoPyTorch/api/base_task.py                 | 279 ++++++++++++++++---
 autoPyTorch/api/tabular_classification.py    |  97 ++++---
 autoPyTorch/api/tabular_regression.py        | 102 ++++---
 autoPyTorch/evaluation/abstract_evaluator.py |  46 +--
 autoPyTorch/evaluation/tae.py                |   2 +-
 autoPyTorch/evaluation/train_evaluator.py    |  25 +-
 test/test_api/test_api.py                    | 111 +++++++-
 7 files changed, 512 insertions(+), 150 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index b4d20165e..b33c6d3ae 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -27,7 +27,7 @@
 
 import pandas as pd
 
-from smac.runhistory.runhistory import DataOrigin, RunHistory
+from smac.runhistory.runhistory import DataOrigin, RunHistory, RunInfo, RunValue
 from smac.stats.stats import Stats
 from smac.tae import StatusType
 
@@ -233,7 +233,11 @@ def __init__(
                                  " HyperparameterSearchSpaceUpdates got {}".format(type(self.search_space_updates)))
 
     @abstractmethod
-    def build_pipeline(self, dataset_properties: Dict[str, Any]) -> BasePipeline:
+    def build_pipeline(self, dataset_properties: Dict[str, Any],
+                       include_components: Optional[Dict] = None,
+                       exclude_components: Optional[Dict] = None,
+                       search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
+                       ) -> BasePipeline:
         """
         Build pipeline according to current task
         and for the passed dataset properties
@@ -244,7 +248,21 @@ def build_pipeline(self, dataset_properties: Dict[str, Any]) -> BasePipeline:
         Returns:
 
         """
-        raise NotImplementedError
+        raise NotImplementedError("Function called on BaseTask, this can only be called by "
+                                  "specific task which is a child of the BaseTask")
+
+    @abstractmethod
+    def get_dataset(self,
+                    X_train: Union[List, pd.DataFrame, np.ndarray],
+                    y_train: Union[List, pd.DataFrame, np.ndarray],
+                    X_test: Union[List, pd.DataFrame, np.ndarray],
+                    y_test: Union[List, pd.DataFrame, np.ndarray],
+                    resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
+                    resampling_strategy_args: Optional[Dict[str, Any]] = None,
+                    dataset_name: Optional[str] = None,
+                    ) -> BaseDataset:
+        raise NotImplementedError("Function called on BaseTask, this can only be called by "
+                                  "specific task which is a child of the BaseTask")
 
     @property
     def run_history(self) -> RunHistory:
@@ -563,7 +581,7 @@ def _do_dummy_prediction(self) -> None:
             initial_num_run=num_run,
             stats=stats,
             memory_limit=memory_limit,
-            disable_file_output=True if len(self._disable_file_output) > 0 else False,
+            disable_file_output=self._disable_file_output,
             all_supported_metrics=self._all_supported_metrics
         )
 
@@ -647,7 +665,7 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
                     initial_num_run=self._backend.get_next_num_run(),
                     stats=stats,
                     memory_limit=memory_limit,
-                    disable_file_output=True if len(self._disable_file_output) > 0 else False,
+                    disable_file_output=self._disable_file_output,
                     all_supported_metrics=self._all_supported_metrics
                 )
                 dask_futures.append([
@@ -743,7 +761,7 @@ def _search(
         tae_func: Optional[Callable] = None,
         all_supported_metrics: bool = True,
         precision: int = 32,
-        disable_file_output: List = [],
+        disable_file_output: Optional[List[str]] = None,
         load_models: bool = True,
         portfolio_selection: Optional[str] = None,
         dask_client: Optional[dask.distributed.Client] = None
@@ -844,9 +862,8 @@ def _search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (Union[bool, List]):
-                If True, disable model and prediction output.
-                Can also be used as a list to pass more fine-grained
+            disable_file_output (Optional[List]):
+                Used as a list to pass more fine-grained
                 information on what to save. Allowed elements in the list are:
 
                 + `y_optimization`:
@@ -860,6 +877,8 @@ def _search(
                     pipelines fit on each fold.
                 + `y_test`:
                     do not save the predictions for the test set.
+                + `all`:
+                    do not save any of the above.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
@@ -901,7 +920,7 @@ def _search(
         self._backend.setup_logger(port=self._logger_port)
 
         self._all_supported_metrics = all_supported_metrics
-        self._disable_file_output = disable_file_output
+        self._disable_file_output = disable_file_output if disable_file_output is not None else []
         self._memory_limit = memory_limit
         self._time_for_task = total_walltime_limit
         # Save start time to backend
@@ -1223,10 +1242,29 @@ def refit(
 
         return self
 
-    def fit(self,
-            dataset: BaseDataset,
-            pipeline_config: Optional[Configuration] = None,
-            split_id: int = 0) -> BasePipeline:
+    def fit_pipeline(
+        self,
+        configuration: Configuration,
+        dataset: Optional[BaseDataset] = None,
+        X_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        y_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        dataset_name: Optional[str] = None,
+        resampling_strategy: Optional[Union[HoldoutValTypes, CrossValTypes]] = None,
+        resampling_strategy_args: Optional[Dict[str, Any]] = None,
+        run_time_limit_secs: int = 60,
+        memory_limit: Optional[int] = None,
+        eval_metric: Optional[str] = None,
+        all_supported_metrics: bool = False,
+        budget_type: Optional[str] = None,
+        include_components: Optional[Dict] = None,
+        exclude_components: Optional[Dict] = None,
+        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
+        budget: Optional[float] = None,
+        pipeline_options: Optional[Dict] = None,
+        disable_file_output: Optional[List[str]] = None,
+    ) -> Tuple[Optional[BasePipeline], RunInfo, RunValue, BaseDataset]:
         """
         Fit a pipeline on the given task for the budget.
         A pipeline configuration can be specified if None,
@@ -1237,24 +1275,110 @@ def fit(self,
         methods.
 
         Args:
-            dataset (Dataset):
-                The argument that will provide the dataset splits. It can either
-                be a dictionary with the splits, or the dataset object which can
-                generate the splits based on different restrictions.
-            split_id (int: default=0):
-                split id to fit on.
-            pipeline_config (Optional[Configuration]):
-                configuration to fit the pipeline with. If None,
-                uses default
+            X_train, y_train, X_test, y_test: Union[np.ndarray, List, pd.DataFrame]
+                A pair of features (X_train) and targets (y_train) used to fit a
+                pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
+                be provided to track the generalization performance of each stage.
+            dataset_name (Optional[str]):
+                Name of the dataset, if None, random value is used.
+            resampling_strategy (Union[CrossValTypes, HoldoutValTypes]),
+                (default=HoldoutValTypes.holdout_validation):
+                strategy to split the training data.
+            resampling_strategy_args (Optional[Dict[str, Any]]):
+                Arguments required for the chosen resampling strategy. If None, uses
+                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+                in ```datasets/resampling_strategy.py```.
+            run_time_limit_secs (int: default=120):
+                Time limit for a single call to the machine learning model.
+                Model fitting will be terminated if the machine learning algorithm
+                runs over the time limit. Set this value high enough so that
+                typical machine learning algorithms can be fit on the training
+                data.
+            memory_limit (Optional[int]):
+                Memory limit in MB for the machine learning algorithm. autopytorch
+                will stop fitting the machine learning algorithm if it tries
+                to allocate more than memory_limit MB. If None is provided,
+                no memory limit is set. In case of multi-processing, memory_limit
+                will be per job. This memory limit also applies to the ensemble
+                creation process.
+            eval_metric (str):
+                Name of the metric that is used to evaluate a pipeline.
+            all_supported_metrics (bool: default=True):
+                if True, all metrics supporting current task will be calculated
+                for each pipeline and results will be available via cv_results
+            budget_type (str):
+                Type of budget to be used when fitting the pipeline.
+                It can be one of:
+
+                + `epochs`: The training of each pipeline will be terminated after
+                    a number of epochs have passed. This number of epochs is determined by the
+                    budget argument of this method.
+                + `runtime`: The training of each pipeline will be terminated after
+                    a number of seconds have passed. This number of seconds is determined by the
+                    budget argument of this method. The overall fitting time of a pipeline is
+                    controlled by func_eval_time_limit_secs. 'runtime' only controls the allocated
+                    time to train a pipeline, but it does not consider the overall time it takes
+                    to create a pipeline (data loading and preprocessing, other i/o operations, etc.).
+            include_components (Optional[Dict]):
+                If None, all possible components are used.
+                Otherwise specifies set of components to use.
+            exclude_components (Optional[Dict]):
+                If None, all possible components are used.
+                Otherwise specifies set of components not to use.
+                Incompatible with include components
+            search_space_updates(Optional[HyperparameterSearchSpaceUpdates]):
+                Updates to be made to the hyperparameter search space of the pipeline
+            budget (Optional[float]):
+                Budget to fit a single run of the pipeline. If not
+                provided, uses the default in the pipeline config
+            pipeline_options (Optional[Dict]):
+                Valid config options include "device",
+                "torch_num_threads", "early_stopping", "use_tensorboard_logger",
+                "metrics_during_training"
+            disable_file_output (Optional[List]):
+                Used as a list to pass more fine-grained
+                information on what to save. Allowed elements in the list are:
+
+                + `y_optimization`:
+                    do not save the predictions for the optimization set,
+                    which would later on be used to build an ensemble. Note that SMAC
+                    optimizes a metric evaluated on the optimization set.
+                + `pipeline`:
+                    do not save any individual pipeline files
+                + `pipelines`:
+                    In case of cross validation, disables saving the joint model of the
+                    pipelines fit on each fold.
+                + `y_test`:
+                    do not save the predictions for the test set.
+                + `all`:
+                    do not save any of the above.
+            configuration: (Configuration)
+                configuration to fit the pipeline with.
 
         Returns:
-            BasePipeline:
-                fitted pipeline
+            (BasePipeline): fitted pipeline
+            (RunInfo): Run information
+            (RunValue): Result of fitting the pipeline
+            (BaseDataset): Dataset created from the given tensors
         """
-        self.dataset_name = dataset.dataset_name
 
-        if self._logger is None:
-            self._logger = self._get_logger(str(self.dataset_name))
+        if dataset is None:
+            assert X_train is not None and \
+                   y_train is not None, "No dataset provided, must provide X_train, y_train tensors"
+            dataset = self.get_dataset(X_train=X_train,
+                                       y_train=y_train,
+                                       X_test=X_test,
+                                       y_test=y_test,
+                                       resampling_strategy=resampling_strategy,
+                                       resampling_strategy_args=resampling_strategy_args,
+                                       dataset_name=dataset_name
+                                       )
+
+        # TAE expects each configuration to have a config_id.
+        # For fitting a pipeline as it is not part of the
+        # search process, it makes sense to set it to 0
+        if hasattr(configuration, 'config_id') or configuration.config_id is None:
+            configuration.__setattr__('config_id', 0)
 
         # get dataset properties
         dataset_requirements = get_dataset_requirements(
@@ -1265,21 +1389,98 @@ def fit(self,
         dataset_properties = dataset.get_dataset_properties(dataset_requirements)
         self._backend.save_datamanager(dataset)
 
-        # build pipeline
-        pipeline = self.build_pipeline(dataset_properties)
-        if pipeline_config is not None:
-            pipeline.set_hyperparameters(pipeline_config)
+        if self._logger is None:
+            # dataset_name is created inside the constructor of BaseDataset
+            # we expect it to be not None. This is for mypy
+            assert dataset.dataset_name is not None
+            self._logger = self._get_logger(dataset.dataset_name)
+
+        if include_components is None:
+            include_components = self.include_components
+        if exclude_components is None:
+            exclude_components = self.exclude_components
+        if search_space_updates is None:
+            search_space_updates = self.search_space_updates
+
+        scenario_mock = unittest.mock.Mock()
+        scenario_mock.wallclock_limit = run_time_limit_secs
+        # This stats object is a hack - maybe the SMAC stats object should
+        # already be generated here!
+        stats = Stats(scenario_mock)
+
+        if memory_limit is None:
+            if hasattr(self, '_memory_limit') and self._memory_limit is not None:
+                memory_limit = self._memory_limit
+
+        metric = get_metrics(dataset_properties=dataset_properties,
+                             names=[eval_metric] if eval_metric is not None else None,
+                             all_supported_metrics=False).pop()
+
+        pipeline_options = self.pipeline_options.copy().update(pipeline_options) if pipeline_options is not None \
+            else self.pipeline_options.copy()
+
+        assert pipeline_options is not None
+
+        if budget_type is not None:
+            pipeline_options.update({'budget_type': budget_type})
+        else:
+            budget_type = pipeline_options['budget_type']
 
-        # initialise fit dictionary
-        X = self._get_fit_dictionary(
-            dataset_properties=dataset_properties,
-            dataset=dataset,
-            split_id=split_id)
+        budget = budget if budget is not None else pipeline_options[budget_type]
 
-        fit_and_suppress_warnings(self._logger, pipeline, X, y=None)
+        if disable_file_output is None:
+            disable_file_output = self._disable_file_output if hasattr(self, '_disable_file_output') \
+                and self._disable_file_output is not None else []
+
+        stats.start_timing()
+
+        tae = ExecuteTaFuncWithQueue(
+            backend=self._backend,
+            seed=self.seed,
+            metric=metric,
+            logger_port=self._logger_port,
+            cost_for_crash=get_cost_of_crash(metric),
+            abort_on_first_run_crash=False,
+            initial_num_run=self._backend.get_next_num_run(),
+            stats=stats,
+            memory_limit=memory_limit,
+            disable_file_output=disable_file_output,
+            all_supported_metrics=all_supported_metrics,
+            budget_type=budget_type,
+            include=include_components,
+            exclude=exclude_components,
+            search_space_updates=search_space_updates,
+            pipeline_config=pipeline_options,
+            pynisher_context=self._multiprocessing_context
+        )
+
+        run_info, run_value = tae.run_wrapper(
+            RunInfo(config=configuration,
+                    budget=budget,
+                    seed=self.seed,
+                    cutoff=run_time_limit_secs,
+                    capped=False,
+                    instance_specific=None,
+                    instance=None)
+        )
+
+        fitted_pipeline: Optional[BasePipeline] = None
+        if 'all' in disable_file_output or 'pipeline' in disable_file_output:
+            self._logger.warning("File output is disabled. No pipeline can returned")
+        elif run_value.status == StatusType.SUCCESS:
+            if self.resampling_strategy in CrossValTypes:
+                load_function = self._backend.load_cv_model_by_seed_and_id_and_budget
+            else:
+                load_function = self._backend.load_model_by_seed_and_id_and_budget
+            fitted_pipeline = load_function(
+                seed=self.seed,
+                idx=run_info.config.config_id + tae.initial_num_run,
+                budget=float(run_info.budget),
+            )
 
         self._clean_logger()
-        return pipeline
+
+        return fitted_pipeline, run_info, run_value, dataset
 
     def predict(
         self,
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index d83f1dc01..3f8019e58 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -1,5 +1,3 @@
-import os
-import uuid
 from typing import Any, Callable, Dict, List, Optional, Union
 
 import numpy as np
@@ -106,18 +104,55 @@ def __init__(
             task_type=TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
         )
 
-    def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularClassificationPipeline:
-        """
-        Build pipeline according to current task and for the passed dataset properties
+    def build_pipeline(
+        self,
+        dataset_properties: Dict[str, Any],
+        include_components: Optional[Dict] = None,
+        exclude_components: Optional[Dict] = None,
+        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
+    ) -> TabularClassificationPipeline:
+        return TabularClassificationPipeline(dataset_properties=dataset_properties,
+                                             include=include_components,
+                                             exclude=exclude_components,
+                                             search_space_updates=search_space_updates)
 
-        Args:
-            dataset_properties (Dict[str,Any])
+    def get_dataset(
+        self,
+        X_train: Union[List, pd.DataFrame, np.ndarray],
+        y_train: Union[List, pd.DataFrame, np.ndarray],
+        X_test: Union[List, pd.DataFrame, np.ndarray],
+        y_test: Union[List, pd.DataFrame, np.ndarray],
+        resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
+        resampling_strategy_args: Optional[Dict[str, Any]] = None,
+        dataset_name: Optional[str] = None,
+    ) -> TabularDataset:
 
-        Returns:
-            TabularClassificationPipeline:
-                Pipeline compatible with the given dataset properties.
-        """
-        return TabularClassificationPipeline(dataset_properties=dataset_properties)
+        resampling_strategy = resampling_strategy if resampling_strategy is not None else self.resampling_strategy
+        resampling_strategy_args = resampling_strategy_args if resampling_strategy_args is not None else \
+            self.resampling_strategy_args
+
+        # Create a validator object to make sure that the data provided by
+        # the user matches the autopytorch requirements
+        InputValidator = TabularInputValidator(
+            is_classification=True,
+            logger_port=self._logger_port,
+        )
+
+        # Fit a input validator to check the provided data
+        # Also, an encoder is fit to both train and test data,
+        # to prevent unseen categories during inference
+        InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
+
+        dataset = TabularDataset(
+            X=X_train, Y=y_train,
+            X_test=X_test, Y_test=y_test,
+            validator=InputValidator,
+            resampling_strategy=resampling_strategy,
+            resampling_strategy_args=resampling_strategy_args,
+            dataset_name=dataset_name
+        )
+
+        return dataset
 
     def search(
         self,
@@ -138,7 +173,7 @@ def search(
         get_smac_object_callback: Optional[Callable] = None,
         all_supported_metrics: bool = True,
         precision: int = 32,
-        disable_file_output: List = [],
+        disable_file_output: Optional[List[str]] = None,
         load_models: bool = True,
         portfolio_selection: Optional[str] = None,
     ) -> 'BaseTask':
@@ -237,9 +272,8 @@ def search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (Union[bool, List]):
-                If True, disable model and prediction output.
-                Can also be used as a list to pass more fine-grained
+            disable_file_output (List):
+                Used as a list to pass more fine-grained
                 information on what to save. Allowed elements in the list are:
 
                 + `y_optimization`:
@@ -253,6 +287,8 @@ def search(
                     pipelines fit on each fold.
                 + `y_test`:
                     do not save the predictions for the test set.
+                + `all`:
+                    do not save any of the above.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
@@ -269,32 +305,15 @@ def search(
             self
 
         """
-        if dataset_name is None:
-            dataset_name = str(uuid.uuid1(clock_seq=os.getpid()))
-
-        # we have to create a logger for at this point for the validator
-        self._logger = self._get_logger(dataset_name)
 
-        # Create a validator object to make sure that the data provided by
-        # the user matches the autopytorch requirements
-        self.InputValidator = TabularInputValidator(
-            is_classification=True,
-            logger_port=self._logger_port,
-        )
-
-        # Fit a input validator to check the provided data
-        # Also, an encoder is fit to both train and test data,
-        # to prevent unseen categories during inference
-        self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
-
-        self.dataset = TabularDataset(
-            X=X_train, Y=y_train,
-            X_test=X_test, Y_test=y_test,
-            validator=self.InputValidator,
-            dataset_name=dataset_name,
+        self.dataset = self.get_dataset(
+            X_train=X_train,
+            y_train=y_train,
+            X_test=X_test,
+            y_test=y_test,
             resampling_strategy=self.resampling_strategy,
             resampling_strategy_args=self.resampling_strategy_args,
-        )
+            dataset_name=dataset_name)
 
         return self._search(
             dataset=self.dataset,
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index a68990732..4d35a71b3 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -1,5 +1,3 @@
-import os
-import uuid
 from typing import Any, Callable, Dict, List, Optional, Union
 
 import numpy as np
@@ -107,18 +105,55 @@ def __init__(
             task_type=TASK_TYPES_TO_STRING[TABULAR_REGRESSION],
         )
 
-    def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularRegressionPipeline:
-        """
-        Build pipeline according to current task and for the passed dataset properties
+    def build_pipeline(
+        self,
+        dataset_properties: Dict[str, Any],
+        include_components: Optional[Dict] = None,
+        exclude_components: Optional[Dict] = None,
+        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
+    ) -> TabularRegressionPipeline:
+        return TabularRegressionPipeline(dataset_properties=dataset_properties,
+                                         include=include_components,
+                                         exclude=exclude_components,
+                                         search_space_updates=search_space_updates)
 
-        Args:
-            dataset_properties (Dict[str,Any])
+    def get_dataset(
+        self,
+        X_train: Union[List, pd.DataFrame, np.ndarray],
+        y_train: Union[List, pd.DataFrame, np.ndarray],
+        X_test: Union[List, pd.DataFrame, np.ndarray],
+        y_test: Union[List, pd.DataFrame, np.ndarray],
+        resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
+        resampling_strategy_args: Optional[Dict[str, Any]] = None,
+        dataset_name: Optional[str] = None,
+    ) -> TabularDataset:
 
-        Returns:
-            TabularRegressionPipeline:
-                Pipeline compatible with the given dataset properties.
-        """
-        return TabularRegressionPipeline(dataset_properties=dataset_properties)
+        resampling_strategy = resampling_strategy if resampling_strategy is not None else self.resampling_strategy
+        resampling_strategy_args = resampling_strategy_args if resampling_strategy_args is not None else \
+            self.resampling_strategy_args
+
+        # Create a validator object to make sure that the data provided by
+        # the user matches the autopytorch requirements
+        InputValidator = TabularInputValidator(
+            is_classification=False,
+            logger_port=self._logger_port,
+        )
+
+        # Fit a input validator to check the provided data
+        # Also, an encoder is fit to both train and test data,
+        # to prevent unseen categories during inference
+        InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
+
+        dataset = TabularDataset(
+            X=X_train, Y=y_train,
+            X_test=X_test, Y_test=y_test,
+            validator=InputValidator,
+            resampling_strategy=resampling_strategy,
+            resampling_strategy_args=resampling_strategy_args,
+            dataset_name=dataset_name
+        )
+
+        return dataset
 
     def search(
         self,
@@ -139,7 +174,7 @@ def search(
         get_smac_object_callback: Optional[Callable] = None,
         all_supported_metrics: bool = True,
         precision: int = 32,
-        disable_file_output: List = [],
+        disable_file_output: Optional[List[str]] = None,
         load_models: bool = True,
         portfolio_selection: Optional[str] = None,
     ) -> 'BaseTask':
@@ -155,8 +190,8 @@ def search(
                 A pair of features (X_train) and targets (y_train) used to fit a
                 pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
                 be provided to track the generalization performance of each stage.
-            optimize_metric (str): name of the metric that is used to
-                evaluate a pipeline.
+            optimize_metric (str):
+                Name of the metric that is used to evaluate a pipeline.
             budget_type (str):
                 Type of budget to be used when fitting the pipeline.
                 It can be one of:
@@ -238,9 +273,8 @@ def search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (Union[bool, List]):
-                If True, disable model and prediction output.
-                Can also be used as a list to pass more fine-grained
+            disable_file_output (Optional[List]):
+                Used as a list to pass more fine-grained
                 information on what to save. Allowed elements in the list are:
 
                 + `y_optimization`:
@@ -254,6 +288,8 @@ def search(
                     pipelines fit on each fold.
                 + `y_test`:
                     do not save the predictions for the test set.
+                + `all`:
+                    do not save any of the above.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
@@ -270,32 +306,14 @@ def search(
             self
 
         """
-        if dataset_name is None:
-            dataset_name = str(uuid.uuid1(clock_seq=os.getpid()))
-
-        # we have to create a logger for at this point for the validator
-        self._logger = self._get_logger(dataset_name)
-
-        # Create a validator object to make sure that the data provided by
-        # the user matches the autopytorch requirements
-        self.InputValidator = TabularInputValidator(
-            is_classification=False,
-            logger_port=self._logger_port,
-        )
-
-        # Fit a input validator to check the provided data
-        # Also, an encoder is fit to both train and test data,
-        # to prevent unseen categories during inference
-        self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
-
-        self.dataset = TabularDataset(
-            X=X_train, Y=y_train,
-            X_test=X_test, Y_test=y_test,
-            validator=self.InputValidator,
-            dataset_name=dataset_name,
+        self.dataset = self.get_dataset(
+            X_train=X_train,
+            y_train=y_train,
+            X_test=X_test,
+            y_test=y_test,
             resampling_strategy=self.resampling_strategy,
             resampling_strategy_args=self.resampling_strategy_args,
-        )
+            dataset_name=dataset_name)
 
         return self._search(
             dataset=self.dataset,
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
index 027c7211a..93c0d0f9b 100644
--- a/autoPyTorch/evaluation/abstract_evaluator.py
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -375,10 +375,23 @@ class AbstractEvaluator(object):
             An optional dictionary to include components of the pipeline steps.
         exclude (Optional[Dict[str, Any]]):
             An optional dictionary to exclude components of the pipeline steps.
-        disable_file_output (Union[bool, List[str]]):
-            By default, the model, it's predictions and other metadata is stored on disk
-            for each finished configuration. This argument allows the user to skip
-            saving certain file type, for example the model, from being written to disk.
+        disable_file_output (Optional[List]):
+                Used as a list to pass more fine-grained
+                information on what to save. Allowed elements in the list are:
+
+                + `y_optimization`:
+                    do not save the predictions for the optimization set,
+                    which would later on be used to build an ensemble. Note that SMAC
+                    optimizes a metric evaluated on the optimization set.
+                + `pipeline`:
+                    do not save any individual pipeline files
+                + `pipelines`:
+                    In case of cross validation, disables saving the joint model of the
+                    pipelines fit on each fold.
+                + `y_test`:
+                    do not save the predictions for the test set.
+                + `all`:
+                    do not save any of the above.
         init_params (Optional[Dict[str, Any]]):
             Optional argument that is passed to each pipeline step. It is the equivalent of
             kwargs for the pipeline steps.
@@ -404,7 +417,7 @@ def __init__(self, backend: Backend,
                  num_run: Optional[int] = None,
                  include: Optional[Dict[str, Any]] = None,
                  exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: Union[bool, List[str]] = False,
+                 disable_file_output: Optional[List[str]] = None,
                  init_params: Optional[Dict[str, Any]] = None,
                  logger_port: Optional[int] = None,
                  all_supported_metrics: bool = True,
@@ -448,12 +461,7 @@ def __init__(self, backend: Backend,
         # Flag to save target for ensemble
         self.output_y_hat_optimization = output_y_hat_optimization
 
-        if isinstance(disable_file_output, bool):
-            self.disable_file_output: bool = disable_file_output
-        elif isinstance(disable_file_output, List):
-            self.disabled_file_outputs: List[str] = disable_file_output
-        else:
-            raise ValueError('disable_file_output should be either a bool or a list')
+        self.disable_file_output = disable_file_output if disable_file_output is not None else []
 
         self.pipeline_class: Optional[Union[BaseEstimator, BasePipeline]] = None
         if self.task_type in REGRESSION_TASKS:
@@ -835,19 +843,17 @@ def file_output(
 
         # Abort if we don't want to output anything.
         if hasattr(self, 'disable_file_output'):
-            if self.disable_file_output:
+            if 'all' in self.disable_file_output:
                 return None, {}
-            else:
-                self.disabled_file_outputs = []
 
         # This file can be written independently of the others down bellow
-        if 'y_optimization' not in self.disabled_file_outputs:
+        if 'y_optimization' not in self.disable_file_output:
             if self.output_y_hat_optimization:
                 self.backend.save_targets_ensemble(self.Y_optimization)
 
         if hasattr(self, 'pipelines') and self.pipelines is not None:
             if self.pipelines[0] is not None and len(self.pipelines) > 0:
-                if 'pipelines' not in self.disabled_file_outputs:
+                if 'pipelines' not in self.disable_file_output:
                     if self.task_type in CLASSIFICATION_TASKS:
                         pipelines = VotingClassifier(estimators=None, voting='soft', )
                     else:
@@ -861,7 +867,7 @@ def file_output(
             pipelines = None
 
         if hasattr(self, 'pipeline') and self.pipeline is not None:
-            if 'pipeline' not in self.disabled_file_outputs:
+            if 'pipeline' not in self.disable_file_output:
                 pipeline = self.pipeline
             else:
                 pipeline = None
@@ -877,15 +883,15 @@ def file_output(
             cv_model=pipelines,
             ensemble_predictions=(
                 Y_optimization_pred if 'y_optimization' not in
-                                       self.disabled_file_outputs else None
+                                       self.disable_file_output else None
             ),
             valid_predictions=(
                 Y_valid_pred if 'y_valid' not in
-                                self.disabled_file_outputs else None
+                                self.disable_file_output else None
             ),
             test_predictions=(
                 Y_test_pred if 'y_test' not in
-                               self.disabled_file_outputs else None
+                               self.disable_file_output else None
             ),
         )
 
diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py
index d99251d3d..89a9838c9 100644
--- a/autoPyTorch/evaluation/tae.py
+++ b/autoPyTorch/evaluation/tae.py
@@ -109,7 +109,7 @@ def __init__(
         include: Optional[Dict[str, Any]] = None,
         exclude: Optional[Dict[str, Any]] = None,
         memory_limit: Optional[int] = None,
-        disable_file_output: bool = False,
+        disable_file_output: Optional[List] = None,
         init_params: Dict[str, Any] = None,
         budget_type: str = None,
         ta: Optional[Callable] = None,
diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py
index 37926a8c0..bdff3549f 100644
--- a/autoPyTorch/evaluation/train_evaluator.py
+++ b/autoPyTorch/evaluation/train_evaluator.py
@@ -79,10 +79,23 @@ class TrainEvaluator(AbstractEvaluator):
             An optional dictionary to include components of the pipeline steps.
         exclude (Optional[Dict[str, Any]]):
             An optional dictionary to exclude components of the pipeline steps.
-        disable_file_output (Union[bool, List[str]]):
-            By default, the model, it's predictions and other metadata is stored on disk
-            for each finished configuration. This argument allows the user to skip
-            saving certain file type, for example the model, from being written to disk.
+        disable_file_output (Optional[List]):
+                Used as a list to pass more fine-grained
+                information on what to save. Allowed elements in the list are:
+
+                + `y_optimization`:
+                    do not save the predictions for the optimization set,
+                    which would later on be used to build an ensemble. Note that SMAC
+                    optimizes a metric evaluated on the optimization set.
+                + `pipeline`:
+                    do not save any individual pipeline files
+                + `pipelines`:
+                    In case of cross validation, disables saving the joint model of the
+                    pipelines fit on each fold.
+                + `y_test`:
+                    do not save the predictions for the test set.
+                + `all`:
+                    do not save any of the above.
         init_params (Optional[Dict[str, Any]]):
             Optional argument that is passed to each pipeline step. It is the equivalent of
             kwargs for the pipeline steps.
@@ -107,7 +120,7 @@ def __init__(self, backend: Backend, queue: Queue,
                  num_run: Optional[int] = None,
                  include: Optional[Dict[str, Any]] = None,
                  exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: Union[bool, List] = False,
+                 disable_file_output: Optional[List] = [],
                  init_params: Optional[Dict[str, Any]] = None,
                  logger_port: Optional[int] = None,
                  keep_models: Optional[bool] = None,
@@ -397,7 +410,7 @@ def eval_function(
         num_run: int,
         include: Optional[Dict[str, Any]],
         exclude: Optional[Dict[str, Any]],
-        disable_file_output: Union[bool, List],
+        disable_file_output: List,
         pipeline_config: Optional[Dict[str, Any]] = None,
         budget_type: str = None,
         init_params: Optional[Dict[str, Any]] = None,
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 5cb271eb0..1e6009081 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -2,6 +2,7 @@
 import os
 import pathlib
 import pickle
+import tempfile
 import unittest
 from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_function
 
@@ -17,14 +18,14 @@
 
 import sklearn
 import sklearn.datasets
-from sklearn.base import BaseEstimator
-from sklearn.base import clone
+from sklearn.base import BaseEstimator, clone
 from sklearn.ensemble import VotingClassifier, VotingRegressor
 
-from smac.runhistory.runhistory import RunHistory
+from smac.runhistory.runhistory import RunHistory, RunInfo, RunValue
 
 from autoPyTorch.api.tabular_classification import TabularClassificationTask
 from autoPyTorch.api.tabular_regression import TabularRegressionTask
+from autoPyTorch.datasets.base_dataset import BaseDataset
 from autoPyTorch.datasets.resampling_strategy import (
     CrossValTypes,
     HoldoutValTypes,
@@ -645,3 +646,107 @@ def test_build_pipeline(api_type, fit_dictionary_tabular):
     pipeline = api.build_pipeline(fit_dictionary_tabular['dataset_properties'])
     assert isinstance(pipeline, BaseEstimator)
     assert len(pipeline.steps) > 0
+
+
+@pytest.mark.parametrize("disable_file_output", [['all'], None])
+@pytest.mark.parametrize('openml_id', (40984,))
+@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
+                         ((HoldoutValTypes.holdout_validation, {'val_share': 0.8}),
+                          (CrossValTypes.k_fold_cross_validation, {'num_splits': 2})
+                          )
+                         )
+@pytest.mark.parametrize("budget", [15, 20])
+def test_pipeline_fit(openml_id,
+                      resampling_strategy,
+                      resampling_strategy_args,
+                      backend,
+                      disable_file_output,
+                      budget,
+                      n_samples):
+    # Get the data and check that contents of data-manager make sense
+    X, y = sklearn.datasets.fetch_openml(
+        data_id=int(openml_id),
+        return_X_y=True, as_frame=True
+    )
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X[:n_samples], y[:n_samples], random_state=1)
+
+    # Search for a good configuration
+    estimator = TabularClassificationTask(
+        backend=backend,
+        resampling_strategy=resampling_strategy,
+    )
+
+    dataset = estimator.get_dataset(X_train=X_train,
+                                    y_train=y_train,
+                                    X_test=X_test,
+                                    y_test=y_test,
+                                    resampling_strategy=resampling_strategy,
+                                    resampling_strategy_args=resampling_strategy_args)
+
+    configuration = estimator.get_search_space(dataset).get_default_configuration()
+    pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,
+                                                                    configuration=configuration,
+                                                                    run_time_limit_secs=50,
+                                                                    disable_file_output=disable_file_output,
+                                                                    budget_type='epochs',
+                                                                    budget=budget
+                                                                    )
+    assert isinstance(dataset, BaseDataset)
+    assert isinstance(run_info, RunInfo)
+    assert isinstance(run_info.config, Configuration)
+
+    assert isinstance(run_value, RunValue)
+    assert 'SUCCESS' in str(run_value.status)
+
+    if disable_file_output is None:
+        if resampling_strategy in CrossValTypes:
+            assert isinstance(pipeline, BaseEstimator)
+            X_test = dataset.test_tensors[0]
+            preds = pipeline.predict_proba(X_test)
+            assert isinstance(preds, np.ndarray)
+
+            score = accuracy(dataset.test_tensors[1], preds)
+            assert isinstance(score, float)
+            assert score > 0.7
+        else:
+            assert isinstance(pipeline, BasePipeline)
+            # To make sure we fitted the model, there should be a
+            # run summary object with accuracy
+            run_summary = pipeline.named_steps['trainer'].run_summary
+            assert run_summary is not None
+            X_test = dataset.test_tensors[0]
+            preds = pipeline.predict(X_test)
+            assert isinstance(preds, np.ndarray)
+
+            score = accuracy(dataset.test_tensors[1], preds)
+            assert isinstance(score, float)
+            assert score > 0.7
+    else:
+        assert pipeline is None
+        assert run_value.cost < 0.3
+
+    # Make sure that the pipeline can be pickled
+    dump_file = os.path.join(tempfile.gettempdir(), 'automl.dump.pkl')
+    with open(dump_file, 'wb') as f:
+        pickle.dump(pipeline, f)
+
+    num_run_dir = estimator._backend.get_numrun_directory(
+        run_info.seed, run_value.additional_info['num_run'], budget=float(budget))
+
+    cv_model_path = os.path.join(num_run_dir, estimator._backend.get_cv_model_filename(
+        run_info.seed, run_value.additional_info['num_run'], budget=float(budget)))
+    model_path = os.path.join(num_run_dir, estimator._backend.get_model_filename(
+        run_info.seed, run_value.additional_info['num_run'], budget=float(budget)))
+
+    if disable_file_output:
+        # No file output is expected
+        assert not os.path.exists(num_run_dir)
+    else:
+        # We expect the model path always
+        # And the cv model only on 'cv'
+        assert os.path.exists(model_path)
+        if resampling_strategy in CrossValTypes:
+            assert os.path.exists(cv_model_path)
+        elif resampling_strategy in HoldoutValTypes:
+            assert not os.path.exists(cv_model_path)

From bc5b469ededc88599afb216f724cd964fc8a1ffb Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 30 Nov 2021 15:46:55 +0100
Subject: [PATCH 02/32] Add documentation for get dataset

---
 autoPyTorch/api/base_task.py              | 56 +++++++++++++++++------
 autoPyTorch/api/tabular_classification.py | 29 +++++++++++-
 autoPyTorch/api/tabular_regression.py     | 29 +++++++++++-
 autoPyTorch/datasets/tabular_dataset.py   |  4 +-
 4 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index b33c6d3ae..53c785d66 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -252,15 +252,41 @@ def build_pipeline(self, dataset_properties: Dict[str, Any],
                                   "specific task which is a child of the BaseTask")
 
     @abstractmethod
-    def get_dataset(self,
-                    X_train: Union[List, pd.DataFrame, np.ndarray],
-                    y_train: Union[List, pd.DataFrame, np.ndarray],
-                    X_test: Union[List, pd.DataFrame, np.ndarray],
-                    y_test: Union[List, pd.DataFrame, np.ndarray],
-                    resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
-                    resampling_strategy_args: Optional[Dict[str, Any]] = None,
-                    dataset_name: Optional[str] = None,
-                    ) -> BaseDataset:
+    def get_dataset(
+        self,
+        X_train: Union[List, pd.DataFrame, np.ndarray],
+        y_train: Union[List, pd.DataFrame, np.ndarray],
+        X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
+        resampling_strategy_args: Optional[Dict[str, Any]] = None,
+        dataset_name: Optional[str] = None,
+    ) -> BaseDataset:
+        """
+        Returns an object of a child class of `BaseDataset` according to the current task.
+
+        Args:
+            X_train (Union[List, pd.DataFrame, np.ndarray]):
+                Training feature set.
+            y_train (Union[List, pd.DataFrame, np.ndarray]):
+                Training target set.
+            X_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
+                Testing feature set
+            y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
+                Testing target set
+            resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
+                Strategy to split the training data.
+            resampling_strategy_args (Optional[Dict[str, Any]]):
+                arguments required for the chosen resampling strategy. If None, uses
+                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+                in ```datasets/resampling_strategy.py```.
+            dataset_name (Optional[str], optional):
+                name of the dataset, used as experiment name.
+
+        Returns:
+            BaseDataset:
+                the dataset object
+        """
         raise NotImplementedError("Function called on BaseTask, this can only be called by "
                                   "specific task which is a child of the BaseTask")
 
@@ -1356,10 +1382,14 @@ def fit_pipeline(
                 configuration to fit the pipeline with.
 
         Returns:
-            (BasePipeline): fitted pipeline
-            (RunInfo): Run information
-            (RunValue): Result of fitting the pipeline
-            (BaseDataset): Dataset created from the given tensors
+            (BasePipeline):
+                fitted pipeline
+            (RunInfo):
+                Run information
+            (RunValue):
+                Result of fitting the pipeline
+            (BaseDataset):
+                Dataset created from the given tensors
         """
 
         if dataset is None:
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 3f8019e58..289b76c7d 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -120,12 +120,37 @@ def get_dataset(
         self,
         X_train: Union[List, pd.DataFrame, np.ndarray],
         y_train: Union[List, pd.DataFrame, np.ndarray],
-        X_test: Union[List, pd.DataFrame, np.ndarray],
-        y_test: Union[List, pd.DataFrame, np.ndarray],
+        X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
         resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         dataset_name: Optional[str] = None,
     ) -> TabularDataset:
+        """
+        Returns an object of `TabularDataset` according to the current task.
+
+        Args:
+            X_train (Union[List, pd.DataFrame, np.ndarray]):
+                Training feature set.
+            y_train (Union[List, pd.DataFrame, np.ndarray]):
+                Training target set.
+            X_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
+                Testing feature set
+            y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
+                Testing target set
+            resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
+                Strategy to split the training data.
+            resampling_strategy_args (Optional[Dict[str, Any]]):
+                arguments required for the chosen resampling strategy. If None, uses
+                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+                in ```datasets/resampling_strategy.py```.
+            dataset_name (Optional[str], optional):
+                name of the dataset, used as experiment name.
+
+        Returns:
+            TabularDataset:
+                the dataset object
+        """
 
         resampling_strategy = resampling_strategy if resampling_strategy is not None else self.resampling_strategy
         resampling_strategy_args = resampling_strategy_args if resampling_strategy_args is not None else \
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index 4d35a71b3..bd5b2e937 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -121,12 +121,37 @@ def get_dataset(
         self,
         X_train: Union[List, pd.DataFrame, np.ndarray],
         y_train: Union[List, pd.DataFrame, np.ndarray],
-        X_test: Union[List, pd.DataFrame, np.ndarray],
-        y_test: Union[List, pd.DataFrame, np.ndarray],
+        X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
         resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         dataset_name: Optional[str] = None,
     ) -> TabularDataset:
+        """
+        Returns an object of `TabularDataset` according to the current task.
+
+        Args:
+            X_train (Union[List, pd.DataFrame, np.ndarray]):
+                Training feature set.
+            y_train (Union[List, pd.DataFrame, np.ndarray]):
+                Training target set.
+            X_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
+                Testing feature set
+            y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
+                Testing target set
+            resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
+                Strategy to split the training data.
+            resampling_strategy_args (Optional[Dict[str, Any]]):
+                arguments required for the chosen resampling strategy. If None, uses
+                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+                in ```datasets/resampling_strategy.py```.
+            dataset_name (Optional[str], optional):
+                name of the dataset, used as experiment name.
+
+        Returns:
+            TabularDataset:
+                the dataset object
+        """
 
         resampling_strategy = resampling_strategy if resampling_strategy is not None else self.resampling_strategy
         resampling_strategy_args = resampling_strategy_args if resampling_strategy_args is not None else \
diff --git a/autoPyTorch/datasets/tabular_dataset.py b/autoPyTorch/datasets/tabular_dataset.py
index c2e229868..16335dfbb 100644
--- a/autoPyTorch/datasets/tabular_dataset.py
+++ b/autoPyTorch/datasets/tabular_dataset.py
@@ -35,8 +35,8 @@ class TabularDataset(BaseDataset):
             resampling_strategy (Union[CrossValTypes, HoldoutValTypes]),
                 (default=HoldoutValTypes.holdout_validation):
                 strategy to split the training data.
-            resampling_strategy_args (Optional[Dict[str, Any]]): arguments
-                required for the chosen resampling strategy. If None, uses
+            resampling_strategy_args (Optional[Dict[str, Any]]):
+                arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
                 in ```datasets/resampling_strategy.py```.
             shuffle:  Whether to shuffle the data before performing splits

From 0359c8ccf6999626510cbcfa3d4f503f3b3c86e9 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 30 Nov 2021 16:52:54 +0100
Subject: [PATCH 03/32] update documentation

---
 autoPyTorch/api/base_task.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 53c785d66..f44062db6 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1327,7 +1327,7 @@ def fit_pipeline(
                 no memory limit is set. In case of multi-processing, memory_limit
                 will be per job. This memory limit also applies to the ensemble
                 creation process.
-            eval_metric (str):
+            eval_metric (Optional[str]):
                 Name of the metric that is used to evaluate a pipeline.
             all_supported_metrics (bool: default=True):
                 if True, all metrics supporting current task will be calculated

From 75eb604f3c71d9030c3e017b783b006408f57734 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 30 Nov 2021 17:39:26 +0100
Subject: [PATCH 04/32] fix tests

---
 autoPyTorch/api/base_task.py              | 54 ++++++++++++++++++++++-
 autoPyTorch/api/tabular_classification.py | 24 +++++-----
 autoPyTorch/api/tabular_regression.py     | 22 ++++-----
 test/test_api/test_api.py                 |  3 --
 4 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index f44062db6..6fd728dd9 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -252,6 +252,48 @@ def build_pipeline(self, dataset_properties: Dict[str, Any],
                                   "specific task which is a child of the BaseTask")
 
     @abstractmethod
+    def _get_dataset_input_validator(
+        self,
+        X_train: Union[List, pd.DataFrame, np.ndarray],
+        y_train: Union[List, pd.DataFrame, np.ndarray],
+        X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
+        resampling_strategy_args: Optional[Dict[str, Any]] = None,
+        dataset_name: Optional[str] = None,
+    ) -> Tuple[BaseDataset, BaseInputValidator]:
+        """
+        Returns an object of a child class of `BaseDataset` and
+        an object of a child class of `BaseInputValidator` according
+        to the current task.
+
+        Args:
+            X_train (Union[List, pd.DataFrame, np.ndarray]):
+                Training feature set.
+            y_train (Union[List, pd.DataFrame, np.ndarray]):
+                Training target set.
+            X_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
+                Testing feature set
+            y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
+                Testing target set
+            resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
+                Strategy to split the training data.
+            resampling_strategy_args (Optional[Dict[str, Any]]):
+                arguments required for the chosen resampling strategy. If None, uses
+                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+                in ```datasets/resampling_strategy.py```.
+            dataset_name (Optional[str], optional):
+                name of the dataset, used as experiment name.
+
+        Returns:
+            BaseDataset:
+                the dataset object
+            BaseInputValidator:
+                fitted input validator
+        """
+        raise NotImplementedError("Function called on BaseTask, this can only be called by "
+                                  "specific task which is a child of the BaseTask")
+
     def get_dataset(
         self,
         X_train: Union[List, pd.DataFrame, np.ndarray],
@@ -287,8 +329,16 @@ def get_dataset(
             BaseDataset:
                 the dataset object
         """
-        raise NotImplementedError("Function called on BaseTask, this can only be called by "
-                                  "specific task which is a child of the BaseTask")
+        dataset, _ = self._get_dataset_input_validator(
+            X_train=X_train,
+            y_train=y_train,
+            X_test=X_test,
+            y_test=y_test,
+            resampling_strategy=resampling_strategy,
+            resampling_strategy_args=resampling_strategy_args,
+            dataset_name=dataset_name)
+
+        return dataset
 
     @property
     def run_history(self) -> RunHistory:
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 289b76c7d..069121d6f 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 
@@ -116,7 +116,7 @@ def build_pipeline(
                                              exclude=exclude_components,
                                              search_space_updates=search_space_updates)
 
-    def get_dataset(
+    def _get_dataset_input_validator(
         self,
         X_train: Union[List, pd.DataFrame, np.ndarray],
         y_train: Union[List, pd.DataFrame, np.ndarray],
@@ -125,9 +125,10 @@ def get_dataset(
         resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         dataset_name: Optional[str] = None,
-    ) -> TabularDataset:
+    ) -> Tuple[TabularDataset, TabularInputValidator]:
         """
-        Returns an object of `TabularDataset` according to the current task.
+        Returns an object of `TabularDataset` and an object of
+        `TabularInputValidator` according to the current task.
 
         Args:
             X_train (Union[List, pd.DataFrame, np.ndarray]):
@@ -144,12 +145,13 @@ def get_dataset(
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
                 in ```datasets/resampling_strategy.py```.
-            dataset_name (Optional[str], optional):
+            dataset_name (Optional[str]):
                 name of the dataset, used as experiment name.
-
         Returns:
             TabularDataset:
-                the dataset object
+                the dataset object.
+            TabularInputValidator:
+                the input validator fitted on the data.
         """
 
         resampling_strategy = resampling_strategy if resampling_strategy is not None else self.resampling_strategy
@@ -177,7 +179,7 @@ def get_dataset(
             dataset_name=dataset_name
         )
 
-        return dataset
+        return dataset, InputValidator
 
     def search(
         self,
@@ -331,7 +333,7 @@ def search(
 
         """
 
-        self.dataset = self.get_dataset(
+        self.dataset, self.InputValidator = self._get_dataset_input_validator(
             X_train=X_train,
             y_train=y_train,
             X_test=X_test,
@@ -377,7 +379,7 @@ def predict(
         """
         if self.InputValidator is None or not self.InputValidator._is_fitted:
             raise ValueError("predict() is only supported after calling search. Kindly call first "
-                             "the estimator fit() method.")
+                             "the estimator search() method.")
 
         X_test = self.InputValidator.feature_validator.transform(X_test)
         predicted_probabilities = super().predict(X_test, batch_size=batch_size,
@@ -397,6 +399,6 @@ def predict_proba(self,
                       batch_size: Optional[int] = None, n_jobs: int = 1) -> np.ndarray:
         if self.InputValidator is None or not self.InputValidator._is_fitted:
             raise ValueError("predict() is only supported after calling search. Kindly call first "
-                             "the estimator fit() method.")
+                             "the estimator search() method.")
         X_test = self.InputValidator.feature_validator.transform(X_test)
         return super().predict(X_test, batch_size=batch_size, n_jobs=n_jobs)
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index bd5b2e937..6828ef8ad 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 
@@ -117,7 +117,7 @@ def build_pipeline(
                                          exclude=exclude_components,
                                          search_space_updates=search_space_updates)
 
-    def get_dataset(
+    def _get_dataset_input_validator(
         self,
         X_train: Union[List, pd.DataFrame, np.ndarray],
         y_train: Union[List, pd.DataFrame, np.ndarray],
@@ -126,9 +126,10 @@ def get_dataset(
         resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         dataset_name: Optional[str] = None,
-    ) -> TabularDataset:
+    ) -> Tuple[TabularDataset, TabularInputValidator]:
         """
-        Returns an object of `TabularDataset` according to the current task.
+        Returns an object of `TabularDataset` and an object of
+        `TabularInputValidator` according to the current task.
 
         Args:
             X_train (Union[List, pd.DataFrame, np.ndarray]):
@@ -145,12 +146,13 @@ def get_dataset(
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
                 in ```datasets/resampling_strategy.py```.
-            dataset_name (Optional[str], optional):
+            dataset_name (Optional[str]):
                 name of the dataset, used as experiment name.
-
         Returns:
             TabularDataset:
-                the dataset object
+                the dataset object.
+            TabularInputValidator:
+                the input validator fitted on the data.
         """
 
         resampling_strategy = resampling_strategy if resampling_strategy is not None else self.resampling_strategy
@@ -178,7 +180,7 @@ def get_dataset(
             dataset_name=dataset_name
         )
 
-        return dataset
+        return dataset, InputValidator
 
     def search(
         self,
@@ -331,7 +333,7 @@ def search(
             self
 
         """
-        self.dataset = self.get_dataset(
+        self.dataset, self.InputValidator = self._get_dataset_input_validator(
             X_train=X_train,
             y_train=y_train,
             X_test=X_test,
@@ -367,7 +369,7 @@ def predict(
     ) -> np.ndarray:
         if self.InputValidator is None or not self.InputValidator._is_fitted:
             raise ValueError("predict() is only supported after calling search. Kindly call first "
-                             "the estimator fit() method.")
+                             "the estimator search() method.")
 
         X_test = self.InputValidator.feature_validator.transform(X_test)
         predicted_values = super().predict(X_test, batch_size=batch_size,
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 1e6009081..286dc1307 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -217,9 +217,6 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
     # Make sure that a configuration space is stored in the estimator
     assert isinstance(estimator.get_search_space(), CS.ConfigurationSpace)
 
-    # test fit on dummy data
-    assert isinstance(estimator.fit(dataset=backend.load_datamanager()), BasePipeline)
-
 
 @pytest.mark.parametrize('openml_name', ("boston", ))
 @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',

From 136f619be252b18c3b51044184fdb3dbf9f9012f Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 30 Nov 2021 19:43:54 +0100
Subject: [PATCH 05/32] remove permutation importance from visualisation
 example

---
 .../example_single_configuration.py           | 86 +++++++++++++++++++
 examples/40_advanced/example_visualization.py | 15 ----
 2 files changed, 86 insertions(+), 15 deletions(-)
 create mode 100644 examples/40_advanced/example_single_configuration.py

diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
new file mode 100644
index 000000000..846118b12
--- /dev/null
+++ b/examples/40_advanced/example_single_configuration.py
@@ -0,0 +1,86 @@
+# -*- encoding: utf-8 -*-
+"""
+==========================
+Fit a single configuration
+==========================
+*Auto-PyTorch* searches for the best combination of machine learning algorithms
+and their hyper-parameter configuration for a given task.
+This example shows how one can fit one of these pipelines, both, with a user defined
+configuration, and a randomly sampled one form the configuration space.
+The pipelines that Auto-PyTorch fits are compatible with Scikit-Learn API. You can
+get further documentation about Scikit-Learn models here: <https://scikit-learn.org/stable/getting_started.html`>_
+"""
+import os
+import tempfile as tmp
+import warnings
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.metrics
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes
+
+
+############################################################################
+# Data Loading
+# ============
+
+X, y = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, test_size=0.5, random_state=3
+)
+
+############################################################################
+# Define an estimator
+# ============================
+
+estimator = TabularClassificationTask(
+    resampling_strategy=HoldoutValTypes.holdout_validation,
+    resampling_strategy_args={'val_share': 0.33},
+    temporary_directory='./tmp/temp',
+    output_directory='./tmp/out',
+    delete_output_folder_after_terminate=False,
+    delete_tmp_folder_after_terminate=False
+)
+
+############################################################################
+# Get a random configuration of the pipeline for current dataset
+# ===============================================================
+
+dataset = estimator.get_dataset(X_train=X_train,
+                                y_train=y_train,
+                                X_test=X_test,
+                                y_test=y_test,
+                                dataset_name='kr-vs-kp')
+configuration = estimator.get_search_space(dataset).sample_configuration()
+
+print("Passed Configuration:", configuration)
+###########################################################################
+# Fit the configuration
+# ==================================
+
+pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,
+                                                                disable_file_output=False,
+                                                                configuration=configuration,
+                                                                budget_type='epochs',
+                                                                budget=20,
+                                                                run_time_limit_secs=200
+                                                                )
+
+# This object complies with Scikit-Learn Pipeline API.
+# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
+print(pipeline.named_steps)
+
+# The fit_pipeline command also returns a named tuple with the pipeline constraints
+print(run_info)
+
+# The fit_pipeline command also returns a named tuple with train/test performance
+print(run_value)
diff --git a/examples/40_advanced/example_visualization.py b/examples/40_advanced/example_visualization.py
index 37c1c6dc3..a88899e81 100644
--- a/examples/40_advanced/example_visualization.py
+++ b/examples/40_advanced/example_visualization.py
@@ -149,18 +149,3 @@
     grid=True,
 )
 plt.show()
-
-# We then can understand the importance of each input feature using
-# a permutation importance analysis. This is done as a proof of concept, to
-# showcase that we can leverage of scikit-learn API.
-result = permutation_importance(estimator, X_train, y_train, n_repeats=5,
-                                scoring='accuracy',
-                                random_state=seed)
-sorted_idx = result.importances_mean.argsort()
-
-fig, ax = plt.subplots()
-ax.boxplot(result.importances[sorted_idx].T,
-           vert=False, labels=X_test.columns[sorted_idx])
-ax.set_title("Permutation Importances (Train set)")
-fig.tight_layout()
-plt.show()

From 47313634e58a7ef523a3e769a88ac307aa901ac5 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 30 Nov 2021 21:40:10 +0100
Subject: [PATCH 06/32] change disable_file_output

---
 examples/40_advanced/example_single_configuration.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index 846118b12..6c78559ec 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -68,7 +68,6 @@
 # ==================================
 
 pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,
-                                                                disable_file_output=False,
                                                                 configuration=configuration,
                                                                 budget_type='epochs',
                                                                 budget=20,

From af48ebf53ae5f9d0ad8a8f792bc53612ab479b17 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 12:57:59 +0100
Subject: [PATCH 07/32] add

---
 autoPyTorch/api/base_task.py                  | 12 ++++---
 autoPyTorch/api/tabular_classification.py     |  6 ++--
 autoPyTorch/api/tabular_regression.py         |  6 ++--
 autoPyTorch/evaluation/abstract_evaluator.py  | 15 +++++---
 autoPyTorch/evaluation/train_evaluator.py     |  8 +++--
 autoPyTorch/evaluation/utils.py               | 36 +++++++++++++++++++
 .../example_single_configuration.py           |  6 ++--
 .../test_abstract_evaluator.py                | 32 +++++++++++++++++
 test/test_evaluation/test_utils.py            | 17 +++++++++
 9 files changed, 120 insertions(+), 18 deletions(-)
 create mode 100644 test/test_evaluation/test_utils.py

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 6fd728dd9..9d1847c00 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -938,9 +938,10 @@ def _search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (Optional[List]):
+            disable_file_output (List[Union[str, DisableFileOutputParameters]]):
                 Used as a list to pass more fine-grained
-                information on what to save. Allowed elements in the list are:
+                information on what to save. Must be a member of `DisableFileOutputParameters`.
+                Allowed elements in the list are:
 
                 + `y_optimization`:
                     do not save the predictions for the optimization set,
@@ -955,6 +956,7 @@ def _search(
                     do not save the predictions for the test set.
                 + `all`:
                     do not save any of the above.
+                For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
@@ -1411,9 +1413,10 @@ def fit_pipeline(
                 Valid config options include "device",
                 "torch_num_threads", "early_stopping", "use_tensorboard_logger",
                 "metrics_during_training"
-            disable_file_output (Optional[List]):
+            disable_file_output (List[Union[str, DisableFileOutputParameters]]):
                 Used as a list to pass more fine-grained
-                information on what to save. Allowed elements in the list are:
+                information on what to save. Must be a member of `DisableFileOutputParameters`.
+                Allowed elements in the list are:
 
                 + `y_optimization`:
                     do not save the predictions for the optimization set,
@@ -1428,6 +1431,7 @@ def fit_pipeline(
                     do not save the predictions for the test set.
                 + `all`:
                     do not save any of the above.
+                For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
             configuration: (Configuration)
                 configuration to fit the pipeline with.
 
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 069121d6f..255b70579 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -299,9 +299,10 @@ def search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (List):
+            disable_file_output (List[Union[str, DisableFileOutputParameters]]):
                 Used as a list to pass more fine-grained
-                information on what to save. Allowed elements in the list are:
+                information on what to save. Must be a member of `DisableFileOutputParameters`.
+                Allowed elements in the list are:
 
                 + `y_optimization`:
                     do not save the predictions for the optimization set,
@@ -316,6 +317,7 @@ def search(
                     do not save the predictions for the test set.
                 + `all`:
                     do not save any of the above.
+                For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index 6828ef8ad..d9ddfb674 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -300,9 +300,10 @@ def search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (Optional[List]):
+            disable_file_output (List[Union[str, DisableFileOutputParameters]]):
                 Used as a list to pass more fine-grained
-                information on what to save. Allowed elements in the list are:
+                information on what to save. Must be a member of `DisableFileOutputParameters`.
+                Allowed elements in the list are:
 
                 + `y_optimization`:
                     do not save the predictions for the optimization set,
@@ -317,6 +318,7 @@ def search(
                     do not save the predictions for the test set.
                 + `all`:
                     do not save any of the above.
+                For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
index 93c0d0f9b..d70f0b756 100644
--- a/autoPyTorch/evaluation/abstract_evaluator.py
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -33,8 +33,9 @@
 )
 from autoPyTorch.datasets.base_dataset import BaseDataset, BaseDatasetPropertiesType
 from autoPyTorch.evaluation.utils import (
+    DisableFileOutputParameters,
     VotingRegressorWrapper,
-    convert_multioutput_multiclass_to_multilabel
+    convert_multioutput_multiclass_to_multilabel,
 )
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
@@ -375,9 +376,10 @@ class AbstractEvaluator(object):
             An optional dictionary to include components of the pipeline steps.
         exclude (Optional[Dict[str, Any]]):
             An optional dictionary to exclude components of the pipeline steps.
-        disable_file_output (Optional[List]):
+        disable_file_output (List[Union[str, DisableFileOutputParameters]]):
                 Used as a list to pass more fine-grained
-                information on what to save. Allowed elements in the list are:
+                information on what to save. Must be a member of `DisableFileOutputParameters`.
+                Allowed elements in the list are:
 
                 + `y_optimization`:
                     do not save the predictions for the optimization set,
@@ -392,6 +394,7 @@ class AbstractEvaluator(object):
                     do not save the predictions for the test set.
                 + `all`:
                     do not save any of the above.
+                For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
         init_params (Optional[Dict[str, Any]]):
             Optional argument that is passed to each pipeline step. It is the equivalent of
             kwargs for the pipeline steps.
@@ -461,7 +464,11 @@ def __init__(self, backend: Backend,
         # Flag to save target for ensemble
         self.output_y_hat_optimization = output_y_hat_optimization
 
-        self.disable_file_output = disable_file_output if disable_file_output is not None else []
+        disable_file_output = disable_file_output if disable_file_output is not None else []
+        # check compatibility of disable file output
+        DisableFileOutputParameters.check_compatibility(disable_file_output)
+
+        self.disable_file_output = disable_file_output
 
         self.pipeline_class: Optional[Union[BaseEstimator, BasePipeline]] = None
         if self.task_type in REGRESSION_TASKS:
diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py
index bdff3549f..8b07421a3 100644
--- a/autoPyTorch/evaluation/train_evaluator.py
+++ b/autoPyTorch/evaluation/train_evaluator.py
@@ -79,9 +79,10 @@ class TrainEvaluator(AbstractEvaluator):
             An optional dictionary to include components of the pipeline steps.
         exclude (Optional[Dict[str, Any]]):
             An optional dictionary to exclude components of the pipeline steps.
-        disable_file_output (Optional[List]):
+        disable_file_output (List[Union[str, DisableFileOutputParameters]]):
                 Used as a list to pass more fine-grained
-                information on what to save. Allowed elements in the list are:
+                information on what to save. Must be a member of `DisableFileOutputParameters`.
+                Allowed elements in the list are:
 
                 + `y_optimization`:
                     do not save the predictions for the optimization set,
@@ -96,6 +97,7 @@ class TrainEvaluator(AbstractEvaluator):
                     do not save the predictions for the test set.
                 + `all`:
                     do not save any of the above.
+                For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
         init_params (Optional[Dict[str, Any]]):
             Optional argument that is passed to each pipeline step. It is the equivalent of
             kwargs for the pipeline steps.
@@ -120,7 +122,7 @@ def __init__(self, backend: Backend, queue: Queue,
                  num_run: Optional[int] = None,
                  include: Optional[Dict[str, Any]] = None,
                  exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: Optional[List] = [],
+                 disable_file_output: Optional[List[str]] = None,
                  init_params: Optional[Dict[str, Any]] = None,
                  logger_port: Optional[int] = None,
                  keep_models: Optional[bool] = None,
diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py
index 1bf93fa84..f729c21f8 100644
--- a/autoPyTorch/evaluation/utils.py
+++ b/autoPyTorch/evaluation/utils.py
@@ -1,4 +1,5 @@
 import queue
+from enum import Enum
 from multiprocessing.queues import Queue
 from typing import List, Optional, Union
 
@@ -102,3 +103,38 @@ def _predict(self, X: np.ndarray) -> np.ndarray:
             predictions.append(pred.ravel())
 
         return np.asarray(predictions).T
+
+
+class DisableFileOutputParameters(Enum):
+    """
+    Contains literals that can be passed in to `disable_file_output` list.
+    These include:
+
+    + `y_optimization`:
+        do not save the predictions for the optimization set,
+        which would later on be used to build an ensemble. Note that SMAC
+        optimizes a metric evaluated on the optimization set.
+    + `pipeline`:
+        do not save any individual pipeline files
+    + `pipelines`:
+        In case of cross validation, disables saving the joint model of the
+        pipelines fit on each fold.
+    + `y_test`:
+        do not save the predictions for the test set.
+    + `all`:
+        do not save any of the above.
+    """
+    pipeline = 'pipeline'
+    pipelines = 'pipelines'
+    y_optimization = 'y_optimization'
+    y_test = 'y_test'
+    all = 'all'
+
+    @classmethod
+    def check_compatibility(cls, disable_file_output: List) -> None:
+        for item in disable_file_output:
+            if item not in cls.__members__:
+                if not isinstance(item, cls):
+                    raise ValueError(f"Expected {item} to be in the members ("
+                                     f"{list(cls.__members__.keys())}) of {cls.__name__}"
+                                     f" or an instance.")
diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index 6c78559ec..c491c16e8 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -40,7 +40,7 @@
 
 ############################################################################
 # Define an estimator
-# ============================
+# ===================
 
 estimator = TabularClassificationTask(
     resampling_strategy=HoldoutValTypes.holdout_validation,
@@ -65,13 +65,13 @@
 print("Passed Configuration:", configuration)
 ###########################################################################
 # Fit the configuration
-# ==================================
+# =====================
 
 pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,
                                                                 configuration=configuration,
                                                                 budget_type='epochs',
                                                                 budget=20,
-                                                                run_time_limit_secs=200
+                                                                run_time_limit_secs=70
                                                                 )
 
 # This object complies with Scikit-Learn Pipeline API.
diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py
index 6cec57fb4..cb16e9a35 100644
--- a/test/test_evaluation/test_abstract_evaluator.py
+++ b/test/test_evaluation/test_abstract_evaluator.py
@@ -314,3 +314,35 @@ def test_error_unsupported_budget_type(self):
                 self.assertIsInstance(e, ValueError)
 
             shutil.rmtree(self.working_directory, ignore_errors=True)
+
+    def test_error_unsupported_disable_file_output_parameters(self):
+        shutil.rmtree(self.working_directory, ignore_errors=True)
+        os.mkdir(self.working_directory)
+
+        queue_mock = unittest.mock.Mock()
+
+        context = BackendContext(
+            prefix='autoPyTorch',
+            temporary_directory=os.path.join(self.working_directory, 'tmp'),
+            output_directory=os.path.join(self.working_directory, 'out'),
+            delete_tmp_folder_after_terminate=True,
+            delete_output_folder_after_terminate=True,
+        )
+        with unittest.mock.patch.object(Backend, 'load_datamanager') as load_datamanager_mock:
+            load_datamanager_mock.return_value = get_multiclass_classification_datamanager()
+
+            backend = Backend(context, prefix='autoPyTorch')
+
+            try:
+                AbstractEvaluator(
+                    backend=backend,
+                    output_y_hat_optimization=False,
+                    queue=queue_mock,
+                    metric=accuracy,
+                    budget=0,
+                    configuration=1,
+                    disable_file_output=['model'])
+            except Exception as e:
+                self.assertIsInstance(e, ValueError)
+
+            shutil.rmtree(self.working_directory, ignore_errors=True)
diff --git a/test/test_evaluation/test_utils.py b/test/test_evaluation/test_utils.py
new file mode 100644
index 000000000..93d2e5195
--- /dev/null
+++ b/test/test_evaluation/test_utils.py
@@ -0,0 +1,17 @@
+import pytest
+
+from autoPyTorch.evaluation.utils import DisableFileOutputParameters
+
+def test_disable_file_output_string_no_error():
+    disable_file_output = ['pipeline', 'pipelines']
+    DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
+
+def test_disable_file_output_string_error():
+    disable_file_output = ['model']
+    with pytest.raises(ValueError, match=r"Expected .*? to be in the members (.*?) of"
+                                         r" DisableFileOutputParameters or an instance."):
+        DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
+
+def test_disable_file_output_enum_no_error():
+    disable_file_output = [DisableFileOutputParameters.pipeline, DisableFileOutputParameters.pipelines]
+    DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
\ No newline at end of file

From 3df4e06eca267f0fbaab885181e397164c55a5da Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 13:00:16 +0100
Subject: [PATCH 08/32] fix flake

---
 test/test_evaluation/test_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/test_evaluation/test_utils.py b/test/test_evaluation/test_utils.py
index 93d2e5195..a67b29e1a 100644
--- a/test/test_evaluation/test_utils.py
+++ b/test/test_evaluation/test_utils.py
@@ -2,16 +2,19 @@
 
 from autoPyTorch.evaluation.utils import DisableFileOutputParameters
 
+
 def test_disable_file_output_string_no_error():
     disable_file_output = ['pipeline', 'pipelines']
     DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
 
+
 def test_disable_file_output_string_error():
     disable_file_output = ['model']
     with pytest.raises(ValueError, match=r"Expected .*? to be in the members (.*?) of"
                                          r" DisableFileOutputParameters or an instance."):
         DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
 
+
 def test_disable_file_output_enum_no_error():
     disable_file_output = [DisableFileOutputParameters.pipeline, DisableFileOutputParameters.pipelines]
-    DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
\ No newline at end of file
+    DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)

From e8289e43490259307d5f046c49598d75ae5e6bf0 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 15:22:25 +0100
Subject: [PATCH 09/32] fix test and examples

---
 examples/40_advanced/example_single_configuration.py | 6 +-----
 test/test_evaluation/test_abstract_evaluator.py      | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index c491c16e8..f8e3a6910 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -45,10 +45,6 @@
 estimator = TabularClassificationTask(
     resampling_strategy=HoldoutValTypes.holdout_validation,
     resampling_strategy_args={'val_share': 0.33},
-    temporary_directory='./tmp/temp',
-    output_directory='./tmp/out',
-    delete_output_folder_after_terminate=False,
-    delete_tmp_folder_after_terminate=False
 )
 
 ############################################################################
@@ -71,7 +67,7 @@
                                                                 configuration=configuration,
                                                                 budget_type='epochs',
                                                                 budget=20,
-                                                                run_time_limit_secs=70
+                                                                run_time_limit_secs=100
                                                                 )
 
 # This object complies with Scikit-Learn Pipeline API.
diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py
index cb16e9a35..b08b26db4 100644
--- a/test/test_evaluation/test_abstract_evaluator.py
+++ b/test/test_evaluation/test_abstract_evaluator.py
@@ -129,7 +129,7 @@ def test_disable_file_output(self):
         ae = AbstractEvaluator(
             backend=self.backend_mock,
             queue=queue_mock,
-            disable_file_output=True,
+            disable_file_output=['all'],
             metric=accuracy,
             logger_port=unittest.mock.Mock(),
             budget=0,

From 4018d026ae4c21f05b5fb43221f478ffd54788ee Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 15:54:59 +0100
Subject: [PATCH 10/32] change type of disable_file_output

---
 autoPyTorch/api/base_task.py                  |  9 ++--
 autoPyTorch/api/tabular_classification.py     |  5 ++-
 autoPyTorch/api/tabular_regression.py         |  5 ++-
 autoPyTorch/evaluation/abstract_evaluator.py  | 40 ++++++++---------
 autoPyTorch/evaluation/tae.py                 |  9 +++-
 autoPyTorch/evaluation/train_evaluator.py     | 43 ++++++++++---------
 .../test_abstract_evaluator.py                |  3 +-
 7 files changed, 62 insertions(+), 52 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 9d1847c00..061844059 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -45,6 +45,7 @@
 from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
 from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings
 from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
+from autoPyTorch.evaluation.utils import DisableFileOutputParameters
 from autoPyTorch.optimizer.smbo import AutoMLSMBO
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import get_available_traditional_learners
@@ -837,7 +838,7 @@ def _search(
         tae_func: Optional[Callable] = None,
         all_supported_metrics: bool = True,
         precision: int = 32,
-        disable_file_output: Optional[List[str]] = None,
+        disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
         load_models: bool = True,
         portfolio_selection: Optional[str] = None,
         dask_client: Optional[dask.distributed.Client] = None
@@ -938,7 +939,7 @@ def _search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (List[Union[str, DisableFileOutputParameters]]):
+            disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]):
                 Used as a list to pass more fine-grained
                 information on what to save. Must be a member of `DisableFileOutputParameters`.
                 Allowed elements in the list are:
@@ -1341,7 +1342,7 @@ def fit_pipeline(
         search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
         budget: Optional[float] = None,
         pipeline_options: Optional[Dict] = None,
-        disable_file_output: Optional[List[str]] = None,
+        disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
     ) -> Tuple[Optional[BasePipeline], RunInfo, RunValue, BaseDataset]:
         """
         Fit a pipeline on the given task for the budget.
@@ -1413,7 +1414,7 @@ def fit_pipeline(
                 Valid config options include "device",
                 "torch_num_threads", "early_stopping", "use_tensorboard_logger",
                 "metrics_during_training"
-            disable_file_output (List[Union[str, DisableFileOutputParameters]]):
+            disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]):
                 Used as a list to pass more fine-grained
                 information on what to save. Must be a member of `DisableFileOutputParameters`.
                 Allowed elements in the list are:
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 255b70579..7d71b84a4 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -16,6 +16,7 @@
     HoldoutValTypes,
 )
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.evaluation.utils import DisableFileOutputParameters
 from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
@@ -200,7 +201,7 @@ def search(
         get_smac_object_callback: Optional[Callable] = None,
         all_supported_metrics: bool = True,
         precision: int = 32,
-        disable_file_output: Optional[List[str]] = None,
+        disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
         load_models: bool = True,
         portfolio_selection: Optional[str] = None,
     ) -> 'BaseTask':
@@ -299,7 +300,7 @@ def search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (List[Union[str, DisableFileOutputParameters]]):
+            disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]):
                 Used as a list to pass more fine-grained
                 information on what to save. Must be a member of `DisableFileOutputParameters`.
                 Allowed elements in the list are:
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index d9ddfb674..7a59651ef 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -16,6 +16,7 @@
     HoldoutValTypes,
 )
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.evaluation.utils import DisableFileOutputParameters
 from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
@@ -201,7 +202,7 @@ def search(
         get_smac_object_callback: Optional[Callable] = None,
         all_supported_metrics: bool = True,
         precision: int = 32,
-        disable_file_output: Optional[List[str]] = None,
+        disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
         load_models: bool = True,
         portfolio_selection: Optional[str] = None,
     ) -> 'BaseTask':
@@ -300,7 +301,7 @@ def search(
             precision (int: default=32):
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
-            disable_file_output (List[Union[str, DisableFileOutputParameters]]):
+            disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]):
                 Used as a list to pass more fine-grained
                 information on what to save. Must be a member of `DisableFileOutputParameters`.
                 Allowed elements in the list are:
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
index d70f0b756..0b7cc105f 100644
--- a/autoPyTorch/evaluation/abstract_evaluator.py
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -376,25 +376,25 @@ class AbstractEvaluator(object):
             An optional dictionary to include components of the pipeline steps.
         exclude (Optional[Dict[str, Any]]):
             An optional dictionary to exclude components of the pipeline steps.
-        disable_file_output (List[Union[str, DisableFileOutputParameters]]):
-                Used as a list to pass more fine-grained
-                information on what to save. Must be a member of `DisableFileOutputParameters`.
-                Allowed elements in the list are:
-
-                + `y_optimization`:
-                    do not save the predictions for the optimization set,
-                    which would later on be used to build an ensemble. Note that SMAC
-                    optimizes a metric evaluated on the optimization set.
-                + `pipeline`:
-                    do not save any individual pipeline files
-                + `pipelines`:
-                    In case of cross validation, disables saving the joint model of the
-                    pipelines fit on each fold.
-                + `y_test`:
-                    do not save the predictions for the test set.
-                + `all`:
-                    do not save any of the above.
-                For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
+        disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]):
+            Used as a list to pass more fine-grained
+            information on what to save. Must be a member of `DisableFileOutputParameters`.
+            Allowed elements in the list are:
+
+            + `y_optimization`:
+                do not save the predictions for the optimization set,
+                which would later on be used to build an ensemble. Note that SMAC
+                optimizes a metric evaluated on the optimization set.
+            + `pipeline`:
+                do not save any individual pipeline files
+            + `pipelines`:
+                In case of cross validation, disables saving the joint model of the
+                pipelines fit on each fold.
+            + `y_test`:
+                do not save the predictions for the test set.
+            + `all`:
+                do not save any of the above.
+            For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
         init_params (Optional[Dict[str, Any]]):
             Optional argument that is passed to each pipeline step. It is the equivalent of
             kwargs for the pipeline steps.
@@ -420,7 +420,7 @@ def __init__(self, backend: Backend,
                  num_run: Optional[int] = None,
                  include: Optional[Dict[str, Any]] = None,
                  exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: Optional[List[str]] = None,
+                 disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
                  init_params: Optional[Dict[str, Any]] = None,
                  logger_port: Optional[int] = None,
                  all_supported_metrics: bool = True,
diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py
index 89a9838c9..683870304 100644
--- a/autoPyTorch/evaluation/tae.py
+++ b/autoPyTorch/evaluation/tae.py
@@ -24,7 +24,12 @@
 
 import autoPyTorch.evaluation.train_evaluator
 from autoPyTorch.automl_common.common.utils.backend import Backend
-from autoPyTorch.evaluation.utils import empty_queue, extract_learning_curve, read_queue
+from autoPyTorch.evaluation.utils import (
+    DisableFileOutputParameters,
+    empty_queue,
+    extract_learning_curve,
+    read_queue
+)
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
 from autoPyTorch.utils.common import dict_repr, replace_string_bool_to_bool
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
@@ -109,7 +114,7 @@ def __init__(
         include: Optional[Dict[str, Any]] = None,
         exclude: Optional[Dict[str, Any]] = None,
         memory_limit: Optional[int] = None,
-        disable_file_output: Optional[List] = None,
+        disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
         init_params: Dict[str, Any] = None,
         budget_type: str = None,
         ta: Optional[Callable] = None,
diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py
index 8b07421a3..1bf1bce4c 100644
--- a/autoPyTorch/evaluation/train_evaluator.py
+++ b/autoPyTorch/evaluation/train_evaluator.py
@@ -18,6 +18,7 @@
     AbstractEvaluator,
     fit_and_suppress_warnings
 )
+from autoPyTorch.evaluation.utils import DisableFileOutputParameters
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
 from autoPyTorch.utils.common import dict_repr, subsampler
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
@@ -79,25 +80,25 @@ class TrainEvaluator(AbstractEvaluator):
             An optional dictionary to include components of the pipeline steps.
         exclude (Optional[Dict[str, Any]]):
             An optional dictionary to exclude components of the pipeline steps.
-        disable_file_output (List[Union[str, DisableFileOutputParameters]]):
-                Used as a list to pass more fine-grained
-                information on what to save. Must be a member of `DisableFileOutputParameters`.
-                Allowed elements in the list are:
-
-                + `y_optimization`:
-                    do not save the predictions for the optimization set,
-                    which would later on be used to build an ensemble. Note that SMAC
-                    optimizes a metric evaluated on the optimization set.
-                + `pipeline`:
-                    do not save any individual pipeline files
-                + `pipelines`:
-                    In case of cross validation, disables saving the joint model of the
-                    pipelines fit on each fold.
-                + `y_test`:
-                    do not save the predictions for the test set.
-                + `all`:
-                    do not save any of the above.
-                For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
+        disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]):
+            Used as a list to pass more fine-grained
+            information on what to save. Must be a member of `DisableFileOutputParameters`.
+            Allowed elements in the list are:
+
+            + `y_optimization`:
+                do not save the predictions for the optimization set,
+                which would later on be used to build an ensemble. Note that SMAC
+                optimizes a metric evaluated on the optimization set.
+            + `pipeline`:
+                do not save any individual pipeline files
+            + `pipelines`:
+                In case of cross validation, disables saving the joint model of the
+                pipelines fit on each fold.
+            + `y_test`:
+                do not save the predictions for the test set.
+            + `all`:
+                do not save any of the above.
+            For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
         init_params (Optional[Dict[str, Any]]):
             Optional argument that is passed to each pipeline step. It is the equivalent of
             kwargs for the pipeline steps.
@@ -122,7 +123,7 @@ def __init__(self, backend: Backend, queue: Queue,
                  num_run: Optional[int] = None,
                  include: Optional[Dict[str, Any]] = None,
                  exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: Optional[List[str]] = None,
+                 disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
                  init_params: Optional[Dict[str, Any]] = None,
                  logger_port: Optional[int] = None,
                  keep_models: Optional[bool] = None,
@@ -412,7 +413,7 @@ def eval_function(
         num_run: int,
         include: Optional[Dict[str, Any]],
         exclude: Optional[Dict[str, Any]],
-        disable_file_output: List,
+        disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
         pipeline_config: Optional[Dict[str, Any]] = None,
         budget_type: str = None,
         init_params: Optional[Dict[str, Any]] = None,
diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py
index b08b26db4..a0be2c3f3 100644
--- a/test/test_evaluation/test_abstract_evaluator.py
+++ b/test/test_evaluation/test_abstract_evaluator.py
@@ -13,6 +13,7 @@
 
 from autoPyTorch.automl_common.common.utils.backend import Backend, BackendContext
 from autoPyTorch.evaluation.abstract_evaluator import AbstractEvaluator
+from autoPyTorch.evaluation.utils import DisableFileOutputParameters
 from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
 
 this_directory = os.path.dirname(__file__)
@@ -129,7 +130,7 @@ def test_disable_file_output(self):
         ae = AbstractEvaluator(
             backend=self.backend_mock,
             queue=queue_mock,
-            disable_file_output=['all'],
+            disable_file_output=[DisableFileOutputParameters.all],
             metric=accuracy,
             logger_port=unittest.mock.Mock(),
             budget=0,

From add889066911b9abf153fbaec2320b01adafa5d4 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 16:29:14 +0100
Subject: [PATCH 11/32] Address comments from eddie

---
 autoPyTorch/api/base_task.py              | 33 ++++++++++++-----------
 autoPyTorch/api/tabular_classification.py |  8 +++---
 autoPyTorch/api/tabular_regression.py     | 11 ++++----
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 061844059..847b255af 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -11,7 +11,7 @@
 import typing
 import unittest.mock
 import warnings
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
@@ -105,7 +105,7 @@ def send_warnings_to_log(
     return prediction
 
 
-class BaseTask:
+class BaseTask(ABC):
     """
     Base class for the tasks that serve as API to the pipelines.
 
@@ -135,10 +135,10 @@ class BaseTask:
         delete_tmp_folder_after_terminate (bool):
             Determines whether to delete the temporary directory,
             when finished
-        include_components (Optional[Dict]):
+        include_components (Optional[Dict[str, Any]]):
             If None, all possible components are used.
             Otherwise specifies set of components to use.
-        exclude_components (Optional[Dict]):
+        exclude_components (Optional[Dict[str, Any]]):
             If None, all possible components are used.
             Otherwise specifies set of components not to use.
             Incompatible with include components
@@ -160,8 +160,8 @@ def __init__(
         output_directory: Optional[str] = None,
         delete_tmp_folder_after_terminate: bool = True,
         delete_output_folder_after_terminate: bool = True,
-        include_components: Optional[Dict] = None,
-        exclude_components: Optional[Dict] = None,
+        include_components: Optional[Dict[str, Any]] = None,
+        exclude_components: Optional[Dict[str, Any]] = None,
         backend: Optional[Backend] = None,
         resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
@@ -318,12 +318,13 @@ def get_dataset(
             y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
                 Testing target set
             resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
-                Strategy to split the training data.
+                Strategy to split the training data. if None, uses
+                HoldoutValTypes.holdout_validation
             resampling_strategy_args (Optional[Dict[str, Any]]):
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
                 in ```datasets/resampling_strategy.py```.
-            dataset_name (Optional[str], optional):
+            dataset_name (Optional[str]):
                 name of the dataset, used as experiment name.
 
         Returns:
@@ -1337,8 +1338,8 @@ def fit_pipeline(
         eval_metric: Optional[str] = None,
         all_supported_metrics: bool = False,
         budget_type: Optional[str] = None,
-        include_components: Optional[Dict] = None,
-        exclude_components: Optional[Dict] = None,
+        include_components: Optional[Dict[str, Any]] = None,
+        exclude_components: Optional[Dict[str, Any]] = None,
         search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
         budget: Optional[float] = None,
         pipeline_options: Optional[Dict] = None,
@@ -1360,14 +1361,14 @@ def fit_pipeline(
                 be provided to track the generalization performance of each stage.
             dataset_name (Optional[str]):
                 Name of the dataset, if None, random value is used.
-            resampling_strategy (Union[CrossValTypes, HoldoutValTypes]),
-                (default=HoldoutValTypes.holdout_validation):
-                strategy to split the training data.
+            resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
+                Strategy to split the training data. if None, uses
+                HoldoutValTypes.holdout_validation
             resampling_strategy_args (Optional[Dict[str, Any]]):
                 Arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
                 in ```datasets/resampling_strategy.py```.
-            run_time_limit_secs (int: default=120):
+            run_time_limit_secs (int: default=60):
                 Time limit for a single call to the machine learning model.
                 Model fitting will be terminated if the machine learning algorithm
                 runs over the time limit. Set this value high enough so that
@@ -1398,10 +1399,10 @@ def fit_pipeline(
                     controlled by func_eval_time_limit_secs. 'runtime' only controls the allocated
                     time to train a pipeline, but it does not consider the overall time it takes
                     to create a pipeline (data loading and preprocessing, other i/o operations, etc.).
-            include_components (Optional[Dict]):
+            include_components (Optional[Dict[str, Any]]):
                 If None, all possible components are used.
                 Otherwise specifies set of components to use.
-            exclude_components (Optional[Dict]):
+            exclude_components (Optional[Dict[str, Any]]):
                 If None, all possible components are used.
                 Otherwise specifies set of components not to use.
                 Incompatible with include components
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 7d71b84a4..d77c35f6a 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -53,10 +53,10 @@ class TabularClassificationTask(BaseTask):
         delete_tmp_folder_after_terminate (bool):
             Determines whether to delete the temporary directory,
             when finished
-        include_components (Optional[Dict]):
+        include_components (Optional[Dict[str, Any]]):
             If None, all possible components are used.
             Otherwise specifies set of components to use.
-        exclude_components (Optional[Dict]):
+        exclude_components (Optional[Dict[str, Any]]):
             If None, all possible components are used.
             Otherwise specifies set of components not to use.
             Incompatible with include components.
@@ -77,8 +77,8 @@ def __init__(
         output_directory: Optional[str] = None,
         delete_tmp_folder_after_terminate: bool = True,
         delete_output_folder_after_terminate: bool = True,
-        include_components: Optional[Dict] = None,
-        exclude_components: Optional[Dict] = None,
+        include_components: Optional[Dict[str, Any]] = None,
+        exclude_components: Optional[Dict[str, Any]] = None,
         resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         backend: Optional[Backend] = None,
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index 7a59651ef..56d49beba 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -53,10 +53,10 @@ class TabularRegressionTask(BaseTask):
         delete_tmp_folder_after_terminate (bool):
             Determines whether to delete the temporary directory,
             when finished
-        include_components (Optional[Dict]):
+        include_components (Optional[Dict[str, Any]]):
             If None, all possible components are used.
             Otherwise specifies set of components to use.
-        exclude_components (Optional[Dict]):
+        exclude_components (Optional[Dict[str, Any]]):
             If None, all possible components are used.
             Otherwise specifies set of components not to use.
             Incompatible with include components.
@@ -78,8 +78,8 @@ def __init__(
         output_directory: Optional[str] = None,
         delete_tmp_folder_after_terminate: bool = True,
         delete_output_folder_after_terminate: bool = True,
-        include_components: Optional[Dict] = None,
-        exclude_components: Optional[Dict] = None,
+        include_components: Optional[Dict[str, Any]] = None,
+        exclude_components: Optional[Dict[str, Any]] = None,
         resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         backend: Optional[Backend] = None,
@@ -142,7 +142,8 @@ def _get_dataset_input_validator(
             y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
                 Testing target set
             resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
-                Strategy to split the training data.
+                Strategy to split the training data. if None, uses
+                HoldoutValTypes.holdout_validation
             resampling_strategy_args (Optional[Dict[str, Any]]):
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS

From d8739cd91a109bf9ec92cfa4317e4540d69b6092 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 17:01:04 +0100
Subject: [PATCH 12/32] fix docstring in api

---
 autoPyTorch/api/base_task.py              | 63 ++++++++++++++++-------
 autoPyTorch/api/tabular_classification.py | 49 ++++++++++++++----
 autoPyTorch/api/tabular_regression.py     | 48 +++++++++++++----
 3 files changed, 123 insertions(+), 37 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 847b255af..9002d8114 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -136,12 +136,15 @@ class BaseTask(ABC):
             Determines whether to delete the temporary directory,
             when finished
         include_components (Optional[Dict[str, Any]]):
-            If None, all possible components are used.
-            Otherwise specifies set of components to use.
+            Dictionary containing components to include. Key is the node
+            name and Value is an Iterable of the names of the components
+            to include. Only these components will be present in the
+            search space.
         exclude_components (Optional[Dict[str, Any]]):
-            If None, all possible components are used.
-            Otherwise specifies set of components not to use.
-            Incompatible with include components
+            Dictionary containing components to exclude. Key is the node
+            name and Value is an Iterable of the names of the components
+            to exclude. All except these components will be present in
+            the search space.
         search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
             Search space updates that can be used to modify the search
             space of particular components or choice modules of the pipeline
@@ -234,19 +237,37 @@ def __init__(
                                  " HyperparameterSearchSpaceUpdates got {}".format(type(self.search_space_updates)))
 
     @abstractmethod
-    def build_pipeline(self, dataset_properties: Dict[str, Any],
-                       include_components: Optional[Dict] = None,
-                       exclude_components: Optional[Dict] = None,
-                       search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
-                       ) -> BasePipeline:
+    def build_pipeline(
+        self,
+        dataset_properties: Dict[str, BaseDatasetPropertiesType],
+        include_components: Optional[Dict[str, Any]] = None,
+        exclude_components: Optional[Dict[str, Any]] = None,
+        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
+    ) -> BasePipeline:
         """
         Build pipeline according to current task
         and for the passed dataset properties
 
         Args:
-            dataset_properties (Dict[str,Any])
+            dataset_properties (Dict[str, Any]):
+                Characteristics of the dataset to guide the pipeline
+                choices of components
+            include_components (Optional[Dict[str, Any]]):
+                Dictionary containing components to include. Key is the node
+                name and Value is an Iterable of the names of the components
+                to include. Only these components will be present in the
+                search space.
+            exclude_components (Optional[Dict[str, Any]]):
+                Dictionary containing components to exclude. Key is the node
+                name and Value is an Iterable of the names of the components
+                to exclude. All except these components will be present in
+                the search space.
+            search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+                Search space updates that can be used to modify the search
+                space of particular components or choice modules of the pipeline
 
         Returns:
+            BasePipeline
 
         """
         raise NotImplementedError("Function called on BaseTask, this can only be called by "
@@ -278,7 +299,8 @@ def _get_dataset_input_validator(
             y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
                 Testing target set
             resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
-                Strategy to split the training data.
+                Strategy to split the training data. if None, uses
+                HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
@@ -319,7 +341,7 @@ def get_dataset(
                 Testing target set
             resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
                 Strategy to split the training data. if None, uses
-                HoldoutValTypes.holdout_validation
+                HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
@@ -1363,7 +1385,7 @@ def fit_pipeline(
                 Name of the dataset, if None, random value is used.
             resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
                 Strategy to split the training data. if None, uses
-                HoldoutValTypes.holdout_validation
+                HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
                 Arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
@@ -1400,12 +1422,15 @@ def fit_pipeline(
                     time to train a pipeline, but it does not consider the overall time it takes
                     to create a pipeline (data loading and preprocessing, other i/o operations, etc.).
             include_components (Optional[Dict[str, Any]]):
-                If None, all possible components are used.
-                Otherwise specifies set of components to use.
+                Dictionary containing components to include. Key is the node
+                name and Value is an Iterable of the names of the components
+                to include. Only these components will be present in the
+                search space.
             exclude_components (Optional[Dict[str, Any]]):
-                If None, all possible components are used.
-                Otherwise specifies set of components not to use.
-                Incompatible with include components
+                Dictionary containing components to exclude. Key is the node
+                name and Value is an Iterable of the names of the components
+                to exclude. All except these components will be present in
+                the search space.
             search_space_updates(Optional[HyperparameterSearchSpaceUpdates]):
                 Updates to be made to the hyperparameter search space of the pipeline
             budget (Optional[float]):
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index d77c35f6a..aeb69277c 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -11,6 +11,7 @@
     TASK_TYPES_TO_STRING,
 )
 from autoPyTorch.data.tabular_validator import TabularInputValidator
+from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.datasets.resampling_strategy import (
     CrossValTypes,
     HoldoutValTypes,
@@ -54,12 +55,15 @@ class TabularClassificationTask(BaseTask):
             Determines whether to delete the temporary directory,
             when finished
         include_components (Optional[Dict[str, Any]]):
-            If None, all possible components are used.
-            Otherwise specifies set of components to use.
+            Dictionary containing components to include. Key is the node
+            name and Value is an Iterable of the names of the components
+            to include. Only these components will be present in the
+            search space.
         exclude_components (Optional[Dict[str, Any]]):
-            If None, all possible components are used.
-            Otherwise specifies set of components not to use.
-            Incompatible with include components.
+            Dictionary containing components to exclude. Key is the node
+            name and Value is an Iterable of the names of the components
+            to exclude. All except these components will be present in
+            the search space.
         search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
             search space updates that can be used to modify the search
             space of particular components or choice modules of the pipeline
@@ -107,11 +111,37 @@ def __init__(
 
     def build_pipeline(
         self,
-        dataset_properties: Dict[str, Any],
-        include_components: Optional[Dict] = None,
-        exclude_components: Optional[Dict] = None,
+        dataset_properties: Dict[str, BaseDatasetPropertiesType],
+        include_components: Optional[Dict[str, Any]] = None,
+        exclude_components: Optional[Dict[str, Any]] = None,
         search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
     ) -> TabularClassificationPipeline:
+        """
+        Build pipeline according to current task
+        and for the passed dataset properties
+
+        Args:
+            dataset_properties (Dict[str, Any]):
+                Characteristics of the dataset to guide the pipeline
+                choices of components
+            include_components (Optional[Dict[str, Any]]):
+                Dictionary containing components to include. Key is the node
+                name and Value is an Iterable of the names of the components
+                to include. Only these components will be present in the
+                search space.
+            exclude_components (Optional[Dict[str, Any]]):
+                Dictionary containing components to exclude. Key is the node
+                name and Value is an Iterable of the names of the components
+                to exclude. All except these components will be present in
+                the search space.
+            search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+                Search space updates that can be used to modify the search
+                space of particular components or choice modules of the pipeline
+
+        Returns:
+            TabularClassificationPipeline
+
+        """
         return TabularClassificationPipeline(dataset_properties=dataset_properties,
                                              include=include_components,
                                              exclude=exclude_components,
@@ -141,7 +171,8 @@ def _get_dataset_input_validator(
             y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
                 Testing target set
             resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
-                Strategy to split the training data.
+                Strategy to split the training data. if None, uses
+                HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index 56d49beba..f429b210c 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -11,6 +11,7 @@
     TASK_TYPES_TO_STRING
 )
 from autoPyTorch.data.tabular_validator import TabularInputValidator
+from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.datasets.resampling_strategy import (
     CrossValTypes,
     HoldoutValTypes,
@@ -54,12 +55,15 @@ class TabularRegressionTask(BaseTask):
             Determines whether to delete the temporary directory,
             when finished
         include_components (Optional[Dict[str, Any]]):
-            If None, all possible components are used.
-            Otherwise specifies set of components to use.
+            Dictionary containing components to include. Key is the node
+            name and Value is an Iterable of the names of the components
+            to include. Only these components will be present in the
+            search space.
         exclude_components (Optional[Dict[str, Any]]):
-            If None, all possible components are used.
-            Otherwise specifies set of components not to use.
-            Incompatible with include components.
+            Dictionary containing components to exclude. Key is the node
+            name and Value is an Iterable of the names of the components
+            to exclude. All except these components will be present in
+            the search space.
         search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
             search space updates that can be used to modify the search
             space of particular components or choice modules of the pipeline
@@ -108,11 +112,37 @@ def __init__(
 
     def build_pipeline(
         self,
-        dataset_properties: Dict[str, Any],
-        include_components: Optional[Dict] = None,
-        exclude_components: Optional[Dict] = None,
+        dataset_properties: Dict[str, BaseDatasetPropertiesType],
+        include_components: Optional[Dict[str, Any]] = None,
+        exclude_components: Optional[Dict[str, Any]] = None,
         search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
     ) -> TabularRegressionPipeline:
+        """
+        Build pipeline according to current task
+        and for the passed dataset properties
+
+        Args:
+            dataset_properties (Dict[str, Any]):
+                Characteristics of the dataset to guide the pipeline
+                choices of components
+            include_components (Optional[Dict[str, Any]]):
+                Dictionary containing components to include. Key is the node
+                name and Value is an Iterable of the names of the components
+                to include. Only these components will be present in the
+                search space.
+            exclude_components (Optional[Dict[str, Any]]):
+                Dictionary containing components to exclude. Key is the node
+                name and Value is an Iterable of the names of the components
+                to exclude. All except these components will be present in
+                the search space.
+            search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+                Search space updates that can be used to modify the search
+                space of particular components or choice modules of the pipeline
+
+        Returns:
+            TabularRegressionPipeline:
+
+        """
         return TabularRegressionPipeline(dataset_properties=dataset_properties,
                                          include=include_components,
                                          exclude=exclude_components,
@@ -143,7 +173,7 @@ def _get_dataset_input_validator(
                 Testing target set
             resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
                 Strategy to split the training data. if None, uses
-                HoldoutValTypes.holdout_validation
+                HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS

From f1ea974e4acbfc19ad63d07159743195eb5ed112 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 20:10:12 +0100
Subject: [PATCH 13/32] fix tests for base api

---
 test/test_api/test_base_api.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/test_api/test_base_api.py b/test/test_api/test_base_api.py
index 126b702e6..44319b809 100644
--- a/test/test_api/test_base_api.py
+++ b/test/test_api/test_base_api.py
@@ -1,7 +1,7 @@
 import logging
 import re
 import unittest
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 import numpy as np
 
@@ -20,6 +20,7 @@
 # ====
 @pytest.mark.parametrize("fit_dictionary_tabular", ['classification_categorical_only'], indirect=True)
 def test_nonsupported_arguments(fit_dictionary_tabular):
+    BaseTask.__abstractmethods__ = set()
     with pytest.raises(ValueError, match=r".*Expected search space updates to be of instance.*"):
         api = BaseTask(search_space_updates='None')
 
@@ -82,6 +83,7 @@ def test_pipeline_predict_function():
 
 @pytest.mark.parametrize("fit_dictionary_tabular", ['classification_categorical_only'], indirect=True)
 def test_show_models(fit_dictionary_tabular):
+    BaseTask.__abstractmethods__ = set()
     api = BaseTask()
     api.ensemble_ = MagicMock()
     api.models_ = [TabularClassificationPipeline(dataset_properties=fit_dictionary_tabular['dataset_properties'])]
@@ -94,6 +96,7 @@ def test_show_models(fit_dictionary_tabular):
 
 def test_set_pipeline_config():
     # checks if we can correctly change the pipeline options
+    BaseTask.__abstractmethods__ = set()
     estimator = BaseTask()
     pipeline_options = {"device": "cuda",
                         "budget_type": "epochs",
@@ -110,6 +113,7 @@ def test_set_pipeline_config():
         (3, 50, 'runtime', {'budget_type': 'runtime', 'runtime': 50}),
     ])
 def test_pipeline_get_budget(fit_dictionary_tabular, min_budget, max_budget, budget_type, expected):
+    BaseTask.__abstractmethods__ = set()
     estimator = BaseTask(task_type='tabular_classification', ensemble_size=0)
 
     # Fixture pipeline config

From 38471f1aec556a276c4560e4e011b2171126f1ce Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 20:12:53 +0100
Subject: [PATCH 14/32] fix tests for base api

---
 test/test_api/test_base_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_api/test_base_api.py b/test/test_api/test_base_api.py
index 44319b809..3b379dbd6 100644
--- a/test/test_api/test_base_api.py
+++ b/test/test_api/test_base_api.py
@@ -1,7 +1,7 @@
 import logging
 import re
 import unittest
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import numpy as np
 

From 02ac9de77215e3bc2562be1d164daed44dd8238d Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Dec 2021 21:13:18 +0100
Subject: [PATCH 15/32] fix tests after rebase

---
 test/test_utils/test_results_manager.py    | 1 +
 test/test_utils/test_results_visualizer.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/test/test_utils/test_results_manager.py b/test/test_utils/test_results_manager.py
index 8998009a4..496aec7fa 100644
--- a/test/test_utils/test_results_manager.py
+++ b/test/test_utils/test_results_manager.py
@@ -352,6 +352,7 @@ def test_metric_results(metric, scores, ensemble_ends_later):
 
 
 def test_search_results_sprint_statistics():
+    BaseTask.__abstractmethods__ = set()
     api = BaseTask()
     for method in ['get_search_results', 'sprint_statistics', 'get_incumbent_results']:
         with pytest.raises(RuntimeError):
diff --git a/test/test_utils/test_results_visualizer.py b/test/test_utils/test_results_visualizer.py
index c463fa063..e31571ef0 100644
--- a/test/test_utils/test_results_visualizer.py
+++ b/test/test_utils/test_results_visualizer.py
@@ -146,6 +146,7 @@ def test_set_plot_args(params):  # TODO
 
 @pytest.mark.parametrize('metric_name', ('unknown', 'accuracy'))
 def test_raise_error_in_plot_perf_over_time_in_base_task(metric_name):
+    BaseTask.__abstractmethods__ = set()
     api = BaseTask()
 
     if metric_name == 'unknown':
@@ -159,6 +160,7 @@ def test_raise_error_in_plot_perf_over_time_in_base_task(metric_name):
 @pytest.mark.parametrize('metric_name', ('balanced_accuracy', 'accuracy'))
 def test_plot_perf_over_time(metric_name):  # TODO
     dummy_history = [{'Timestamp': datetime(2022, 1, 1), 'train_accuracy': 1, 'test_accuracy': 1}]
+    BaseTask.__abstractmethods__ = set()
     api = BaseTask()
     run_history_data = json.load(open(os.path.join(os.path.dirname(__file__),
                                                    'runhistory.json'),

From fd32939a59aa3236fd228cadb91afec202eed709 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 7 Dec 2021 10:57:34 +0100
Subject: [PATCH 16/32] reduce dataset size in example

---
 examples/40_advanced/example_single_configuration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index f8e3a6910..a0fe454b6 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -44,7 +44,7 @@
 
 estimator = TabularClassificationTask(
     resampling_strategy=HoldoutValTypes.holdout_validation,
-    resampling_strategy_args={'val_share': 0.33},
+    resampling_strategy_args={'val_share': 0.5},
 )
 
 ############################################################################

From 39587505659b9e9e586930d51de9a9d47b81582f Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 7 Dec 2021 10:59:05 +0100
Subject: [PATCH 17/32] remove optional from  doc string

---
 autoPyTorch/api/base_task.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 9002d8114..2839f86e8 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -305,7 +305,7 @@ def _get_dataset_input_validator(
                 arguments required for the chosen resampling strategy. If None, uses
                 the default values provided in DEFAULT_RESAMPLING_PARAMETERS
                 in ```datasets/resampling_strategy.py```.
-            dataset_name (Optional[str], optional):
+            dataset_name (Optional[str]):
                 name of the dataset, used as experiment name.
 
         Returns:

From c33381ad6f60da9d41dd8b78bd0eb302ea3798f4 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 7 Dec 2021 11:42:10 +0100
Subject: [PATCH 18/32] Handle unsuccessful fitting of pipeline better

---
 autoPyTorch/api/base_task.py                  | 28 +++++++------
 .../example_single_configuration.py           |  8 ++--
 test/test_api/test_api.py                     | 42 +++++++++++++++++++
 3 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 2839f86e8..6dde20560 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1576,19 +1576,23 @@ def fit_pipeline(
         )
 
         fitted_pipeline: Optional[BasePipeline] = None
-        if 'all' in disable_file_output or 'pipeline' in disable_file_output:
-            self._logger.warning("File output is disabled. No pipeline can returned")
-        elif run_value.status == StatusType.SUCCESS:
-            if self.resampling_strategy in CrossValTypes:
-                load_function = self._backend.load_cv_model_by_seed_and_id_and_budget
-            else:
-                load_function = self._backend.load_model_by_seed_and_id_and_budget
-            fitted_pipeline = load_function(
-                seed=self.seed,
-                idx=run_info.config.config_id + tae.initial_num_run,
-                budget=float(run_info.budget),
-            )
 
+        if run_value.status == StatusType.SUCCESS:
+            if 'all' in disable_file_output or 'pipeline' in disable_file_output:
+                self._logger.warning("File output is disabled. No pipeline can returned")
+            elif run_value.status == StatusType.SUCCESS:
+                if self.resampling_strategy in CrossValTypes:
+                    load_function = self._backend.load_cv_model_by_seed_and_id_and_budget
+                else:
+                    load_function = self._backend.load_model_by_seed_and_id_and_budget
+                fitted_pipeline = load_function(
+                    seed=self.seed,
+                    idx=run_info.config.config_id + tae.initial_num_run,
+                    budget=float(run_info.budget),
+                )
+        else:
+            warnings.warn(f"Fitting pipeline failed with status: {run_value.status}"
+                          f", aditional_info: {run_value.additional_info}")
         self._clean_logger()
 
         return fitted_pipeline, run_info, run_value, dataset
diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index a0fe454b6..564fb71e3 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -70,12 +70,12 @@
                                                                 run_time_limit_secs=100
                                                                 )
 
-# This object complies with Scikit-Learn Pipeline API.
-# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
-print(pipeline.named_steps)
-
 # The fit_pipeline command also returns a named tuple with the pipeline constraints
 print(run_info)
 
 # The fit_pipeline command also returns a named tuple with train/test performance
 print(run_value)
+
+# This object complies with Scikit-Learn Pipeline API.
+# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
+print(pipeline.named_steps)
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 286dc1307..25180a775 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -747,3 +747,45 @@ def test_pipeline_fit(openml_id,
             assert os.path.exists(cv_model_path)
         elif resampling_strategy in HoldoutValTypes:
             assert not os.path.exists(cv_model_path)
+
+@pytest.mark.parametrize('openml_id', (40984,))
+@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
+                         ((HoldoutValTypes.holdout_validation, {'val_share': 0.8}),
+                          )
+                         )
+def test_pipeline_fit_error(
+    openml_id,
+    resampling_strategy,
+    resampling_strategy_args,
+    backend,
+    n_samples
+):
+    # Get the data and check that contents of data-manager make sense
+    X, y = sklearn.datasets.fetch_openml(
+        data_id=int(openml_id),
+        return_X_y=True, as_frame=True
+    )
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X[:n_samples], y[:n_samples], random_state=1)
+
+    # Search for a good configuration
+    estimator = TabularClassificationTask(
+        backend=backend,
+        resampling_strategy=resampling_strategy,
+    )
+
+    dataset = estimator.get_dataset(X_train=X_train,
+                                    y_train=y_train,
+                                    X_test=X_test,
+                                    y_test=y_test,
+                                    resampling_strategy=resampling_strategy,
+                                    resampling_strategy_args=resampling_strategy_args)
+
+    configuration = estimator.get_search_space(dataset).get_default_configuration()
+    pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,
+                                                                    configuration=configuration,
+                                                                    run_time_limit_secs=7,
+                                                                    )
+
+    assert 'TIMEOUT' in str(run_value.status)
+    assert pipeline is None

From dff0e5c71b9c8b2e53359421d4334cc7e2f81c32 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 7 Dec 2021 11:45:45 +0100
Subject: [PATCH 19/32] fix flake in tests

---
 test/test_api/test_api.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 25180a775..fda013612 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -748,6 +748,7 @@ def test_pipeline_fit(openml_id,
         elif resampling_strategy in HoldoutValTypes:
             assert not os.path.exists(cv_model_path)
 
+
 @pytest.mark.parametrize('openml_id', (40984,))
 @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
                          ((HoldoutValTypes.holdout_validation, {'val_share': 0.8}),

From eb648e5b768196ceb08485e433be1c0b99fbeae3 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 7 Dec 2021 14:01:24 +0100
Subject: [PATCH 20/32] change to default configuration for documentation

---
 examples/40_advanced/example_single_configuration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index 564fb71e3..23581f3f8 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -48,7 +48,7 @@
 )
 
 ############################################################################
-# Get a random configuration of the pipeline for current dataset
+# Get a configuration of the pipeline for current dataset
 # ===============================================================
 
 dataset = estimator.get_dataset(X_train=X_train,
@@ -56,7 +56,7 @@
                                 X_test=X_test,
                                 y_test=y_test,
                                 dataset_name='kr-vs-kp')
-configuration = estimator.get_search_space(dataset).sample_configuration()
+configuration = estimator.get_search_space(dataset).get_default_configuration()
 
 print("Passed Configuration:", configuration)
 ###########################################################################

From 974ea1c6b3b1600f054e9c3d3953837364c088f1 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 7 Dec 2021 14:35:33 +0100
Subject: [PATCH 21/32] add warning for no ensemble created when y_optimization
 in disable_file_output

---
 autoPyTorch/api/base_task.py    |  8 ++++++++
 autoPyTorch/evaluation/utils.py | 19 ++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 6dde20560..c6b77dbc4 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1023,6 +1023,14 @@ def _search(
 
         self._all_supported_metrics = all_supported_metrics
         self._disable_file_output = disable_file_output if disable_file_output is not None else []
+        if (
+            DisableFileOutputParameters.check_value_in_iterable(self._disable_file_output,
+                                                                DisableFileOutputParameters.y_optimization)
+            and self.ensemble_size > 1
+        ):
+            self._logger.warning(f"No ensemble will be created when {DisableFileOutputParameters.y_optimization}"
+                                 f" is in disable_file_output")
+
         self._memory_limit = memory_limit
         self._time_for_task = total_walltime_limit
         # Save start time to backend
diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py
index f729c21f8..3b6ed0669 100644
--- a/autoPyTorch/evaluation/utils.py
+++ b/autoPyTorch/evaluation/utils.py
@@ -1,7 +1,7 @@
 import queue
 from enum import Enum
 from multiprocessing.queues import Queue
-from typing import List, Optional, Union
+from typing import Iterable, List, Optional, Union
 
 import numpy as np
 
@@ -138,3 +138,20 @@ def check_compatibility(cls, disable_file_output: List) -> None:
                     raise ValueError(f"Expected {item} to be in the members ("
                                      f"{list(cls.__members__.keys())}) of {cls.__name__}"
                                      f" or an instance.")
+
+    @staticmethod
+    def check_value_in_iterable(container: Iterable, parameter: "DisableFileOutputParameters") -> bool:
+        """
+        checks if parameter is in the container either as
+        the parameter itself or as its value.
+
+        Args:
+            container (Iterable):
+                Iterable to check in.
+            parameter (DisableFileOutputParameters):
+                parameter to check.
+        Returns:
+            bool:
+                whether parameter is in `container`
+        """
+        return parameter in container or parameter.value in container

From cc19e4c6f28917d3c7671672daac7ebaabe2d678 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 7 Dec 2021 17:04:38 +0100
Subject: [PATCH 22/32] reduce budget for single configuration

---
 examples/40_advanced/example_single_configuration.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index 23581f3f8..e4a3078f9 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -66,7 +66,7 @@
 pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,
                                                                 configuration=configuration,
                                                                 budget_type='epochs',
-                                                                budget=20,
+                                                                budget=10,
                                                                 run_time_limit_secs=100
                                                                 )
 
@@ -78,4 +78,5 @@
 
 # This object complies with Scikit-Learn Pipeline API.
 # https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
+if pipeline is not None:
 print(pipeline.named_steps)

From ab93ee6df0997fcdbb3494b634b44d093a47bda5 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 7 Dec 2021 17:44:54 +0100
Subject: [PATCH 23/32] address comments from eddie

---
 autoPyTorch/api/base_task.py                  |  4 +--
 autoPyTorch/evaluation/utils.py               |  9 +++---
 .../example_single_configuration.py           |  1 -
 test/test_evaluation/test_utils.py            | 30 ++++++++++++++-----
 4 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index c6b77dbc4..904fc8778 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1588,7 +1588,7 @@ def fit_pipeline(
         if run_value.status == StatusType.SUCCESS:
             if 'all' in disable_file_output or 'pipeline' in disable_file_output:
                 self._logger.warning("File output is disabled. No pipeline can returned")
-            elif run_value.status == StatusType.SUCCESS:
+            else:
                 if self.resampling_strategy in CrossValTypes:
                     load_function = self._backend.load_cv_model_by_seed_and_id_and_budget
                 else:
@@ -1600,7 +1600,7 @@ def fit_pipeline(
                 )
         else:
             warnings.warn(f"Fitting pipeline failed with status: {run_value.status}"
-                          f", aditional_info: {run_value.additional_info}")
+                          f", additional_info: {run_value.additional_info}")
         self._clean_logger()
 
         return fitted_pipeline, run_info, run_value, dataset
diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py
index 3b6ed0669..0dacb23b7 100644
--- a/autoPyTorch/evaluation/utils.py
+++ b/autoPyTorch/evaluation/utils.py
@@ -133,11 +133,10 @@ class DisableFileOutputParameters(Enum):
     @classmethod
     def check_compatibility(cls, disable_file_output: List) -> None:
         for item in disable_file_output:
-            if item not in cls.__members__:
-                if not isinstance(item, cls):
-                    raise ValueError(f"Expected {item} to be in the members ("
-                                     f"{list(cls.__members__.keys())}) of {cls.__name__}"
-                                     f" or an instance.")
+            if item not in cls.__members__ and not isinstance(item, cls):
+                raise ValueError(f"Expected {item} to be in the members ("
+                                 f"{list(cls.__members__.keys())}) of {cls.__name__}"
+                                 f" or an instance.")
 
     @staticmethod
     def check_value_in_iterable(container: Iterable, parameter: "DisableFileOutputParameters") -> bool:
diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index e4a3078f9..453ac4636 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -78,5 +78,4 @@
 
 # This object complies with Scikit-Learn Pipeline API.
 # https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
-if pipeline is not None:
 print(pipeline.named_steps)
diff --git a/test/test_evaluation/test_utils.py b/test/test_evaluation/test_utils.py
index a67b29e1a..c6743ac47 100644
--- a/test/test_evaluation/test_utils.py
+++ b/test/test_evaluation/test_utils.py
@@ -1,20 +1,34 @@
+"""
+Tests the functionality in autoPyTorch.evaluation.utils
+"""
 import pytest
 
 from autoPyTorch.evaluation.utils import DisableFileOutputParameters
 
 
-def test_disable_file_output_string_no_error():
-    disable_file_output = ['pipeline', 'pipelines']
+@pytest.mark.parametrize('disable_file_output',
+                         [['pipeline', 'pipelines'],
+                          [DisableFileOutputParameters.pipelines, DisableFileOutputParameters.pipeline]])
+def test_disable_file_output_no_error(disable_file_output):
+    """
+    Checks that `DisableFileOutputParameters.check_compatibility`
+    does not raise an error for the parameterized values of `disable_file_output`.
+
+    Args:
+        disable_file_output ([List[Union[str, DisableFileOutputParameters]]]):
+            Options that should be compatible with the `DisableFileOutputParameters`
+            defined in `autoPyTorch`.
+    """
     DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
 
 
-def test_disable_file_output_string_error():
+def test_disable_file_output_error():
+    """
+    Checks that `DisableFileOutputParameters.check_compatibility` raises an error
+    for a value not present in `DisableFileOutputParameters` and ensures that the
+    expected error is raised.
+    """
     disable_file_output = ['model']
     with pytest.raises(ValueError, match=r"Expected .*? to be in the members (.*?) of"
                                          r" DisableFileOutputParameters or an instance."):
         DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
-
-
-def test_disable_file_output_enum_no_error():
-    disable_file_output = [DisableFileOutputParameters.pipeline, DisableFileOutputParameters.pipelines]
-    DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)

From c246b20aafd740ae8d311762906d48dd92470117 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 9 Dec 2021 10:42:29 +0100
Subject: [PATCH 24/32] address comments from shuhei

---
 autoPyTorch/api/base_task.py                 | 4 ++--
 autoPyTorch/evaluation/abstract_evaluator.py | 5 ++---
 autoPyTorch/evaluation/utils.py              | 9 ++++++---
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 904fc8778..db374b1b8 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1024,8 +1024,8 @@ def _search(
         self._all_supported_metrics = all_supported_metrics
         self._disable_file_output = disable_file_output if disable_file_output is not None else []
         if (
-            DisableFileOutputParameters.check_value_in_iterable(self._disable_file_output,
-                                                                DisableFileOutputParameters.y_optimization)
+            DisableFileOutputParameters.is_in_iterable(self._disable_file_output,
+                                                       DisableFileOutputParameters.y_optimization)
             and self.ensemble_size > 1
         ):
             self._logger.warning(f"No ensemble will be created when {DisableFileOutputParameters.y_optimization}"
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
index 0b7cc105f..b21183dda 100644
--- a/autoPyTorch/evaluation/abstract_evaluator.py
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -849,9 +849,8 @@ def file_output(
                 )
 
         # Abort if we don't want to output anything.
-        if hasattr(self, 'disable_file_output'):
-            if 'all' in self.disable_file_output:
-                return None, {}
+        if 'all' in self.disable_file_output:
+            return None, {}
 
         # This file can be written independently of the others down bellow
         if 'y_optimization' not in self.disable_file_output:
diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py
index 0dacb23b7..d75b70a60 100644
--- a/autoPyTorch/evaluation/utils.py
+++ b/autoPyTorch/evaluation/utils.py
@@ -131,15 +131,18 @@ class DisableFileOutputParameters(Enum):
     all = 'all'
 
     @classmethod
-    def check_compatibility(cls, disable_file_output: List) -> None:
+    def check_compatibility(
+        cls,
+        disable_file_output: List[Union[str, 'DisableFileOutputParameters']]
+    ) -> None:
         for item in disable_file_output:
             if item not in cls.__members__ and not isinstance(item, cls):
                 raise ValueError(f"Expected {item} to be in the members ("
                                  f"{list(cls.__members__.keys())}) of {cls.__name__}"
-                                 f" or an instance.")
+                                 f" either as an instance or the string value of the member.")
 
     @staticmethod
-    def check_value_in_iterable(container: Iterable, parameter: "DisableFileOutputParameters") -> bool:
+    def is_in_iterable(container: Iterable, parameter: "DisableFileOutputParameters") -> bool:
         """
         checks if parameter is in the container either as
         the parameter itself or as its value.

From a0a4e757cefc095480523d447778bc7f2c775c53 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 9 Dec 2021 12:56:24 +0100
Subject: [PATCH 25/32] Add autoPyTorchEnum

---
 autoPyTorch/api/base_task.py       |  3 +--
 autoPyTorch/evaluation/utils.py    | 27 ++++++--------------------
 autoPyTorch/utils/common.py        | 20 +++++++++++++++++++
 test/test_evaluation/test_utils.py |  3 ++-
 test/test_utils/test_common.py     | 31 ++++++++++++++++++++++++++++++
 5 files changed, 60 insertions(+), 24 deletions(-)
 create mode 100644 test/test_utils/test_common.py

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index db374b1b8..ac1df9950 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1024,8 +1024,7 @@ def _search(
         self._all_supported_metrics = all_supported_metrics
         self._disable_file_output = disable_file_output if disable_file_output is not None else []
         if (
-            DisableFileOutputParameters.is_in_iterable(self._disable_file_output,
-                                                       DisableFileOutputParameters.y_optimization)
+            DisableFileOutputParameters.y_optimization in self._disable_file_output
             and self.ensemble_size > 1
         ):
             self._logger.warning(f"No ensemble will be created when {DisableFileOutputParameters.y_optimization}"
diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py
index d75b70a60..37e5fa36d 100644
--- a/autoPyTorch/evaluation/utils.py
+++ b/autoPyTorch/evaluation/utils.py
@@ -1,7 +1,6 @@
 import queue
-from enum import Enum
 from multiprocessing.queues import Queue
-from typing import Iterable, List, Optional, Union
+from typing import List, Optional, Union
 
 import numpy as np
 
@@ -9,6 +8,9 @@
 
 from smac.runhistory.runhistory import RunValue
 
+from autoPyTorch.utils.common import autoPyTorchEnum
+
+
 __all__ = [
     'read_queue',
     'convert_multioutput_multiclass_to_multilabel',
@@ -105,7 +107,7 @@ def _predict(self, X: np.ndarray) -> np.ndarray:
         return np.asarray(predictions).T
 
 
-class DisableFileOutputParameters(Enum):
+class DisableFileOutputParameters(autoPyTorchEnum):
     """
     Contains literals that can be passed in to `disable_file_output` list.
     These include:
@@ -139,21 +141,4 @@ def check_compatibility(
             if item not in cls.__members__ and not isinstance(item, cls):
                 raise ValueError(f"Expected {item} to be in the members ("
                                  f"{list(cls.__members__.keys())}) of {cls.__name__}"
-                                 f" either as an instance or the string value of the member.")
-
-    @staticmethod
-    def is_in_iterable(container: Iterable, parameter: "DisableFileOutputParameters") -> bool:
-        """
-        checks if parameter is in the container either as
-        the parameter itself or as its value.
-
-        Args:
-            container (Iterable):
-                Iterable to check in.
-            parameter (DisableFileOutputParameters):
-                parameter to check.
-        Returns:
-            bool:
-                whether parameter is in `container`
-        """
-        return parameter in container or parameter.value in container
+                                 f" or as string value of a member.")
diff --git a/autoPyTorch/utils/common.py b/autoPyTorch/utils/common.py
index 7be8a233c..8da9ad6c7 100644
--- a/autoPyTorch/utils/common.py
+++ b/autoPyTorch/utils/common.py
@@ -1,3 +1,4 @@
+from enum import Enum
 from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Sequence, Type, Union
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -75,6 +76,25 @@ def __str__(self) -> str:
             self.hyperparameter, self.value_range, self.default_value, self.log)
 
 
+class autoPyTorchEnum(str, Enum):
+    """
+    Utility class for enums in autoPyTorch.
+    Allows users to use strings, while we internally use
+    this enum
+    """
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, autoPyTorchEnum):
+            return type(self) == type(other) and self.value == other.value
+        elif isinstance(other, str):
+            return bool(self.value == other)
+        else:
+            raise RuntimeError(f"Unsupported type {type(other)}."
+                               f"{self} only supports `str` and `{self}`")
+
+    def __hash__(self) -> int:
+        return hash(self.value)
+
+
 def custom_collate_fn(batch: List) -> List[Optional[torch.Tensor]]:
     """
     In the case of not providing a y tensor, in a
diff --git a/test/test_evaluation/test_utils.py b/test/test_evaluation/test_utils.py
index c6743ac47..e81eea38b 100644
--- a/test/test_evaluation/test_utils.py
+++ b/test/test_evaluation/test_utils.py
@@ -30,5 +30,6 @@ def test_disable_file_output_error():
     """
     disable_file_output = ['model']
     with pytest.raises(ValueError, match=r"Expected .*? to be in the members (.*?) of"
-                                         r" DisableFileOutputParameters or an instance."):
+                                         r" DisableFileOutputParameters or as string value"
+                                         r" of a member."):
         DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
diff --git a/test/test_utils/test_common.py b/test/test_utils/test_common.py
new file mode 100644
index 000000000..45044680b
--- /dev/null
+++ b/test/test_utils/test_common.py
@@ -0,0 +1,31 @@
+"""
+This tests the functionality in autoPyTorch/utils/common.
+"""
+import pytest
+
+from autoPyTorch.utils.common import autoPyTorchEnum
+
+
+class SubEnum(autoPyTorchEnum):
+    x = "x"
+    y = "y"
+
+
+@pytest.mark.parametrize('iter',
+                         [[SubEnum.x],
+                          ["x"],
+                          {SubEnum.x: "hello"},
+                          {'x': 'hello'}])
+def test_autopytorch_enum(iter):
+    """
+    This test ensures that a subclass of `autoPyTorchEnum`
+    can be used with strings.
+
+    Args:
+        iter (Iterable):
+            iterable to check for compaitbility
+    """
+
+    e = SubEnum.x
+    
+    assert e in iter
\ No newline at end of file

From a0fef7700fd0be8696db1c8a477d4ddedf4f6754 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 10 Dec 2021 12:39:42 +0100
Subject: [PATCH 26/32] fix flake in tests

---
 test/test_utils/test_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_utils/test_common.py b/test/test_utils/test_common.py
index 45044680b..8a042bed6 100644
--- a/test/test_utils/test_common.py
+++ b/test/test_utils/test_common.py
@@ -27,5 +27,5 @@ def test_autopytorch_enum(iter):
     """
 
     e = SubEnum.x
-    
-    assert e in iter
\ No newline at end of file
+
+    assert e in iter

From 8094ff11ebeeda8d823ae92df505477f6e7f53de Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Sun, 19 Dec 2021 15:46:50 +0100
Subject: [PATCH 27/32] address comments from shuhei

---
 autoPyTorch/api/base_task.py | 55 ++++++++++++++++++++++--------------
 autoPyTorch/utils/common.py  |  6 ++--
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index ac1df9950..c656be8d6 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -314,8 +314,7 @@ def _get_dataset_input_validator(
             BaseInputValidator:
                 fitted input validator
         """
-        raise NotImplementedError("Function called on BaseTask, this can only be called by "
-                                  "specific task which is a child of the BaseTask")
+        raise NotImplementedError
 
     def get_dataset(
         self,
@@ -1495,8 +1494,7 @@ def fit_pipeline(
         # TAE expects each configuration to have a config_id.
         # For fitting a pipeline as it is not part of the
         # search process, it makes sense to set it to 0
-        if hasattr(configuration, 'config_id') or configuration.config_id is None:
-            configuration.__setattr__('config_id', 0)
+        configuration.__setattr__('config_id', 0)
 
         # get dataset properties
         dataset_requirements = get_dataset_requirements(
@@ -1582,28 +1580,43 @@ def fit_pipeline(
                     instance=None)
         )
 
-        fitted_pipeline: Optional[BasePipeline] = None
+        fitted_pipeline = self._get_fitted_pipeline(
+            pipeline_idx=run_info.config.config_id + tae.initial_num_run,
+            run_info=run_info,
+            run_value=run_value,
+            disable_file_output=disable_file_output
+        )
 
-        if run_value.status == StatusType.SUCCESS:
-            if 'all' in disable_file_output or 'pipeline' in disable_file_output:
-                self._logger.warning("File output is disabled. No pipeline can returned")
-            else:
-                if self.resampling_strategy in CrossValTypes:
-                    load_function = self._backend.load_cv_model_by_seed_and_id_and_budget
-                else:
-                    load_function = self._backend.load_model_by_seed_and_id_and_budget
-                fitted_pipeline = load_function(
-                    seed=self.seed,
-                    idx=run_info.config.config_id + tae.initial_num_run,
-                    budget=float(run_info.budget),
-                )
-        else:
-            warnings.warn(f"Fitting pipeline failed with status: {run_value.status}"
-                          f", additional_info: {run_value.additional_info}")
         self._clean_logger()
 
         return fitted_pipeline, run_info, run_value, dataset
 
+    def _get_fitted_pipeline(
+        self,
+        pipeline_idx: int,
+        run_info: RunInfo,
+        run_value: RunValue,
+        disable_file_output: List[Union[str, DisableFileOutputParameters]]
+    ) -> Optional[BasePipeline]:
+        if run_value.status != StatusType.SUCCESS:
+            warnings.warn(f"Fitting pipeline failed with status: {run_value.status}"
+                          f", additional_info: {run_value.additional_info}")
+            return None
+        elif any(disable_file_output for c in ['all', 'pipeline']):
+            self._logger.warning("File output is disabled. No pipeline can returned")
+            return None
+
+        if self.resampling_strategy in CrossValTypes:
+            load_function = self._backend.load_cv_model_by_seed_and_id_and_budget
+        else:
+            load_function = self._backend.load_model_by_seed_and_id_and_budget
+
+        return load_function(
+            seed=self.seed,
+            idx=pipeline_idx,
+            budget=float(run_info.budget),
+        )
+
     def predict(
         self,
         X_test: np.ndarray,
diff --git a/autoPyTorch/utils/common.py b/autoPyTorch/utils/common.py
index 8da9ad6c7..1488d5fcd 100644
--- a/autoPyTorch/utils/common.py
+++ b/autoPyTorch/utils/common.py
@@ -88,8 +88,10 @@ def __eq__(self, other: Any) -> bool:
         elif isinstance(other, str):
             return bool(self.value == other)
         else:
-            raise RuntimeError(f"Unsupported type {type(other)}."
-                               f"{self} only supports `str` and `{self}`")
+            enum_name = self.__class__.__name__
+            raise RuntimeError(f"Unsupported type {type(other)}. "
+                               f"{enum_name} only supports `str` and"
+                               f"`{enum_name}`")
 
     def __hash__(self) -> int:
         return hash(self.value)

From 4d90706d295d736e8aa52341dc887df58b648140 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Sun, 19 Dec 2021 15:52:56 +0100
Subject: [PATCH 28/32] Apply suggestions from code review

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/api/base_task.py                 | 17 +++-----
 autoPyTorch/evaluation/abstract_evaluator.py |  4 +-
 test/test_utils/test_common.py               | 44 +++++++++++++++++++-
 3 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index c656be8d6..5f9fb0f05 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1511,12 +1511,9 @@ def fit_pipeline(
             assert dataset.dataset_name is not None
             self._logger = self._get_logger(dataset.dataset_name)
 
-        if include_components is None:
-            include_components = self.include_components
-        if exclude_components is None:
-            exclude_components = self.exclude_components
-        if search_space_updates is None:
-            search_space_updates = self.search_space_updates
+        include_components = self.include_components if include_components is None else include_components
+        exclude_components = self.exclude_components if exclude_components is None else exclude_components
+        search_space_updates = self.search_space_updates if search_space_updates is None else search_space_updates
 
         scenario_mock = unittest.mock.Mock()
         scenario_mock.wallclock_limit = run_time_limit_secs
@@ -1524,9 +1521,8 @@ def fit_pipeline(
         # already be generated here!
         stats = Stats(scenario_mock)
 
-        if memory_limit is None:
-            if hasattr(self, '_memory_limit') and self._memory_limit is not None:
-                memory_limit = self._memory_limit
+        if memory_limit is None and getattr(self, '_memory_limit', None) is not None:
+            memory_limit = self._memory_limit
 
         metric = get_metrics(dataset_properties=dataset_properties,
                              names=[eval_metric] if eval_metric is not None else None,
@@ -1545,8 +1541,7 @@ def fit_pipeline(
         budget = budget if budget is not None else pipeline_options[budget_type]
 
         if disable_file_output is None:
-            disable_file_output = self._disable_file_output if hasattr(self, '_disable_file_output') \
-                and self._disable_file_output is not None else []
+            disable_file_output = getattr(self, '_disable_file_output', [])
 
         stats.start_timing()
 
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
index b21183dda..f98c69dd4 100644
--- a/autoPyTorch/evaluation/abstract_evaluator.py
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -857,7 +857,7 @@ def file_output(
             if self.output_y_hat_optimization:
                 self.backend.save_targets_ensemble(self.Y_optimization)
 
-        if hasattr(self, 'pipelines') and self.pipelines is not None:
+        if getattr(self, 'pipelines', None) is not None:
             if self.pipelines[0] is not None and len(self.pipelines) > 0:
                 if 'pipelines' not in self.disable_file_output:
                     if self.task_type in CLASSIFICATION_TASKS:
@@ -872,7 +872,7 @@ def file_output(
         else:
             pipelines = None
 
-        if hasattr(self, 'pipeline') and self.pipeline is not None:
+        if getattr(self, 'pipeline', None) is not None:
             if 'pipeline' not in self.disable_file_output:
                 pipeline = self.pipeline
             else:
diff --git a/test/test_utils/test_common.py b/test/test_utils/test_common.py
index 8a042bed6..023c7aea0 100644
--- a/test/test_utils/test_common.py
+++ b/test/test_utils/test_common.py
@@ -12,10 +12,12 @@ class SubEnum(autoPyTorchEnum):
 
 
 @pytest.mark.parametrize('iter',
-                         [[SubEnum.x],
+                         ([SubEnum.x],
                           ["x"],
                           {SubEnum.x: "hello"},
-                          {'x': 'hello'}])
+                          {'x': 'hello'},
+                          SubEnum,
+                          ["x", "y"]))
 def test_autopytorch_enum(iter):
     """
     This test ensures that a subclass of `autoPyTorchEnum`
@@ -29,3 +31,41 @@ def test_autopytorch_enum(iter):
     e = SubEnum.x
 
     assert e in iter
+
+class DummyEnum(Enum):  # You need to move it on top
+    x = "x"
+
+
+@pytest.mark.parametrize('iter',
+                         [[SubEnum.y],
+                          ["y"],
+                          {SubEnum.y: "hello"},
+                          {'y': 'hello'}])
+def test_autopytorch_enum_false(iter):
+    """
+    This test ensures that a subclass of `autoPyTorchEnum`
+    can be used with strings.
+    Args:
+        iter (Iterable):
+            iterable to check for compaitbility
+    """
+
+    e = SubEnum.x
+
+    assert e not in iter
+
+
+@pytest.mark.parametrize('others', (1, 2.0, SubEnum, DummyEnum.x))
+def test_raise_errors_autopytorch_enum(others):
+    """
+    This test ensures that a subclass of `autoPyTorchEnum`
+    raises error properly.
+    Args:
+        others (Any):
+            Variable to compare with SubEnum.
+    """
+
+    with pytest.raises(RuntimeError):
+        SubEnum.x == others
+
+

From c7cc712ea41292f3659920be0baa569a4544c38f Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Sun, 19 Dec 2021 16:03:46 +0100
Subject: [PATCH 29/32] fix flake

---
 autoPyTorch/api/base_task.py                 | 15 +++++++++++----
 autoPyTorch/evaluation/abstract_evaluator.py |  2 +-
 test/test_utils/test_common.py               | 11 ++++++-----
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 5f9fb0f05..f2ecf3441 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1491,6 +1491,10 @@ def fit_pipeline(
                                        dataset_name=dataset_name
                                        )
 
+        # dataset_name is created inside the constructor of BaseDataset
+        # we expect it to be not None. This is for mypy
+        assert dataset.dataset_name is not None
+
         # TAE expects each configuration to have a config_id.
         # For fitting a pipeline as it is not part of the
         # search process, it makes sense to set it to 0
@@ -1506,9 +1510,6 @@ def fit_pipeline(
         self._backend.save_datamanager(dataset)
 
         if self._logger is None:
-            # dataset_name is created inside the constructor of BaseDataset
-            # we expect it to be not None. This is for mypy
-            assert dataset.dataset_name is not None
             self._logger = self._get_logger(dataset.dataset_name)
 
         include_components = self.include_components if include_components is None else include_components
@@ -1576,6 +1577,7 @@ def fit_pipeline(
         )
 
         fitted_pipeline = self._get_fitted_pipeline(
+            dataset_name=dataset.dataset_name,
             pipeline_idx=run_info.config.config_id + tae.initial_num_run,
             run_info=run_info,
             run_value=run_value,
@@ -1588,11 +1590,16 @@ def fit_pipeline(
 
     def _get_fitted_pipeline(
         self,
+        dataset_name: str,
         pipeline_idx: int,
         run_info: RunInfo,
         run_value: RunValue,
         disable_file_output: List[Union[str, DisableFileOutputParameters]]
     ) -> Optional[BasePipeline]:
+
+        if self._logger is None:
+            self._logger = self._get_logger(str(dataset_name))
+
         if run_value.status != StatusType.SUCCESS:
             warnings.warn(f"Fitting pipeline failed with status: {run_value.status}"
                           f", additional_info: {run_value.additional_info}")
@@ -1606,7 +1613,7 @@ def _get_fitted_pipeline(
         else:
             load_function = self._backend.load_model_by_seed_and_id_and_budget
 
-        return load_function(
+        return load_function(  # type: ignore[no-any-return]
             seed=self.seed,
             idx=pipeline_idx,
             budget=float(run_info.budget),
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
index f98c69dd4..2f792b7a8 100644
--- a/autoPyTorch/evaluation/abstract_evaluator.py
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -858,7 +858,7 @@ def file_output(
                 self.backend.save_targets_ensemble(self.Y_optimization)
 
         if getattr(self, 'pipelines', None) is not None:
-            if self.pipelines[0] is not None and len(self.pipelines) > 0:
+            if self.pipelines[0] is not None and len(self.pipelines) > 0:  # type: ignore[index, arg-type]
                 if 'pipelines' not in self.disable_file_output:
                     if self.task_type in CLASSIFICATION_TASKS:
                         pipelines = VotingClassifier(estimators=None, voting='soft', )
diff --git a/test/test_utils/test_common.py b/test/test_utils/test_common.py
index 023c7aea0..ea3dec563 100644
--- a/test/test_utils/test_common.py
+++ b/test/test_utils/test_common.py
@@ -1,6 +1,8 @@
 """
 This tests the functionality in autoPyTorch/utils/common.
 """
+from enum import Enum
+
 import pytest
 
 from autoPyTorch.utils.common import autoPyTorchEnum
@@ -11,6 +13,10 @@ class SubEnum(autoPyTorchEnum):
     y = "y"
 
 
+class DummyEnum(Enum):  # You need to move it on top
+    x = "x"
+
+
 @pytest.mark.parametrize('iter',
                          ([SubEnum.x],
                           ["x"],
@@ -32,9 +38,6 @@ def test_autopytorch_enum(iter):
 
     assert e in iter
 
-class DummyEnum(Enum):  # You need to move it on top
-    x = "x"
-
 
 @pytest.mark.parametrize('iter',
                          [[SubEnum.y],
@@ -67,5 +70,3 @@ def test_raise_errors_autopytorch_enum(others):
 
     with pytest.raises(RuntimeError):
         SubEnum.x == others
-
-

From 14113f97f4145157933adf14959b333cd42a6e86 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 20 Dec 2021 11:50:45 +0100
Subject: [PATCH 30/32] use **dataset_kwargs

---
 autoPyTorch/api/base_task.py | 54 +++++++++++++-----------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index f2ecf3441..2400a55fd 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1353,14 +1353,6 @@ def refit(
     def fit_pipeline(
         self,
         configuration: Configuration,
-        dataset: Optional[BaseDataset] = None,
-        X_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
-        y_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
-        X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
-        y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
-        dataset_name: Optional[str] = None,
-        resampling_strategy: Optional[Union[HoldoutValTypes, CrossValTypes]] = None,
-        resampling_strategy_args: Optional[Dict[str, Any]] = None,
         run_time_limit_secs: int = 60,
         memory_limit: Optional[int] = None,
         eval_metric: Optional[str] = None,
@@ -1372,6 +1364,7 @@ def fit_pipeline(
         budget: Optional[float] = None,
         pipeline_options: Optional[Dict] = None,
         disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
+        **dataset_kwargs: Any
     ) -> Tuple[Optional[BasePipeline], RunInfo, RunValue, BaseDataset]:
         """
         Fit a pipeline on the given task for the budget.
@@ -1383,19 +1376,6 @@ def fit_pipeline(
         methods.
 
         Args:
-            X_train, y_train, X_test, y_test: Union[np.ndarray, List, pd.DataFrame]
-                A pair of features (X_train) and targets (y_train) used to fit a
-                pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
-                be provided to track the generalization performance of each stage.
-            dataset_name (Optional[str]):
-                Name of the dataset, if None, random value is used.
-            resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
-                Strategy to split the training data. if None, uses
-                HoldoutValTypes.holdout_validation.
-            resampling_strategy_args (Optional[Dict[str, Any]]):
-                Arguments required for the chosen resampling strategy. If None, uses
-                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
-                in ```datasets/resampling_strategy.py```.
             run_time_limit_secs (int: default=60):
                 Time limit for a single call to the machine learning model.
                 Model fitting will be terminated if the machine learning algorithm
@@ -1465,8 +1445,15 @@ def fit_pipeline(
                 + `all`:
                     do not save any of the above.
                 For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
-            configuration: (Configuration)
+            configuration (Configuration):
                 configuration to fit the pipeline with.
+            **dataset_kwargs (Any):
+                Can contain either `dataset (BaseDataset)` object or
+                keyword arguments specifying the dataset like X_train, y_train,
+                X_test, y_test (Optional[Union[List, pd.DataFrame, np.ndarray]] = None)
+                and other parameters like dataset_name (str),
+                resampling_strategy (Union[HoldoutValTypes, CrossValTypes]), 
+                resampling_strategy_args (Dict[str, Any]).
 
         Returns:
             (BasePipeline):
@@ -1477,19 +1464,18 @@ def fit_pipeline(
                 Result of fitting the pipeline
             (BaseDataset):
                 Dataset created from the given tensors
-        """
+        """        
+
+        if 'dataset' not in dataset_kwargs:
+            if (
+                dataset_kwargs.get('X_train', None) is not None
+                and dataset_kwargs.get('y_train', None) is not None
+            ):
+                raise ValueError("No dataset provided, must provide X_train, y_train tensors")
 
-        if dataset is None:
-            assert X_train is not None and \
-                   y_train is not None, "No dataset provided, must provide X_train, y_train tensors"
-            dataset = self.get_dataset(X_train=X_train,
-                                       y_train=y_train,
-                                       X_test=X_test,
-                                       y_test=y_test,
-                                       resampling_strategy=resampling_strategy,
-                                       resampling_strategy_args=resampling_strategy_args,
-                                       dataset_name=dataset_name
-                                       )
+            dataset = self.get_dataset(**dataset_kwargs)
+        else:
+            dataset = dataset_kwargs['dataset']
 
         # dataset_name is created inside the constructor of BaseDataset
         # we expect it to be not None. This is for mypy

From 5b2f75f3bb0059950d1bab5f43c254b557d4629e Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 20 Dec 2021 11:51:46 +0100
Subject: [PATCH 31/32] fix flake

---
 autoPyTorch/api/base_task.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 2400a55fd..d2f39822f 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1452,7 +1452,7 @@ def fit_pipeline(
                 keyword arguments specifying the dataset like X_train, y_train,
                 X_test, y_test (Optional[Union[List, pd.DataFrame, np.ndarray]] = None)
                 and other parameters like dataset_name (str),
-                resampling_strategy (Union[HoldoutValTypes, CrossValTypes]), 
+                resampling_strategy (Union[HoldoutValTypes, CrossValTypes]),
                 resampling_strategy_args (Dict[str, Any]).
 
         Returns:
@@ -1464,7 +1464,7 @@ def fit_pipeline(
                 Result of fitting the pipeline
             (BaseDataset):
                 Dataset created from the given tensors
-        """        
+        """
 
         if 'dataset' not in dataset_kwargs:
             if (

From 24aac05da7b522d9e1214b4dbff8dc4e99871b66 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 20 Dec 2021 16:27:38 +0100
Subject: [PATCH 32/32] change to enforce keyword args

---
 autoPyTorch/api/base_task.py | 57 +++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index d2f39822f..531125bff 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1353,6 +1353,15 @@ def refit(
     def fit_pipeline(
         self,
         configuration: Configuration,
+        *,
+        dataset: Optional[BaseDataset] = None,
+        X_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        y_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
+        dataset_name: Optional[str] = None,
+        resampling_strategy: Optional[Union[HoldoutValTypes, CrossValTypes]] = None,
+        resampling_strategy_args: Optional[Dict[str, Any]] = None,
         run_time_limit_secs: int = 60,
         memory_limit: Optional[int] = None,
         eval_metric: Optional[str] = None,
@@ -1364,7 +1373,6 @@ def fit_pipeline(
         budget: Optional[float] = None,
         pipeline_options: Optional[Dict] = None,
         disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
-        **dataset_kwargs: Any
     ) -> Tuple[Optional[BasePipeline], RunInfo, RunValue, BaseDataset]:
         """
         Fit a pipeline on the given task for the budget.
@@ -1376,6 +1384,26 @@ def fit_pipeline(
         methods.
 
         Args:
+            configuration (Configuration):
+                configuration to fit the pipeline with.
+            dataset (BaseDataset):
+                An object of the appropriate child class of `BaseDataset`,
+                that will be used to fit the pipeline
+            X_train, y_train, X_test, y_test: Union[np.ndarray, List, pd.DataFrame]
+                A pair of features (X_train) and targets (y_train) used to fit a
+                pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
+                be provided to track the generalization performance of each stage.
+            dataset_name (Optional[str]):
+                Name of the dataset, if None, random value is used.
+            resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]):
+                Strategy to split the training data. if None, uses
+                HoldoutValTypes.holdout_validation.
+            resampling_strategy_args (Optional[Dict[str, Any]]):
+                Arguments required for the chosen resampling strategy. If None, uses
+                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+                in ```datasets/resampling_strategy.py```.
+            dataset_name (Optional[str]):
+                name of the dataset, used as experiment name.
             run_time_limit_secs (int: default=60):
                 Time limit for a single call to the machine learning model.
                 Model fitting will be terminated if the machine learning algorithm
@@ -1445,15 +1473,6 @@ def fit_pipeline(
                 + `all`:
                     do not save any of the above.
                 For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
-            configuration (Configuration):
-                configuration to fit the pipeline with.
-            **dataset_kwargs (Any):
-                Can contain either `dataset (BaseDataset)` object or
-                keyword arguments specifying the dataset like X_train, y_train,
-                X_test, y_test (Optional[Union[List, pd.DataFrame, np.ndarray]] = None)
-                and other parameters like dataset_name (str),
-                resampling_strategy (Union[HoldoutValTypes, CrossValTypes]),
-                resampling_strategy_args (Dict[str, Any]).
 
         Returns:
             (BasePipeline):
@@ -1466,16 +1485,20 @@ def fit_pipeline(
                 Dataset created from the given tensors
         """
 
-        if 'dataset' not in dataset_kwargs:
+        if dataset is None:
             if (
-                dataset_kwargs.get('X_train', None) is not None
-                and dataset_kwargs.get('y_train', None) is not None
+                X_train is not None
+                and y_train is not None
             ):
                 raise ValueError("No dataset provided, must provide X_train, y_train tensors")
-
-            dataset = self.get_dataset(**dataset_kwargs)
-        else:
-            dataset = dataset_kwargs['dataset']
+            dataset = self.get_dataset(X_train=X_train,
+                                       y_train=y_train,
+                                       X_test=X_test,
+                                       y_test=y_test,
+                                       resampling_strategy=resampling_strategy,
+                                       resampling_strategy_args=resampling_strategy_args,
+                                       dataset_name=dataset_name
+                                       )
 
         # dataset_name is created inside the constructor of BaseDataset
         # we expect it to be not None. This is for mypy