Create fit evaluator, no resampling strategy and fix bug for test statistics

ravinkohli · ravinkohli · commit 55c01b7e36cf · 2022-02-28T14:46:16.000+01:00
Fix mypy and flake

Fix check for X_test while making test data loader

fix bug in lookahead hyperparameters where lookahead was repeated for the hyperparameter name

Make passing tests in api easier

Fix bug in trainer weighted loss code for regression
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -1502,6 +1502,7 @@ def fit_pipeline(
             (BaseDataset):
                 Dataset created from the given tensors
         """
+        self.dataset_name = dataset.dataset_name
 
         if dataset is None:
             if (
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
@@ -91,9 +91,17 @@ def __init__(
         output_directory: Optional[str] = None,
         delete_tmp_folder_after_terminate: bool = True,
         delete_output_folder_after_terminate: bool = True,
+<<<<<<< HEAD
         include_components: Optional[Dict[str, Any]] = None,
         exclude_components: Optional[Dict[str, Any]] = None,
         resampling_strategy: ResamplingStrategies = HoldoutValTypes.holdout_validation,
+=======
+        include_components: Optional[Dict] = None,
+        exclude_components: Optional[Dict] = None,
+        resampling_strategy: Union[CrossValTypes,
+                                   HoldoutValTypes,
+                                   NoResamplingStrategyTypes] = HoldoutValTypes.holdout_validation,
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         backend: Optional[Backend] = None,
         search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
@@ -92,9 +92,17 @@ def __init__(
         output_directory: Optional[str] = None,
         delete_tmp_folder_after_terminate: bool = True,
         delete_output_folder_after_terminate: bool = True,
+<<<<<<< HEAD
         include_components: Optional[Dict[str, Any]] = None,
         exclude_components: Optional[Dict[str, Any]] = None,
         resampling_strategy: ResamplingStrategies = HoldoutValTypes.holdout_validation,
+=======
+        include_components: Optional[Dict] = None,
+        exclude_components: Optional[Dict] = None,
+        resampling_strategy:Union[CrossValTypes,
+                                    HoldoutValTypes,
+                                    NoResamplingStrategyTypes] = HoldoutValTypes.holdout_validation,
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         backend: Optional[Backend] = None,
         search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
@@ -112,7 +112,13 @@ def __init__(
         dataset_name: Optional[str] = None,
         val_tensors: Optional[BaseDatasetInputType] = None,
         test_tensors: Optional[BaseDatasetInputType] = None,
+<<<<<<< HEAD
         resampling_strategy: ResamplingStrategies = HoldoutValTypes.holdout_validation,
+=======
+        resampling_strategy: Union[CrossValTypes,
+                                   HoldoutValTypes,
+                                   NoResamplingStrategyTypes] = HoldoutValTypes.holdout_validation,
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         shuffle: Optional[bool] = True,
         seed: Optional[int] = 42,
@@ -129,7 +135,12 @@ def __init__(
                 validation data
             test_tensors (An optional tuple of objects that have a __len__ and a __getitem__ attribute):
                 test data
+<<<<<<< HEAD
             resampling_strategy (RESAMPLING_STRATEGIES: default=HoldoutValTypes.holdout_validation):
+=======
+            resampling_strategy (Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes]),
+                (default=HoldoutValTypes.holdout_validation):
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
                 strategy to split the training data.
             resampling_strategy_args (Optional[Dict[str, Any]]): arguments
                 required for the chosen resampling strategy. If None, uses
@@ -151,10 +162,17 @@ def __init__(
         if not hasattr(train_tensors[0], 'shape'):
             type_check(train_tensors, val_tensors)
         self.train_tensors, self.val_tensors, self.test_tensors = train_tensors, val_tensors, test_tensors
+<<<<<<< HEAD
         self.cross_validators: Dict[str, CrossValFunc] = {}
         self.holdout_validators: Dict[str, HoldOutFunc] = {}
         self.no_resampling_validators: Dict[str, NoResamplingFunc] = {}
         self.random_state = np.random.RandomState(seed=seed)
+=======
+        self.cross_validators: Dict[str, CROSS_VAL_FN] = {}
+        self.holdout_validators: Dict[str, HOLDOUT_FN] = {}
+        self.no_resampling_validators: Dict[str, NO_RESAMPLING_FN] = {}
+        self.rng = np.random.RandomState(seed=seed)
+>>>>>>> Fix mypy and flake
         self.shuffle = shuffle
         self.resampling_strategy = resampling_strategy
         self.resampling_strategy_args = resampling_strategy_args
@@ -171,7 +189,11 @@ def __init__(
         # Make sure cross validation splits are created once
         self.cross_validators = CrossValFuncs.get_cross_validators(*CrossValTypes)
         self.holdout_validators = HoldOutFuncs.get_holdout_validators(*HoldoutValTypes)
+<<<<<<< HEAD
         self.no_resampling_validators = NoResamplingFuncs.get_no_resampling_validators(*NoResamplingStrategyTypes)
+=======
+        self.no_resampling_validators = get_no_resampling_validators(*NoResamplingStrategyTypes)
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
 
         self.splits = self.get_splits_from_resampling_strategy()
 
@@ -272,8 +294,12 @@ def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], Optional[
                 )
             )
         elif isinstance(self.resampling_strategy, NoResamplingStrategyTypes):
+<<<<<<< HEAD
             splits.append((self.no_resampling_validators[self.resampling_strategy.name](self.random_state,
                                                                                         self._get_indices()), None))
+=======
+            splits.append((self.no_resampling_validators[self.resampling_strategy.name](self._get_indices()), None))
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
         else:
             raise ValueError(f"Unsupported resampling strategy={self.resampling_strategy}")
         return splits
@@ -345,7 +371,11 @@ def create_holdout_val_split(
             self.random_state, val_share, self._get_indices(), **kwargs)
         return train, val
 
+<<<<<<< HEAD
     def get_dataset(self, split_id: int, train: bool) -> Dataset:
+=======
+    def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
         """
         The above split methods employ the Subset to internally subsample the whole dataset.
 
@@ -360,6 +390,7 @@ def get_dataset(self, split_id: int, train: bool) -> Dataset:
             Dataset: the reduced dataset to be used for testing
         """
         # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
+<<<<<<< HEAD
         if split_id >= len(self.splits):  # old version: split_id > len(self.splits)
             raise IndexError(f"self.splits index out of range, got split_id={split_id}"
                              f" (>= num_splits={len(self.splits)})")
@@ -368,6 +399,9 @@ def get_dataset(self, split_id: int, train: bool) -> Dataset:
             raise ValueError("Specified fold (or subset) does not exist")
 
         return TransformSubset(self, indices, train=train)
+=======
+        return TransformSubset(self, self.splits[split_id][0], train=train)
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
 
     def replace_data(self, X_train: BaseDatasetInputType,
                      X_test: Optional[BaseDatasetInputType]) -> 'BaseDataset':
diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
@@ -39,6 +39,11 @@ def __call__(self, random_state: np.random.RandomState, val_share: float,
         ...
 
 
+class NO_RESAMPLING_FN(Protocol):
+    def __call__(self, indices: np.ndarray) -> np.ndarray:
+        ...
+
+
 class CrossValTypes(IntEnum):
     """The type of cross validation
 
@@ -85,13 +90,22 @@ def is_stratified(self) -> bool:
 
 class NoResamplingStrategyTypes(IntEnum):
     no_resampling = 8
+<<<<<<< HEAD
 
     def is_stratified(self) -> bool:
         return False
 
 
 # TODO: replace it with another way
 ResamplingStrategies = Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes]
+=======
+    shuffle_no_resampling = 9
+
+
+# TODO: replace it with another way
+RESAMPLING_STRATEGIES = [CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes]
+
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
 
 DEFAULT_RESAMPLING_PARAMETERS: Dict[
     ResamplingStrategies,
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -686,9 +686,9 @@ def _loss(self, y_true: np.ndarray, y_hat: np.ndarray) -> Dict[str, float]:
             y_true, y_hat, self.task_type, metrics)
 
     def finish_up(self, loss: Dict[str, float], train_loss: Dict[str, float],
-                  opt_pred: np.ndarray, valid_pred: Optional[np.ndarray],
-                  test_pred: Optional[np.ndarray], additional_run_info: Optional[Dict],
-                  file_output: bool, status: StatusType
+                  valid_pred: Optional[np.ndarray], test_pred: Optional[np.ndarray],
+                  additional_run_info: Optional[Dict], file_output: bool, status: StatusType,
+                  opt_pred: Optional[np.ndarray],
                   ) -> Optional[Tuple[float, float, int, Dict]]:
         """This function does everything necessary after the fitting is done:
 
@@ -730,6 +730,9 @@ def finish_up(self, loss: Dict[str, float], train_loss: Dict[str, float],
                 Additional run information, like train/test loss
         """
 
+        assert opt_pred is not None, "Cases where 'opt_pred' is None should be handled " \
+                                     "specifically with special child classes"
+
         self.duration = time.time() - self.starttime
 
         if file_output:
diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py
@@ -151,6 +151,11 @@ def __init__(self, backend: Backend, queue: Queue,
             pipeline_config=pipeline_config,
             search_space_updates=search_space_updates
         )
+        assert isinstance(self.datamanager.resampling_strategy, (CrossValTypes, HoldoutValTypes)),\
+            "This Evaluator is used for HPO Search. " \
+            "Val Split is required for HPO search. " \
+            "Expected 'self.resampling_strategy' in" \
+            " '(CrossValTypes, HoldoutValTypes)' got {}".format(self.datamanager.resampling_strategy)
 
         if not isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
             raise ValueError(
@@ -408,7 +413,11 @@ def _predict(self, pipeline: BaseEstimator,
 
 
 # create closure for evaluating an algorithm
+<<<<<<< HEAD
 def eval_train_function(
+=======
+def eval_function(
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
     backend: Backend,
     queue: Queue,
     metric: autoPyTorchMetric,
diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -59,15 +59,15 @@ def __init__(self, batch_size: int = 64,
             FitRequirement("Backend", (Backend,), user_defined=True, dataset_property=False),
             FitRequirement("is_small_preprocess", (bool,), user_defined=True, dataset_property=True)])
 
-    def transform(self, X: np.ndarray) -> np.ndarray:
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         """The transform function calls the transform function of the
         underlying model and returns the transformed array.
 
         Args:
-            X (np.ndarray): input features
+            X (Dict[str, Any])): 'X' dictionary
 
         Returns:
-            np.ndarray: Transformed features
+            (Dict[str, Any]): the updated 'X' dictionary
         """
         X.update({'train_data_loader': self.train_data_loader,
                   'val_data_loader': self.val_data_loader,
@@ -107,7 +107,11 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
             # Overwrite the datamanager with the pre-processes data
             datamanager.replace_data(X['X_train'], X['X_test'] if 'X_test' in X else None)
 
+<<<<<<< HEAD
         train_dataset = datamanager.get_dataset(split_id=X['split_id'], train=True)
+=======
+        train_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=True)
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
 
         self.train_data_loader = torch.utils.data.DataLoader(
             train_dataset,
@@ -119,8 +123,13 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
             collate_fn=custom_collate_fn,
         )
 
+<<<<<<< HEAD
         if X.get('val_indices', None) is not None:
             val_dataset = datamanager.get_dataset(split_id=X['split_id'], train=False)
+=======
+        if X['val_indices'] is not None:
+            val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False)
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
             self.val_data_loader = torch.utils.data.DataLoader(
                 val_dataset,
                 batch_size=min(self.batch_size, len(val_dataset)),
diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
@@ -24,7 +24,7 @@ class AdversarialTrainer(BaseTrainerComponent):
     def __init__(
             self,
             epsilon: float,
-            weighted_loss: int = 1,
+            weighted_loss: int = 0,
             random_state: Optional[np.random.RandomState] = None,
             use_stochastic_weight_averaging: bool = False,
             use_snapshot_ensemble: bool = False,
diff --git a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py
@@ -13,7 +13,7 @@
 
 
 class StandardTrainer(BaseTrainerComponent):
-    def __init__(self, weighted_loss: bool = False,
+    def __init__(self, weighted_loss: int = 0,
                  use_stochastic_weight_averaging: bool = False,
                  use_snapshot_ensemble: bool = False,
                  se_lastk: int = 3,
diff --git a/autoPyTorch/pipeline/components/training/trainer/__init__.py b/autoPyTorch/pipeline/components/training/trainer/__init__.py
@@ -382,7 +382,11 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
 
             val_loss, val_metrics, test_loss, test_metrics = None, {}, None, {}
             if self.eval_valid_each_epoch(X):
+<<<<<<< HEAD
                 if X['val_data_loader']:
+=======
+                if 'val_data_loader' in X and X['val_data_loader']:
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
                     val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
                 if 'test_data_loader' in X and X['test_data_loader']:
                     test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer)
@@ -436,10 +440,17 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
 
         # wrap up -- add score if not evaluating every epoch
         if not self.eval_valid_each_epoch(X):
+<<<<<<< HEAD
             if X['val_data_loader']:
                 val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
             if 'test_data_loader' in X and X['val_data_loader']:
                 test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer)
+=======
+            if 'val_data_loader' in X and X['val_data_loader']:
+                val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
+            if 'test_data_loader' in X and X['test_data_loader']:
+                test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'])
+>>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
             self.run_summary.add_performance(
                 epoch=epoch,
                 start_time=start_time,
@@ -652,8 +663,15 @@ def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, H
 
         # iterate over all search space updates of this node and filter the ones out, that have the given prefix
         for key in updates.keys():
-            if key.startswith(Lookahead.__name__):
-                result[key[len(Lookahead.__name__) + 1:]] = updates[key]
+            if Lookahead.__name__ in key:
+                # need to also remove lookahead from the hyperparameter name
+                new_update = HyperparameterSearchSpace(
+                    updates[key].hyperparameter.replace('{}:'.format(Lookahead.__name__), ''),
+                    value_range=updates[key].value_range,
+                    default_value=updates[key].default_value,
+                    log=updates[key].log
+                )
+                result[key.replace('{}:'.format(Lookahead.__name__), '')] = new_update
             else:
                 result[key] = updates[key]
         return result
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -206,7 +206,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent):
     """
     Base class for training
     Args:
-        weighted_loss (int, default=1): In case for classification, whether to weight
+        weighted_loss (int, default=0): In case for classification, whether to weight
             the loss function according to the distribution of classes in the target
         use_stochastic_weight_averaging (bool, default=True): whether to use stochastic
             weight averaging. Stochastic weight averaging is a simple average of
@@ -221,7 +221,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent):
         random_state:
         **lookahead_config:
     """
-    def __init__(self, weighted_loss: int = 1,
+    def __init__(self, weighted_loss: int = 0,
                  use_stochastic_weight_averaging: bool = True,
                  use_snapshot_ensemble: bool = True,
                  se_lastk: int = 3,
diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
@@ -18,7 +18,7 @@
 class CutOut:
     def __init__(self, patch_ratio: float,
                  cutout_prob: float,
-                 weighted_loss: int = 1,
+                 weighted_loss: int = 0,
                  random_state: Optional[np.random.RandomState] = None,
                  use_stochastic_weight_averaging: bool = False,
                  use_snapshot_ensemble: bool = False,
diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
@@ -17,7 +17,7 @@
 
 class MixUp:
     def __init__(self, alpha: float,
-                 weighted_loss: int = 1,
+                 weighted_loss: int = 0,
                  random_state: Optional[np.random.RandomState] = None,
                  use_stochastic_weight_averaging: bool = False,
                  use_snapshot_ensemble: bool = False,
diff --git a/test/test_evaluation/test_fit_evaluator.py b/test/test_evaluation/test_fit_evaluator.py
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py