automl · ravinkohli · Aug 12, 2022 · Aug 8, 2022 · Aug 8, 2022 · Aug 9, 2022
diff --git a/autoPyTorch/utils/results_manager.py b/autoPyTorch/utils/results_manager.py
@@ -1,6 +1,6 @@
 import io
 from datetime import datetime
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 from ConfigSpace.configuration_space import Configuration
 
@@ -28,6 +28,9 @@
 ]
 
 
+OPTIONAL_INFERENCE_CHOICES = ('test',)
+
+
 def cost2metric(cost: float, metric: autoPyTorchMetric) -> float:
     """
     Revert cost metric evaluated in SMAC to the original metric.
@@ -69,7 +72,7 @@ def _extract_metrics_info(
     run_value: RunValue,
     scoring_functions: List[autoPyTorchMetric],
     inference_name: str
-) -> Dict[str, float]:
+) -> Dict[str, Optional[float]]:
     """
     Extract the metric information given a run_value
     and a list of metrics of interest.
@@ -97,7 +100,14 @@ def _extract_metrics_info(
     if inference_name not in inference_choices:
         raise ValueError(f'inference_name must be in {inference_choices}, but got {inference_choices}')
 
-    cost_info = run_value.additional_info[f'{inference_name}_loss']
+    cost_info = run_value.additional_info.get(f'{inference_name}_loss', None)
+    if cost_info is None:
+        if inference_name not in OPTIONAL_INFERENCE_CHOICES:
+            raise ValueError(f"Expected loss for {inference_name} set to not be None, but got {cost_info}")
+        else:
+            # Additional info for metrics is not available in this case.
+            return {metric.name: None for metric in scoring_functions}
+
     avail_metrics = cost_info.keys()
 
     return {
@@ -175,7 +185,7 @@ def _update(self, data: Dict[str, Any]) -> None:
             )
 
         self._train_scores.append(data[f'train_{self.metric_name}'])
-        self._test_scores.append(data[f'test_{self.metric_name}'])
+        self._test_scores.append(data.get(f'test_{self.metric_name}', None))
         self._end_times.append(datetime.timestamp(data['Timestamp']))
 
     def _sort_by_endtime(self) -> None:
@@ -413,11 +423,31 @@ def _extract_results_from_run_history(self, run_history: RunHistory) -> None:
             config = run_history.ids_config[run_key.config_id]
             self._update(config=config, run_key=run_key, run_value=run_value)
 
+        self._check_null_in_optional_inference_choices()
+
         self.rank_opt_scores = scipy.stats.rankdata(
             -1 * self._metric._sign * self.opt_scores,  # rank order
             method='min'
         )
 
+    def _check_null_in_optional_inference_choices(
+        self
+    ) -> None:
+        """
+        Checks if the data is missing for each optional inference choice and
+        sets the scores for that inference choice to all None.
+        """
+        for inference_choice in OPTIONAL_INFERENCE_CHOICES:
+            metrics_dict = getattr(self, f'{inference_choice}_metric_dict')
+            new_metric_dict = {}
+
+            for metric in self._scoring_functions:
+                scores = metrics_dict[metric.name]
+                if all([score is None or score == metric._worst_possible_result for score in scores]):
+                    scores = [None] * len(self.status_types)
+                new_metric_dict[metric.name] = scores
+            setattr(self, f'{inference_choice}_metric_dict', new_metric_dict)
+
 
 class MetricResults:
     def __init__(
@@ -486,12 +516,24 @@ def _extract_results(self) -> None:
         for inference_name in ['train', 'test', 'opt']:
             # TODO: Extract information from self.search_results
             data = getattr(self.search_results, f'{inference_name}_metric_dict')[metric_name]
+            if all([d is None for d in data]):
+                if inference_name not in OPTIONAL_INFERENCE_CHOICES:
+                    raise ValueError(f"Expected {metric_name} score for {inference_name} set"
+                                     f" to not be None, but got {data}")
+                else:
+                    continue
             self.data[f'single::{inference_name}::{metric_name}'] = np.array(data)
 
             if self.ensemble_results.empty() or inference_name == 'opt':
                 continue
 
             data = getattr(self.ensemble_results, f'{inference_name}_scores')
+            if all([d is None for d in data]):
+                if inference_name not in OPTIONAL_INFERENCE_CHOICES:
+                    raise ValueError(f"Expected {metric_name} score for {inference_name} set"
+                                     f" to not be None, but got {data}")
+                else:
+                    continue
             self.data[f'ensemble::{inference_name}::{metric_name}'] = np.array(data)
 
     def get_ensemble_merged_data(self) -> Dict[str, np.ndarray]:
@@ -516,6 +558,8 @@ def get_ensemble_merged_data(self) -> Dict[str, np.ndarray]:
         cur, timestep_size, sign = 0, self.cum_times.size, self.metric._sign
         key_train, key_test = f'ensemble::train::{self.metric.name}', f'ensemble::test::{self.metric.name}'
 
+        all_test_perfs_null = all([perf is None for perf in test_scores])
+
         train_perfs = np.full_like(self.cum_times, self.metric._worst_possible_result)
         test_perfs = np.full_like(self.cum_times, self.metric._worst_possible_result)
 
@@ -530,9 +574,16 @@ def get_ensemble_merged_data(self) -> Dict[str, np.ndarray]:
             time_index = min(cur, timestep_size - 1)
             # If there already exists a previous allocated value, update by a better value
             train_perfs[time_index] = sign * max(sign * train_perfs[time_index], sign * train_score)
-            test_perfs[time_index] = sign * max(sign * test_perfs[time_index], sign * test_score)
+            # test_perfs can be none when X_test is not passed
+            if not all_test_perfs_null:
+                test_perfs[time_index] = sign * max(sign * test_perfs[time_index], sign * test_score)
+
+        update_dict = {key_train: train_perfs}
+        if not all_test_perfs_null:
+            update_dict[key_test] = test_perfs
+
+        data.update(update_dict)
 
-        data.update({key_train: train_perfs, key_test: test_perfs})
         return data
 
 

diff --git a/autoPyTorch/utils/results_visualizer.py b/autoPyTorch/utils/results_visualizer.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from autoPyTorch.utils.results_manager import MetricResults
+from autoPyTorch.utils.results_manager import MetricResults, OPTIONAL_INFERENCE_CHOICES
 
 
 plt.rcParams["font.family"] = "Times New Roman"
@@ -318,7 +318,15 @@ def plot_perf_over_time(
         minimize = (results.metric._sign == -1)
 
         for key in data.keys():
+            inference_name = key.split('::')[1]
             _label, _color, _perfs = labels[key], colors[key], data[key]
+            all_null_perfs = all([perf is None for perf in _perfs])
+
+            if all_null_perfs:
+                if inference_name not in OPTIONAL_INFERENCE_CHOICES:
+                    raise ValueError(f"Expected loss for {inference_name} set to not be None")
+                else:
+                    continue
             # Take the best results over time
             _cum_perfs = np.minimum.accumulate(_perfs) if minimize else np.maximum.accumulate(_perfs)