From a5374ede181bf3d88e5c3a8fab9c81fb7c596534 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 2 Sep 2021 13:53:08 +0200 Subject: [PATCH 01/26] To test locally --- .../setup/network_backbone/ResNetBackbone.py | 20 ++++++++++++++-- .../network_backbone/ShapedResNetBackbone.py | 15 ++++++++++++ .../setup/network_backbone/utils.py | 24 +++++++++++++++---- .../example_custom_configuration_space.py | 11 ++++++++- 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 069ca4679..2020d285a 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -139,6 +139,13 @@ def get_hyperparameter_search_space( value_range=(True, False), default_value=True, ), + shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_shake_drop", value_range=(True, False), default_value=True, @@ -180,9 +187,12 @@ def get_hyperparameter_search_space( if skip_connection_flag: + shake_shake_flag = False shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True + elif 'shake-shake' in multi_branch_choice.value_range: + shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -192,6 +202,10 @@ def get_hyperparameter_search_space( shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + if shake_shake_flag: + method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) # It is the upper bound of the nr of groups, # since the configuration will actually be sampled. @@ -327,11 +341,13 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: if self.config["multi_branch_choice"] == 'shake-shake': x1 = self.layers(x) x2 = self.shake_shake_layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(is_training=self.training, + is_cuda=x.is_cuda, + method=self.config['shake_shake_method']) x = shake_shake(x1, x2, alpha, beta) elif self.config["multi_branch_choice"] == 'shake-drop': x = self.layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda, method='shake-drop') bl = shake_drop_get_bl( self.block_index, 1 - self.config["max_shake_drop_probability"], diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index e0867cdd3..aaaf54faf 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,6 +145,14 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), + shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), @@ -188,9 +196,12 @@ def get_hyperparameter_search_space( # type: ignore[override] if skip_connection_flag: + shake_shake_flag = False shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True + elif 'shake-shake' in multi_branch_choice.value_range: + shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -200,5 +211,9 @@ def get_hyperparameter_search_space( # type: ignore[override] shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + if shake_shake_flag: + method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) return cs diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index ef19beac8..95a990eb2 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -92,15 +92,31 @@ def backward(ctx: typing.Any, shake_drop = ShakeDropFunction.apply -def shake_get_alpha_beta(is_training: bool, is_cuda: bool - ) -> typing.Tuple[torch.tensor, torch.tensor]: +def shake_get_alpha_beta( + is_training: bool, + is_cuda: bool, + method: str +) -> typing.Tuple[torch.tensor, torch.tensor]: + if not is_training: result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5])) return result if not is_cuda else (result[0].cuda(), result[1].cuda()) # TODO implement other update methods - alpha = torch.rand(1) - beta = torch.rand(1) + if method != 'even-even': + alpha = torch.rand(1) + else: + alpha = torch.FloatTensor([0.5]) + if method in ['shake-shake', 'shake-drop']: + beta = torch.rand(1) + elif 'even' in method: # 'shake-even' or 'even-even' + beta = torch.FloatTensor([0.5]) + elif method == 'M3': + beta = torch.FloatTensor( + [torch.rand(1)*(0.5 - alpha)*alpha if alpha < 0.5 else torch.rand(1)*(alpha - 0.5)*alpha] + ) + else: + raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone") if is_cuda: alpha = alpha.cuda() diff --git a/examples/tabular/40_advanced/example_custom_configuration_space.py b/examples/tabular/40_advanced/example_custom_configuration_space.py index 6a3764b94..b95ceeaa5 100644 --- a/examples/tabular/40_advanced/example_custom_configuration_space.py +++ b/examples/tabular/40_advanced/example_custom_configuration_space.py @@ -54,6 +54,15 @@ def get_search_space_updates(): hyperparameter='ResNetBackbone:dropout', value_range=[0, 0.5], default_value=0.2) + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:multi_branch_choice', + value_range=['shake-shake'], + default_value='shake-shake') + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:shake_shake_method', + value_range=['M3'], + default_value='M3' + ) return updates @@ -74,7 +83,7 @@ def get_search_space_updates(): # ================================================== api = TabularClassificationTask( search_space_updates=get_search_space_updates(), - include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'], + include_components={'network_backbone': ['ResNetBackbone'], 'encoder': ['OneHotEncoder']} ) From 06ad6584746f5852dd4d31a3c7706ef44a218159 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 6 Sep 2021 18:26:43 +0200 Subject: [PATCH 02/26] fix bug in trainer choice fit --- .../pipeline/components/training/trainer/base_trainer_choice.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py index 2dcb8fe16..7119df201 100755 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py @@ -410,7 +410,6 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic # change model update_model_state_dict_from_swa(X['network'], self.choice.swa_model.state_dict()) if self.choice.use_snapshot_ensemble: - swa_utils.update_bn(X['train_data_loader'], model.double()) # we update only the last network which pertains to the stochastic weight averaging model swa_utils.update_bn(X['train_data_loader'], self.choice.model_snapshots[-1].double()) From 1942279d1ec5aabfcc2d2127ddc3bd9dfd056293 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 8 Sep 2021 13:16:20 +0200 Subject: [PATCH 03/26] fix ensemble bug --- autoPyTorch/api/base_task.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index c2e220875..6ed0559e0 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -397,6 +397,7 @@ def _clean_logger(self) -> None: self.logging_server.join(timeout=5) self.logging_server.terminate() del self.stop_logging_server + self._logger = None def _create_dask_client(self) -> None: """ @@ -491,6 +492,23 @@ def _load_models(self) -> bool: return True + def _cleanup(self) -> None: + """ + Closes the different servers created during api search. + Returns: + None + """ + if self._logger is not None: + self._logger.info("Closing the dask infrastructure") + self._close_dask_client() + self._logger.info("Finished closing the dask infrastructure") + + # Clean up the logger + self._logger.info("Starting to clean up the logger") + self._clean_logger() + else: + self._close_dask_client() + def _load_best_individual_model(self) -> SingleBest: """ In case of failure during ensemble building, @@ -923,6 +941,8 @@ def _search( self._stopwatch.stop_task(traditional_task_name) # ============> Starting ensemble + self.precision = precision + self.opt_metric = optimize_metric elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name) time_left_for_ensembles = max(0, total_walltime_limit - elapsed_time) proc_ensemble = None @@ -1024,18 +1044,12 @@ def _search( pd.DataFrame(self.ensemble_performance_history).to_json( os.path.join(self._backend.internals_directory, 'ensemble_history.json')) - self._logger.info("Closing the dask infrastructure") - self._close_dask_client() - self._logger.info("Finished closing the dask infrastructure") - if load_models: self._logger.info("Loading models...") self._load_models() self._logger.info("Finished loading models...") - # Clean up the logger - self._logger.info("Starting to clean up the logger") - self._clean_logger() + self._cleanup() return self @@ -1506,7 +1520,7 @@ def predict( predictions = self.ensemble_.predict(all_predictions) - self._clean_logger() + self._cleanup() return predictions @@ -1543,10 +1557,7 @@ def __getstate__(self) -> Dict[str, Any]: return self.__dict__ def __del__(self) -> None: - # Clean up the logger - self._clean_logger() - - self._close_dask_client() + self._cleanup() # When a multiprocessing work is done, the # objects are deleted. We don't want to delete run areas From 2dc88500566ac67ad30018fcba00a4e7e62d1cb3 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 8 Sep 2021 16:48:58 +0200 Subject: [PATCH 04/26] Correct bug in cleanup --- autoPyTorch/api/base_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 6ed0559e0..19951a3a5 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -498,7 +498,7 @@ def _cleanup(self) -> None: Returns: None """ - if self._logger is not None: + if hasattr(self, '_logger') and self._logger is not None: self._logger.info("Closing the dask infrastructure") self._close_dask_client() self._logger.info("Finished closing the dask infrastructure") From a80eb9e83cbd4847f3d23fb60378cc2453b23700 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 2 Sep 2021 13:53:08 +0200 Subject: [PATCH 05/26] To test locally --- .../setup/network_backbone/ResNetBackbone.py | 20 ++++++++++++++-- .../network_backbone/ShapedResNetBackbone.py | 15 ++++++++++++ .../setup/network_backbone/utils.py | 24 +++++++++++++++---- .../example_custom_configuration_space.py | 11 ++++++++- 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 069ca4679..2020d285a 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -139,6 +139,13 @@ def get_hyperparameter_search_space( value_range=(True, False), default_value=True, ), + shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_shake_drop", value_range=(True, False), default_value=True, @@ -180,9 +187,12 @@ def get_hyperparameter_search_space( if skip_connection_flag: + shake_shake_flag = False shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True + elif 'shake-shake' in multi_branch_choice.value_range: + shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -192,6 +202,10 @@ def get_hyperparameter_search_space( shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + if shake_shake_flag: + method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) # It is the upper bound of the nr of groups, # since the configuration will actually be sampled. @@ -327,11 +341,13 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: if self.config["multi_branch_choice"] == 'shake-shake': x1 = self.layers(x) x2 = self.shake_shake_layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(is_training=self.training, + is_cuda=x.is_cuda, + method=self.config['shake_shake_method']) x = shake_shake(x1, x2, alpha, beta) elif self.config["multi_branch_choice"] == 'shake-drop': x = self.layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda, method='shake-drop') bl = shake_drop_get_bl( self.block_index, 1 - self.config["max_shake_drop_probability"], diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index e0867cdd3..aaaf54faf 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,6 +145,14 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), + shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), @@ -188,9 +196,12 @@ def get_hyperparameter_search_space( # type: ignore[override] if skip_connection_flag: + shake_shake_flag = False shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True + elif 'shake-shake' in multi_branch_choice.value_range: + shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -200,5 +211,9 @@ def get_hyperparameter_search_space( # type: ignore[override] shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + if shake_shake_flag: + method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) return cs diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index ef19beac8..95a990eb2 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -92,15 +92,31 @@ def backward(ctx: typing.Any, shake_drop = ShakeDropFunction.apply -def shake_get_alpha_beta(is_training: bool, is_cuda: bool - ) -> typing.Tuple[torch.tensor, torch.tensor]: +def shake_get_alpha_beta( + is_training: bool, + is_cuda: bool, + method: str +) -> typing.Tuple[torch.tensor, torch.tensor]: + if not is_training: result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5])) return result if not is_cuda else (result[0].cuda(), result[1].cuda()) # TODO implement other update methods - alpha = torch.rand(1) - beta = torch.rand(1) + if method != 'even-even': + alpha = torch.rand(1) + else: + alpha = torch.FloatTensor([0.5]) + if method in ['shake-shake', 'shake-drop']: + beta = torch.rand(1) + elif 'even' in method: # 'shake-even' or 'even-even' + beta = torch.FloatTensor([0.5]) + elif method == 'M3': + beta = torch.FloatTensor( + [torch.rand(1)*(0.5 - alpha)*alpha if alpha < 0.5 else torch.rand(1)*(alpha - 0.5)*alpha] + ) + else: + raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone") if is_cuda: alpha = alpha.cuda() diff --git a/examples/tabular/40_advanced/example_custom_configuration_space.py b/examples/tabular/40_advanced/example_custom_configuration_space.py index 6a3764b94..b95ceeaa5 100644 --- a/examples/tabular/40_advanced/example_custom_configuration_space.py +++ b/examples/tabular/40_advanced/example_custom_configuration_space.py @@ -54,6 +54,15 @@ def get_search_space_updates(): hyperparameter='ResNetBackbone:dropout', value_range=[0, 0.5], default_value=0.2) + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:multi_branch_choice', + value_range=['shake-shake'], + default_value='shake-shake') + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:shake_shake_method', + value_range=['M3'], + default_value='M3' + ) return updates @@ -74,7 +83,7 @@ def get_search_space_updates(): # ================================================== api = TabularClassificationTask( search_space_updates=get_search_space_updates(), - include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'], + include_components={'network_backbone': ['ResNetBackbone'], 'encoder': ['OneHotEncoder']} ) From 06d80d471898b41b59104a3bc92800f02f275b54 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 16 Sep 2021 14:44:10 +0200 Subject: [PATCH 06/26] Cleanup for removing time debug statements --- .../tabular_preprocessing/TabularColumnTransformer.py | 3 --- .../pipeline/components/training/trainer/base_trainer.py | 5 ----- .../components/training/trainer/base_trainer_choice.py | 3 --- .../pipeline/components/training/trainer/cutout_utils.py | 2 -- .../pipeline/components/training/trainer/mixup_utils.py | 2 -- 5 files changed, 15 deletions(-) diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py index 5fcf5cfb5..c7ca61e09 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py @@ -23,7 +23,6 @@ def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = N self.add_fit_requirements([ FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True), FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True)]) - self.fit_time = None def get_column_transformer(self) -> ColumnTransformer: """ @@ -48,7 +47,6 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer": Returns: "TabularColumnTransformer": an instance of self """ - start_time = time.time() self.check_requirements(X, y) numerical_pipeline = 'passthrough' @@ -74,7 +72,6 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer": X_train = X['backend'].load_datamanager().train_tensors[0] self.preprocessor.fit(X_train) - self.fit_time = time.time() - start_time return self diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index 188504da3..6040f32e9 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -371,16 +371,12 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int, outputs_data = list() targets_data = list() - batch_load_start_time = time.time() for step, (data, targets) in enumerate(train_loader): - self.data_loading_times.append(time.time() - batch_load_start_time) - batch_train_start = time.time() if self.budget_tracker.is_max_time_reached(): break loss, outputs = self.train_step(data, targets) - self.batch_fit_times.append(time.time() - batch_train_start) # save for metric evaluation outputs_data.append(outputs.detach().cpu()) targets_data.append(targets.detach().cpu()) @@ -395,7 +391,6 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int, loss, epoch * len(train_loader) + step, ) - batch_load_start_time = time.time() if self.scheduler: if 'ReduceLROnPlateau' in self.scheduler.__class__.__name__: diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py index 7119df201..a344e92ce 100755 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py @@ -77,7 +77,6 @@ def __init__(self, (torch.utils.data.DataLoader,), user_defined=False, dataset_property=False)] self.checkpoint_dir = None # type: Optional[str] - self.fit_time = None def get_fit_requirements(self) -> Optional[List[FitRequirement]]: return self._fit_requirements @@ -264,7 +263,6 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom Returns: A instance of self """ - start_time = time.time() # Make sure that the prerequisites are there self.check_requirements(X, y) @@ -287,7 +285,6 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom self.choice: autoPyTorchComponent = cast(autoPyTorchComponent, self.choice) if self.choice.use_snapshot_ensemble: X['network_snapshots'].extend(self.choice.model_snapshots) - self.fit_time = time.time() - start_time return self.choice def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoice': diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py index c58546a4c..c7feb2214 100644 --- a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py +++ b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py @@ -53,8 +53,6 @@ def __init__(self, patch_ratio: float, self.lookahead_config = lookahead_config self.patch_ratio = patch_ratio self.cutout_prob = cutout_prob - self.batch_fit_times = [] - self.data_loading_times = [] def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0 ) -> Callable: diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py index b1cf37972..a2325b91c 100644 --- a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py +++ b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py @@ -51,8 +51,6 @@ def __init__(self, alpha: float, f'{Lookahead.__name__}:la_alpha': 0.6} self.lookahead_config = lookahead_config self.alpha = alpha - self.batch_fit_times = [] - self.data_loading_times = [] def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0 ) -> Callable: From d8b553aa2262825786440c26779e1f39b142d6e5 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 20 Sep 2021 15:55:20 +0200 Subject: [PATCH 07/26] ablation for adversarial --- .../components/training/trainer/AdversarialTrainer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py index c5a536dd0..36d586919 100644 --- a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py @@ -189,12 +189,17 @@ def get_hyperparameter_search_space( default_value=3), epsilon: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="epsilon", - value_range=(0.05, 0.2), - default_value=0.2), + value_range=(0.001, 0.15), + default_value=0.007, + log=True), ) -> ConfigurationSpace: cs = ConfigurationSpace() + epsilon = HyperparameterSearchSpace(hyperparameter="epsilon", + value_range=(0.007, 0.007), + default_value=0.007) add_hyperparameter(cs, epsilon, UniformFloatHyperparameter) + add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter) snapshot_ensemble_flag = False if any(use_snapshot_ensemble.value_range): From 34712b3b2d2c7a5ab810f795cf80abcf4090adb4 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Tue, 21 Sep 2021 13:08:54 +0200 Subject: [PATCH 08/26] shuffle false in dataloader --- .../components/training/data_loader/base_data_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index 5b8e445ac..8dff86052 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -112,7 +112,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: self.train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=min(self.batch_size, len(train_dataset)), - shuffle=True, + shuffle=False, num_workers=X.get('num_workers', 0), pin_memory=X.get('pin_memory', True), drop_last=X.get('drop_last', True), From 49f40dc2715f9e40ec38455e073c5116e2ee2b1a Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Tue, 21 Sep 2021 14:44:48 +0200 Subject: [PATCH 09/26] drop last false in dataloader --- .../components/training/data_loader/base_data_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index 8dff86052..7302ac6f5 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -112,10 +112,10 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: self.train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=min(self.batch_size, len(train_dataset)), - shuffle=False, + shuffle=True, num_workers=X.get('num_workers', 0), pin_memory=X.get('pin_memory', True), - drop_last=X.get('drop_last', True), + drop_last=X.get('drop_last', False), collate_fn=custom_collate_fn, ) From f4ea158a4c5611137a2522dfa4237b32b4ca1941 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 23 Sep 2021 15:39:25 +0200 Subject: [PATCH 10/26] fix bug for validation set, and cutout and cutmix --- autoPyTorch/api/base_task.py | 2 +- .../pipeline/components/training/trainer/RowCutMixTrainer.py | 2 +- .../pipeline/components/training/trainer/RowCutOutTrainer.py | 2 +- autoPyTorch/utils/backend.py | 5 +++++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 19951a3a5..14aa6ab83 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -1244,7 +1244,7 @@ def fit_pipeline(self, dataset_requirements = get_dataset_requirements( info=self._get_required_dataset_properties(dataset)) dataset_properties = dataset.get_dataset_properties(dataset_requirements) - self._backend.save_datamanager(dataset) + self._backend.replace_datamanager(dataset) if self._logger is None: self._logger = self._get_logger(dataset.dataset_name) diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py index 20d02c793..f1b606046 100644 --- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py @@ -36,7 +36,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, return X, {'y_a': y, 'y_b': y[index], 'lam': 1} size = X.shape[1] - indices = torch.tensor(self.random_state.choice(range(1, size), max(1, np.int32(size * lam)), + indices = torch.tensor(self.random_state.choice(range(size), max(1, np.int32(size * lam)), replace=False)) X[:, indices] = X[index, :][:, indices] diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py index c09603523..d7bd23f4e 100644 --- a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py @@ -37,7 +37,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam} size = X.shape[1] - indices = self.random_state.choice(range(1, size), max(1, np.int32(size * self.patch_ratio)), + indices = self.random_state.choice(range(size), max(1, np.int32(size * self.patch_ratio)), replace=False) """if not isinstance(self.numerical_columns, typing.Iterable): diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py index 713c7d572..7a7399a9f 100644 --- a/autoPyTorch/utils/backend.py +++ b/autoPyTorch/utils/backend.py @@ -328,6 +328,11 @@ def load_datamanager(self) -> BaseDataset: with open(filepath, 'rb') as fh: return pickle.load(fh) + def replace_datamanager(self, datamanager: BaseDataset): + warnings.warn("Original dataset will be overwritten with the provided dataset") + os.remove(self._get_datamanager_pickle_filename()) + self.save_datamanager(datamanager=datamanager) + def get_runs_directory(self) -> str: return os.path.join(self.internals_directory, 'runs') From fca1399b6875f5f6e225c3334c811ff9a44645d8 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 2 Sep 2021 13:53:08 +0200 Subject: [PATCH 11/26] To test locally --- .../setup/network_backbone/ResNetBackbone.py | 20 ++++++++++++++-- .../network_backbone/ShapedResNetBackbone.py | 15 ++++++++++++ .../setup/network_backbone/utils.py | 24 +++++++++++++++---- .../example_custom_configuration_space.py | 11 ++++++++- 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 069ca4679..2020d285a 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -139,6 +139,13 @@ def get_hyperparameter_search_space( value_range=(True, False), default_value=True, ), + shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_shake_drop", value_range=(True, False), default_value=True, @@ -180,9 +187,12 @@ def get_hyperparameter_search_space( if skip_connection_flag: + shake_shake_flag = False shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True + elif 'shake-shake' in multi_branch_choice.value_range: + shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -192,6 +202,10 @@ def get_hyperparameter_search_space( shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + if shake_shake_flag: + method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) # It is the upper bound of the nr of groups, # since the configuration will actually be sampled. @@ -327,11 +341,13 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: if self.config["multi_branch_choice"] == 'shake-shake': x1 = self.layers(x) x2 = self.shake_shake_layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(is_training=self.training, + is_cuda=x.is_cuda, + method=self.config['shake_shake_method']) x = shake_shake(x1, x2, alpha, beta) elif self.config["multi_branch_choice"] == 'shake-drop': x = self.layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda, method='shake-drop') bl = shake_drop_get_bl( self.block_index, 1 - self.config["max_shake_drop_probability"], diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index e0867cdd3..aaaf54faf 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,6 +145,14 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), + shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), @@ -188,9 +196,12 @@ def get_hyperparameter_search_space( # type: ignore[override] if skip_connection_flag: + shake_shake_flag = False shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True + elif 'shake-shake' in multi_branch_choice.value_range: + shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -200,5 +211,9 @@ def get_hyperparameter_search_space( # type: ignore[override] shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + if shake_shake_flag: + method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) return cs diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index ef19beac8..95a990eb2 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -92,15 +92,31 @@ def backward(ctx: typing.Any, shake_drop = ShakeDropFunction.apply -def shake_get_alpha_beta(is_training: bool, is_cuda: bool - ) -> typing.Tuple[torch.tensor, torch.tensor]: +def shake_get_alpha_beta( + is_training: bool, + is_cuda: bool, + method: str +) -> typing.Tuple[torch.tensor, torch.tensor]: + if not is_training: result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5])) return result if not is_cuda else (result[0].cuda(), result[1].cuda()) # TODO implement other update methods - alpha = torch.rand(1) - beta = torch.rand(1) + if method != 'even-even': + alpha = torch.rand(1) + else: + alpha = torch.FloatTensor([0.5]) + if method in ['shake-shake', 'shake-drop']: + beta = torch.rand(1) + elif 'even' in method: # 'shake-even' or 'even-even' + beta = torch.FloatTensor([0.5]) + elif method == 'M3': + beta = torch.FloatTensor( + [torch.rand(1)*(0.5 - alpha)*alpha if alpha < 0.5 else torch.rand(1)*(alpha - 0.5)*alpha] + ) + else: + raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone") if is_cuda: alpha = alpha.cuda() diff --git a/examples/tabular/40_advanced/example_custom_configuration_space.py b/examples/tabular/40_advanced/example_custom_configuration_space.py index 6a3764b94..b95ceeaa5 100644 --- a/examples/tabular/40_advanced/example_custom_configuration_space.py +++ b/examples/tabular/40_advanced/example_custom_configuration_space.py @@ -54,6 +54,15 @@ def get_search_space_updates(): hyperparameter='ResNetBackbone:dropout', value_range=[0, 0.5], default_value=0.2) + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:multi_branch_choice', + value_range=['shake-shake'], + default_value='shake-shake') + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:shake_shake_method', + value_range=['M3'], + default_value='M3' + ) return updates @@ -74,7 +83,7 @@ def get_search_space_updates(): # ================================================== api = TabularClassificationTask( search_space_updates=get_search_space_updates(), - include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'], + include_components={'network_backbone': ['ResNetBackbone'], 'encoder': ['OneHotEncoder']} ) From 209a4e82a362cb0edf15432bafb8a526f7c19b3e Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Fri, 24 Sep 2021 12:36:54 +0200 Subject: [PATCH 12/26] shuffle = False --- .../components/training/data_loader/base_data_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index 7302ac6f5..bf0f23fa6 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -112,7 +112,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: self.train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=min(self.batch_size, len(train_dataset)), - shuffle=True, + shuffle=False, num_workers=X.get('num_workers', 0), pin_memory=X.get('pin_memory', True), drop_last=X.get('drop_last', False), From d18fcca083ebd15c7644146792026c809f3f3f6d Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 2 Sep 2021 13:53:08 +0200 Subject: [PATCH 13/26] To test locally --- .../setup/network_backbone/ResNetBackbone.py | 20 ++++++++++++++-- .../network_backbone/ShapedResNetBackbone.py | 15 ++++++++++++ .../setup/network_backbone/utils.py | 24 +++++++++++++++---- .../example_custom_configuration_space.py | 11 ++++++++- 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 069ca4679..2020d285a 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -139,6 +139,13 @@ def get_hyperparameter_search_space( value_range=(True, False), default_value=True, ), + shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_shake_drop", value_range=(True, False), default_value=True, @@ -180,9 +187,12 @@ def get_hyperparameter_search_space( if skip_connection_flag: + shake_shake_flag = False shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True + elif 'shake-shake' in multi_branch_choice.value_range: + shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -192,6 +202,10 @@ def get_hyperparameter_search_space( shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + if shake_shake_flag: + method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) # It is the upper bound of the nr of groups, # since the configuration will actually be sampled. @@ -327,11 +341,13 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: if self.config["multi_branch_choice"] == 'shake-shake': x1 = self.layers(x) x2 = self.shake_shake_layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(is_training=self.training, + is_cuda=x.is_cuda, + method=self.config['shake_shake_method']) x = shake_shake(x1, x2, alpha, beta) elif self.config["multi_branch_choice"] == 'shake-drop': x = self.layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda, method='shake-drop') bl = shake_drop_get_bl( self.block_index, 1 - self.config["max_shake_drop_probability"], diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index e0867cdd3..aaaf54faf 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,6 +145,14 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), + shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), @@ -188,9 +196,12 @@ def get_hyperparameter_search_space( # type: ignore[override] if skip_connection_flag: + shake_shake_flag = False shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True + elif 'shake-shake' in multi_branch_choice.value_range: + shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -200,5 +211,9 @@ def get_hyperparameter_search_space( # type: ignore[override] shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + if shake_shake_flag: + method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) return cs diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index ef19beac8..95a990eb2 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -92,15 +92,31 @@ def backward(ctx: typing.Any, shake_drop = ShakeDropFunction.apply -def shake_get_alpha_beta(is_training: bool, is_cuda: bool - ) -> typing.Tuple[torch.tensor, torch.tensor]: +def shake_get_alpha_beta( + is_training: bool, + is_cuda: bool, + method: str +) -> typing.Tuple[torch.tensor, torch.tensor]: + if not is_training: result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5])) return result if not is_cuda else (result[0].cuda(), result[1].cuda()) # TODO implement other update methods - alpha = torch.rand(1) - beta = torch.rand(1) + if method != 'even-even': + alpha = torch.rand(1) + else: + alpha = torch.FloatTensor([0.5]) + if method in ['shake-shake', 'shake-drop']: + beta = torch.rand(1) + elif 'even' in method: # 'shake-even' or 'even-even' + beta = torch.FloatTensor([0.5]) + elif method == 'M3': + beta = torch.FloatTensor( + [torch.rand(1)*(0.5 - alpha)*alpha if alpha < 0.5 else torch.rand(1)*(alpha - 0.5)*alpha] + ) + else: + raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone") if is_cuda: alpha = alpha.cuda() diff --git a/examples/tabular/40_advanced/example_custom_configuration_space.py b/examples/tabular/40_advanced/example_custom_configuration_space.py index 6a3764b94..b95ceeaa5 100644 --- a/examples/tabular/40_advanced/example_custom_configuration_space.py +++ b/examples/tabular/40_advanced/example_custom_configuration_space.py @@ -54,6 +54,15 @@ def get_search_space_updates(): hyperparameter='ResNetBackbone:dropout', value_range=[0, 0.5], default_value=0.2) + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:multi_branch_choice', + value_range=['shake-shake'], + default_value='shake-shake') + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:shake_shake_method', + value_range=['M3'], + default_value='M3' + ) return updates @@ -74,7 +83,7 @@ def get_search_space_updates(): # ================================================== api = TabularClassificationTask( search_space_updates=get_search_space_updates(), - include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'], + include_components={'network_backbone': ['ResNetBackbone'], 'encoder': ['OneHotEncoder']} ) From b38bfb3be8bf06ce65e893cc50104c65572998e8 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Sun, 26 Sep 2021 18:15:18 +0200 Subject: [PATCH 14/26] updates to search space --- .../pipeline/components/setup/optimizer/AdamWOptimizer.py | 2 +- .../components/training/data_loader/base_data_loader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py index 4d11c3026..cc0123af8 100644 --- a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py +++ b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py @@ -97,7 +97,7 @@ def get_hyperparameter_search_space( weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weight_decay", value_range=(1E-7, 0.1), default_value=1E-4, - log=True), + log=False), ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index bf0f23fa6..7302ac6f5 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -112,7 +112,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: self.train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=min(self.batch_size, len(train_dataset)), - shuffle=False, + shuffle=True, num_workers=X.get('num_workers', 0), pin_memory=X.get('pin_memory', True), drop_last=X.get('drop_last', False), From 8c2f2ac259211150a34f02097f9d920918c8ef22 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Sun, 26 Sep 2021 18:19:17 +0200 Subject: [PATCH 15/26] updates to search space --- .../pipeline/components/setup/optimizer/AdamWOptimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py index cc0123af8..a415ff1c6 100644 --- a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py +++ b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py @@ -95,7 +95,7 @@ def get_hyperparameter_search_space( default_value=True, ), weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weight_decay", - value_range=(1E-7, 0.1), + value_range=(1E-5, 0.1), default_value=1E-4, log=False), ) -> ConfigurationSpace: From f0676b119c973b56d5e6bbe35551aba82c0ca71a Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Sun, 26 Sep 2021 22:08:38 +0200 Subject: [PATCH 16/26] update branch with search space --- autoPyTorch/pipeline/base_pipeline.py | 5 +++-- .../network_backbone/ShapedResNetBackbone.py | 17 +++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py index 842f63271..80d59a68f 100644 --- a/autoPyTorch/pipeline/base_pipeline.py +++ b/autoPyTorch/pipeline/base_pipeline.py @@ -451,12 +451,13 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]], continue raise ValueError("Unknown hyperparameter for component {}. " "Expected update hyperparameter " - "to be in {} got {}".format(node.__class__.__name__, + "to be in {} got {}. choice is {}".format(node.__class__.__name__, component. get_hyperparameter_search_space( dataset_properties=self.dataset_properties). get_hyperparameter_names(), - split_hyperparameter[1])) + split_hyperparameter[1], + component.__name__)) else: if update.hyperparameter not in node.get_hyperparameter_search_space( dataset_properties=self.dataset_properties): diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index aaaf54faf..c18a7aea0 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,13 +145,18 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), + # shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + # hyperparameter="shake_shake_method", + # value_range=('shake-shake', + # 'shake-even', + # 'even-even', + # 'M3'), + # default_value='shake-shake', + # ), shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( - hyperparameter="shake_shake_method", - value_range=('shake-shake', - 'shake-even', - 'even-even', - 'M3'), - default_value='shake-shake', + hyperparameter="shake_shake_method", + value_range=('even-even', ), + default_value='even-even', ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", From 82d950ca1ca939a015947f2451f0ce4c8f3a652f Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 27 Sep 2021 11:34:00 +0200 Subject: [PATCH 17/26] undo search space update --- .../network_backbone/ShapedResNetBackbone.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index c18a7aea0..aaaf54faf 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,18 +145,13 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), - # shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( - # hyperparameter="shake_shake_method", - # value_range=('shake-shake', - # 'shake-even', - # 'even-even', - # 'M3'), - # default_value='shake-shake', - # ), shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( - hyperparameter="shake_shake_method", - value_range=('even-even', ), - default_value='even-even', + hyperparameter="shake_shake_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", From 30ba55ebbb47c236d769624ad54e45190d266155 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 27 Sep 2021 11:57:02 +0200 Subject: [PATCH 18/26] fix bug in shake shake flag --- .../components/setup/network_backbone/ResNetBackbone.py | 2 +- .../components/setup/network_backbone/ShapedResNetBackbone.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 2020d285a..2d553c743 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -191,7 +191,7 @@ def get_hyperparameter_search_space( shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True - elif 'shake-shake' in multi_branch_choice.value_range: + if 'shake-shake' in multi_branch_choice.value_range: shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index aaaf54faf..5dba55805 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -200,7 +200,7 @@ def get_hyperparameter_search_space( # type: ignore[override] shake_drop_prob_flag = False if 'shake-drop' in multi_branch_choice.value_range: shake_drop_prob_flag = True - elif 'shake-shake' in multi_branch_choice.value_range: + if 'shake-shake' in multi_branch_choice.value_range: shake_shake_flag = True mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) From e406f5bc97e848c3b8493379479a5248dc9e9a5f Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 27 Sep 2021 17:26:38 +0200 Subject: [PATCH 19/26] limit to shake-even --- .../network_backbone/ShapedResNetBackbone.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index 5dba55805..6f68f62ae 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,13 +145,18 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), + # shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + # hyperparameter="shake_shake_method", + # value_range=('shake-shake', + # 'shake-even', + # 'even-even', + # 'M3'), + # default_value='shake-shake', + # ), shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( - hyperparameter="shake_shake_method", - value_range=('shake-shake', - 'shake-even', - 'even-even', - 'M3'), - default_value='shake-shake', + hyperparameter="shake_shake_method", + value_range=('shake-even', ), + default_value='shake-even', ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", From 863cc06c70b3000d2d55d091b7355d7f29a961fe Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 27 Sep 2021 18:00:56 +0200 Subject: [PATCH 20/26] restrict to even even --- .../components/setup/network_backbone/ShapedResNetBackbone.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index 6f68f62ae..6f16bb3c0 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -155,8 +155,8 @@ def get_hyperparameter_search_space( # type: ignore[override] # ), shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="shake_shake_method", - value_range=('shake-even', ), - default_value='shake-even', + value_range=('even-even', ), + default_value='even-even', ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", From 292178197068a59a6a9c2ec869349dfec0ffd96f Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 29 Sep 2021 12:06:16 +0200 Subject: [PATCH 21/26] Add even even and others for shake-drop also --- .../setup/network_backbone/ResNetBackbone.py | 29 +++++++-------- .../network_backbone/ShapedResNetBackbone.py | 35 +++++++------------ .../setup/network_backbone/utils.py | 11 +++--- 3 files changed, 32 insertions(+), 43 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 2d553c743..806d6f40d 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -139,13 +139,14 @@ def get_hyperparameter_search_space( value_range=(True, False), default_value=True, ), - shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="shake_shake_method", - value_range=('shake-shake', - 'shake-even', - 'even-even', - 'M3'), - default_value='shake-shake', - ), + shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="shake_alpha_beta_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_shake_drop", value_range=(True, False), default_value=True, @@ -187,12 +188,8 @@ def get_hyperparameter_search_space( if skip_connection_flag: - shake_shake_flag = False - shake_drop_prob_flag = False - if 'shake-drop' in multi_branch_choice.value_range: - shake_drop_prob_flag = True - if 'shake-shake' in multi_branch_choice.value_range: - shake_shake_flag = True + shake_shake_flag = 'shake-shake' in multi_branch_choice.value_range + shake_drop_prob_flag = 'shake-drop' in multi_branch_choice.value_range mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -202,10 +199,10 @@ def get_hyperparameter_search_space( shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) - if shake_shake_flag: - method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + if shake_shake_flag or shake_drop_prob_flag: + method = get_hyperparameter(shake_alpha_beta_method, CategoricalHyperparameter) cs.add_hyperparameter(method) - cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) + cs.add_condition(CS.InCondition(method, mb_choice, ["shake-shake", "shake-drop"])) # It is the upper bound of the nr of groups, # since the configuration will actually be sampled. diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index 6f16bb3c0..12c6d4e74 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,19 +145,14 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), - # shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( - # hyperparameter="shake_shake_method", - # value_range=('shake-shake', - # 'shake-even', - # 'even-even', - # 'M3'), - # default_value='shake-shake', - # ), - shake_shake_method: HyperparameterSearchSpace = HyperparameterSearchSpace( - hyperparameter="shake_shake_method", - value_range=('even-even', ), - default_value='even-even', - ), + shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="shake_alpha_beta_method", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), @@ -201,12 +196,8 @@ def get_hyperparameter_search_space( # type: ignore[override] if skip_connection_flag: - shake_shake_flag = False - shake_drop_prob_flag = False - if 'shake-drop' in multi_branch_choice.value_range: - shake_drop_prob_flag = True - if 'shake-shake' in multi_branch_choice.value_range: - shake_shake_flag = True + shake_shake_flag = 'shake-shake' in multi_branch_choice.value_range + shake_drop_prob_flag = 'shake-drop' in multi_branch_choice.value_range mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) @@ -216,9 +207,9 @@ def get_hyperparameter_search_space( # type: ignore[override] shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) - if shake_shake_flag: - method = get_hyperparameter(shake_shake_method, CategoricalHyperparameter) + if shake_shake_flag or shake_drop_prob_flag: + method = get_hyperparameter(shake_alpha_beta_method, CategoricalHyperparameter) cs.add_hyperparameter(method) - cs.add_condition(CS.EqualsCondition(method, mb_choice, "shake-shake")) + cs.add_condition(CS.InCondition(method, mb_choice, ["shake-shake", "shake-drop"])) return cs diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index 95a990eb2..9413c1bc3 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -103,13 +103,14 @@ def shake_get_alpha_beta( return result if not is_cuda else (result[0].cuda(), result[1].cuda()) # TODO implement other update methods - if method != 'even-even': - alpha = torch.rand(1) - else: + if method == 'even-even': alpha = torch.FloatTensor([0.5]) - if method in ['shake-shake', 'shake-drop']: + else: + alpha = torch.rand(1) + + if method == 'shake-shake': beta = torch.rand(1) - elif 'even' in method: # 'shake-even' or 'even-even' + elif method in ['shake-even', 'even-even']: beta = torch.FloatTensor([0.5]) elif method == 'M3': beta = torch.FloatTensor( From e9359da25ab94e9a7bb8386b90d5a2be118dabea Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 29 Sep 2021 17:17:59 +0200 Subject: [PATCH 22/26] fix bug in passing alpha beta method --- .../components/setup/network_backbone/ResNetBackbone.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 806d6f40d..10f509741 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -340,11 +340,12 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: x2 = self.shake_shake_layers(x) alpha, beta = shake_get_alpha_beta(is_training=self.training, is_cuda=x.is_cuda, - method=self.config['shake_shake_method']) + method=self.config['shake_alpha_beta_method']) x = shake_shake(x1, x2, alpha, beta) elif self.config["multi_branch_choice"] == 'shake-drop': x = self.layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda, method='shake-drop') + alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda, + method=self.config['shake_alpha_beta_method']) bl = shake_drop_get_bl( self.block_index, 1 - self.config["max_shake_drop_probability"], From 7f25e6f75d7b9b3449031ada86587a56da12e7a2 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 29 Sep 2021 17:38:01 +0200 Subject: [PATCH 23/26] restrict to only even even --- .../setup/network_backbone/ShapedResNetBackbone.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index 12c6d4e74..a4f2e7629 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,12 +145,17 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), + # shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + # hyperparameter="shake_alpha_beta_method", + # value_range=('shake-shake', + # 'shake-even', + # 'even-even', + # 'M3'), + # default_value='shake-shake', + # ), shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="shake_alpha_beta_method", - value_range=('shake-shake', - 'shake-even', - 'even-even', - 'M3'), + value_range=('even-even',), default_value='shake-shake', ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( From dd5cb5b5a856152821afc779528038fff39bfe96 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Wed, 29 Sep 2021 17:46:05 +0200 Subject: [PATCH 24/26] fix silly bug: --- .../components/setup/network_backbone/ShapedResNetBackbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index a4f2e7629..e535a66bb 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -156,7 +156,7 @@ def get_hyperparameter_search_space( # type: ignore[override] shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="shake_alpha_beta_method", value_range=('even-even',), - default_value='shake-shake', + default_value='even-even', ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", From 0bb84366f99456c3b00b281df49fd0c66bcba86e Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 30 Sep 2021 12:09:35 +0200 Subject: [PATCH 25/26] remove imputer and ordinal encoder for categorical transformer in feature validator --- autoPyTorch/data/tabular_feature_validator.py | 30 +++++++++---------- .../network_backbone/ShapedResNetBackbone.py | 20 ++++++------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py index 16185817b..28d64a4b1 100644 --- a/autoPyTorch/data/tabular_feature_validator.py +++ b/autoPyTorch/data/tabular_feature_validator.py @@ -41,26 +41,26 @@ def get_tabular_preprocessors(): preprocessors['numerical'] = list() preprocessors['categorical'] = list() + # preprocessors['categorical'].append(SimpleImputer(strategy='constant', + # # Train data is numpy + # # as of this point, where + # # Ordinal Encoding is using + # # for categorical. Only + # # Numbers are allowed + # # fill_value='!missing!', + # fill_value=-1, + # copy=False)) + + # preprocessors['categorical'].append(OrdinalEncoder( + # handle_unknown='use_encoded_value', + # unknown_value=-1)) + preprocessors['categorical'].append(OneHotEncoder( categories='auto', sparse=False, handle_unknown='ignore')) - preprocessors['categorical'].append(SimpleImputer(strategy='constant', - # Train data is numpy - # as of this point, where - # Ordinal Encoding is using - # for categorical. Only - # Numbers are allowed - # fill_value='!missing!', - fill_value=-1, - copy=False)) - - preprocessors['categorical'].append(OrdinalEncoder( - handle_unknown='use_encoded_value', - unknown_value=-1)) - preprocessors['numerical'].append(SimpleImputer(strategy='median', - copy=False)) + copy=False)) preprocessors['numerical'].append(StandardScaler(with_mean=True, with_std=True, copy=False)) return preprocessors diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index e535a66bb..b9ce18ede 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,19 +145,19 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), - # shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( - # hyperparameter="shake_alpha_beta_method", - # value_range=('shake-shake', - # 'shake-even', - # 'even-even', - # 'M3'), - # default_value='shake-shake', - # ), shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="shake_alpha_beta_method", - value_range=('even-even',), - default_value='even-even', + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', ), + # shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( + # hyperparameter="shake_alpha_beta_method", + # value_range=('even-even',), + # default_value='even-even', + # ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), From 89e595e608e951bf1d19846c78004df01eb4a5bd Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 30 Sep 2021 16:06:09 +0200 Subject: [PATCH 26/26] Address comments from shuhei --- .../setup/network_backbone/ShapedResNetBackbone.py | 5 ----- .../pipeline/components/setup/network_backbone/utils.py | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index b9ce18ede..12c6d4e74 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -153,11 +153,6 @@ def get_hyperparameter_search_space( # type: ignore[override] 'M3'), default_value='shake-shake', ), - # shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace( - # hyperparameter="shake_alpha_beta_method", - # value_range=('even-even',), - # default_value='even-even', - # ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index 9413c1bc3..9a1f9dd4e 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -97,7 +97,10 @@ def shake_get_alpha_beta( is_cuda: bool, method: str ) -> typing.Tuple[torch.tensor, torch.tensor]: - + """ + The methods used in this function have been introduced in 'ShakeShake Regularisation' + https://arxiv.org/abs/1705.07485. The names have been taken from the paper as well. + """ if not is_training: result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5])) return result if not is_cuda else (result[0].cuda(), result[1].cuda())