|
| 1 | +import copy |
| 2 | +import logging.handlers |
| 3 | +import os |
| 4 | +import random |
| 5 | +import tempfile |
| 6 | +import time |
| 7 | + |
| 8 | +import numpy as np |
| 9 | + |
| 10 | +import openml |
| 11 | + |
| 12 | +import pytest |
| 13 | + |
| 14 | +import sklearn.datasets |
| 15 | + |
| 16 | +import torch |
| 17 | + |
| 18 | +from autoPyTorch.automl_common.common.utils.backend import create |
| 19 | +from autoPyTorch.data.tabular_validator import TabularInputValidator |
| 20 | +from autoPyTorch.datasets.resampling_strategy import ( |
| 21 | + CrossValTypes, |
| 22 | + HoldoutValTypes, |
| 23 | +) |
| 24 | +from autoPyTorch.datasets.tabular_dataset import TabularDataset |
| 25 | +from autoPyTorch.optimizer.utils import read_return_initial_configurations |
| 26 | +from autoPyTorch.pipeline.components.training.metrics.metrics import ( |
| 27 | + accuracy, |
| 28 | + balanced_accuracy, |
| 29 | + roc_auc, |
| 30 | +) |
| 31 | +from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline |
| 32 | +from autoPyTorch.utils.pipeline import get_dataset_requirements |
| 33 | + |
| 34 | + |
| 35 | +def get_backend_dirs_for_openml_task(openml_task_id): |
| 36 | + temporary_directory = os.path.join(tempfile.gettempdir(), f"tmp_{openml_task_id}_{time.time()}") |
| 37 | + output_directory = os.path.join(tempfile.gettempdir(), f"out_{openml_task_id}_{time.time()}") |
| 38 | + return temporary_directory, output_directory |
| 39 | + |
| 40 | + |
| 41 | +def get_fit_dictionary(openml_task_id): |
| 42 | + # Make sure everything from here onwards is reproducible |
| 43 | + # Add CUDA for future testing also |
| 44 | + seed = 42 |
| 45 | + random.seed(seed) |
| 46 | + torch.manual_seed(seed) |
| 47 | + torch.cuda.manual_seed(seed) |
| 48 | + torch.backends.cudnn.enabled = False |
| 49 | + torch.backends.cudnn.deterministic = True |
| 50 | + torch.backends.cudnn.benchmark = False |
| 51 | + np.random.seed(seed) |
| 52 | + |
| 53 | + task = openml.tasks.get_task(openml_task_id) |
| 54 | + temporary_directory, output_directory = get_backend_dirs_for_openml_task(openml_task_id) |
| 55 | + backend = create( |
| 56 | + temporary_directory=temporary_directory, |
| 57 | + output_directory=output_directory, |
| 58 | + delete_tmp_folder_after_terminate=False, |
| 59 | + delete_output_folder_after_terminate=False, |
| 60 | + prefix='autoPyTorch' |
| 61 | + ) |
| 62 | + X, y = sklearn.datasets.fetch_openml(data_id=task.dataset_id, return_X_y=True, as_frame=True) |
| 63 | + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( |
| 64 | + X, y, random_state=seed) |
| 65 | + validator = TabularInputValidator( |
| 66 | + is_classification='classification' in task.task_type.lower()).fit(X.copy(), y.copy()) |
| 67 | + datamanager = TabularDataset( |
| 68 | + dataset_name=openml.datasets.get_dataset(task.dataset_id, download_data=False).name, |
| 69 | + X=X_train, Y=y_train, |
| 70 | + validator=validator, |
| 71 | + X_test=X_test, Y_test=y_test, |
| 72 | + resampling_strategy=CrossValTypes.stratified_k_fold_cross_validation |
| 73 | + if 'cross' in str(task.estimation_procedure) else HoldoutValTypes.holdout_validation |
| 74 | + ) |
| 75 | + |
| 76 | + info = datamanager.get_required_dataset_info() |
| 77 | + |
| 78 | + dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info)) |
| 79 | + fit_dictionary = { |
| 80 | + 'X_train': datamanager.train_tensors[0], |
| 81 | + 'y_train': datamanager.train_tensors[1], |
| 82 | + 'train_indices': datamanager.splits[0][0], |
| 83 | + 'val_indices': datamanager.splits[0][1], |
| 84 | + 'dataset_properties': dataset_properties, |
| 85 | + 'num_run': openml_task_id, |
| 86 | + 'device': 'cpu', |
| 87 | + 'budget_type': 'epochs', |
| 88 | + 'epochs': 200, |
| 89 | + 'torch_num_threads': 1, |
| 90 | + 'early_stopping': 100, |
| 91 | + 'working_dir': '/tmp', |
| 92 | + 'use_tensorboard_logger': False, |
| 93 | + 'metrics_during_training': True, |
| 94 | + 'split_id': 0, |
| 95 | + 'backend': backend, |
| 96 | + 'logger_port': logging.handlers.DEFAULT_TCP_LOGGING_PORT, |
| 97 | + } |
| 98 | + backend.save_datamanager(datamanager) |
| 99 | + return fit_dictionary |
| 100 | + |
| 101 | + |
| 102 | +@pytest.mark.parametrize( |
| 103 | + 'openml_task_id,configuration,scorer,lower_bound_score', |
| 104 | + ( |
| 105 | + # Australian |
| 106 | + (146818, 0, balanced_accuracy, 0.85), |
| 107 | + (146818, 1, roc_auc, 0.90), |
| 108 | + (146818, 2, balanced_accuracy, 0.80), |
| 109 | + (146818, 3, balanced_accuracy, 0.85), |
| 110 | + # credit-g |
| 111 | + (31, 0, accuracy, 0.75), |
| 112 | + (31, 1, accuracy, 0.75), |
| 113 | + (31, 2, accuracy, 0.75), |
| 114 | + (31, 3, accuracy, 0.70), |
| 115 | + (31, 4, accuracy, 0.70), |
| 116 | + # segment |
| 117 | + (146822, 'default', accuracy, 0.90), |
| 118 | + # kr-vs-kp |
| 119 | + (3, 'default', accuracy, 0.90), |
| 120 | + # vehicle |
| 121 | + (53, 'default', accuracy, 0.75), |
| 122 | + ), |
| 123 | +) |
| 124 | +def test_can_properly_fit_a_config(openml_task_id, configuration, scorer, lower_bound_score): |
| 125 | + |
| 126 | + fit_dictionary = get_fit_dictionary(openml_task_id) |
| 127 | + fit_dictionary['additional_metrics'] = [scorer.name] |
| 128 | + fit_dictionary['optimize_metric'] = scorer.name |
| 129 | + |
| 130 | + pipeline = TabularClassificationPipeline( |
| 131 | + dataset_properties=fit_dictionary['dataset_properties']) |
| 132 | + cs = pipeline.get_hyperparameter_search_space() |
| 133 | + if configuration == 'default': |
| 134 | + config = cs.get_default_configuration() |
| 135 | + else: |
| 136 | + # Else configuration indicates what index of the greedy config |
| 137 | + config = read_return_initial_configurations( |
| 138 | + config_space=cs, |
| 139 | + portfolio_selection="greedy", |
| 140 | + )[configuration] |
| 141 | + pipeline.set_hyperparameters(config) |
| 142 | + pipeline.fit(copy.deepcopy(fit_dictionary)) |
| 143 | + |
| 144 | + # First we make sure performance is deterministic |
| 145 | + # As we use the validation performance for early stopping, this is |
| 146 | + # not the true generalization performance, but our goal is to test |
| 147 | + # that we can learn the data and capture wrong configurations |
| 148 | + |
| 149 | + # Sadly, when using batch norm we have results that are dependent on the current |
| 150 | + # torch manual seed. Set seed zero here to make this test reproducible |
| 151 | + torch.manual_seed(0) |
| 152 | + val_indices = fit_dictionary['val_indices'] |
| 153 | + train_data, target_data = fit_dictionary['backend'].load_datamanager().train_tensors |
| 154 | + predictions = pipeline.predict(train_data[val_indices]) |
| 155 | + score = scorer(fit_dictionary['y_train'][val_indices], predictions) |
| 156 | + assert pytest.approx(score) >= lower_bound_score |
| 157 | + |
| 158 | + # Check that we reverted to the best score |
| 159 | + run_summary = pipeline.named_steps['trainer'].run_summary |
| 160 | + |
| 161 | + # Then check that the training progressed nicely |
| 162 | + # We fit a file to have the trajectory-tendency |
| 163 | + # Some epochs might be bad, but overall we should make progress |
| 164 | + train_scores = [run_summary.performance_tracker['train_metrics'][e][scorer.name] |
| 165 | + for e in range(1, len(run_summary.performance_tracker['train_metrics']) + 1)] |
| 166 | + slope, intersect = np.polyfit(np.arange(len(train_scores)), train_scores, 1) |
| 167 | + if scorer._sign > 0: |
| 168 | + # We expect an increasing trajectory of training |
| 169 | + assert train_scores[0] < train_scores[-1] |
| 170 | + assert slope > 0 |
| 171 | + else: |
| 172 | + # We expect a decreasing trajectory of training |
| 173 | + assert train_scores[0] > train_scores[-1] |
| 174 | + assert slope < 0 |
| 175 | + |
| 176 | + # We do not expect the network to output zeros during training. |
| 177 | + # We add this check to prevent a dropout bug we had, where dropout probability |
| 178 | + # was a bool, not a float |
| 179 | + network = pipeline.named_steps['network'].network |
| 180 | + network.train() |
| 181 | + global_accumulator = {} |
| 182 | + |
| 183 | + def forward_hook(module, X_in, X_out): |
| 184 | + global_accumulator[f"{id(module)}_{module.__class__.__name__}"] = torch.mean(X_out) |
| 185 | + |
| 186 | + for i, (hierarchy, module) in enumerate(network.named_modules()): |
| 187 | + module.register_forward_hook(forward_hook) |
| 188 | + pipeline.predict(train_data[val_indices]) |
| 189 | + for module_name, mean_tensor in global_accumulator.items(): |
| 190 | + # The global accumulator has the output of each layer |
| 191 | + # of the network. If an output of any layer is zero, this |
| 192 | + # check will fail |
| 193 | + assert mean_tensor != 0, module_name |
0 commit comments