From 19a5cf77014ff5a729f49dc80611598852dd0396 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 8 Aug 2022 18:16:48 +0200 Subject: [PATCH 1/3] fix update of pipeline config options in fit pipeline --- autoPyTorch/api/base_task.py | 2 +- test/test_api/test_api.py | 56 ++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index c5468eae7..12d451ca0 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -1632,7 +1632,7 @@ def fit_pipeline( names=[eval_metric] if eval_metric is not None else None, all_supported_metrics=False).pop() - pipeline_options = self.pipeline_options.copy().update(pipeline_options) if pipeline_options is not None \ + pipeline_options = {**self.pipeline_options, **pipeline_options} if pipeline_options is not None \ else self.pipeline_options.copy() assert pipeline_options is not None diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 465d74c6b..6a9d9d6ba 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -932,6 +932,62 @@ def test_pipeline_fit(openml_id, assert not os.path.exists(cv_model_path) +@pytest.mark.parametrize('openml_id', (40984,)) +@pytest.mark.parametrize("budget", [1]) +def test_pipeline_fit_pass_pipeline_options( + openml_id, + backend, + budget, + n_samples): + # Get the data and check that contents of data-manager make sense + X, y = sklearn.datasets.fetch_openml( + data_id=int(openml_id), + return_X_y=True, as_frame=True + ) + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X[:n_samples], y[:n_samples], random_state=1) + + # Search for a good configuration + estimator = TabularClassificationTask( + backend=backend, + ensemble_size=0 + ) + + dataset = estimator.get_dataset(X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test) + + configuration = estimator.get_search_space(dataset).get_default_configuration() + pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset, + configuration=configuration, + run_time_limit_secs=50, + budget_type='epochs', + budget=budget, + pipeline_options={'early_stopping': 100} + ) + assert isinstance(dataset, BaseDataset) + assert isinstance(run_info, RunInfo) + assert isinstance(run_info.config, Configuration) + + assert isinstance(run_value, RunValue) + assert 'SUCCESS' in str(run_value.status) + + # Make sure that the pipeline can be pickled + dump_file = os.path.join(tempfile.gettempdir(), 'automl.dump.pkl') + with open(dump_file, 'wb') as f: + pickle.dump(pipeline, f) + + num_run_dir = estimator._backend.get_numrun_directory( + run_info.seed, run_value.additional_info['num_run'], budget=float(budget)) + model_path = os.path.join(num_run_dir, estimator._backend.get_model_filename( + run_info.seed, run_value.additional_info['num_run'], budget=float(budget))) + + # We expect the model path always + # And the cv model only on 'cv' + assert os.path.exists(model_path) + + @pytest.mark.parametrize('openml_id', (40984,)) @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', ((HoldoutValTypes.holdout_validation, {'val_share': 0.8}), From 09bf3c69c2fb81bd4f3ed2d0bf11ce8a85e87e3b Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Mon, 8 Aug 2022 18:18:04 +0200 Subject: [PATCH 2/3] fix flake and test --- test/test_api/test_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 6a9d9d6ba..118bda718 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -938,7 +938,8 @@ def test_pipeline_fit_pass_pipeline_options( openml_id, backend, budget, - n_samples): + n_samples +): # Get the data and check that contents of data-manager make sense X, y = sklearn.datasets.fetch_openml( data_id=int(openml_id), From 08bddb68f56859fab842b7aeb5c5e12269729510 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Fri, 12 Aug 2022 13:27:12 +0200 Subject: [PATCH 3/3] suggestions from review --- test/test_api/test_api.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 118bda718..d95f3943f 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -932,8 +932,7 @@ def test_pipeline_fit(openml_id, assert not os.path.exists(cv_model_path) -@pytest.mark.parametrize('openml_id', (40984,)) -@pytest.mark.parametrize("budget", [1]) +@pytest.mark.parametrize('openml_id,budget', [(40984, 1)]) def test_pipeline_fit_pass_pipeline_options( openml_id, backend, @@ -985,7 +984,6 @@ def test_pipeline_fit_pass_pipeline_options( run_info.seed, run_value.additional_info['num_run'], budget=float(budget))) # We expect the model path always - # And the cv model only on 'cv' assert os.path.exists(model_path)