Skip to content

Commit 4f1dc75

Browse files
author
Github Actions
committed
Ravin Kohli: Adds more examples to customise AutoPyTorch. (#124)
1 parent 03e19b9 commit 4f1dc75

File tree

53 files changed

+3664
-31551
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3664
-31551
lines changed

refactor_development/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: d4f3d04cddab6e3e314b10c7fdfafae2
3+
config: fab3c6c6521ed874dac2c35d9201a857
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

refactor_development/_downloads/0baaec1666f007b22da0886cb1b9e240/example_tabular_regression.py renamed to refactor_development/_downloads/000ffe6d9d5014b4165debb6cbf446f8/example_tabular_regression.py

Lines changed: 18 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,15 @@
33
Tabular Regression
44
======================
55
6-
The following example shows how to fit a sample classification model
6+
The following example shows how to fit a sample regression model
77
with AutoPyTorch
88
"""
99
import os
1010
import tempfile as tmp
11-
import typing
1211
import warnings
1312

14-
from sklearn.datasets import make_regression
15-
16-
from autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator
13+
import sklearn.datasets
14+
import sklearn.model_selection
1715

1816
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
1917
os.environ['OMP_NUM_THREADS'] = '1'
@@ -23,54 +21,16 @@
2321
warnings.simplefilter(action='ignore', category=UserWarning)
2422
warnings.simplefilter(action='ignore', category=FutureWarning)
2523

26-
from sklearn import model_selection, preprocessing
27-
2824
from autoPyTorch.api.tabular_regression import TabularRegressionTask
29-
from autoPyTorch.datasets.tabular_dataset import TabularDataset
30-
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
31-
32-
33-
def get_search_space_updates():
34-
"""
35-
Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
36-
Returns:
37-
HyperparameterSearchSpaceUpdates
38-
"""
39-
updates = HyperparameterSearchSpaceUpdates()
40-
updates.append(node_name="data_loader",
41-
hyperparameter="batch_size",
42-
value_range=[16, 512],
43-
default_value=32)
44-
updates.append(node_name="lr_scheduler",
45-
hyperparameter="CosineAnnealingLR:T_max",
46-
value_range=[50, 60],
47-
default_value=55)
48-
updates.append(node_name='network_backbone',
49-
hyperparameter='ResNetBackbone:dropout',
50-
value_range=[0, 0.5],
51-
default_value=0.2)
52-
return updates
5325

5426

5527
if __name__ == '__main__':
28+
5629
############################################################################
5730
# Data Loading
5831
# ============
59-
60-
# Get the training data for tabular regression
61-
# X, y = datasets.fetch_openml(name="cholesterol", return_X_y=True)
62-
63-
# Use dummy data for now since there are problems with categorical columns
64-
X, y = make_regression(
65-
n_samples=5000,
66-
n_features=4,
67-
n_informative=3,
68-
n_targets=1,
69-
shuffle=True,
70-
random_state=0
71-
)
72-
73-
X_train, X_test, y_train, y_test = model_selection.train_test_split(
32+
X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)
33+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
7434
X,
7535
y,
7636
random_state=1,
@@ -89,16 +49,23 @@ def get_search_space_updates():
8949
# Build and fit a regressor
9050
# ==========================
9151
api = TabularRegressionTask(
92-
delete_tmp_folder_after_terminate=False,
93-
search_space_updates=get_search_space_updates()
52+
temporary_directory='./tmp/autoPyTorch_example_tmp_02',
53+
output_directory='./tmp/autoPyTorch_example_out_02',
54+
# To maintain logs of the run, set the next two as False
55+
delete_tmp_folder_after_terminate=True,
56+
delete_output_folder_after_terminate=True
9457
)
58+
59+
############################################################################
60+
# Search for an ensemble of machine learning algorithms
61+
# =====================================================
9562
api.search(
9663
X_train=X_train,
9764
y_train=y_train_scaled,
9865
X_test=X_test.copy(),
9966
y_test=y_test_scaled.copy(),
10067
optimize_metric='r2',
101-
total_walltime_limit=500,
68+
total_walltime_limit=300,
10269
func_eval_time_limit=50,
10370
traditional_per_total_budget=0
10471
)
@@ -114,3 +81,5 @@ def get_search_space_updates():
11481
score = api.score(y_pred, y_test)
11582

11683
print(score)
84+
# Print the final ensemble built by AutoPyTorch
85+
print(api.show_models())

refactor_development/_downloads/306036486863b5329c4111d8adbaac63/example_tabular_regression.ipynb

Lines changed: 0 additions & 54 deletions
This file was deleted.
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Tabular Classification with different resampling strategy\n\nThe following example shows how to fit a sample classification model\nwith different resampling strategies in AutoPyTorch\nBy default, AutoPyTorch uses Holdout Validation with\na 67% train size split.\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes\n\n\nif __name__ == '__main__':\n\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n ############################################################################\n # Build and fit a classifier with default resampling strategy\n # ===========================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_03',\n output_directory='./tmp/autoPyTorch_example_out_03',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33\n # is the default argument setting for TabularClassificationTask.\n # It is explicitly specified in this example for demonstrational\n # purpose.\n resampling_strategy=HoldoutValTypes.holdout_validation,\n resampling_strategy_args={'val_share': 0.33}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())\n\n ############################################################################\n\n ############################################################################\n # Build and fit a classifier with Cross validation resampling strategy\n # ====================================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_04',\n output_directory='./tmp/autoPyTorch_example_out_04',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n resampling_strategy=CrossValTypes.k_fold_cross_validation,\n resampling_strategy_args={'num_splits': 3}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())\n\n ############################################################################\n\n ############################################################################\n # Build and fit a classifier with Stratified resampling strategy\n # ==============================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_05',\n output_directory='./tmp/autoPyTorch_example_out_05',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n # For demonstration purposes, we use\n # Stratified hold out validation. However,\n # one can also use CrossValTypes.stratified_k_fold_cross_validation.\n resampling_strategy=HoldoutValTypes.stratified_holdout_validation,\n resampling_strategy_args={'val_share': 0.33}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())"
30+
]
31+
}
32+
],
33+
"metadata": {
34+
"kernelspec": {
35+
"display_name": "Python 3",
36+
"language": "python",
37+
"name": "python3"
38+
},
39+
"language_info": {
40+
"codemirror_mode": {
41+
"name": "ipython",
42+
"version": 3
43+
},
44+
"file_extension": ".py",
45+
"mimetype": "text/x-python",
46+
"name": "python",
47+
"nbconvert_exporter": "python",
48+
"pygments_lexer": "ipython3",
49+
"version": "3.8.8"
50+
}
51+
},
52+
"nbformat": 4,
53+
"nbformat_minor": 0
54+
}

refactor_development/_downloads/3a985c2d5cf88bfc51ae65d16b30f86c/example_image_classification.py

Lines changed: 0 additions & 54 deletions
This file was deleted.

0 commit comments

Comments
 (0)