Skip to content

Commit 2ded7ae

Browse files
author
Github Actions
committed
Francisco Rivera Valverde: [ADD] Extra visualization example (#189)
1 parent 6b2b6cc commit 2ded7ae

File tree

154 files changed

+44560
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

154 files changed

+44560
-0
lines changed

development/.buildinfo

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Sphinx build info version 1
2+
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3+
config: da1291ead51b7998a2311fe24055da4c
4+
tags: 645f666f9bcd5a90fca523b33c5a78b7
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""
2+
======================
3+
Tabular Regression
4+
======================
5+
6+
The following example shows how to fit a sample regression model
7+
with AutoPyTorch
8+
"""
9+
import os
10+
import tempfile as tmp
11+
import warnings
12+
13+
import sklearn.datasets
14+
import sklearn.model_selection
15+
16+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
17+
os.environ['OMP_NUM_THREADS'] = '1'
18+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
19+
os.environ['MKL_NUM_THREADS'] = '1'
20+
21+
warnings.simplefilter(action='ignore', category=UserWarning)
22+
warnings.simplefilter(action='ignore', category=FutureWarning)
23+
24+
from autoPyTorch.api.tabular_regression import TabularRegressionTask
25+
26+
27+
if __name__ == '__main__':
28+
29+
############################################################################
30+
# Data Loading
31+
# ============
32+
X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)
33+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
34+
X,
35+
y,
36+
random_state=1,
37+
)
38+
39+
# Scale the regression targets to have zero mean and unit variance.
40+
# This is important for Neural Networks since predicting large target values would require very large weights.
41+
# One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean
42+
y_train_mean = y_train.mean()
43+
y_train_std = y_train.std()
44+
45+
y_train_scaled = (y_train - y_train_mean) / y_train_std
46+
y_test_scaled = (y_test - y_train_mean) / y_train_std
47+
48+
############################################################################
49+
# Build and fit a regressor
50+
# ==========================
51+
api = TabularRegressionTask(
52+
temporary_directory='./tmp/autoPyTorch_example_tmp_02',
53+
output_directory='./tmp/autoPyTorch_example_out_02',
54+
# To maintain logs of the run, set the next two as False
55+
delete_tmp_folder_after_terminate=True,
56+
delete_output_folder_after_terminate=True
57+
)
58+
59+
############################################################################
60+
# Search for an ensemble of machine learning algorithms
61+
# =====================================================
62+
api.search(
63+
X_train=X_train,
64+
y_train=y_train_scaled,
65+
X_test=X_test.copy(),
66+
y_test=y_test_scaled.copy(),
67+
optimize_metric='r2',
68+
total_walltime_limit=300,
69+
func_eval_time_limit_secs=50,
70+
enable_traditional_pipeline=False,
71+
)
72+
73+
############################################################################
74+
# Print the final ensemble performance
75+
# ====================================
76+
print(api.run_history, api.trajectory)
77+
y_pred_scaled = api.predict(X_test)
78+
79+
# Rescale the Neural Network predictions into the original target range
80+
y_pred = y_pred_scaled * y_train_std + y_train_mean
81+
score = api.score(y_pred, y_test)
82+
83+
print(score)
84+
# Print the final ensemble built by AutoPyTorch
85+
print(api.show_models())
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Tabular Classification with different resampling strategy\n\nThe following example shows how to fit a sample classification model\nwith different resampling strategies in AutoPyTorch\nBy default, AutoPyTorch uses Holdout Validation with\na 67% train size split.\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes\n\n\nif __name__ == '__main__':\n\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n ############################################################################\n # Build and fit a classifier with default resampling strategy\n # ===========================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_03',\n output_directory='./tmp/autoPyTorch_example_out_03',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33\n # is the default argument setting for TabularClassificationTask.\n # It is explicitly specified in this example for demonstrational\n # purpose.\n resampling_strategy=HoldoutValTypes.holdout_validation,\n resampling_strategy_args={'val_share': 0.33}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit_secs=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())\n\n ############################################################################\n\n ############################################################################\n # Build and fit a classifier with Cross validation resampling strategy\n # ====================================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_04',\n output_directory='./tmp/autoPyTorch_example_out_04',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n resampling_strategy=CrossValTypes.k_fold_cross_validation,\n resampling_strategy_args={'num_splits': 3}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit_secs=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())\n\n ############################################################################\n\n ############################################################################\n # Build and fit a classifier with Stratified resampling strategy\n # ==============================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_05',\n output_directory='./tmp/autoPyTorch_example_out_05',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n # For demonstration purposes, we use\n # Stratified hold out validation. However,\n # one can also use CrossValTypes.stratified_k_fold_cross_validation.\n resampling_strategy=HoldoutValTypes.stratified_holdout_validation,\n resampling_strategy_args={'val_share': 0.33}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit_secs=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())"
30+
]
31+
}
32+
],
33+
"metadata": {
34+
"kernelspec": {
35+
"display_name": "Python 3",
36+
"language": "python",
37+
"name": "python3"
38+
},
39+
"language_info": {
40+
"codemirror_mode": {
41+
"name": "ipython",
42+
"version": 3
43+
},
44+
"file_extension": ".py",
45+
"mimetype": "text/x-python",
46+
"name": "python",
47+
"nbconvert_exporter": "python",
48+
"pygments_lexer": "ipython3",
49+
"version": "3.8.9"
50+
}
51+
},
52+
"nbformat": 4,
53+
"nbformat_minor": 0
54+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
"""
2+
======================
3+
Tabular Classification with different resampling strategy
4+
======================
5+
6+
The following example shows how to fit a sample classification model
7+
with different resampling strategies in AutoPyTorch
8+
By default, AutoPyTorch uses Holdout Validation with
9+
a 67% train size split.
10+
"""
11+
import os
12+
import tempfile as tmp
13+
import warnings
14+
15+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
16+
os.environ['OMP_NUM_THREADS'] = '1'
17+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
18+
os.environ['MKL_NUM_THREADS'] = '1'
19+
20+
warnings.simplefilter(action='ignore', category=UserWarning)
21+
warnings.simplefilter(action='ignore', category=FutureWarning)
22+
23+
import sklearn.datasets
24+
import sklearn.model_selection
25+
26+
from autoPyTorch.api.tabular_classification import TabularClassificationTask
27+
from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
28+
29+
30+
if __name__ == '__main__':
31+
32+
############################################################################
33+
# Data Loading
34+
# ============
35+
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
36+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
37+
X,
38+
y,
39+
random_state=1,
40+
)
41+
42+
############################################################################
43+
# Build and fit a classifier with default resampling strategy
44+
# ===========================================================
45+
api = TabularClassificationTask(
46+
temporary_directory='./tmp/autoPyTorch_example_tmp_03',
47+
output_directory='./tmp/autoPyTorch_example_out_03',
48+
# To maintain logs of the run, set the next two as False
49+
delete_tmp_folder_after_terminate=True,
50+
delete_output_folder_after_terminate=True,
51+
# 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
52+
# is the default argument setting for TabularClassificationTask.
53+
# It is explicitly specified in this example for demonstrational
54+
# purpose.
55+
resampling_strategy=HoldoutValTypes.holdout_validation,
56+
resampling_strategy_args={'val_share': 0.33}
57+
)
58+
59+
############################################################################
60+
# Search for an ensemble of machine learning algorithms
61+
# =====================================================
62+
api.search(
63+
X_train=X_train,
64+
y_train=y_train,
65+
X_test=X_test.copy(),
66+
y_test=y_test.copy(),
67+
optimize_metric='accuracy',
68+
total_walltime_limit=150,
69+
func_eval_time_limit_secs=30
70+
)
71+
72+
############################################################################
73+
# Print the final ensemble performance
74+
# ====================================
75+
print(api.run_history, api.trajectory)
76+
y_pred = api.predict(X_test)
77+
score = api.score(y_pred, y_test)
78+
print(score)
79+
# Print the final ensemble built by AutoPyTorch
80+
print(api.show_models())
81+
82+
############################################################################
83+
84+
############################################################################
85+
# Build and fit a classifier with Cross validation resampling strategy
86+
# ====================================================================
87+
api = TabularClassificationTask(
88+
temporary_directory='./tmp/autoPyTorch_example_tmp_04',
89+
output_directory='./tmp/autoPyTorch_example_out_04',
90+
# To maintain logs of the run, set the next two as False
91+
delete_tmp_folder_after_terminate=True,
92+
delete_output_folder_after_terminate=True,
93+
resampling_strategy=CrossValTypes.k_fold_cross_validation,
94+
resampling_strategy_args={'num_splits': 3}
95+
)
96+
97+
############################################################################
98+
# Search for an ensemble of machine learning algorithms
99+
# =====================================================
100+
api.search(
101+
X_train=X_train,
102+
y_train=y_train,
103+
X_test=X_test.copy(),
104+
y_test=y_test.copy(),
105+
optimize_metric='accuracy',
106+
total_walltime_limit=150,
107+
func_eval_time_limit_secs=30
108+
)
109+
110+
############################################################################
111+
# Print the final ensemble performance
112+
# ====================================
113+
print(api.run_history, api.trajectory)
114+
y_pred = api.predict(X_test)
115+
score = api.score(y_pred, y_test)
116+
print(score)
117+
# Print the final ensemble built by AutoPyTorch
118+
print(api.show_models())
119+
120+
############################################################################
121+
122+
############################################################################
123+
# Build and fit a classifier with Stratified resampling strategy
124+
# ==============================================================
125+
api = TabularClassificationTask(
126+
temporary_directory='./tmp/autoPyTorch_example_tmp_05',
127+
output_directory='./tmp/autoPyTorch_example_out_05',
128+
# To maintain logs of the run, set the next two as False
129+
delete_tmp_folder_after_terminate=True,
130+
delete_output_folder_after_terminate=True,
131+
# For demonstration purposes, we use
132+
# Stratified hold out validation. However,
133+
# one can also use CrossValTypes.stratified_k_fold_cross_validation.
134+
resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
135+
resampling_strategy_args={'val_share': 0.33}
136+
)
137+
138+
############################################################################
139+
# Search for an ensemble of machine learning algorithms
140+
# =====================================================
141+
api.search(
142+
X_train=X_train,
143+
y_train=y_train,
144+
X_test=X_test.copy(),
145+
y_test=y_test.copy(),
146+
optimize_metric='accuracy',
147+
total_walltime_limit=150,
148+
func_eval_time_limit_secs=30
149+
)
150+
151+
############################################################################
152+
# Print the final ensemble performance
153+
# ====================================
154+
print(api.run_history, api.trajectory)
155+
y_pred = api.predict(X_test)
156+
score = api.score(y_pred, y_test)
157+
print(score)
158+
# Print the final ensemble built by AutoPyTorch
159+
print(api.show_models())
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
======================
3+
Tabular Classification
4+
======================
5+
6+
The following example shows how to fit a sample classification model
7+
with AutoPyTorch
8+
"""
9+
import os
10+
import tempfile as tmp
11+
import warnings
12+
13+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
14+
os.environ['OMP_NUM_THREADS'] = '1'
15+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
16+
os.environ['MKL_NUM_THREADS'] = '1'
17+
18+
warnings.simplefilter(action='ignore', category=UserWarning)
19+
warnings.simplefilter(action='ignore', category=FutureWarning)
20+
21+
import sklearn.datasets
22+
import sklearn.model_selection
23+
24+
from autoPyTorch.api.tabular_classification import TabularClassificationTask
25+
26+
27+
if __name__ == '__main__':
28+
29+
############################################################################
30+
# Data Loading
31+
# ============
32+
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
33+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
34+
X,
35+
y,
36+
random_state=42,
37+
)
38+
39+
############################################################################
40+
# Build and fit a classifier
41+
# ==========================
42+
api = TabularClassificationTask(
43+
temporary_directory='./tmp/autoPyTorch_example_tmp_01',
44+
output_directory='./tmp/autoPyTorch_example_out_01',
45+
# To maintain logs of the run, set the next two as False
46+
delete_tmp_folder_after_terminate=True,
47+
delete_output_folder_after_terminate=True,
48+
seed=42,
49+
)
50+
51+
############################################################################
52+
# Search for an ensemble of machine learning algorithms
53+
# =====================================================
54+
api.search(
55+
X_train=X_train,
56+
y_train=y_train,
57+
X_test=X_test.copy(),
58+
y_test=y_test.copy(),
59+
optimize_metric='accuracy',
60+
total_walltime_limit=300,
61+
func_eval_time_limit_secs=50
62+
)
63+
64+
############################################################################
65+
# Print the final ensemble performance
66+
# ====================================
67+
print(api.run_history, api.trajectory)
68+
y_pred = api.predict(X_test)
69+
score = api.score(y_pred, y_test)
70+
print(score)
71+
# Print the final ensemble built by AutoPyTorch
72+
print(api.show_models())

0 commit comments

Comments
 (0)