Skip to content

Commit fe13421

Browse files
author
Github Actions
committed
Francisco Rivera Valverde: [ADD] Forkserver as default multiprocessing strategy (#223)
1 parent ba3571a commit fe13421

File tree

42 files changed

+3272
-1754
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+3272
-1754
lines changed

development/_downloads/307f532dbef0476f85afc6b64b65f087/example_resampling_strategy.py

Lines changed: 113 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -27,118 +27,116 @@
2727
from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
2828

2929

30-
if __name__ == '__main__':
31-
32-
############################################################################
33-
# Data Loading
34-
# ============
35-
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
36-
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
37-
X,
38-
y,
39-
random_state=1,
40-
)
41-
42-
############################################################################
43-
# Build and fit a classifier with default resampling strategy
44-
# ===========================================================
45-
api = TabularClassificationTask(
46-
# 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
47-
# is the default argument setting for TabularClassificationTask.
48-
# It is explicitly specified in this example for demonstrational
49-
# purpose.
50-
resampling_strategy=HoldoutValTypes.holdout_validation,
51-
resampling_strategy_args={'val_share': 0.33}
52-
)
53-
54-
############################################################################
55-
# Search for an ensemble of machine learning algorithms
56-
# =====================================================
57-
api.search(
58-
X_train=X_train,
59-
y_train=y_train,
60-
X_test=X_test.copy(),
61-
y_test=y_test.copy(),
62-
optimize_metric='accuracy',
63-
total_walltime_limit=150,
64-
func_eval_time_limit_secs=30
65-
)
66-
67-
############################################################################
68-
# Print the final ensemble performance
69-
# ====================================
70-
print(api.run_history, api.trajectory)
71-
y_pred = api.predict(X_test)
72-
score = api.score(y_pred, y_test)
73-
print(score)
74-
# Print the final ensemble built by AutoPyTorch
75-
print(api.show_models())
76-
77-
############################################################################
78-
79-
############################################################################
80-
# Build and fit a classifier with Cross validation resampling strategy
81-
# ====================================================================
82-
api = TabularClassificationTask(
83-
resampling_strategy=CrossValTypes.k_fold_cross_validation,
84-
resampling_strategy_args={'num_splits': 3}
85-
)
86-
87-
############################################################################
88-
# Search for an ensemble of machine learning algorithms
89-
# =====================================================
90-
api.search(
91-
X_train=X_train,
92-
y_train=y_train,
93-
X_test=X_test.copy(),
94-
y_test=y_test.copy(),
95-
optimize_metric='accuracy',
96-
total_walltime_limit=150,
97-
func_eval_time_limit_secs=30
98-
)
99-
100-
############################################################################
101-
# Print the final ensemble performance
102-
# ====================================
103-
print(api.run_history, api.trajectory)
104-
y_pred = api.predict(X_test)
105-
score = api.score(y_pred, y_test)
106-
print(score)
107-
# Print the final ensemble built by AutoPyTorch
108-
print(api.show_models())
109-
110-
############################################################################
111-
112-
############################################################################
113-
# Build and fit a classifier with Stratified resampling strategy
114-
# ==============================================================
115-
api = TabularClassificationTask(
116-
# For demonstration purposes, we use
117-
# Stratified hold out validation. However,
118-
# one can also use CrossValTypes.stratified_k_fold_cross_validation.
119-
resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
120-
resampling_strategy_args={'val_share': 0.33}
121-
)
122-
123-
############################################################################
124-
# Search for an ensemble of machine learning algorithms
125-
# =====================================================
126-
api.search(
127-
X_train=X_train,
128-
y_train=y_train,
129-
X_test=X_test.copy(),
130-
y_test=y_test.copy(),
131-
optimize_metric='accuracy',
132-
total_walltime_limit=150,
133-
func_eval_time_limit_secs=30
134-
)
135-
136-
############################################################################
137-
# Print the final ensemble performance
138-
# ====================================
139-
print(api.run_history, api.trajectory)
140-
y_pred = api.predict(X_test)
141-
score = api.score(y_pred, y_test)
142-
print(score)
143-
# Print the final ensemble built by AutoPyTorch
144-
print(api.show_models())
30+
############################################################################
31+
# Data Loading
32+
# ============
33+
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
34+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
35+
X,
36+
y,
37+
random_state=1,
38+
)
39+
40+
############################################################################
41+
# Build and fit a classifier with default resampling strategy
42+
# ===========================================================
43+
api = TabularClassificationTask(
44+
# 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
45+
# is the default argument setting for TabularClassificationTask.
46+
# It is explicitly specified in this example for demonstrational
47+
# purpose.
48+
resampling_strategy=HoldoutValTypes.holdout_validation,
49+
resampling_strategy_args={'val_share': 0.33}
50+
)
51+
52+
############################################################################
53+
# Search for an ensemble of machine learning algorithms
54+
# =====================================================
55+
api.search(
56+
X_train=X_train,
57+
y_train=y_train,
58+
X_test=X_test.copy(),
59+
y_test=y_test.copy(),
60+
optimize_metric='accuracy',
61+
total_walltime_limit=150,
62+
func_eval_time_limit_secs=30
63+
)
64+
65+
############################################################################
66+
# Print the final ensemble performance
67+
# ====================================
68+
print(api.run_history, api.trajectory)
69+
y_pred = api.predict(X_test)
70+
score = api.score(y_pred, y_test)
71+
print(score)
72+
# Print the final ensemble built by AutoPyTorch
73+
print(api.show_models())
74+
75+
############################################################################
76+
77+
############################################################################
78+
# Build and fit a classifier with Cross validation resampling strategy
79+
# ====================================================================
80+
api = TabularClassificationTask(
81+
resampling_strategy=CrossValTypes.k_fold_cross_validation,
82+
resampling_strategy_args={'num_splits': 3}
83+
)
84+
85+
############################################################################
86+
# Search for an ensemble of machine learning algorithms
87+
# =====================================================
88+
api.search(
89+
X_train=X_train,
90+
y_train=y_train,
91+
X_test=X_test.copy(),
92+
y_test=y_test.copy(),
93+
optimize_metric='accuracy',
94+
total_walltime_limit=150,
95+
func_eval_time_limit_secs=30
96+
)
97+
98+
############################################################################
99+
# Print the final ensemble performance
100+
# ====================================
101+
print(api.run_history, api.trajectory)
102+
y_pred = api.predict(X_test)
103+
score = api.score(y_pred, y_test)
104+
print(score)
105+
# Print the final ensemble built by AutoPyTorch
106+
print(api.show_models())
107+
108+
############################################################################
109+
110+
############################################################################
111+
# Build and fit a classifier with Stratified resampling strategy
112+
# ==============================================================
113+
api = TabularClassificationTask(
114+
# For demonstration purposes, we use
115+
# Stratified hold out validation. However,
116+
# one can also use CrossValTypes.stratified_k_fold_cross_validation.
117+
resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
118+
resampling_strategy_args={'val_share': 0.33}
119+
)
120+
121+
############################################################################
122+
# Search for an ensemble of machine learning algorithms
123+
# =====================================================
124+
api.search(
125+
X_train=X_train,
126+
y_train=y_train,
127+
X_test=X_test.copy(),
128+
y_test=y_test.copy(),
129+
optimize_metric='accuracy',
130+
total_walltime_limit=150,
131+
func_eval_time_limit_secs=30
132+
)
133+
134+
############################################################################
135+
# Print the final ensemble performance
136+
# ====================================
137+
print(api.run_history, api.trajectory)
138+
y_pred = api.predict(X_test)
139+
score = api.score(y_pred, y_test)
140+
print(score)
141+
# Print the final ensemble built by AutoPyTorch
142+
print(api.show_models())

development/_downloads/38ebc52de63d1626596d1647c695c721/example_tabular_regression.ipynb

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,79 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import os\nimport tempfile as tmp\nimport warnings\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask\n\n\nif __name__ == '__main__':\n\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n # Scale the regression targets to have zero mean and unit variance.\n # This is important for Neural Networks since predicting large target values would require very large weights.\n # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean\n y_train_mean = y_train.mean()\n y_train_std = y_train.std()\n\n y_train_scaled = (y_train - y_train_mean) / y_train_std\n y_test_scaled = (y_test - y_train_mean) / y_train_std\n\n ############################################################################\n # Build and fit a regressor\n # ==========================\n api = TabularRegressionTask()\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train_scaled,\n X_test=X_test.copy(),\n y_test=y_test_scaled.copy(),\n optimize_metric='r2',\n total_walltime_limit=300,\n func_eval_time_limit_secs=50,\n enable_traditional_pipeline=False,\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred_scaled = api.predict(X_test)\n\n # Rescale the Neural Network predictions into the original target range\n y_pred = y_pred_scaled * y_train_std + y_train_mean\n score = api.score(y_pred, y_test)\n\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())"
29+
"import os\nimport tempfile as tmp\nimport warnings\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"## Data Loading\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)\nX_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n)"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"## Build and fit a regressor\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"api = TabularRegressionTask()"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"## Search for an ensemble of machine learning algorithms\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='r2',\n total_walltime_limit=300,\n func_eval_time_limit_secs=50,\n enable_traditional_pipeline=False,\n)"
84+
]
85+
},
86+
{
87+
"cell_type": "markdown",
88+
"metadata": {},
89+
"source": [
90+
"## Print the final ensemble performance\n\n"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {
97+
"collapsed": false
98+
},
99+
"outputs": [],
100+
"source": [
101+
"print(api.run_history, api.trajectory)\ny_pred = api.predict(X_test)\n\n# Rescale the Neural Network predictions into the original target range\nscore = api.score(y_pred, y_test)\n\nprint(score)\n# Print the final ensemble built by AutoPyTorch\nprint(api.show_models())"
30102
]
31103
}
32104
],

0 commit comments

Comments
 (0)