automl
diff --git a/‎development/.buildinfo
Lines changed: 4 additions & 0 deletions b/‎development/.buildinfo
Lines changed: 4 additions & 0 deletions
diff --git a/‎development/_downloads/000ffe6d9d5014b4165debb6cbf446f8/example_tabular_regression.py
Lines changed: 85 additions & 0 deletions b/‎development/_downloads/000ffe6d9d5014b4165debb6cbf446f8/example_tabular_regression.py
Lines changed: 85 additions & 0 deletions
diff --git a/‎development/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
7.44 KB b/‎development/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
7.44 KB
diff --git a/‎development/_downloads/342871cbb8ddcf6157ab171f9b9eab25/example_resampling_strategy.ipynb
Lines changed: 54 additions & 0 deletions b/‎development/_downloads/342871cbb8ddcf6157ab171f9b9eab25/example_resampling_strategy.ipynb
Lines changed: 54 additions & 0 deletions
diff --git a/‎development/_downloads/62e4ad0741dcbe663bc5f60c078c950f/example_resampling_strategy.py
Lines changed: 159 additions & 0 deletions b/‎development/_downloads/62e4ad0741dcbe663bc5f60c078c950f/example_resampling_strategy.py
Lines changed: 159 additions & 0 deletions
diff --git a/‎development/_downloads/83c92a475e127571aad1d77baa11248d/example_tabular_classification.py
Lines changed: 72 additions & 0 deletions b/‎development/_downloads/83c92a475e127571aad1d77baa11248d/example_tabular_classification.py
Lines changed: 72 additions & 0 deletions
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: da1291ead51b7998a2311fe24055da4c
+tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -0,0 +1,85 @@
+"""
+======================
+Tabular Regression
+======================
+
+The following example shows how to fit a sample regression model
+with AutoPyTorch
+"""
+import os
+import tempfile as tmp
+import warnings
+
+import sklearn.datasets
+import sklearn.model_selection
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+from autoPyTorch.api.tabular_regression import TabularRegressionTask
+
+
+if __name__ == '__main__':
+
+    ############################################################################
+    # Data Loading
+    # ============
+    X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
+
+    # Scale the regression targets to have zero mean and unit variance.
+    # This is important for Neural Networks since predicting large target values would require very large weights.
+    # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean
+    y_train_mean = y_train.mean()
+    y_train_std = y_train.std()
+
+    y_train_scaled = (y_train - y_train_mean) / y_train_std
+    y_test_scaled = (y_test - y_train_mean) / y_train_std
+
+    ############################################################################
+    # Build and fit a regressor
+    # ==========================
+    api = TabularRegressionTask(
+        temporary_directory='./tmp/autoPyTorch_example_tmp_02',
+        output_directory='./tmp/autoPyTorch_example_out_02',
+        # To maintain logs of the run, set the next two as False
+        delete_tmp_folder_after_terminate=True,
+        delete_output_folder_after_terminate=True
+    )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train_scaled,
+        X_test=X_test.copy(),
+        y_test=y_test_scaled.copy(),
+        optimize_metric='r2',
+        total_walltime_limit=300,
+        func_eval_time_limit_secs=50,
+        enable_traditional_pipeline=False,
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred_scaled = api.predict(X_test)
+
+    # Rescale the Neural Network predictions into the original target range
+    y_pred = y_pred_scaled * y_train_std + y_train_mean
+    score = api.score(y_pred, y_test)
+
+    print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Tabular Classification with different resampling strategy\n\nThe following example shows how to fit a sample classification model\nwith different resampling strategies in AutoPyTorch\nBy default, AutoPyTorch uses Holdout Validation with\na 67% train size split.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes\n\n\nif __name__ == '__main__':\n\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    ############################################################################\n    # Build and fit a classifier with default resampling strategy\n    # ===========================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_03',\n        output_directory='./tmp/autoPyTorch_example_out_03',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33\n        # is the default argument setting for TabularClassificationTask.\n        # It is explicitly specified in this example for demonstrational\n        # purpose.\n        resampling_strategy=HoldoutValTypes.holdout_validation,\n        resampling_strategy_args={'val_share': 0.33}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit_secs=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())\n\n    ############################################################################\n\n    ############################################################################\n    # Build and fit a classifier with Cross validation resampling strategy\n    # ====================================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_04',\n        output_directory='./tmp/autoPyTorch_example_out_04',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        resampling_strategy=CrossValTypes.k_fold_cross_validation,\n        resampling_strategy_args={'num_splits': 3}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit_secs=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())\n\n    ############################################################################\n\n    ############################################################################\n    # Build and fit a classifier with Stratified resampling strategy\n    # ==============================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_05',\n        output_directory='./tmp/autoPyTorch_example_out_05',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        # For demonstration purposes, we use\n        # Stratified hold out validation. However,\n        # one can also use CrossValTypes.stratified_k_fold_cross_validation.\n        resampling_strategy=HoldoutValTypes.stratified_holdout_validation,\n        resampling_strategy_args={'val_share': 0.33}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit_secs=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,159 @@
+"""
+======================
+Tabular Classification with different resampling strategy
+======================
+
+The following example shows how to fit a sample classification model
+with different resampling strategies in AutoPyTorch
+By default, AutoPyTorch uses Holdout Validation with
+a 67% train size split.
+"""
+import os
+import tempfile as tmp
+import warnings
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
+
+
+if __name__ == '__main__':
+
+    ############################################################################
+    # Data Loading
+    # ============
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
+
+    ############################################################################
+    # Build and fit a classifier with default resampling strategy
+    # ===========================================================
+    api = TabularClassificationTask(
+        temporary_directory='./tmp/autoPyTorch_example_tmp_03',
+        output_directory='./tmp/autoPyTorch_example_out_03',
+        # To maintain logs of the run, set the next two as False
+        delete_tmp_folder_after_terminate=True,
+        delete_output_folder_after_terminate=True,
+        # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
+        # is the default argument setting for TabularClassificationTask.
+        # It is explicitly specified in this example for demonstrational
+        # purpose.
+        resampling_strategy=HoldoutValTypes.holdout_validation,
+        resampling_strategy_args={'val_share': 0.33}
+    )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric='accuracy',
+        total_walltime_limit=150,
+        func_eval_time_limit_secs=30
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())
+
+    ############################################################################
+
+    ############################################################################
+    # Build and fit a classifier with Cross validation resampling strategy
+    # ====================================================================
+    api = TabularClassificationTask(
+        temporary_directory='./tmp/autoPyTorch_example_tmp_04',
+        output_directory='./tmp/autoPyTorch_example_out_04',
+        # To maintain logs of the run, set the next two as False
+        delete_tmp_folder_after_terminate=True,
+        delete_output_folder_after_terminate=True,
+        resampling_strategy=CrossValTypes.k_fold_cross_validation,
+        resampling_strategy_args={'num_splits': 3}
+    )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric='accuracy',
+        total_walltime_limit=150,
+        func_eval_time_limit_secs=30
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())
+
+    ############################################################################
+
+    ############################################################################
+    # Build and fit a classifier with Stratified resampling strategy
+    # ==============================================================
+    api = TabularClassificationTask(
+        temporary_directory='./tmp/autoPyTorch_example_tmp_05',
+        output_directory='./tmp/autoPyTorch_example_out_05',
+        # To maintain logs of the run, set the next two as False
+        delete_tmp_folder_after_terminate=True,
+        delete_output_folder_after_terminate=True,
+        # For demonstration purposes, we use
+        # Stratified hold out validation. However,
+        # one can also use CrossValTypes.stratified_k_fold_cross_validation.
+        resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
+        resampling_strategy_args={'val_share': 0.33}
+    )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric='accuracy',
+        total_walltime_limit=150,
+        func_eval_time_limit_secs=30
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())
@@ -0,0 +1,72 @@
+"""
+======================
+Tabular Classification
+======================
+
+The following example shows how to fit a sample classification model
+with AutoPyTorch
+"""
+import os
+import tempfile as tmp
+import warnings
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+
+
+if __name__ == '__main__':
+
+    ############################################################################
+    # Data Loading
+    # ============
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=42,
+    )
+
+    ############################################################################
+    # Build and fit a classifier
+    # ==========================
+    api = TabularClassificationTask(
+        temporary_directory='./tmp/autoPyTorch_example_tmp_01',
+        output_directory='./tmp/autoPyTorch_example_out_01',
+        # To maintain logs of the run, set the next two as False
+        delete_tmp_folder_after_terminate=True,
+        delete_output_folder_after_terminate=True,
+        seed=42,
+    )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric='accuracy',
+        total_walltime_limit=300,
+        func_eval_time_limit_secs=50
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())