automl
diff --git a/‎refactor_development_regularization_cocktails_backup/.buildinfo
Lines changed: 4 additions & 0 deletions b/‎refactor_development_regularization_cocktails_backup/.buildinfo
Lines changed: 4 additions & 0 deletions
diff --git a/‎refactor_development_regularization_cocktails_backup/_downloads/000ffe6d9d5014b4165debb6cbf446f8/example_tabular_regression.py
Lines changed: 85 additions & 0 deletions b/‎refactor_development_regularization_cocktails_backup/_downloads/000ffe6d9d5014b4165debb6cbf446f8/example_tabular_regression.py
Lines changed: 85 additions & 0 deletions
diff --git a/‎refactor_development_regularization_cocktails_backup/_downloads/0b35eb5fea161bb1ba2c30e6b7323b44/example_single_configuration.ipynb
Lines changed: 54 additions & 0 deletions b/‎refactor_development_regularization_cocktails_backup/_downloads/0b35eb5fea161bb1ba2c30e6b7323b44/example_single_configuration.ipynb
Lines changed: 54 additions & 0 deletions
diff --git a/‎refactor_development_regularization_cocktails_backup/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
7.42 KB b/‎refactor_development_regularization_cocktails_backup/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
7.42 KB
diff --git a/‎refactor_development_regularization_cocktails_backup/_downloads/342871cbb8ddcf6157ab171f9b9eab25/example_resampling_strategy.ipynb
Lines changed: 54 additions & 0 deletions b/‎refactor_development_regularization_cocktails_backup/_downloads/342871cbb8ddcf6157ab171f9b9eab25/example_resampling_strategy.ipynb
Lines changed: 54 additions & 0 deletions
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 28ba0151b4e1d0ebfa351d5c8fb73340
+tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -0,0 +1,85 @@
+"""
+======================
+Tabular Regression
+======================
+
+The following example shows how to fit a sample regression model
+with AutoPyTorch
+"""
+import os
+import tempfile as tmp
+import warnings
+
+import sklearn.datasets
+import sklearn.model_selection
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+from autoPyTorch.api.tabular_regression import TabularRegressionTask
+
+
+if __name__ == '__main__':
+
+    ############################################################################
+    # Data Loading
+    # ============
+    X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
+
+    # Scale the regression targets to have zero mean and unit variance.
+    # This is important for Neural Networks since predicting large target values would require very large weights.
+    # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean
+    y_train_mean = y_train.mean()
+    y_train_std = y_train.std()
+
+    y_train_scaled = (y_train - y_train_mean) / y_train_std
+    y_test_scaled = (y_test - y_train_mean) / y_train_std
+
+    ############################################################################
+    # Build and fit a regressor
+    # ==========================
+    api = TabularRegressionTask(
+        temporary_directory='./tmp/autoPyTorch_example_tmp_02',
+        output_directory='./tmp/autoPyTorch_example_out_02',
+        # To maintain logs of the run, set the next two as False
+        delete_tmp_folder_after_terminate=True,
+        delete_output_folder_after_terminate=True
+    )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train_scaled,
+        X_test=X_test.copy(),
+        y_test=y_test_scaled.copy(),
+        optimize_metric='r2',
+        total_walltime_limit=300,
+        func_eval_time_limit_secs=50,
+        enable_traditional_pipeline=False,
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred_scaled = api.predict(X_test)
+
+    # Rescale the Neural Network predictions into the original target range
+    y_pred = y_pred_scaled * y_train_std + y_train_mean
+    score = api.score(y_pred, y_test)
+
+    print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Fit a single configuration\n*Auto-PyTorch* searches for the best combination of machine learning algorithms\nand their hyper-parameter configuration for a given task.\n\nThis example shows how one can fit one of these pipelines, both, with a user defined\nconfiguration, and a randomly sampled one form the configuration space.\nThe pipelines that Auto-PyTorch fits are compatible with Scikit-Learn API. You can\nget further documentation about Scikit-Learn models here: <https://scikit-learn.org/stable/getting_started.html`>_\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.metrics\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import HoldoutValTypes\n\n\nif __name__ == '__main__':\n    ############################################################################\n    # Data Loading\n    # ============\n\n    X, y = sklearn.datasets.fetch_openml('iris', return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X, y, test_size=0.8, random_state=3\n    )\n\n    ############################################################################\n    # Define an estimator\n    # ============================\n\n    # Search for a good configuration\n    estimator = TabularClassificationTask(\n        resampling_strategy=HoldoutValTypes.holdout_validation,\n        resampling_strategy_args={'val_share': 0.33}\n    )\n\n    ############################################################################\n    # Get a random configuration of the pipeline for current dataset\n    # ===============================================================\n\n    dataset = estimator.get_dataset(X_train=X_train,\n                                    y_train=y_train,\n                                    X_test=X_test,\n                                    y_test=y_test)\n    configuration = estimator.get_search_space(dataset).get_default_configuration()\n\n    ###########################################################################\n    # Fit the configuration\n    # ==================================\n\n    pipeline, run_info, run_value, dataset = estimator.fit_pipeline(\n        X_train=X_train,\n        y_train=y_train,\n        dataset_name='kr-vs-kp',\n        run_time_limit_secs=100,\n        X_test=X_test,\n        y_test=y_test,\n        disable_file_output=False,\n        configuration=configuration,\n    )\n\n    # This object complies with Scikit-Learn Pipeline API.\n    # https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html\n    print(pipeline.named_steps)\n\n    # The fit_pipeline command also returns a named tuple with the pipeline constraints\n    print(run_info)\n\n    # The fit_pipeline command also returns a named tuple with train/test performance\n    print(run_value)\n\n    print(\"Passed Configuration:\", pipeline.config)\n    print(\"Network:\", pipeline.named_steps['network'].network)"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Tabular Classification with different resampling strategy\n\nThe following example shows how to fit a sample classification model\nwith different resampling strategies in AutoPyTorch\nBy default, AutoPyTorch uses Holdout Validation with\na 67% train size split.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes\n\n\nif __name__ == '__main__':\n\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    ############################################################################\n    # Build and fit a classifier with default resampling strategy\n    # ===========================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_03',\n        output_directory='./tmp/autoPyTorch_example_out_03',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33\n        # is the default argument setting for TabularClassificationTask.\n        # It is explicitly specified in this example for demonstrational\n        # purpose.\n        resampling_strategy=HoldoutValTypes.holdout_validation,\n        resampling_strategy_args={'val_share': 0.33}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit_secs=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())\n\n    ############################################################################\n\n    ############################################################################\n    # Build and fit a classifier with Cross validation resampling strategy\n    # ====================================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_04',\n        output_directory='./tmp/autoPyTorch_example_out_04',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        resampling_strategy=CrossValTypes.k_fold_cross_validation,\n        resampling_strategy_args={'num_splits': 3}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit_secs=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())\n\n    ############################################################################\n\n    ############################################################################\n    # Build and fit a classifier with Stratified resampling strategy\n    # ==============================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_05',\n        output_directory='./tmp/autoPyTorch_example_out_05',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        # For demonstration purposes, we use\n        # Stratified hold out validation. However,\n        # one can also use CrossValTypes.stratified_k_fold_cross_validation.\n        resampling_strategy=HoldoutValTypes.stratified_holdout_validation,\n        resampling_strategy_args={'val_share': 0.33}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit_secs=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}