automl
diff --git a/‎development/_downloads/307f532dbef0476f85afc6b64b65f087/example_resampling_strategy.py
Lines changed: 113 additions & 115 deletions b/‎development/_downloads/307f532dbef0476f85afc6b64b65f087/example_resampling_strategy.py
Lines changed: 113 additions & 115 deletions
diff --git a/‎development/_downloads/38ebc52de63d1626596d1647c695c721/example_tabular_regression.ipynb
Lines changed: 73 additions & 1 deletion b/‎development/_downloads/38ebc52de63d1626596d1647c695c721/example_tabular_regression.ipynb
Lines changed: 73 additions & 1 deletion
@@ -27,118 +27,116 @@
 from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
 
 
-if __name__ == '__main__':
-
-    ############################################################################
-    # Data Loading
-    # ============
-    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        X,
-        y,
-        random_state=1,
-    )
-
-    ############################################################################
-    # Build and fit a classifier with default resampling strategy
-    # ===========================================================
-    api = TabularClassificationTask(
-        # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
-        # is the default argument setting for TabularClassificationTask.
-        # It is explicitly specified in this example for demonstrational
-        # purpose.
-        resampling_strategy=HoldoutValTypes.holdout_validation,
-        resampling_strategy_args={'val_share': 0.33}
-    )
-
-    ############################################################################
-    # Search for an ensemble of machine learning algorithms
-    # =====================================================
-    api.search(
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test.copy(),
-        y_test=y_test.copy(),
-        optimize_metric='accuracy',
-        total_walltime_limit=150,
-        func_eval_time_limit_secs=30
-    )
-
-    ############################################################################
-    # Print the final ensemble performance
-    # ====================================
-    print(api.run_history, api.trajectory)
-    y_pred = api.predict(X_test)
-    score = api.score(y_pred, y_test)
-    print(score)
-    # Print the final ensemble built by AutoPyTorch
-    print(api.show_models())
-
-    ############################################################################
-
-    ############################################################################
-    # Build and fit a classifier with Cross validation resampling strategy
-    # ====================================================================
-    api = TabularClassificationTask(
-        resampling_strategy=CrossValTypes.k_fold_cross_validation,
-        resampling_strategy_args={'num_splits': 3}
-    )
-
-    ############################################################################
-    # Search for an ensemble of machine learning algorithms
-    # =====================================================
-    api.search(
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test.copy(),
-        y_test=y_test.copy(),
-        optimize_metric='accuracy',
-        total_walltime_limit=150,
-        func_eval_time_limit_secs=30
-    )
-
-    ############################################################################
-    # Print the final ensemble performance
-    # ====================================
-    print(api.run_history, api.trajectory)
-    y_pred = api.predict(X_test)
-    score = api.score(y_pred, y_test)
-    print(score)
-    # Print the final ensemble built by AutoPyTorch
-    print(api.show_models())
-
-    ############################################################################
-
-    ############################################################################
-    # Build and fit a classifier with Stratified resampling strategy
-    # ==============================================================
-    api = TabularClassificationTask(
-        # For demonstration purposes, we use
-        # Stratified hold out validation. However,
-        # one can also use CrossValTypes.stratified_k_fold_cross_validation.
-        resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
-        resampling_strategy_args={'val_share': 0.33}
-    )
-
-    ############################################################################
-    # Search for an ensemble of machine learning algorithms
-    # =====================================================
-    api.search(
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test.copy(),
-        y_test=y_test.copy(),
-        optimize_metric='accuracy',
-        total_walltime_limit=150,
-        func_eval_time_limit_secs=30
-    )
-
-    ############################################################################
-    # Print the final ensemble performance
-    # ====================================
-    print(api.run_history, api.trajectory)
-    y_pred = api.predict(X_test)
-    score = api.score(y_pred, y_test)
-    print(score)
-    # Print the final ensemble built by AutoPyTorch
-    print(api.show_models())
+############################################################################
+# Data Loading
+# ============
+X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X,
+    y,
+    random_state=1,
+)
+
+############################################################################
+# Build and fit a classifier with default resampling strategy
+# ===========================================================
+api = TabularClassificationTask(
+    # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
+    # is the default argument setting for TabularClassificationTask.
+    # It is explicitly specified in this example for demonstrational
+    # purpose.
+    resampling_strategy=HoldoutValTypes.holdout_validation,
+    resampling_strategy_args={'val_share': 0.33}
+)
+
+############################################################################
+# Search for an ensemble of machine learning algorithms
+# =====================================================
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric='accuracy',
+    total_walltime_limit=150,
+    func_eval_time_limit_secs=30
+)
+
+############################################################################
+# Print the final ensemble performance
+# ====================================
+print(api.run_history, api.trajectory)
+y_pred = api.predict(X_test)
+score = api.score(y_pred, y_test)
+print(score)
+# Print the final ensemble built by AutoPyTorch
+print(api.show_models())
+
+############################################################################
+
+############################################################################
+# Build and fit a classifier with Cross validation resampling strategy
+# ====================================================================
+api = TabularClassificationTask(
+    resampling_strategy=CrossValTypes.k_fold_cross_validation,
+    resampling_strategy_args={'num_splits': 3}
+)
+
+############################################################################
+# Search for an ensemble of machine learning algorithms
+# =====================================================
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric='accuracy',
+    total_walltime_limit=150,
+    func_eval_time_limit_secs=30
+)
+
+############################################################################
+# Print the final ensemble performance
+# ====================================
+print(api.run_history, api.trajectory)
+y_pred = api.predict(X_test)
+score = api.score(y_pred, y_test)
+print(score)
+# Print the final ensemble built by AutoPyTorch
+print(api.show_models())
+
+############################################################################
+
+############################################################################
+# Build and fit a classifier with Stratified resampling strategy
+# ==============================================================
+api = TabularClassificationTask(
+    # For demonstration purposes, we use
+    # Stratified hold out validation. However,
+    # one can also use CrossValTypes.stratified_k_fold_cross_validation.
+    resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
+    resampling_strategy_args={'val_share': 0.33}
+)
+
+############################################################################
+# Search for an ensemble of machine learning algorithms
+# =====================================================
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric='accuracy',
+    total_walltime_limit=150,
+    func_eval_time_limit_secs=30
+)
+
+############################################################################
+# Print the final ensemble performance
+# ====================================
+print(api.run_history, api.trajectory)
+y_pred = api.predict(X_test)
+score = api.score(y_pred, y_test)
+print(score)
+# Print the final ensemble built by AutoPyTorch
+print(api.show_models())
@@ -26,7 +26,79 @@
       },
       "outputs": [],
       "source": [
-        "import os\nimport tempfile as tmp\nimport warnings\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask\n\n\nif __name__ == '__main__':\n\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    # Scale the regression targets to have zero mean and unit variance.\n    # This is important for Neural Networks since predicting large target values would require very large weights.\n    # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean\n    y_train_mean = y_train.mean()\n    y_train_std = y_train.std()\n\n    y_train_scaled = (y_train - y_train_mean) / y_train_std\n    y_test_scaled = (y_test - y_train_mean) / y_train_std\n\n    ############################################################################\n    # Build and fit a regressor\n    # ==========================\n    api = TabularRegressionTask()\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train_scaled,\n        X_test=X_test.copy(),\n        y_test=y_test_scaled.copy(),\n        optimize_metric='r2',\n        total_walltime_limit=300,\n        func_eval_time_limit_secs=50,\n        enable_traditional_pipeline=False,\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred_scaled = api.predict(X_test)\n\n    # Rescale the Neural Network predictions into the original target range\n    y_pred = y_pred_scaled * y_train_std + y_train_mean\n    score = api.score(y_pred, y_test)\n\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())"
+        "import os\nimport tempfile as tmp\nimport warnings\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Data Loading\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)\nX_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n    X,\n    y,\n    random_state=1,\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Build and fit a regressor\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "api = TabularRegressionTask()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Search for an ensemble of machine learning algorithms\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "api.search(\n    X_train=X_train,\n    y_train=y_train,\n    X_test=X_test.copy(),\n    y_test=y_test.copy(),\n    optimize_metric='r2',\n    total_walltime_limit=300,\n    func_eval_time_limit_secs=50,\n    enable_traditional_pipeline=False,\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Print the final ensemble performance\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(api.run_history, api.trajectory)\ny_pred = api.predict(X_test)\n\n# Rescale the Neural Network predictions into the original target range\nscore = api.score(y_pred, y_test)\n\nprint(score)\n# Print the final ensemble built by AutoPyTorch\nprint(api.show_models())"
       ]
     }
   ],