automl
diff --git a/‎development/_downloads/307f532dbef0476f85afc6b64b65f087/example_resampling_strategy.py
Lines changed: 21 additions & 9 deletions b/‎development/_downloads/307f532dbef0476f85afc6b64b65f087/example_resampling_strategy.py
Lines changed: 21 additions & 9 deletions
diff --git a/‎development/_downloads/3f9c66ebcc4532fdade3cdaa4d769bde/example_custom_configuration_space.ipynb
Lines changed: 128 additions & 2 deletions b/‎development/_downloads/3f9c66ebcc4532fdade3cdaa4d769bde/example_custom_configuration_space.ipynb
Lines changed: 128 additions & 2 deletions
diff --git a/‎development/_downloads/4cbefcc88d68bf84110d315dc5fdb8e1/example_resampling_strategy.ipynb
Lines changed: 30 additions & 9 deletions b/‎development/_downloads/4cbefcc88d68bf84110d315dc5fdb8e1/example_resampling_strategy.ipynb
Lines changed: 30 additions & 9 deletions
@@ -26,10 +26,13 @@
 from autoPyTorch.api.tabular_classification import TabularClassificationTask
 from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
 
+############################################################################
+# Default Resampling Strategy
+# ============================
 
 ############################################################################
 # Data Loading
-# ============
+# ------------
 X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
 X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
     X,
@@ -39,7 +42,7 @@
 
 ############################################################################
 # Build and fit a classifier with default resampling strategy
-# ===========================================================
+# -----------------------------------------------------------
 api = TabularClassificationTask(
     # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
     # is the default argument setting for TabularClassificationTask.
@@ -51,7 +54,7 @@
 
 ############################################################################
 # Search for an ensemble of machine learning algorithms
-# =====================================================
+# -----------------------------------------------------
 api.search(
     X_train=X_train,
     y_train=y_train,
@@ -64,7 +67,7 @@
 
 ############################################################################
 # Print the final ensemble performance
-# ====================================
+# ------------------------------------
 y_pred = api.predict(X_test)
 score = api.score(y_pred, y_test)
 print(score)
@@ -76,17 +79,22 @@
 
 ############################################################################
 
+############################################################################
+# Cross validation Resampling Strategy
+# =====================================
+
 ############################################################################
 # Build and fit a classifier with Cross validation resampling strategy
-# ====================================================================
+# --------------------------------------------------------------------
 api = TabularClassificationTask(
     resampling_strategy=CrossValTypes.k_fold_cross_validation,
     resampling_strategy_args={'num_splits': 3}
 )
 
 ############################################################################
 # Search for an ensemble of machine learning algorithms
-# =====================================================
+# -----------------------------------------------------------------------
+
 api.search(
     X_train=X_train,
     y_train=y_train,
@@ -99,7 +107,7 @@
 
 ############################################################################
 # Print the final ensemble performance
-# ====================================
+# ------------
 y_pred = api.predict(X_test)
 score = api.score(y_pred, y_test)
 print(score)
@@ -111,9 +119,13 @@
 
 ############################################################################
 
+############################################################################
+# Stratified Resampling Strategy
+# ===============================
+
 ############################################################################
 # Build and fit a classifier with Stratified resampling strategy
-# ==============================================================
+# --------------------------------------------------------------
 api = TabularClassificationTask(
     # For demonstration purposes, we use
     # Stratified hold out validation. However,
@@ -124,7 +136,7 @@
 
 ############################################################################
 # Search for an ensemble of machine learning algorithms
-# =====================================================
+# -----------------------------------------------------
 api.search(
     X_train=X_train,
     y_train=y_train,
 
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Tabular Classification with Custom Configuration Space\n\nThe following example shows how adjust the configuration space of\nthe search. Currently, there are two changes that can be made to the space:-\n1. Adjust individual hyperparameters in the pipeline\n2. Include or exclude components:\n    a) include: Dictionary containing components to include. Key is the node\n                name and Value is an Iterable of the names of the components\n                to include. Only these components will be present in the\n                search space.\n    b) exclude: Dictionary containing components to exclude. Key is the node\n                name and Value is an Iterable of the names of the components\n                to exclude. All except these components will be present in\n                the search space.\n"
+        "\n# Tabular Classification with Custom Configuration Space\n\nThe following example shows how adjust the configuration space of\nthe search. Currently, there are two changes that can be made to the space:-\n\n1. Adjust individual hyperparameters in the pipeline\n2. Include or exclude components:\n    a) include: Dictionary containing components to include. Key is the node\n                name and Value is an Iterable of the names of the components\n                to include. Only these components will be present in the\n                search space.\n    b) exclude: Dictionary containing components to exclude. Key is the node\n                name and Value is an Iterable of the names of the components\n                to exclude. All except these components will be present in\n                the search space.\n"
       ]
     },
     {
@@ -26,7 +26,133 @@
       },
       "outputs": [],
       "source": [
-        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n    \"\"\"\n    Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n    Returns:\n        HyperparameterSearchSpaceUpdates\n    \"\"\"\n    updates = HyperparameterSearchSpaceUpdates()\n    updates.append(node_name=\"data_loader\",\n                   hyperparameter=\"batch_size\",\n                   value_range=[16, 512],\n                   default_value=32)\n    updates.append(node_name=\"lr_scheduler\",\n                   hyperparameter=\"CosineAnnealingLR:T_max\",\n                   value_range=[50, 60],\n                   default_value=55)\n    updates.append(node_name='network_backbone',\n                   hyperparameter='ResNetBackbone:dropout',\n                   value_range=[0, 0.5],\n                   default_value=0.2)\n    return updates\n\n\nif __name__ == '__main__':\n\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    ############################################################################\n    # Build and fit a classifier with include components\n    # ==================================================\n    api = TabularClassificationTask(\n        search_space_updates=get_search_space_updates(),\n        include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'],\n                            'encoder': ['OneHotEncoder']}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train.copy(),\n        y_train=y_train.copy(),\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit_secs=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    print(api.show_models())\n\n    # Print statistics from search\n    print(api.sprint_statistics())\n\n    ############################################################################\n    # Build and fit a classifier with exclude components\n    # ==================================================\n    api = TabularClassificationTask(\n        search_space_updates=get_search_space_updates(),\n        exclude_components={'network_backbone': ['MLPBackbone'],\n                            'encoder': ['OneHotEncoder']}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit_secs=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    print(api.show_models())\n\n    # Print statistics from search\n    print(api.sprint_statistics())"
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n    \"\"\"\n    Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n    Returns:\n        HyperparameterSearchSpaceUpdates\n    \"\"\"\n    updates = HyperparameterSearchSpaceUpdates()\n    updates.append(node_name=\"data_loader\",\n                   hyperparameter=\"batch_size\",\n                   value_range=[16, 512],\n                   default_value=32)\n    updates.append(node_name=\"lr_scheduler\",\n                   hyperparameter=\"CosineAnnealingLR:T_max\",\n                   value_range=[50, 60],\n                   default_value=55)\n    updates.append(node_name='network_backbone',\n                   hyperparameter='ResNetBackbone:dropout',\n                   value_range=[0, 0.5],\n                   default_value=0.2)\n    return updates"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Data Loading\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\nX_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n    X,\n    y,\n    random_state=1,\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Build and fit a classifier with include components\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "api = TabularClassificationTask(\n    search_space_updates=get_search_space_updates(),\n    include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'],\n                        'encoder': ['OneHotEncoder']}\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Search for an ensemble of machine learning algorithms\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "api.search(\n    X_train=X_train.copy(),\n    y_train=y_train.copy(),\n    X_test=X_test.copy(),\n    y_test=y_test.copy(),\n    optimize_metric='accuracy',\n    total_walltime_limit=150,\n    func_eval_time_limit_secs=30\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Print the final ensemble performance\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "y_pred = api.predict(X_test)\nscore = api.score(y_pred, y_test)\nprint(score)\nprint(api.show_models())\n\n# Print statistics from search\nprint(api.sprint_statistics())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Build and fit a classifier with exclude components\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "api = TabularClassificationTask(\n    search_space_updates=get_search_space_updates(),\n    exclude_components={'network_backbone': ['MLPBackbone'],\n                        'encoder': ['OneHotEncoder']}\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Search for an ensemble of machine learning algorithms\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "api.search(\n    X_train=X_train,\n    y_train=y_train,\n    X_test=X_test.copy(),\n    y_test=y_test.copy(),\n    optimize_metric='accuracy',\n    total_walltime_limit=150,\n    func_eval_time_limit_secs=30\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Print the final ensemble performance\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "y_pred = api.predict(X_test)\nscore = api.score(y_pred, y_test)\nprint(score)\nprint(api.show_models())\n\n# Print statistics from search\nprint(api.sprint_statistics())"
       ]
     }
   ],
 
@@ -33,7 +33,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Data Loading\n\n"
+        "## Default Resampling Strategy\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Data Loading\n\n"
       ]
     },
     {
@@ -51,7 +58,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Build and fit a classifier with default resampling strategy\n\n"
+        "### Build and fit a classifier with default resampling strategy\n\n"
       ]
     },
     {
@@ -69,7 +76,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Search for an ensemble of machine learning algorithms\n\n"
+        "### Search for an ensemble of machine learning algorithms\n\n"
       ]
     },
     {
@@ -87,7 +94,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Print the final ensemble performance\n\n"
+        "### Print the final ensemble performance\n\n"
       ]
     },
     {
@@ -105,7 +112,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Build and fit a classifier with Cross validation resampling strategy\n\n"
+        "## Cross validation Resampling Strategy\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Build and fit a classifier with Cross validation resampling strategy\n\n"
       ]
     },
     {
@@ -123,7 +137,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Search for an ensemble of machine learning algorithms\n\n"
+        "### Search for an ensemble of machine learning algorithms\n\n"
       ]
     },
     {
@@ -141,7 +155,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Print the final ensemble performance\n\n"
+        "### Print the final ensemble performance\n\n"
       ]
     },
     {
@@ -159,7 +173,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Build and fit a classifier with Stratified resampling strategy\n\n"
+        "## Stratified Resampling Strategy\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Build and fit a classifier with Stratified resampling strategy\n\n"
       ]
     },
     {
@@ -177,7 +198,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Search for an ensemble of machine learning algorithms\n\n"
+        "### Search for an ensemble of machine learning algorithms\n\n"
       ]
     },
     {
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,14 @@`
`33`	`33`	`"cell_type": "markdown",`
`34`	`34`	`"metadata": {},`
`35`	`35`	`"source": [`
`36`		`- "## Data Loading\n\n"`
	`36`	`+ "## Default Resampling Strategy\n\n"`
	`37`	`+ ]`
	`38`	`+ },`
	`39`	`+ {`
	`40`	`+ "cell_type": "markdown",`
	`41`	`+ "metadata": {},`
	`42`	`+ "source": [`
	`43`	`+ "### Data Loading\n\n"`
`37`	`44`	`]`
`38`	`45`	`},`
`39`	`46`	`{`
`@@ -51,7 +58,7 @@`
`51`	`58`	`"cell_type": "markdown",`
`52`	`59`	`"metadata": {},`
`53`	`60`	`"source": [`
`54`		`- "## Build and fit a classifier with default resampling strategy\n\n"`
	`61`	`+ "### Build and fit a classifier with default resampling strategy\n\n"`
`55`	`62`	`]`
`56`	`63`	`},`
`57`	`64`	`{`
`@@ -69,7 +76,7 @@`
`69`	`76`	`"cell_type": "markdown",`
`70`	`77`	`"metadata": {},`
`71`	`78`	`"source": [`
`72`		`- "## Search for an ensemble of machine learning algorithms\n\n"`
	`79`	`+ "### Search for an ensemble of machine learning algorithms\n\n"`
`73`	`80`	`]`
`74`	`81`	`},`
`75`	`82`	`{`
`@@ -87,7 +94,7 @@`
`87`	`94`	`"cell_type": "markdown",`
`88`	`95`	`"metadata": {},`
`89`	`96`	`"source": [`
`90`		`- "## Print the final ensemble performance\n\n"`
	`97`	`+ "### Print the final ensemble performance\n\n"`
`91`	`98`	`]`
`92`	`99`	`},`
`93`	`100`	`{`
`@@ -105,7 +112,14 @@`
`105`	`112`	`"cell_type": "markdown",`
`106`	`113`	`"metadata": {},`
`107`	`114`	`"source": [`
`108`		`- "## Build and fit a classifier with Cross validation resampling strategy\n\n"`
	`115`	`+ "## Cross validation Resampling Strategy\n\n"`
	`116`	`+ ]`
	`117`	`+ },`
	`118`	`+ {`
	`119`	`+ "cell_type": "markdown",`
	`120`	`+ "metadata": {},`
	`121`	`+ "source": [`
	`122`	`+ "### Build and fit a classifier with Cross validation resampling strategy\n\n"`
`109`	`123`	`]`
`110`	`124`	`},`
`111`	`125`	`{`
`@@ -123,7 +137,7 @@`
`123`	`137`	`"cell_type": "markdown",`
`124`	`138`	`"metadata": {},`
`125`	`139`	`"source": [`
`126`		`- "## Search for an ensemble of machine learning algorithms\n\n"`
	`140`	`+ "### Search for an ensemble of machine learning algorithms\n\n"`
`127`	`141`	`]`
`128`	`142`	`},`
`129`	`143`	`{`
`@@ -141,7 +155,7 @@`
`141`	`155`	`"cell_type": "markdown",`
`142`	`156`	`"metadata": {},`
`143`	`157`	`"source": [`
`144`		`- "## Print the final ensemble performance\n\n"`
	`158`	`+ "### Print the final ensemble performance\n\n"`
`145`	`159`	`]`
`146`	`160`	`},`
`147`	`161`	`{`
`@@ -159,7 +173,14 @@`
`159`	`173`	`"cell_type": "markdown",`
`160`	`174`	`"metadata": {},`
`161`	`175`	`"source": [`
`162`		`- "## Build and fit a classifier with Stratified resampling strategy\n\n"`
	`176`	`+ "## Stratified Resampling Strategy\n\n"`
	`177`	`+ ]`
	`178`	`+ },`
	`179`	`+ {`
	`180`	`+ "cell_type": "markdown",`
	`181`	`+ "metadata": {},`
	`182`	`+ "source": [`
	`183`	`+ "### Build and fit a classifier with Stratified resampling strategy\n\n"`
`163`	`184`	`]`
`164`	`185`	`},`
`165`	`186`	`{`
`@@ -177,7 +198,7 @@`
`177`	`198`	`"cell_type": "markdown",`
`178`	`199`	`"metadata": {},`
`179`	`200`	`"source": [`
`180`		`- "## Search for an ensemble of machine learning algorithms\n\n"`
	`201`	`+ "### Search for an ensemble of machine learning algorithms\n\n"`
`181`	`202`	`]`
`182`	`203`	`},`
`183`	`204`	`{`