automl
diff --git a/‎refactor_development/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
20 Bytes b/‎refactor_development/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
20 Bytes
diff --git a/‎refactor_development/_downloads/83c92a475e127571aad1d77baa11248d/example_tabular_classification.py
+3-2 b/‎refactor_development/_downloads/83c92a475e127571aad1d77baa11248d/example_tabular_classification.py
+3-2
diff --git a/‎refactor_development/_downloads/8ef6602cf8ed40221edd89244fb031af/basics_tabular_python.zip
19 Bytes b/‎refactor_development/_downloads/8ef6602cf8ed40221edd89244fb031af/basics_tabular_python.zip
19 Bytes
diff --git a/‎refactor_development/_downloads/b38295cf56f02b1c4547385a8f389f90/advanced_tabular_python.zip
0 Bytes b/‎refactor_development/_downloads/b38295cf56f02b1c4547385a8f389f90/advanced_tabular_python.zip
0 Bytes
diff --git a/‎refactor_development/_downloads/df038041811db9bd567de83692a8f994/advanced_tabular_jupyter.zip
0 Bytes b/‎refactor_development/_downloads/df038041811db9bd567de83692a8f994/advanced_tabular_jupyter.zip
0 Bytes
diff --git a/‎refactor_development/_downloads/ff1caf117ce143ee4d6abd46896ef46a/example_tabular_classification.ipynb
+1-1 b/‎refactor_development/_downloads/ff1caf117ce143ee4d6abd46896ef46a/example_tabular_classification.ipynb
+1-1
diff --git a/‎refactor_development/_sources/advanced_tabular/example_custom_configuration_space.rst.txt
+78-26 b/‎refactor_development/_sources/advanced_tabular/example_custom_configuration_space.rst.txt
+78-26
@@ -33,7 +33,7 @@
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
         X,
         y,
-        random_state=1,
+        random_state=42,
     )
 
     ############################################################################
@@ -44,7 +44,8 @@
         output_directory='./tmp/autoPyTorch_example_out_01',
         # To maintain logs of the run, set the next two as False
         delete_tmp_folder_after_terminate=True,
-        delete_output_folder_after_terminate=True
+        delete_output_folder_after_terminate=True,
+        seed=42,
     )
 
     ############################################################################
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\n\n\nif __name__ == '__main__':\n\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    ############################################################################\n    # Build and fit a classifier\n    # ==========================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_01',\n        output_directory='./tmp/autoPyTorch_example_out_01',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=300,\n        func_eval_time_limit_secs=50\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())"
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\n\n\nif __name__ == '__main__':\n\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=42,\n    )\n\n    ############################################################################\n    # Build and fit a classifier\n    # ==========================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_01',\n        output_directory='./tmp/autoPyTorch_example_out_01',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        seed=42,\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=300,\n        func_eval_time_limit_secs=50\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())"
       ]
     }
   ],
 
@@ -46,7 +46,7 @@ the search. Currently, there are two changes that can be made to the space:-
 
  .. code-block:: none
 
-    <smac.runhistory.runhistory.RunHistory object at 0x7fd58f4d3d00> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
+    <smac.runhistory.runhistory.RunHistory object at 0x7f92f432d4f0> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
       data_loader:batch_size, Value: 32
       encoder:__choice__, Value: 'OneHotEncoder'
       feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
@@ -80,7 +80,7 @@ the search. Currently, there are two changes that can be made to the space:-
       scaler:__choice__, Value: 'StandardScaler'
       trainer:StandardTrainer:weighted_loss, Value: True
       trainer:__choice__, Value: 'StandardTrainer'
-    , ta_runs=0, ta_time_used=0.0, wallclock_time=0.001336812973022461, budget=0), TrajEntry(train_perf=0.14619883040935677, incumbent_id=1, incumbent=Configuration:
+    , ta_runs=0, ta_time_used=0.0, wallclock_time=0.0020940303802490234, budget=0), TrajEntry(train_perf=0.16374269005847952, incumbent_id=1, incumbent=Configuration:
       data_loader:batch_size, Value: 32
       encoder:__choice__, Value: 'OneHotEncoder'
       feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
@@ -114,19 +114,71 @@ the search. Currently, there are two changes that can be made to the space:-
       scaler:__choice__, Value: 'StandardScaler'
       trainer:StandardTrainer:weighted_loss, Value: True
       trainer:__choice__, Value: 'StandardTrainer'
-    , ta_runs=1, ta_time_used=4.047108173370361, wallclock_time=5.4710657596588135, budget=5.555555555555555)]
-    {'accuracy': 0.8901734104046243}
+    , ta_runs=1, ta_time_used=5.8945159912109375, wallclock_time=7.446282625198364, budget=5.555555555555555), TrajEntry(train_perf=0.1578947368421053, incumbent_id=2, incumbent=Configuration:
+      data_loader:batch_size, Value: 475
+      encoder:__choice__, Value: 'OneHotEncoder'
+      feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
+      imputer:categorical_strategy, Value: 'most_frequent'
+      imputer:numerical_strategy, Value: 'constant_zero'
+      lr_scheduler:__choice__, Value: 'NoScheduler'
+      network_backbone:MLPBackbone:activation, Value: 'tanh'
+      network_backbone:MLPBackbone:dropout_1, Value: 0.579891279191762
+      network_backbone:MLPBackbone:dropout_2, Value: 0.43202885747368863
+      network_backbone:MLPBackbone:dropout_3, Value: 0.2053050533304992
+      network_backbone:MLPBackbone:dropout_4, Value: 0.3628626567848122
+      network_backbone:MLPBackbone:dropout_5, Value: 0.000687232634536894
+      network_backbone:MLPBackbone:dropout_6, Value: 0.30779918180581656
+      network_backbone:MLPBackbone:dropout_7, Value: 0.4566654226669556
+      network_backbone:MLPBackbone:num_groups, Value: 7
+      network_backbone:MLPBackbone:num_units_1, Value: 749
+      network_backbone:MLPBackbone:num_units_2, Value: 751
+      network_backbone:MLPBackbone:num_units_3, Value: 759
+      network_backbone:MLPBackbone:num_units_4, Value: 664
+      network_backbone:MLPBackbone:num_units_5, Value: 219
+      network_backbone:MLPBackbone:num_units_6, Value: 757
+      network_backbone:MLPBackbone:num_units_7, Value: 1005
+      network_backbone:MLPBackbone:use_dropout, Value: True
+      network_backbone:__choice__, Value: 'MLPBackbone'
+      network_embedding:LearnedEntityEmbedding:dimension_reduction_0, Value: 0.9640640623783606
+      network_embedding:LearnedEntityEmbedding:dimension_reduction_1, Value: 0.017233504391813814
+      network_embedding:LearnedEntityEmbedding:dimension_reduction_2, Value: 0.24122690885917664
+      network_embedding:LearnedEntityEmbedding:dimension_reduction_3, Value: 0.31247176333246596
+      network_embedding:LearnedEntityEmbedding:dimension_reduction_4, Value: 0.41504826813841933
+      network_embedding:LearnedEntityEmbedding:dimension_reduction_5, Value: 0.8395119637200936
+      network_embedding:LearnedEntityEmbedding:dimension_reduction_6, Value: 0.8208414027523236
+      network_embedding:LearnedEntityEmbedding:dimension_reduction_7, Value: 0.4284420622613293
+      network_embedding:LearnedEntityEmbedding:min_unique_values_for_embedding, Value: 7
+      network_embedding:__choice__, Value: 'LearnedEntityEmbedding'
+      network_head:__choice__, Value: 'fully_connected'
+      network_head:fully_connected:num_layers, Value: 1
+      network_init:KaimingInit:bias_strategy, Value: 'Zero'
+      network_init:__choice__, Value: 'KaimingInit'
+      optimizer:AdamOptimizer:beta1, Value: 0.9770847327434384
+      optimizer:AdamOptimizer:beta2, Value: 0.9710627513919582
+      optimizer:AdamOptimizer:lr, Value: 0.00010844892447274338
+      optimizer:AdamOptimizer:weight_decay, Value: 0.05048412416506887
+      optimizer:__choice__, Value: 'AdamOptimizer'
+      scaler:Normalizer:norm, Value: 'max'
+      scaler:__choice__, Value: 'Normalizer'
+      trainer:StandardTrainer:weighted_loss, Value: False
+      trainer:__choice__, Value: 'StandardTrainer'
+    , ta_runs=12, ta_time_used=167.8572690486908, wallclock_time=197.65661692619324, budget=16.666666666666664)]
+    {'accuracy': 0.8554913294797688}
     |    | Preprocessing                                                     | Estimator                                                 |   Weight |
     |---:|:------------------------------------------------------------------|:----------------------------------------------------------|---------:|
-    |  0 | None                                                              | CatBoostClassifier                                        |     0.28 |
-    |  1 | SimpleImputer,OneHotEncoder,StandardScaler,NoFeaturePreprocessing | no embedding,MLPBackbone,FullyConnectedHead,nn.Sequential |     0.2  |
-    |  2 | None                                                              | ExtraTreesClassifier                                      |     0.18 |
-    |  3 | SimpleImputer,OneHotEncoder,Normalizer,KernelPCA                  | embedding,ResNetBackbone,FullyConnectedHead,nn.Sequential |     0.14 |
-    |  4 | None                                                              | KNNClassifier                                             |     0.06 |
-    |  5 | SimpleImputer,OneHotEncoder,StandardScaler,NoFeaturePreprocessing | no embedding,MLPBackbone,FullyConnectedHead,nn.Sequential |     0.06 |
-    |  6 | SimpleImputer,OneHotEncoder,Normalizer,KernelPCA                  | embedding,ResNetBackbone,FullyConnectedHead,nn.Sequential |     0.04 |
-    |  7 | None                                                              | RFClassifier                                              |     0.04 |
-    <smac.runhistory.runhistory.RunHistory object at 0x7fd57e5f4d00> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
+    |  0 | None                                                              | SVC                                                       |     0.18 |
+    |  1 | None                                                              | RFClassifier                                              |     0.16 |
+    |  2 | None                                                              | CatBoostClassifier                                        |     0.14 |
+    |  3 | SimpleImputer,OneHotEncoder,Normalizer,KernelPCA                  | embedding,ResNetBackbone,FullyConnectedHead,nn.Sequential |     0.12 |
+    |  4 | SimpleImputer,OneHotEncoder,Normalizer,KernelPCA                  | embedding,ResNetBackbone,FullyConnectedHead,nn.Sequential |     0.1  |
+    |  5 | SimpleImputer,OneHotEncoder,Normalizer,NoFeaturePreprocessing     | embedding,MLPBackbone,FullyConnectedHead,nn.Sequential    |     0.1  |
+    |  6 | None                                                              | ExtraTreesClassifier                                      |     0.08 |
+    |  7 | SimpleImputer,OneHotEncoder,StandardScaler,NoFeaturePreprocessing | no embedding,MLPBackbone,FullyConnectedHead,nn.Sequential |     0.04 |
+    |  8 | SimpleImputer,OneHotEncoder,Normalizer,NoFeaturePreprocessing     | embedding,MLPBackbone,FullyConnectedHead,nn.Sequential    |     0.02 |
+    |  9 | SimpleImputer,OneHotEncoder,Normalizer,NoFeaturePreprocessing     | embedding,MLPBackbone,FullyConnectedHead,nn.Sequential    |     0.02 |
+    | 10 | None                                                              | KNNClassifier                                             |     0.02 |
+    | 11 | SimpleImputer,OneHotEncoder,StandardScaler,NoFeaturePreprocessing | no embedding,MLPBackbone,FullyConnectedHead,nn.Sequential |     0.02 |
+    <smac.runhistory.runhistory.RunHistory object at 0x7f92c7b660d0> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
       data_loader:batch_size, Value: 32
       encoder:__choice__, Value: 'NoEncoder'
       feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
@@ -158,7 +210,7 @@ the search. Currently, there are two changes that can be made to the space:-
       scaler:__choice__, Value: 'StandardScaler'
       trainer:StandardTrainer:weighted_loss, Value: True
       trainer:__choice__, Value: 'StandardTrainer'
-    , ta_runs=0, ta_time_used=0.0, wallclock_time=0.0012526512145996094, budget=0), TrajEntry(train_perf=0.19298245614035092, incumbent_id=1, incumbent=Configuration:
+    , ta_runs=0, ta_time_used=0.0, wallclock_time=0.0016224384307861328, budget=0), TrajEntry(train_perf=0.16374269005847952, incumbent_id=1, incumbent=Configuration:
       data_loader:batch_size, Value: 32
       encoder:__choice__, Value: 'NoEncoder'
       feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
@@ -190,17 +242,17 @@ the search. Currently, there are two changes that can be made to the space:-
       scaler:__choice__, Value: 'StandardScaler'
       trainer:StandardTrainer:weighted_loss, Value: True
       trainer:__choice__, Value: 'StandardTrainer'
-    , ta_runs=1, ta_time_used=3.548128843307495, wallclock_time=4.968382835388184, budget=5.555555555555555)]
-    {'accuracy': 0.8728323699421965}
-    |    | Preprocessing                                    | Estimator                                                          |   Weight |
-    |---:|:-------------------------------------------------|:-------------------------------------------------------------------|---------:|
-    |  0 | None                                             | CatBoostClassifier                                                 |     0.24 |
-    |  1 | None                                             | RFClassifier                                                       |     0.22 |
-    |  2 | None                                             | ExtraTreesClassifier                                               |     0.18 |
-    |  3 | None                                             | KNNClassifier                                                      |     0.14 |
-    |  4 | None                                             | LGBMClassifier                                                     |     0.1  |
-    |  5 | SimpleImputer,NoEncoder,MinMaxScaler,KitchenSink | no embedding,ShapedResNetBackbone,FullyConnectedHead,nn.Sequential |     0.06 |
-    |  6 | None                                             | SVC                                                                |     0.06 |
+    , ta_runs=1, ta_time_used=4.966748952865601, wallclock_time=6.570724964141846, budget=5.555555555555555)]
+    {'accuracy': 0.8554913294797688}
+    |    | Preprocessing                                                 | Estimator                                                          |   Weight |
+    |---:|:--------------------------------------------------------------|:-------------------------------------------------------------------|---------:|
+    |  0 | None                                                          | CatBoostClassifier                                                 |     0.74 |
+    |  1 | None                                                          | SVC                                                                |     0.08 |
+    |  2 | None                                                          | KNNClassifier                                                      |     0.06 |
+    |  3 | None                                                          | RFClassifier                                                       |     0.04 |
+    |  4 | None                                                          | ExtraTreesClassifier                                               |     0.04 |
+    |  5 | SimpleImputer,NoEncoder,MinMaxScaler,NoFeaturePreprocessing   | no embedding,ShapedResNetBackbone,FullyConnectedHead,nn.Sequential |     0.02 |
+    |  6 | SimpleImputer,NoEncoder,StandardScaler,NoFeaturePreprocessing | no embedding,ShapedMLPBackbone,FullyConnectedHead,nn.Sequential    |     0.02 |
 
 
 
@@ -329,7 +381,7 @@ the search. Currently, there are two changes that can be made to the space:-
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 11 minutes  24.080 seconds)
+   **Total running time of the script:** ( 11 minutes  43.212 seconds)
 
 
 .. _sphx_glr_download_advanced_tabular_example_custom_configuration_space.py:
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\n\n\nif __name__ == '__main__':\n\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n ############################################################################\n # Build and fit a classifier\n # ==========================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_01',\n output_directory='./tmp/autoPyTorch_example_out_01',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=300,\n func_eval_time_limit_secs=50\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())"
	`29`	+ "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\n\n\nif __name__ == '__main__':\n\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=42,\n )\n\n ############################################################################\n # Build and fit a classifier\n # ==========================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_01',\n output_directory='./tmp/autoPyTorch_example_out_01',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n seed=42,\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=300,\n func_eval_time_limit_secs=50\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`