Skip to content

Hyperparameter Search Space updates now with constant and include ability #146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

25 changes: 24 additions & 1 deletion autoPyTorch/pipeline/base_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,24 +398,47 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
raise ValueError("Unknown node name. Expected update node name to be in {} "
"got {}".format(self.named_steps.keys(), update.node_name))
node = self.named_steps[update.node_name]
# if node is a choice module
if hasattr(node, 'get_components'):
split_hyperparameter = update.hyperparameter.split(':')

# check if component is not present in include
if include is not None and update.node_name in include.keys():
if split_hyperparameter[0] not in include[update.node_name]:
raise ValueError("Not found {} in include".format(split_hyperparameter[0]))

# check if component is present in exclude
if exclude is not None and update.node_name in exclude.keys():
if split_hyperparameter[0] in exclude[update.node_name]:
raise ValueError("Found {} in exclude".format(split_hyperparameter[0]))

components = node.get_components()
if split_hyperparameter[0] not in components.keys():
# if hyperparameter is __choice__, check if
# the components in the value range of search space update
# are in components of the choice module
if split_hyperparameter[0] == '__choice__':
for choice in update.value_range:
if include is not None and update.node_name in include.keys():
if choice not in include[update.node_name]:
raise ValueError("Not found {} in include".format(choice))
if exclude is not None and update.node_name in exclude.keys():
if choice in exclude[update.node_name]:
raise ValueError("Found {} in exclude".format(choice))
if choice not in components.keys():
raise ValueError("Unknown hyperparameter for choice {}. "
"Expected update hyperparameter "
"to be in {} got {}".format(node.__class__.__name__,
components.keys(), choice))
# check if the component whose hyperparameter
# needs to be updated is in components of the
# choice module
elif split_hyperparameter[0] not in components.keys():
raise ValueError("Unknown hyperparameter for choice {}. "
"Expected update hyperparameter "
"to be in {} got {}".format(node.__class__.__name__,
components.keys(), split_hyperparameter[0]))
else:
# check if hyperparameter is in the search space of the component
component = components[split_hyperparameter[0]]
if split_hyperparameter[1] not in component. \
get_hyperparameter_search_space(dataset_properties=self.dataset_properties):
Expand Down
48 changes: 25 additions & 23 deletions autoPyTorch/pipeline/components/base_choice.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional

from ConfigSpace.configuration_space import Configuration, ConfigurationSpace

Expand All @@ -9,7 +10,8 @@
from sklearn.utils import check_random_state

from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
from autoPyTorch.utils.common import FitRequirement
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate


class autoPyTorchChoice(object):
Expand Down Expand Up @@ -49,7 +51,7 @@ def __init__(self,
# self.set_hyperparameters(self.configuration)
self.choice: Optional[autoPyTorchComponent] = None

self._cs_updates: Dict[str, Tuple] = dict()
self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()

def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
if self.choice is not None:
Expand Down Expand Up @@ -247,35 +249,35 @@ def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
"""
assert isinstance(dataset_properties, dict), "dataset_properties must be a dictionary"

def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tuple],
default_value: Union[int, float, str], log: bool = False) -> None:
"""Allows the user to update a hyperparameter
def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None:
"""
Applies search space update to the class

Arguments:
name {string} -- name of hyperparameter
new_value_range {List[?] -- value range can be either lower, upper or a list of possible conditionals
log {bool} -- is hyperparameter logscale
Args:
hyperparameter_search_space_update (HyperparameterSearchSpaceUpdate):
Search Space update for the current autoPyTorchChoice module
"""

if len(new_value_range) == 0:
raise ValueError("The new value range needs at least one value")
self._cs_updates[name] = tuple([new_value_range, default_value, log])
self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update

def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, Tuple]:
def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, HyperparameterSearchSpace]:
"""Get the search space updates with the given prefix

Keyword Arguments:
prefix {str} -- Only return search space updates with given prefix (default: {None})
Args:
prefix (str):
Only return search space updates with given prefix (default: {None})

Returns:
dict -- Mapping of search space updates. Keys don't contain the prefix.
Dict[str, HyperparameterSearchSpace]:
Mapping of search space updates. Keys don't contain the prefix.
"""
if prefix is None:
return self._cs_updates
result: Dict[str, Tuple] = dict()

# iterate over all search space updates of this node and filter the ones out, that have the given prefix
result: Dict[str, HyperparameterSearchSpace] = dict()

# iterate over all search space updates of this node and keep the ones that have the given prefix
for key in self._cs_updates.keys():
if key.startswith(prefix):
result[key[len(prefix) + 1:]] = self._cs_updates[key]
if prefix is None:
result[key] = self._cs_updates[key].get_search_space()
elif re.search(f'^{prefix}', key) is not None:
result[key[len(prefix) + 1:]] = self._cs_updates[key].get_search_space(remove_prefix=prefix)
return result
41 changes: 16 additions & 25 deletions autoPyTorch/pipeline/components/base_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@
import sys
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional

from ConfigSpace.configuration_space import Configuration, ConfigurationSpace

from sklearn.base import BaseEstimator

from autoPyTorch.utils.common import FitRequirement
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate


def find_components(
package: str,
directory: str,
base_class: BaseEstimator
package: str,
directory: str,
base_class: BaseEstimator
) -> Dict[str, BaseEstimator]:
"""Utility to find component on a given directory,
that inherit from base_class
Expand All @@ -34,8 +35,7 @@ def find_components(
module = importlib.import_module(full_module_name)

for member_name, obj in inspect.getmembers(module):
if inspect.isclass(obj) and issubclass(obj, base_class) and \
obj != base_class:
if inspect.isclass(obj) and issubclass(obj, base_class) and obj != base_class:
# TODO test if the obj implements the interface
# Keep in mind that this only instantiates the ensemble_wrapper,
# but not the real target classifier
Expand Down Expand Up @@ -96,7 +96,7 @@ class autoPyTorchComponent(BaseEstimator):
def __init__(self) -> None:
super().__init__()
self._fit_requirements: List[FitRequirement] = list()
self._cs_updates: Dict[str, Tuple] = dict()
self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()

@classmethod
def get_required_properties(cls) -> Optional[List[str]]:
Expand Down Expand Up @@ -140,7 +140,7 @@ def get_properties(dataset_properties: Optional[Dict[str, str]] = None

@staticmethod
def get_hyperparameter_search_space(
dataset_properties: Optional[Dict[str, str]] = None
dataset_properties: Optional[Dict[str, str]] = None
) -> ConfigurationSpace:
"""Return the configuration space of this classification algorithm.

Expand Down Expand Up @@ -253,8 +253,7 @@ def __str__(self) -> str:
name = self.get_properties()['name']
return "autoPyTorch.pipeline %s" % name

def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tuple],
default_value: Union[int, float, str], log: bool = False) -> None:
def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None:
"""Allows the user to update a hyperparameter

Arguments:
Expand All @@ -263,26 +262,18 @@ def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tup
log {bool} -- is hyperparameter logscale
"""

if len(new_value_range) == 0:
raise ValueError("The new value range needs at least one value")
self._cs_updates[name] = tuple([new_value_range, default_value, log])
self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update

def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, Tuple]:
"""Get the search space updates with the given prefix

Keyword Arguments:
prefix {str} -- Only return search space updates with given prefix (default: {None})
def _get_search_space_updates(self) -> Dict[str, HyperparameterSearchSpace]:
"""Get the search space updates

Returns:
dict -- Mapping of search space updates. Keys don't contain the prefix.
"""
if prefix is None:
return self._cs_updates
result: Dict[str, Tuple] = dict()

result: Dict[str, HyperparameterSearchSpace] = dict()

# iterate over all search space updates of this node and keep the ones that have the given prefix
for key in self._cs_updates.keys():
if key.startswith(prefix):
# different for autopytorch component as the hyperparameter
result[key[len(prefix):]] = self._cs_updates[key]
result[key] = self._cs_updates[key].get_search_space()
return result
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,21 @@ def get_hyperparameter_search_space(self,
default = default_
break

preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)

updates = self._get_search_space_updates()
if '__choice__' in updates.keys():
choice_hyperparameter = updates['__choice__']
if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
raise ValueError("Expected given update for {} to have "
"choices in {} got {}".format(self.__class__.__name__,
available_preprocessors,
choice_hyperparameter.value_range))
preprocessor = CSH.CategoricalHyperparameter('__choice__',
choice_hyperparameter.value_range,
default_value=choice_hyperparameter.default_value)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)
cs.add_hyperparameter(preprocessor)

# add only child hyperparameters of early_preprocessor choices
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,37 @@ def get_hyperparameter_search_space(self,
default = default_
break

# add only no encoder to choice hyperparameters in case the dataset is only numerical
if len(dataset_properties['categorical_columns']) == 0:
default = 'NoEncoder'
if include is not None and default not in include:
raise ValueError("Provided {} in include, however, the dataset "
"is incompatible with it".format(include))
updates = self._get_search_space_updates()
if '__choice__' in updates.keys():
choice_hyperparameter = updates['__choice__']
if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
raise ValueError("Expected given update for {} to have "
"choices in {} got {}".format(self.__class__.__name__,
available_preprocessors,
choice_hyperparameter.value_range))
if len(dataset_properties['categorical_columns']) == 0:
assert len(choice_hyperparameter.value_range) == 1
assert 'NoEncoder' in choice_hyperparameter.value_range, \
"Provided {} in choices, however, the dataset " \
"is incompatible with it".format(choice_hyperparameter.value_range)

preprocessor = CSH.CategoricalHyperparameter('__choice__',
['NoEncoder'],
default_value=default)
choice_hyperparameter.value_range,
default_value=choice_hyperparameter.default_value)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)
# add only no encoder to choice hyperparameters in case the dataset is only numerical
if len(dataset_properties['categorical_columns']) == 0:
default = 'NoEncoder'
if include is not None and default not in include:
raise ValueError("Provided {} in include, however, the dataset "
"is incompatible with it".format(include))
preprocessor = CSH.CategoricalHyperparameter('__choice__',
['NoEncoder'],
default_value=default)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)

cs.add_hyperparameter(preprocessor)

Expand Down
Loading