Skip to content

Hyperparameter Search Space updates now with constant and include ability #146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

25 changes: 24 additions & 1 deletion autoPyTorch/pipeline/base_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,24 +398,47 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
raise ValueError("Unknown node name. Expected update node name to be in {} "
"got {}".format(self.named_steps.keys(), update.node_name))
node = self.named_steps[update.node_name]
# if node is a choice module
if hasattr(node, 'get_components'):
split_hyperparameter = update.hyperparameter.split(':')

# check if component is not present in include
if include is not None and update.node_name in include.keys():
if split_hyperparameter[0] not in include[update.node_name]:
raise ValueError("Not found {} in include".format(split_hyperparameter[0]))

# check if component is present in exclude
if exclude is not None and update.node_name in exclude.keys():
if split_hyperparameter[0] in exclude[update.node_name]:
raise ValueError("Found {} in exclude".format(split_hyperparameter[0]))

components = node.get_components()
if split_hyperparameter[0] not in components.keys():
# if hyperparameter is __choice__, check if
# the components in the value range of search space update
# are in components of the choice module
if split_hyperparameter[0] == '__choice__':
for choice in update.value_range:
if include is not None and update.node_name in include.keys():
if choice not in include[update.node_name]:
raise ValueError("Not found {} in include".format(choice))
if exclude is not None and update.node_name in exclude.keys():
if choice in exclude[update.node_name]:
raise ValueError("Found {} in exclude".format(choice))
if choice not in components.keys():
raise ValueError("Unknown hyperparameter for choice {}. "
"Expected update hyperparameter "
"to be in {} got {}".format(node.__class__.__name__,
components.keys(), choice))
# check if the component whose hyperparameter
# needs to be updated is in components of the
# choice module
elif split_hyperparameter[0] not in components.keys():
raise ValueError("Unknown hyperparameter for choice {}. "
"Expected update hyperparameter "
"to be in {} got {}".format(node.__class__.__name__,
components.keys(), split_hyperparameter[0]))
else:
# check if hyperparameter is in the search space of the component
component = components[split_hyperparameter[0]]
if split_hyperparameter[1] not in component. \
get_hyperparameter_search_space(dataset_properties=self.dataset_properties):
Expand Down
36 changes: 20 additions & 16 deletions autoPyTorch/pipeline/components/base_choice.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional

from ConfigSpace.configuration_space import Configuration, ConfigurationSpace

Expand All @@ -9,7 +10,8 @@
from sklearn.utils import check_random_state

from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
from autoPyTorch.utils.common import FitRequirement
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate


class autoPyTorchChoice(object):
Expand Down Expand Up @@ -49,7 +51,7 @@ def __init__(self,
# self.set_hyperparameters(self.configuration)
self.choice: Optional[autoPyTorchComponent] = None

self._cs_updates: Dict[str, Tuple] = dict()
self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()

def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
if self.choice is not None:
Expand Down Expand Up @@ -247,35 +249,37 @@ def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
"""
assert isinstance(dataset_properties, dict), "dataset_properties must be a dictionary"

def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tuple],
default_value: Union[int, float, str], log: bool = False) -> None:
def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None:
"""Allows the user to update a hyperparameter
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this doc-string can be improved.. It really updates the hyperparameter search space I think.


Arguments:
Args:
name {string} -- name of hyperparameter
new_value_range {List[?] -- value range can be either lower, upper or a list of possible conditionals
log {bool} -- is hyperparameter logscale
"""

if len(new_value_range) == 0:
raise ValueError("The new value range needs at least one value")
self._cs_updates[name] = tuple([new_value_range, default_value, log])
self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update

def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, Tuple]:
def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, HyperparameterSearchSpace]:
"""Get the search space updates with the given prefix

Keyword Arguments:
Args:
prefix {str} -- Only return search space updates with given prefix (default: {None})

Returns:
dict -- Mapping of search space updates. Keys don't contain the prefix.
"""
RETURN_ALL = False
if prefix is None:
return self._cs_updates
result: Dict[str, Tuple] = dict()
RETURN_ALL = True

result: Dict[str, HyperparameterSearchSpace] = dict()

# iterate over all search space updates of this node and filter the ones out, that have the given prefix
# iterate over all search space updates of this node and keep the ones that have the given prefix
for key in self._cs_updates.keys():
if key.startswith(prefix):
result[key[len(prefix) + 1:]] = self._cs_updates[key]
if RETURN_ALL:
result[key] = self._cs_updates[key].get_search_space()
elif re.search(f'^{prefix}', key) is not None:
assert isinstance(prefix, str)
result[key[len(prefix) + 1:]] = self._cs_updates[key].get_search_space(remove_prefix=prefix)
return result
41 changes: 16 additions & 25 deletions autoPyTorch/pipeline/components/base_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@
import sys
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional

from ConfigSpace.configuration_space import Configuration, ConfigurationSpace

from sklearn.base import BaseEstimator

from autoPyTorch.utils.common import FitRequirement
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate


def find_components(
package: str,
directory: str,
base_class: BaseEstimator
package: str,
directory: str,
base_class: BaseEstimator
) -> Dict[str, BaseEstimator]:
"""Utility to find component on a given directory,
that inherit from base_class
Expand All @@ -34,8 +35,7 @@ def find_components(
module = importlib.import_module(full_module_name)

for member_name, obj in inspect.getmembers(module):
if inspect.isclass(obj) and issubclass(obj, base_class) and \
obj != base_class:
if inspect.isclass(obj) and issubclass(obj, base_class) and obj != base_class:
# TODO test if the obj implements the interface
# Keep in mind that this only instantiates the ensemble_wrapper,
# but not the real target classifier
Expand Down Expand Up @@ -96,7 +96,7 @@ class autoPyTorchComponent(BaseEstimator):
def __init__(self) -> None:
super().__init__()
self._fit_requirements: List[FitRequirement] = list()
self._cs_updates: Dict[str, Tuple] = dict()
self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()

@classmethod
def get_required_properties(cls) -> Optional[List[str]]:
Expand Down Expand Up @@ -140,7 +140,7 @@ def get_properties(dataset_properties: Optional[Dict[str, str]] = None

@staticmethod
def get_hyperparameter_search_space(
dataset_properties: Optional[Dict[str, str]] = None
dataset_properties: Optional[Dict[str, str]] = None
) -> ConfigurationSpace:
"""Return the configuration space of this classification algorithm.

Expand Down Expand Up @@ -253,8 +253,7 @@ def __str__(self) -> str:
name = self.get_properties()['name']
return "autoPyTorch.pipeline %s" % name

def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tuple],
default_value: Union[int, float, str], log: bool = False) -> None:
def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None:
"""Allows the user to update a hyperparameter

Arguments:
Expand All @@ -263,26 +262,18 @@ def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tup
log {bool} -- is hyperparameter logscale
"""

if len(new_value_range) == 0:
raise ValueError("The new value range needs at least one value")
self._cs_updates[name] = tuple([new_value_range, default_value, log])
self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update

def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, Tuple]:
"""Get the search space updates with the given prefix

Keyword Arguments:
prefix {str} -- Only return search space updates with given prefix (default: {None})
def _get_search_space_updates(self) -> Dict[str, HyperparameterSearchSpace]:
"""Get the search space updates

Returns:
dict -- Mapping of search space updates. Keys don't contain the prefix.
"""
if prefix is None:
return self._cs_updates
result: Dict[str, Tuple] = dict()

result: Dict[str, HyperparameterSearchSpace] = dict()

# iterate over all search space updates of this node and keep the ones that have the given prefix
for key in self._cs_updates.keys():
if key.startswith(prefix):
# different for autopytorch component as the hyperparameter
result[key[len(prefix):]] = self._cs_updates[key]
result[key] = self._cs_updates[key].get_search_space()
return result
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,21 @@ def get_hyperparameter_search_space(self,
default = default_
break

preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)

updates = self._get_search_space_updates()
if '__choice__' in updates.keys():
choice_hyperparameter = updates['__choice__']
if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
raise ValueError("Expected given update for {} to have "
"choices in {} got {}".format(self.__class__.__name__,
available_preprocessors,
choice_hyperparameter.value_range))
preprocessor = CSH.CategoricalHyperparameter('__choice__',
choice_hyperparameter.value_range,
default_value=choice_hyperparameter.default_value)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)
cs.add_hyperparameter(preprocessor)

# add only child hyperparameters of early_preprocessor choices
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,37 @@ def get_hyperparameter_search_space(self,
default = default_
break

# add only no encoder to choice hyperparameters in case the dataset is only numerical
if len(dataset_properties['categorical_columns']) == 0:
default = 'NoEncoder'
if include is not None and default not in include:
raise ValueError("Provided {} in include, however, the dataset "
"is incompatible with it".format(include))
updates = self._get_search_space_updates()
if '__choice__' in updates.keys():
choice_hyperparameter = updates['__choice__']
if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
raise ValueError("Expected given update for {} to have "
"choices in {} got {}".format(self.__class__.__name__,
available_preprocessors,
choice_hyperparameter.value_range))
if len(dataset_properties['categorical_columns']) == 0:
assert len(choice_hyperparameter.value_range) == 1
assert 'NoEncoder' in choice_hyperparameter.value_range, \
"Provided {} in choices, however, the dataset " \
"is incompatible with it".format(choice_hyperparameter.value_range)

preprocessor = CSH.CategoricalHyperparameter('__choice__',
['NoEncoder'],
default_value=default)
choice_hyperparameter.value_range,
default_value=choice_hyperparameter.default_value)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)
# add only no encoder to choice hyperparameters in case the dataset is only numerical
if len(dataset_properties['categorical_columns']) == 0:
default = 'NoEncoder'
if include is not None and default not in include:
raise ValueError("Provided {} in include, however, the dataset "
"is incompatible with it".format(include))
preprocessor = CSH.CategoricalHyperparameter('__choice__',
['NoEncoder'],
default_value=default)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)

cs.add_hyperparameter(preprocessor)

Expand Down
Loading