diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index 40f10317f..344556dd3 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -45,8 +45,17 @@ def __init__(self, An object for tracking when to stop the network training. It handles epoch based criteria as well as training based criteria. - It also allows to define a 'epoch_or_time' budget type, which means, - the first of them both which is exhausted, is honored + It also allows to define a 'epoch_or_time' budget type, which means, the first of them both which is + exhausted, is honored + + Args: + budget_type (str): + Type of budget to be used when fitting the pipeline. + Possible values are 'epochs', 'runtime', or 'epoch_or_time' + max_epochs (Optional[int], default=None): + Maximum number of epochs to train the pipeline for + max_runtime (Optional[int], default=None): + Maximum number of seconds to train the pipeline for """ self.start_time = time.time() self.budget_type = budget_type @@ -54,8 +63,19 @@ def __init__(self, self.max_runtime = max_runtime def is_max_epoch_reached(self, epoch: int) -> bool: + """ + For budget type 'epoch' or 'epoch_or_time' return True if the maximum number of epochs is reached. + + Args: + epoch (int): + the current epoch - # Make None a method to run without this constrain + Returns: + bool: + True if the current epoch is larger than the maximum epochs, False otherwise. + Additionally, returns False if the run is without this constraint. + """ + # Make None a method to run without this constraint if self.max_epochs is None: return False if self.budget_type in ['epochs', 'epoch_or_time'] and epoch > self.max_epochs: @@ -63,7 +83,15 @@ def is_max_epoch_reached(self, epoch: int) -> bool: return False def is_max_time_reached(self) -> bool: - # Make None a method to run without this constrain + """ + For budget type 'runtime' or 'epoch_or_time' return True if the maximum runtime is reached. + + Returns: + bool: + True if the maximum runtime is reached, False otherwise. + Additionally, returns False if the run is without this constraint. + """ + # Make None a method to run without this constraint if self.max_runtime is None: return False elapsed_time = time.time() - self.start_time @@ -78,14 +106,22 @@ def __init__( total_parameter_count: float, trainable_parameter_count: float, optimize_metric: Optional[str] = None, - ): + ) -> None: """ A useful object to track performance per epoch. - It allows to track train, validation and test information not only for - debug, but for research purposes (Like understanding overfit). + It allows to track train, validation and test information not only for debug, but for research purposes + (Like understanding overfit). It does so by tracking a metric/loss at the end of each epoch. + + Args: + total_parameter_count (float): + the total number of parameters of the model + trainable_parameter_count (float): + only the parameters being optimized + optimize_metric (Optional[str], default=None): + name of the metric that is used to evaluate a pipeline. """ self.performance_tracker: Dict[str, Dict] = { 'start_time': {}, @@ -121,8 +157,30 @@ def add_performance(self, test_loss: Optional[float] = None, ) -> None: """ - Tracks performance information about the run, useful for - plotting individual runs + Tracks performance information about the run, useful for plotting individual runs. + + Args: + epoch (int): + the current epoch + start_time (float): + timestamp at the beginning of current epoch + end_time (float): + timestamp when gathering the information after the current epoch + train_loss (float): + the training loss + train_metrics (Dict[str, float]): + training scores for each desired metric + val_metrics (Dict[str, float]): + validation scores for each desired metric + test_metrics (Dict[str, float]): + test scores for each desired metric + val_loss (Optional[float], default=None): + the validation loss + test_loss (Optional[float], default=None): + the test loss + + Returns: + None """ self.performance_tracker['train_loss'][epoch] = train_loss self.performance_tracker['val_loss'][epoch] = val_loss @@ -134,6 +192,18 @@ def add_performance(self, self.performance_tracker['test_metrics'][epoch] = test_metrics def get_best_epoch(self, split_type: str = 'val') -> int: + """ + Get the epoch with the best metric. + + Args: + split_type (str, default=val): + Which split's metric to consider. + Possible values are 'train' or 'val + + Returns: + int: + the epoch with the best metric + """ # If we compute for optimization, prefer the performance # metric to the loss if self.optimize_metric is not None: @@ -159,6 +229,13 @@ def get_best_epoch(self, split_type: str = 'val') -> int: )) + 1 # Epochs start at 1 def get_last_epoch(self) -> int: + """ + Get the last epoch. + + Returns: + int: + the last epoch + """ if 'train_loss' not in self.performance_tracker: return 0 else: @@ -170,7 +247,8 @@ def repr_last_epoch(self) -> str: performance Returns: - str: A nice representation of the last epoch + str: + A nice representation of the last epoch """ last_epoch = len(self.performance_tracker['train_loss']) string = "\n" @@ -202,7 +280,8 @@ def is_empty(self) -> bool: Checks if the object is empty or not Returns: - bool + bool: + True if the object is empty, False otherwise """ # if train_loss is empty, we can be sure that RunSummary is empty. return not bool(self.performance_tracker['train_loss']) @@ -210,22 +289,34 @@ def is_empty(self) -> bool: class BaseTrainerComponent(autoPyTorchTrainingComponent): """ - Base class for training + Base class for training. + Args: - weighted_loss (int, default=0): In case for classification, whether to weight - the loss function according to the distribution of classes in the target - use_stochastic_weight_averaging (bool, default=True): whether to use stochastic - weight averaging. Stochastic weight averaging is a simple average of - multiple points(model parameters) along the trajectory of SGD. SWA - has been proposed in + weighted_loss (int, default=0): + In case for classification, whether to weight the loss function according to the distribution of classes + in the target + use_stochastic_weight_averaging (bool, default=True): + whether to use stochastic weight averaging. Stochastic weight averaging is a simple average of + multiple points(model parameters) along the trajectory of SGD. SWA has been proposed in [Averaging Weights Leads to Wider Optima and Better Generalization](https://arxiv.org/abs/1803.05407) - use_snapshot_ensemble (bool, default=True): whether to use snapshot - ensemble - se_lastk (int, default=3): Number of snapshots of the network to maintain - use_lookahead_optimizer (bool, default=True): whether to use lookahead - optimizer - random_state: - **lookahead_config: + use_snapshot_ensemble (bool, default=True): + whether to use snapshot ensemble + se_lastk (int, default=3): + Number of snapshots of the network to maintain + use_lookahead_optimizer (bool, default=True): + whether to use lookahead optimizer + random_state (Optional[np.random.RandomState]): + Object that contains a seed and allows for reproducible results + swa_model (Optional[torch.nn.Module], default=None): + Averaged model used for Stochastic Weight Averaging + model_snapshots (Optional[List[torch.nn.Module]], default=None): + List of model snapshots in case snapshot ensemble is used + **lookahead_config (Any): + keyword arguments for the lookahead optimizer including: + la_steps (int): + number of lookahead steps + la_alpha (float): + linear interpolation factor. 1.0 recovers the inner optimizer. """ def __init__(self, weighted_loss: int = 0, use_stochastic_weight_averaging: bool = True, @@ -336,15 +427,21 @@ def prepare( def on_epoch_start(self, X: Dict[str, Any], epoch: int) -> None: """ - Optional place holder for AutoPytorch Extensions. + Optional placeholder for AutoPytorch Extensions. + A user can define what happens on every epoch start or every epoch end. - An user can define what happens on every epoch start or every epoch end. + Args: + X (Dict[str, Any]): + Dictionary with fitted parameters. It is a message passing mechanism, in which during a transform, + a components adds relevant information so that further stages can be properly fitted + epoch (int): + the current epoch """ pass def _swa_update(self) -> None: """ - perform swa model update + Perform Stochastic Weight Averaging model update """ if self.swa_model is None: raise ValueError("SWA model cannot be none when stochastic weight averaging is enabled") @@ -354,6 +451,7 @@ def _swa_update(self) -> None: def _se_update(self, epoch: int) -> None: """ Add latest model or swa_model to model snapshot ensemble + Args: epoch (int): current epoch @@ -373,9 +471,16 @@ def _se_update(self, epoch: int) -> None: def on_epoch_end(self, X: Dict[str, Any], epoch: int) -> bool: """ - Optional place holder for AutoPytorch Extensions. - An user can define what happens on every epoch start or every epoch end. - If returns True, the training is stopped + Optional placeholder for AutoPytorch Extensions. + A user can define what happens on every epoch start or every epoch end. + If returns True, the training is stopped. + + Args: + X (Dict[str, Any]): + Dictionary with fitted parameters. It is a message passing mechanism, in which during a transform, + a components adds relevant information so that further stages can be properly fitted + epoch (int): + the current epoch """ if X['is_cyclic_scheduler']: @@ -421,12 +526,18 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int, Train the model for a single epoch. Args: - train_loader (torch.utils.data.DataLoader): generator of features/label - epoch (int): The current epoch used solely for tracking purposes + train_loader (torch.utils.data.DataLoader): + generator of features/label + epoch (int): + The current epoch used solely for tracking purposes + writer (Optional[SummaryWriter]): + Object to keep track of the training loss in an event file Returns: - float: training loss - Dict[str, float]: scores for each desired metric + float: + training loss + Dict[str, float]: + scores for each desired metric """ loss_sum = 0.0 @@ -482,12 +593,16 @@ def train_step(self, data: torch.Tensor, targets: torch.Tensor) -> Tuple[float, Allows to train 1 step of gradient descent, given a batch of train/labels Args: - data (torch.Tensor): input features to the network - targets (torch.Tensor): ground truth to calculate loss + data (torch.Tensor): + input features to the network + targets (torch.Tensor): + ground truth to calculate loss Returns: - torch.Tensor: The predictions of the network - float: the loss incurred in the prediction + torch.Tensor: + The predictions of the network + float: + the loss incurred in the prediction """ # prepare data = data.float().to(self.device) @@ -513,12 +628,18 @@ def evaluate(self, test_loader: torch.utils.data.DataLoader, epoch: int, Evaluate the model in both metrics and criterion Args: - test_loader (torch.utils.data.DataLoader): generator of features/label - epoch (int): the current epoch for tracking purposes + test_loader (torch.utils.data.DataLoader): + generator of features/label + epoch (int): + the current epoch for tracking purposes + writer (Optional[SummaryWriter]): + Object to keep track of the test loss in an event file Returns: - float: test loss - Dict[str, float]: scores for each desired metric + float: + test loss + Dict[str, float]: + scores for each desired metric """ self.model.eval() @@ -576,14 +697,15 @@ def get_class_weights(self, criterion: Type[torch.nn.Module], labels: Union[np.n def data_preparation(self, X: torch.Tensor, y: torch.Tensor, ) -> Tuple[torch.Tensor, Dict[str, np.ndarray]]: """ - Depending on the trainer choice, data fed to the network might be pre-processed - on a different way. That is, in standard training we provide the data to the - network as we receive it to the loader. Some regularization techniques, like mixup - alter the data. + Depending on the trainer choice, data fed to the network might be pre-processed on a different way. That is, + in standard training we provide the data to the network as we receive it to the loader. Some regularization + techniques, like mixup alter the data. Args: - X (torch.Tensor): The batch training features - y (torch.Tensor): The batch training labels + X (torch.Tensor): + The batch training features + y (torch.Tensor): + The batch training labels Returns: torch.Tensor: that processes data @@ -595,16 +717,21 @@ def data_preparation(self, X: torch.Tensor, y: torch.Tensor, def criterion_preparation(self, y_a: torch.Tensor, y_b: torch.Tensor = None, lam: float = 1.0 ) -> Callable: # type: ignore """ - Depending on the trainer choice, the criterion is not directly applied to the - traditional y_pred/y_ground_truth pairs, but rather it might have a slight transformation. + Depending on the trainer choice, the criterion is not directly applied to the traditional + y_pred/y_ground_truth pairs, but rather it might have a slight transformation. For example, in the case of mixup training, we need to account for the lambda mixup Args: - kwargs (Dict): an expanded dictionary with modifiers to the - criterion calculation + y_a (torch.Tensor): + the batch label of the first training example used in trainer + y_b (torch.Tensor, default=None): + if applicable, the batch label of the second training example used in trainer + lam (float): + trainer coefficient Returns: - Callable: a lambda function that contains the new criterion calculation recipe + Callable: + a lambda function that contains the new criterion calculation recipe """ raise NotImplementedError()