Skip to content

Reproducibility in cocktail #204

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions autoPyTorch/pipeline/components/setup/base_setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Any, Dict
from typing import Any, Dict, Optional

import numpy as np

from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent

Expand All @@ -7,8 +9,8 @@ class autoPyTorchSetupComponent(autoPyTorchComponent):
"""Provide an abstract interface for schedulers
in Auto-Pytorch"""

def __init__(self) -> None:
super(autoPyTorchSetupComponent, self).__init__()
def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None:
super(autoPyTorchSetupComponent, self).__init__(random_state=random_state)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah interesting, did we miss this when we made the changes in development ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think it affects the code at all.


def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class LearnedEntityEmbedding(NetworkEmbeddingComponent):
Class to learn an embedding for categorical hyperparameters.
"""

def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None, **kwargs: Any):
def __init__(self, random_state: Optional[np.random.RandomState] = None, **kwargs: Any):
super().__init__(random_state=random_state)
self.config = kwargs

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class NoEmbedding(NetworkEmbeddingComponent):
Class to learn an embedding for categorical hyperparameters.
"""

def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None):
def __init__(self, random_state: Optional[np.random.RandomState] = None):
super().__init__(random_state=random_state)

def build_embedding(self, num_input_features: np.ndarray, num_numerical_features: int) -> nn.Module:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import copy
from typing import Any, Dict, Optional, Tuple, Union
from typing import Any, Dict, Optional, Tuple

import numpy as np

Expand All @@ -11,10 +11,9 @@


class NetworkEmbeddingComponent(autoPyTorchSetupComponent):
def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None):
super().__init__()
def __init__(self, random_state: Optional[np.random.RandomState] = None):
super().__init__(random_state=random_state)
self.embedding: Optional[nn.Module] = None
self.random_state = random_state

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
typing.Dict[str, np.ndarray]: arguments to the criterion function
"""
beta = 1.0
lam = np.random.beta(beta, beta)
lam = self.random_state.beta(beta, beta)
batch_size, channel, W, H = X.size()
index = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)

r = np.random.rand(1)
r = self.random_state.rand(1)
if beta <= 0 or r > self.alpha:
return X, {'y_a': y, 'y_b': y[index], 'lam': 1}

Expand All @@ -40,8 +40,8 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
cut_rat = np.sqrt(1. - lam)
cut_w = np.int(W * cut_rat)
cut_h = np.int(H * cut_rat)
cx = np.random.randint(W)
cy = np.random.randint(H)
cx = self.random_state.randint(W)
cy = self.random_state.randint(H)
bbx1 = np.clip(cx - cut_w // 2, 0, W)
bby1 = np.clip(cy - cut_h // 2, 0, H)
bbx2 = np.clip(cx + cut_w // 2, 0, W)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
np.ndarray: that processes data
typing.Dict[str, np.ndarray]: arguments to the criterion function
"""
r = np.random.rand(1)
r = self.random_state.rand(1)
batch_size, channel, W, H = X.size()
if r > self.cutout_prob:
return X, {'y_a': y, 'y_b': y, 'lam': 1}
Expand All @@ -34,8 +34,8 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
cut_rat = np.sqrt(1. - self.patch_ratio)
cut_w = np.int(W * cut_rat)
cut_h = np.int(H * cut_rat)
cx = np.random.randint(W)
cy = np.random.randint(H)
cx = self.random_state.randint(W)
cy = self.random_state.randint(H)
bbx1 = np.clip(cx - cut_w // 2, 0, W)
bby1 = np.clip(cy - cut_h // 2, 0, H)
bbx2 = np.clip(cx + cut_w // 2, 0, W)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import random
import typing

import numpy as np
Expand Down Expand Up @@ -28,19 +27,19 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
typing.Dict[str, np.ndarray]: arguments to the criterion function
"""
beta = 1.0
lam = np.random.beta(beta, beta)
lam = self.random_state.beta(beta, beta)
batch_size = X.size()[0]
index = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)

r = np.random.rand(1)
r = self.random_state.rand(1)
if beta <= 0 or r > self.alpha:
return X, {'y_a': y, 'y_b': y[index], 'lam': 1}

# The mixup component mixes up also on the batch dimension
# It is unlikely that the batch size is lower than the number of features, but
# be safe
size = min(X.shape[0], X.shape[1])
indices = torch.tensor(random.sample(range(1, size), max(1, np.int(size * lam))))
indices = torch.tensor(self.random_state.choice(range(1, size), max(1, np.int(size * lam))))

X[:, indices] = X[index, :][:, indices]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import random
import typing

import numpy as np
Expand Down Expand Up @@ -28,7 +27,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
typing.Dict[str, np.ndarray]: arguments to the criterion function
"""

r = np.random.rand(1)
r = self.random_state.rand(1)
if r > self.cutout_prob:
y_a = y
y_b = y
Expand All @@ -39,7 +38,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
# It is unlikely that the batch size is lower than the number of features, but
# be safe
size = min(X.shape[0], X.shape[1])
indices = torch.tensor(random.sample(range(1, size), max(1, np.int(size * self.patch_ratio))))
indices = torch.tensor(self.random_state.choice(range(1, size), max(1, np.int(size * self.patch_ratio))))

# We use an ordinal encoder on the tabular data
# -1 is the conceptual equivalent to 0 in a image, that does not
Expand Down