-
Notifications
You must be signed in to change notification settings - Fork 299
Bug fixes #249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bug fixes #249
Changes from 5 commits
a7a94e8
3b7f559
11e7021
375c055
3413bc3
d37d4a5
23466f0
00f80cb
88e0228
3b6ec03
a26edbe
73a11c9
37e3537
dc5e8a2
48b16a3
dab2f76
6f0aecb
84d7406
9f8ebb5
1488978
a044a19
6c8a55b
e9dfea9
88893a9
da6e47c
2740052
e597951
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -261,18 +261,19 @@ def __init__( | |||||||||||||||||||||||||||||
# if the shortcut needs a layer we apply batchnorm and activation to the shortcut | ||||||||||||||||||||||||||||||
# as well (start_norm) | ||||||||||||||||||||||||||||||
if in_features != out_features: | ||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||
self.shortcut = nn.Linear(in_features, out_features) | ||||||||||||||||||||||||||||||
initial_normalization = list() | ||||||||||||||||||||||||||||||
if self.config['use_batch_norm']: | ||||||||||||||||||||||||||||||
if self.config["use_skip_connection"]: | ||||||||||||||||||||||||||||||
self.shortcut = nn.Linear(in_features, out_features) | ||||||||||||||||||||||||||||||
initial_normalization = list() | ||||||||||||||||||||||||||||||
if self.config['use_batch_norm']: | ||||||||||||||||||||||||||||||
initial_normalization.append( | ||||||||||||||||||||||||||||||
nn.BatchNorm1d(in_features) | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
initial_normalization.append( | ||||||||||||||||||||||||||||||
nn.BatchNorm1d(in_features) | ||||||||||||||||||||||||||||||
self.activation() | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
self.start_norm = nn.Sequential( | ||||||||||||||||||||||||||||||
*initial_normalization | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
initial_normalization.append( | ||||||||||||||||||||||||||||||
self.activation() | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
self.start_norm = nn.Sequential( | ||||||||||||||||||||||||||||||
*initial_normalization | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
self.block_index = block_index | ||||||||||||||||||||||||||||||
self.num_blocks = blocks_per_group * self.config["num_groups"] | ||||||||||||||||||||||||||||||
|
@@ -290,14 +291,6 @@ def _build_block(self, in_features: int, out_features: int) -> nn.Module: | |||||||||||||||||||||||||||||
if self.config['use_batch_norm']: | ||||||||||||||||||||||||||||||
layers.append(nn.BatchNorm1d(in_features)) | ||||||||||||||||||||||||||||||
layers.append(self.activation()) | ||||||||||||||||||||||||||||||
else: | ||||||||||||||||||||||||||||||
# if start norm is not None and skip connection is None | ||||||||||||||||||||||||||||||
# we will never apply the start_norm for the first layer in the block, | ||||||||||||||||||||||||||||||
# which is why we should account for this case. | ||||||||||||||||||||||||||||||
if not self.config['use_skip_connection']: | ||||||||||||||||||||||||||||||
if self.config['use_batch_norm']: | ||||||||||||||||||||||||||||||
layers.append(nn.BatchNorm1d(in_features)) | ||||||||||||||||||||||||||||||
layers.append(self.activation()) | ||||||||||||||||||||||||||||||
nabenabe0928 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
layers.append(nn.Linear(in_features, out_features)) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
|
@@ -327,8 +320,7 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: | |||||||||||||||||||||||||||||
# if in_features != out_features | ||||||||||||||||||||||||||||||
# -> result = W_shortcut(A(BN(x))) + W_2(~D(A(BN(W_1(A(BN(x)))))) | ||||||||||||||||||||||||||||||
x = self.start_norm(x) | ||||||||||||||||||||||||||||||
if self.config["use_skip_connection"]: | ||||||||||||||||||||||||||||||
residual = self.shortcut(x) | ||||||||||||||||||||||||||||||
residual = self.shortcut(x) | ||||||||||||||||||||||||||||||
nabenabe0928 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
# TODO make the below code better | ||||||||||||||||||||||||||||||
if self.config["use_skip_connection"]: | ||||||||||||||||||||||||||||||
|
@@ -337,13 +329,8 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: | |||||||||||||||||||||||||||||
x2 = self.shake_shake_layers(x) | ||||||||||||||||||||||||||||||
alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) | ||||||||||||||||||||||||||||||
x = shake_shake(x1, x2, alpha, beta) | ||||||||||||||||||||||||||||||
else: | ||||||||||||||||||||||||||||||
elif self.config["multi_branch_choice"] == 'shake-drop': | ||||||||||||||||||||||||||||||
x = self.layers(x) | ||||||||||||||||||||||||||||||
else: | ||||||||||||||||||||||||||||||
x = self.layers(x) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
if self.config["use_skip_connection"]: | ||||||||||||||||||||||||||||||
if self.config["multi_branch_choice"] == 'shake-drop': | ||||||||||||||||||||||||||||||
alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) | ||||||||||||||||||||||||||||||
bl = shake_drop_get_bl( | ||||||||||||||||||||||||||||||
self.block_index, | ||||||||||||||||||||||||||||||
|
@@ -353,8 +340,11 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: | |||||||||||||||||||||||||||||
x.is_cuda, | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
x = shake_drop(x, alpha, beta, bl) | ||||||||||||||||||||||||||||||
else: | ||||||||||||||||||||||||||||||
x = self.layers(x) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
if self.config["use_skip_connection"]: | ||||||||||||||||||||||||||||||
ravinkohli marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||
x = x + residual | ||||||||||||||||||||||||||||||
else: | ||||||||||||||||||||||||||||||
x = self.layers(x) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
return x |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,6 +72,7 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None: | |
) | ||
if self.config['use_batch_norm']: | ||
layers.append(torch.nn.BatchNorm1d(self.config["num_units_%i" % self.config['num_groups']])) | ||
layers.append(_activations[self.config["activation"]]()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need it, right? (this change originates in this PR, right?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, makes it consistent with the |
||
backbone = torch.nn.Sequential(*layers) | ||
self.backbone = backbone | ||
return backbone | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,6 @@ def build_head(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...] | |
layers = [] | ||
in_features = np.prod(input_shape).item() | ||
out_features = np.prod(output_shape).item() | ||
layers.append(_activations[self.config["activation"]]()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why deleted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I am not mistaken, that is because firstly, we do not add an extra layer here, secondly, this last nonlinearity layer was for the residual block and now it is added there already. I will double check. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, I confirmed it, it is consistent, the last activation layer for the last layer of the last block is in every |
||
layers.append(nn.Linear(in_features=in_features, | ||
out_features=out_features)) | ||
return nn.Sequential(*layers) | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -24,7 +24,7 @@ class AdversarialTrainer(BaseTrainerComponent): | |||||
def __init__( | ||||||
self, | ||||||
epsilon: float, | ||||||
weighted_loss: bool = False, | ||||||
weighted_loss: int = 0, | ||||||
random_state: Optional[np.random.RandomState] = None, | ||||||
use_stochastic_weight_averaging: bool = False, | ||||||
use_snapshot_ensemble: bool = False, | ||||||
|
@@ -159,8 +159,8 @@ def get_hyperparameter_search_space( | |||||
dataset_properties: Optional[Dict] = None, | ||||||
weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( | ||||||
hyperparameter="weighted_loss", | ||||||
value_range=[True, False], | ||||||
default_value=True), | ||||||
value_range=[1], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
default_value=1), | ||||||
la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( | ||||||
hyperparameter="la_steps", | ||||||
value_range=(5, 10), | ||||||
|
@@ -226,9 +226,17 @@ def get_hyperparameter_search_space( | |||||
parent_hyperparameter=parent_hyperparameter | ||||||
) | ||||||
|
||||||
""" | ||||||
# TODO, decouple the weighted loss from the trainer | ||||||
if dataset_properties is not None: | ||||||
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: | ||||||
add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) | ||||||
""" | ||||||
# TODO, decouple the weighted loss from the trainer. Uncomment the code above and | ||||||
# remove the code below. Also update the method signature, so the weighted loss | ||||||
# is not a constant. | ||||||
if dataset_properties is not None: | ||||||
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: | ||||||
add_hyperparameter(cs, weighted_loss, Constant) | ||||||
ravinkohli marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
return cs |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -175,7 +175,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent): | |||||
""" | ||||||
Base class for training | ||||||
Args: | ||||||
weighted_loss (bool, default=False): In case for classification, whether to weight | ||||||
weighted_loss (int, default=0): In case for classification, whether to weight | ||||||
nabenabe0928 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
the loss function according to the distribution of classes in the target | ||||||
use_stochastic_weight_averaging (bool, default=True): whether to use stochastic | ||||||
weight averaging. Stochastic weight averaging is a simple average of | ||||||
|
@@ -190,7 +190,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent): | |||||
random_state: | ||||||
**lookahead_config: | ||||||
""" | ||||||
def __init__(self, weighted_loss: bool = False, | ||||||
def __init__(self, weighted_loss: int = 0, | ||||||
use_stochastic_weight_averaging: bool = True, | ||||||
use_snapshot_ensemble: bool = True, | ||||||
se_lastk: int = 3, | ||||||
|
@@ -537,8 +537,8 @@ def get_hyperparameter_search_space( | |||||
dataset_properties: Optional[Dict] = None, | ||||||
weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( | ||||||
hyperparameter="weighted_loss", | ||||||
value_range=[True, False], | ||||||
default_value=True), | ||||||
value_range=[1], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to check, if passing a tuple is what is expected. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it expects an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, but I guess we can be consistent with the other hyperparameter spaces too, so I will do the change. |
||||||
default_value=1), | ||||||
ravinkohli marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( | ||||||
hyperparameter="la_steps", | ||||||
value_range=(5, 10), | ||||||
|
@@ -599,9 +599,17 @@ def get_hyperparameter_search_space( | |||||
parent_hyperparameter=parent_hyperparameter | ||||||
) | ||||||
|
||||||
""" | ||||||
# TODO, decouple the weighted loss from the trainer | ||||||
if dataset_properties is not None: | ||||||
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: | ||||||
add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) | ||||||
""" | ||||||
# TODO, decouple the weighted loss from the trainer. Uncomment the code above and | ||||||
# remove the code below. Also update the method signature, so the weighted loss | ||||||
# is not a constant. | ||||||
if dataset_properties is not None: | ||||||
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: | ||||||
add_hyperparameter(cs, weighted_loss, Constant) | ||||||
|
||||||
return cs |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -20,7 +20,7 @@ | |||||
class CutOut: | ||||||
def __init__(self, patch_ratio: float, | ||||||
cutout_prob: float, | ||||||
weighted_loss: bool = False, | ||||||
weighted_loss: int = 0, | ||||||
random_state: Optional[np.random.RandomState] = None, | ||||||
use_stochastic_weight_averaging: bool = False, | ||||||
use_snapshot_ensemble: bool = False, | ||||||
|
@@ -63,8 +63,8 @@ def get_hyperparameter_search_space( | |||||
dataset_properties: Optional[Dict] = None, | ||||||
weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( | ||||||
hyperparameter="weighted_loss", | ||||||
value_range=[True, False], | ||||||
default_value=True), | ||||||
value_range=[1], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
default_value=1), | ||||||
la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( | ||||||
hyperparameter="la_steps", | ||||||
value_range=(5, 10), | ||||||
|
@@ -136,9 +136,17 @@ def get_hyperparameter_search_space( | |||||
parent_hyperparameter=parent_hyperparameter | ||||||
) | ||||||
|
||||||
""" | ||||||
# TODO, decouple the weighted loss from the trainer | ||||||
if dataset_properties is not None: | ||||||
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: | ||||||
add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) | ||||||
""" | ||||||
# TODO, decouple the weighted loss from the trainer. Uncomment the code above and | ||||||
# remove the code below. Also update the method signature, so the weighted loss | ||||||
# is not a constant. | ||||||
if dataset_properties is not None: | ||||||
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: | ||||||
add_hyperparameter(cs, weighted_loss, Constant) | ||||||
|
||||||
return cs |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -19,7 +19,7 @@ | |||||
|
||||||
class MixUp: | ||||||
def __init__(self, alpha: float, | ||||||
weighted_loss: bool = False, | ||||||
weighted_loss: int = 0, | ||||||
random_state: Optional[np.random.RandomState] = None, | ||||||
use_stochastic_weight_averaging: bool = False, | ||||||
use_snapshot_ensemble: bool = False, | ||||||
|
@@ -61,8 +61,8 @@ def get_hyperparameter_search_space( | |||||
dataset_properties: Optional[Dict] = None, | ||||||
weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( | ||||||
hyperparameter="weighted_loss", | ||||||
value_range=[True, False], | ||||||
default_value=True), | ||||||
value_range=[1], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
default_value=1), | ||||||
la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( | ||||||
hyperparameter="la_steps", | ||||||
value_range=(5, 10), | ||||||
|
@@ -127,9 +127,18 @@ def get_hyperparameter_search_space( | |||||
la_config_space, | ||||||
parent_hyperparameter=parent_hyperparameter | ||||||
) | ||||||
|
||||||
""" | ||||||
# TODO, decouple the weighted loss from the trainer | ||||||
if dataset_properties is not None: | ||||||
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: | ||||||
add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) | ||||||
""" | ||||||
# TODO, decouple the weighted loss from the trainer. Uncomment the code above and | ||||||
# remove the code below. Also update the method signature, so the weighted loss | ||||||
# is not a constant. | ||||||
if dataset_properties is not None: | ||||||
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: | ||||||
add_hyperparameter(cs, weighted_loss, Constant) | ||||||
|
||||||
return cs |
Uh oh!
There was an error while loading. Please reload this page.