From a44194c5cb235d9bc2bd6753484bd1fb9865e3f7 Mon Sep 17 00:00:00 2001 From: ydcjeff <32727188+ydcjeff@users.noreply.github.com> Date: Wed, 26 May 2021 11:56:06 +0630 Subject: [PATCH 1/8] fix: address comments from #136 --- src/metadata/metadata.json | 2 +- .../template-text-classification/main.py | 13 +++---------- .../template-text-classification/trainers.py | 17 ++++++++++++++--- .../template-vision-classification/main.py | 15 ++++----------- .../template-vision-classification/trainers.py | 17 ++++++++++++++--- src/templates/template-vision-dcgan/main.py | 9 +-------- src/templates/template-vision-dcgan/trainers.py | 16 +++++++++++++--- .../template-vision-segmentation/main.py | 12 +++--------- .../template-vision-segmentation/trainers.py | 17 ++++++++++++++--- 9 files changed, 67 insertions(+), 51 deletions(-) diff --git a/src/metadata/metadata.json b/src/metadata/metadata.json index 135c65fc..cacc63b5 100644 --- a/src/metadata/metadata.json +++ b/src/metadata/metadata.json @@ -47,7 +47,7 @@ "save_training": { "name": "save_training", "type": "checkbox", - "description": "Save the training state (models, optimizers, trainers, ...) by every save_every_iters." + "description": "Save the training state (models, optimizers, trainers, ...)." }, "save_evaluation": { "name": "save_evaluation", diff --git a/src/templates/template-text-classification/main.py b/src/templates/template-text-classification/main.py index 991f37f2..db73777d 100644 --- a/src/templates/template-text-classification/main.py +++ b/src/templates/template-text-classification/main.py @@ -12,7 +12,6 @@ from model import TransformerModel from torch import nn, optim from torch.optim.lr_scheduler import _LRScheduler -from torch.utils.data.distributed import DistributedSampler from trainers import setup_evaluator, setup_trainer from utils import * @@ -72,7 +71,9 @@ def run(local_rank: int, config: Any): } # trainer and evaluator - trainer = setup_trainer(config, model, optimizer, loss_fn, device) + trainer = setup_trainer( + config, model, optimizer, loss_fn, device, dataloader_train.sampler + ) evaluator = setup_evaluator(config, model, metrics, device) # setup engines logger with python logging @@ -82,14 +83,6 @@ def run(local_rank: int, config: Any): (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) trainer.logger = evaluator.logger = logger - # set epoch for distributed sampler - @trainer.on(Events.EPOCH_STARTED) - def set_epoch(): - if idist.get_world_size() > 1 and isinstance( - dataloader_train.sampler, DistributedSampler - ): - dataloader_train.sampler.set_epoch(trainer.state.epoch - 1) - if isinstance(lr_scheduler, _LRScheduler): trainer.add_event_handler( Events.ITERATION_COMPLETED, diff --git a/src/templates/template-text-classification/trainers.py b/src/templates/template-text-classification/trainers.py index 20bdfd68..e73ff605 100644 --- a/src/templates/template-text-classification/trainers.py +++ b/src/templates/template-text-classification/trainers.py @@ -1,11 +1,13 @@ from typing import Any, Dict, Union +import ignite.distributed as idist import torch -from ignite.engine import DeterministicEngine, Engine +from ignite.engine import DeterministicEngine, Engine, Events from ignite.metrics.metric import Metric from torch import nn from torch.cuda.amp import GradScaler, autocast from torch.optim.optimizer import Optimizer +from torch.utils.data import DistributedSampler, Sampler def setup_trainer( @@ -14,6 +16,7 @@ def setup_trainer( optimizer: Optimizer, loss_fn: nn.Module, device: Union[str, torch.device], + train_sampler: Sampler, ) -> Union[Engine, DeterministicEngine]: scaler = GradScaler(enabled=config.use_amp) @@ -50,11 +53,19 @@ def train_function(engine: Union[Engine, DeterministicEngine], batch: Any): return metric #::: if(it.deterministic) { :::# - return DeterministicEngine(train_function) + trainer = DeterministicEngine(train_function) #::: } else { :::# - return Engine(train_function) + trainer = Engine(train_function) #::: } :::# + # set epoch for distributed sampler + @trainer.on(Events.EPOCH_STARTED) + def set_epoch(): + if idist.get_world_size() > 1 and isinstance( + train_sampler, DistributedSampler + ): + train_sampler.set_epoch(trainer.state.epoch - 1) + def setup_evaluator( config: Any, diff --git a/src/templates/template-vision-classification/main.py b/src/templates/template-vision-classification/main.py index eb161f42..d50227be 100644 --- a/src/templates/template-vision-classification/main.py +++ b/src/templates/template-vision-classification/main.py @@ -9,7 +9,6 @@ from ignite.utils import manual_seed from model import Net from torch import nn, optim -from torch.utils.data.distributed import DistributedSampler from trainers import setup_evaluator, setup_trainer from utils import * @@ -33,7 +32,9 @@ def run(local_rank: int, config: Any): loss_fn = nn.CrossEntropyLoss().to(device=device) # trainer and evaluator - trainer = setup_trainer(config, model, optimizer, loss_fn, device) + trainer = setup_trainer( + config, model, optimizer, loss_fn, device, dataloader_train.sampler + ) evaluator = setup_evaluator(config, model, device) # attach metrics to evaluator @@ -51,15 +52,7 @@ def run(local_rank: int, config: Any): logger = setup_logging(config) logger.info("Configuration: \n%s", pformat(vars(config))) (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) - trainer.logger = evaluator.logger = logger - - # set epoch for distributed sampler - @trainer.on(Events.EPOCH_STARTED) - def set_epoch(): - if idist.get_world_size() > 1 and isinstance( - dataloader_train.sampler, DistributedSampler - ): - dataloader_train.sampler.set_epoch(trainer.state.epoch - 1) + trainer.logger = evaluator.logger = loggerw # setup ignite handlers #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# diff --git a/src/templates/template-vision-classification/trainers.py b/src/templates/template-vision-classification/trainers.py index 9bbc4084..95e7f0c1 100644 --- a/src/templates/template-vision-classification/trainers.py +++ b/src/templates/template-vision-classification/trainers.py @@ -1,10 +1,12 @@ from typing import Any, Union +import ignite.distributed as idist import torch -from ignite.engine import DeterministicEngine, Engine +from ignite.engine import DeterministicEngine, Engine, Events from torch.cuda.amp import autocast from torch.nn import Module from torch.optim import Optimizer +from torch.utils.data import DistributedSampler, Sampler def setup_trainer( @@ -13,6 +15,7 @@ def setup_trainer( optimizer: Optimizer, loss_fn: Module, device: Union[str, torch.device], + train_sampler: Sampler, ) -> Union[Engine, DeterministicEngine]: def train_function(engine: Union[Engine, DeterministicEngine], batch: Any): model.train() @@ -36,11 +39,19 @@ def train_function(engine: Union[Engine, DeterministicEngine], batch: Any): return {"train_loss": train_loss} #::: if(it.deterministic) { :::# - return DeterministicEngine(train_function) + trainer = DeterministicEngine(train_function) #::: } else { :::# - return Engine(train_function) + trainer = Engine(train_function) #::: } :::# + # set epoch for distributed sampler + @trainer.on(Events.EPOCH_STARTED) + def set_epoch(): + if idist.get_world_size() > 1 and isinstance( + train_sampler, DistributedSampler + ): + train_sampler.set_epoch(trainer.state.epoch - 1) + def setup_evaluator( config: Any, diff --git a/src/templates/template-vision-dcgan/main.py b/src/templates/template-vision-dcgan/main.py index 6c6005d9..170500ce 100644 --- a/src/templates/template-vision-dcgan/main.py +++ b/src/templates/template-vision-dcgan/main.py @@ -67,6 +67,7 @@ def run(local_rank: int, config: Any): optimizer_g=optimizer_g, loss_fn=loss_fn, device=device, + train_sampler=dataloader_train.sampler, ) evaluator = setup_evaluator( config=config, @@ -83,14 +84,6 @@ def run(local_rank: int, config: Any): (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) trainer.logger = evaluator.logger = logger - # set epoch for distributed sampler - @trainer.on(Events.EPOCH_STARTED) - def set_epoch(): - if idist.get_world_size() > 1 and isinstance( - dataloader_train.sampler, DistributedSampler - ): - dataloader_train.sampler.set_epoch(trainer.state.epoch - 1) - # setup ignite handlers #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# diff --git a/src/templates/template-vision-dcgan/trainers.py b/src/templates/template-vision-dcgan/trainers.py index 7801aad4..9dfe6bd2 100644 --- a/src/templates/template-vision-dcgan/trainers.py +++ b/src/templates/template-vision-dcgan/trainers.py @@ -2,10 +2,11 @@ import ignite.distributed as idist import torch -from ignite.engine import DeterministicEngine, Engine +from ignite.engine import DeterministicEngine, Engine, Events from torch.cuda.amp import autocast from torch.nn import Module from torch.optim import Optimizer +from torch.utils.data import DistributedSampler, Sampler def setup_trainer( @@ -16,6 +17,7 @@ def setup_trainer( optimizer_g: Optimizer, loss_fn: Module, device: Union[str, torch.device], + train_sampler: Sampler, ) -> Union[Engine, DeterministicEngine]: ws = idist.get_world_size() @@ -87,11 +89,19 @@ def train_function(engine: Union[Engine, DeterministicEngine], batch: Any): return metrics #::: if(it.deterministic) { :::# - return DeterministicEngine(train_function) + trainer = DeterministicEngine(train_function) #::: } else { :::# - return Engine(train_function) + trainer = Engine(train_function) #::: } :::# + # set epoch for distributed sampler + @trainer.on(Events.EPOCH_STARTED) + def set_epoch(): + if idist.get_world_size() > 1 and isinstance( + train_sampler, DistributedSampler + ): + train_sampler.set_epoch(trainer.state.epoch - 1) + def setup_evaluator( config: Any, diff --git a/src/templates/template-vision-segmentation/main.py b/src/templates/template-vision-segmentation/main.py index 7133ad28..87cf74c8 100644 --- a/src/templates/template-vision-segmentation/main.py +++ b/src/templates/template-vision-segmentation/main.py @@ -63,7 +63,9 @@ def run(local_rank: int, config: Any): metrics = {"IoU": IoU(cm_metric), "mIoU_bg": mIoU(cm_metric)} # trainer and evaluator - trainer = setup_trainer(config, model, optimizer, loss_fn, device) + trainer = setup_trainer( + config, model, optimizer, loss_fn, device, dataloader_train.sampler + ) evaluator = setup_evaluator(config, model, metrics, device) # setup engines logger with python logging @@ -73,14 +75,6 @@ def run(local_rank: int, config: Any): (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) trainer.logger = evaluator.logger = logger - # set epoch for distributed sampler - @trainer.on(Events.EPOCH_STARTED) - def set_epoch(): - if idist.get_world_size() > 1 and isinstance( - dataloader_train.sampler, DistributedSampler - ): - dataloader_train.sampler.set_epoch(trainer.state.epoch - 1) - if isinstance(lr_scheduler, _LRScheduler): trainer.add_event_handler( Events.ITERATION_COMPLETED, diff --git a/src/templates/template-vision-segmentation/trainers.py b/src/templates/template-vision-segmentation/trainers.py index 99eb680f..3296f7ac 100644 --- a/src/templates/template-vision-segmentation/trainers.py +++ b/src/templates/template-vision-segmentation/trainers.py @@ -1,12 +1,14 @@ from typing import Any, Dict, Union +import ignite.distributed as idist import torch from data import prepare_image_mask -from ignite.engine import DeterministicEngine, Engine +from ignite.engine import DeterministicEngine, Engine, Events from ignite.metrics import Metric from torch.cuda.amp import GradScaler, autocast from torch.nn import Module from torch.optim import Optimizer +from torch.utils.data import DistributedSampler, Sampler from utils import model_output_transform @@ -16,6 +18,7 @@ def setup_trainer( optimizer: Optimizer, loss_fn: Module, device: Union[str, torch.device], + train_sampler: Sampler, ): prepare_batch = prepare_image_mask @@ -41,11 +44,19 @@ def train_function(engine: Engine, batch: Any): return metric #::: if(it.deterministic) { :::# - return DeterministicEngine(train_function) + trainer = DeterministicEngine(train_function) #::: } else { :::# - return Engine(train_function) + trainer = Engine(train_function) #::: } :::# + # set epoch for distributed sampler + @trainer.on(Events.EPOCH_STARTED) + def set_epoch(): + if idist.get_world_size() > 1 and isinstance( + train_sampler, DistributedSampler + ): + train_sampler.set_epoch(trainer.state.epoch - 1) + def setup_evaluator( config: Any, From db7df83f751a58f7236e22cdbfdcae25181d6f37 Mon Sep 17 00:00:00 2001 From: ydcjeff <32727188+ydcjeff@users.noreply.github.com> Date: Wed, 26 May 2021 13:18:12 +0630 Subject: [PATCH 2/8] fix: return trainer, regex replace --- src/store.js | 3 ++- src/templates/template-text-classification/trainers.py | 2 ++ src/templates/template-text-classification/utils.py | 2 -- src/templates/template-vision-classification/main.py | 2 +- src/templates/template-vision-classification/trainers.py | 2 ++ src/templates/template-vision-classification/utils.py | 2 -- src/templates/template-vision-dcgan/trainers.py | 2 ++ src/templates/template-vision-dcgan/utils.py | 2 -- src/templates/template-vision-segmentation/trainers.py | 2 ++ src/templates/template-vision-segmentation/utils.py | 2 -- 10 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/store.js b/src/store.js index 4804dcd2..930c7b59 100644 --- a/src/store.js +++ b/src/store.js @@ -68,7 +68,8 @@ export function genCode() { } store.code[file] = ejs .render(currentFiles[file], store.config) - .replaceAll(/(\n\n\n\n)+/gi, '\n') + .replaceAll(/\s{4}\n/gi, '') + .replaceAll(/(\n{3,})/gi, '\n\n') } if (isDev) { store.code[__DEV_CONFIG_FILE__] = JSON.stringify(store.config, null, 2) diff --git a/src/templates/template-text-classification/trainers.py b/src/templates/template-text-classification/trainers.py index e73ff605..7cd8cf86 100644 --- a/src/templates/template-text-classification/trainers.py +++ b/src/templates/template-text-classification/trainers.py @@ -66,6 +66,8 @@ def set_epoch(): ): train_sampler.set_epoch(trainer.state.epoch - 1) + return trainer + def setup_evaluator( config: Any, diff --git a/src/templates/template-text-classification/utils.py b/src/templates/template-text-classification/utils.py index 3dae2325..39974ba4 100644 --- a/src/templates/template-text-classification/utils.py +++ b/src/templates/template-text-classification/utils.py @@ -133,8 +133,6 @@ def setup_logging(config: Any) -> Logger: #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - - def setup_handlers( trainer: Engine, evaluator: Engine, diff --git a/src/templates/template-vision-classification/main.py b/src/templates/template-vision-classification/main.py index d50227be..736f6b4b 100644 --- a/src/templates/template-vision-classification/main.py +++ b/src/templates/template-vision-classification/main.py @@ -52,7 +52,7 @@ def run(local_rank: int, config: Any): logger = setup_logging(config) logger.info("Configuration: \n%s", pformat(vars(config))) (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) - trainer.logger = evaluator.logger = loggerw + trainer.logger = evaluator.logger = logger # setup ignite handlers #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# diff --git a/src/templates/template-vision-classification/trainers.py b/src/templates/template-vision-classification/trainers.py index 95e7f0c1..b25905dc 100644 --- a/src/templates/template-vision-classification/trainers.py +++ b/src/templates/template-vision-classification/trainers.py @@ -52,6 +52,8 @@ def set_epoch(): ): train_sampler.set_epoch(trainer.state.epoch - 1) + return trainer + def setup_evaluator( config: Any, diff --git a/src/templates/template-vision-classification/utils.py b/src/templates/template-vision-classification/utils.py index 4f2992bd..66a5c280 100644 --- a/src/templates/template-vision-classification/utils.py +++ b/src/templates/template-vision-classification/utils.py @@ -133,8 +133,6 @@ def setup_logging(config: Any) -> Logger: #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - - def setup_handlers( trainer: Engine, evaluator: Engine, diff --git a/src/templates/template-vision-dcgan/trainers.py b/src/templates/template-vision-dcgan/trainers.py index 9dfe6bd2..b99834c3 100644 --- a/src/templates/template-vision-dcgan/trainers.py +++ b/src/templates/template-vision-dcgan/trainers.py @@ -102,6 +102,8 @@ def set_epoch(): ): train_sampler.set_epoch(trainer.state.epoch - 1) + return trainer + def setup_evaluator( config: Any, diff --git a/src/templates/template-vision-dcgan/utils.py b/src/templates/template-vision-dcgan/utils.py index 4f2992bd..66a5c280 100644 --- a/src/templates/template-vision-dcgan/utils.py +++ b/src/templates/template-vision-dcgan/utils.py @@ -133,8 +133,6 @@ def setup_logging(config: Any) -> Logger: #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - - def setup_handlers( trainer: Engine, evaluator: Engine, diff --git a/src/templates/template-vision-segmentation/trainers.py b/src/templates/template-vision-segmentation/trainers.py index 3296f7ac..e262c864 100644 --- a/src/templates/template-vision-segmentation/trainers.py +++ b/src/templates/template-vision-segmentation/trainers.py @@ -57,6 +57,8 @@ def set_epoch(): ): train_sampler.set_epoch(trainer.state.epoch - 1) + return trainer + def setup_evaluator( config: Any, diff --git a/src/templates/template-vision-segmentation/utils.py b/src/templates/template-vision-segmentation/utils.py index d6fe2034..15b10023 100644 --- a/src/templates/template-vision-segmentation/utils.py +++ b/src/templates/template-vision-segmentation/utils.py @@ -133,8 +133,6 @@ def setup_logging(config: Any) -> Logger: #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - - def setup_handlers( trainer: Engine, evaluator: Engine, From 27512d7a6183d52261409f64c5a30612966407c3 Mon Sep 17 00:00:00 2001 From: ydcjeff <32727188+ydcjeff@users.noreply.github.com> Date: Wed, 26 May 2021 13:38:18 +0630 Subject: [PATCH 3/8] fix: move comments inside ejs conditions --- src/templates/template-text-classification/main.py | 10 +++++----- src/templates/template-vision-classification/main.py | 10 +++++----- src/templates/template-vision-dcgan/main.py | 7 +++++-- src/templates/template-vision-segmentation/main.py | 10 +++++----- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/templates/template-text-classification/main.py b/src/templates/template-text-classification/main.py index db73777d..2aba9e4f 100644 --- a/src/templates/template-text-classification/main.py +++ b/src/templates/template-text-classification/main.py @@ -93,8 +93,8 @@ def run(local_rank: int, config: Any): else: trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler) - # setup ignite handlers #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # setup ignite handlers #::: if (it.save_training) { :::# to_save_train = { @@ -118,8 +118,8 @@ def run(local_rank: int, config: Any): ) #::: } :::# - # experiment tracking #::: if (it.logger) { :::# + # experiment tracking if rank == 0: exp_logger = setup_exp_logging(config, trainer, optimizer, evaluator) #::: } :::# @@ -140,8 +140,8 @@ def run(local_rank: int, config: Any): # for evaluation stats @trainer.on(Events.EPOCH_COMPLETED(every=1)) def _(): - # show timer #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # show timer if timer is not None: logger.info("Time per batch: %.4f seconds", timer.value()) timer.reset() @@ -162,8 +162,8 @@ def _(): epoch_length=config.train_epoch_length, ) - # close logger #::: if (it.logger) { :::# + # close logger if rank == 0: from ignite.contrib.handlers.wandb_logger import WandBLogger @@ -175,8 +175,8 @@ def _(): exp_logger.close() #::: } :::# - # show the last checkpoint filename #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # show the last checkpoint filename if ckpt_handler_train is not None: logger.info( "Last training checkpoint name - %s", diff --git a/src/templates/template-vision-classification/main.py b/src/templates/template-vision-classification/main.py index 736f6b4b..684f17ad 100644 --- a/src/templates/template-vision-classification/main.py +++ b/src/templates/template-vision-classification/main.py @@ -54,8 +54,8 @@ def run(local_rank: int, config: Any): (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) trainer.logger = evaluator.logger = logger - # setup ignite handlers #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # setup ignite handlers #::: if (it.save_training) { :::# to_save_train = {"model": model, "optimizer": optimizer, "trainer": trainer} @@ -74,8 +74,8 @@ def run(local_rank: int, config: Any): ) #::: } :::# - # experiment tracking #::: if (it.logger) { :::# + # experiment tracking if rank == 0: exp_logger = setup_exp_logging(config, trainer, optimizer, evaluator) #::: } :::# @@ -96,8 +96,8 @@ def run(local_rank: int, config: Any): # for evaluation stats @trainer.on(Events.EPOCH_COMPLETED(every=1)) def _(): - # show timer #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # show timer if timer is not None: logger.info("Time per batch: %.4f seconds", timer.value()) timer.reset() @@ -118,8 +118,8 @@ def _(): epoch_length=config.train_epoch_length, ) - # close logger #::: if (it.logger) { :::# + # close logger if rank == 0: from ignite.contrib.handlers.wandb_logger import WandBLogger @@ -131,8 +131,8 @@ def _(): exp_logger.close() #::: } :::# - # show the last checkpoint filename #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # show the last checkpoint filename if ckpt_handler_train is not None: logger.info( "Last training checkpoint name - %s", diff --git a/src/templates/template-vision-dcgan/main.py b/src/templates/template-vision-dcgan/main.py index 170500ce..5db5fcfd 100644 --- a/src/templates/template-vision-dcgan/main.py +++ b/src/templates/template-vision-dcgan/main.py @@ -84,8 +84,8 @@ def run(local_rank: int, config: Any): (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) trainer.logger = evaluator.logger = logger - # setup ignite handlers #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # setup ignite handlers #::: if (it.save_training) { :::# to_save_train = { @@ -110,8 +110,8 @@ def run(local_rank: int, config: Any): ) #::: } :::# - # experiment tracking #::: if (it.logger) { :::# + # experiment tracking if rank == 0: exp_logger = setup_exp_logging( config, @@ -152,6 +152,7 @@ def save_real_example(engine): @trainer.on(Events.EPOCH_COMPLETED(every=1)) def _(): #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # show timer if timer is not None: logger.info("Time per batch: %.4f seconds", timer.value()) timer.reset() @@ -173,6 +174,7 @@ def _(): ) #::: if (it.logger) { :::# + # close logger if rank == 0: from ignite.contrib.handlers.wandb_logger import WandBLogger @@ -185,6 +187,7 @@ def _(): #::: } :::# #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # show last checkpoint names if ckpt_handler_train is not None: logger.info( "Last training checkpoint name - %s", diff --git a/src/templates/template-vision-segmentation/main.py b/src/templates/template-vision-segmentation/main.py index 87cf74c8..e72440c6 100644 --- a/src/templates/template-vision-segmentation/main.py +++ b/src/templates/template-vision-segmentation/main.py @@ -85,8 +85,8 @@ def run(local_rank: int, config: Any): else: trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler) - # setup ignite handlers #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # setup ignite handlers #::: if (it.save_training) { :::# to_save_train = { @@ -110,8 +110,8 @@ def run(local_rank: int, config: Any): ) #::: } :::# - # experiment tracking #::: if (it.logger) { :::# + # experiment tracking if rank == 0: exp_logger = setup_exp_logging(config, trainer, optimizer, evaluator) @@ -162,8 +162,8 @@ def custom_event_filter(_, val_iteration): # for evaluation stats @trainer.on(Events.EPOCH_COMPLETED(every=1)) def _(): - # show timer #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # show timer if timer is not None: logger.info("Time per batch: %.4f seconds", timer.value()) timer.reset() @@ -184,8 +184,8 @@ def _(): epoch_length=config.train_epoch_length, ) - # close logger #::: if (it.logger) { :::# + # close logger if rank == 0: from ignite.contrib.handlers.wandb_logger import WandBLogger @@ -197,8 +197,8 @@ def _(): exp_logger.close() #::: } :::# - # show the last checkpoint filename #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + # show the last checkpoint filename if ckpt_handler_train is not None: logger.info( "Last training checkpoint name - %s", From 8ecbaea83d78d0d7cccb126c87e9067a4b65b141 Mon Sep 17 00:00:00 2001 From: ydcjeff <32727188+ydcjeff@users.noreply.github.com> Date: Wed, 26 May 2021 13:46:53 +0630 Subject: [PATCH 4/8] fix: replacw with \n --- src/store.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/store.js b/src/store.js index 1db1e5bc..582b0514 100644 --- a/src/store.js +++ b/src/store.js @@ -69,7 +69,7 @@ export function genCode() { } store.code[file] = ejs .render(currentFiles[file], store.config) - .replaceAll(/\s{4}\n/gi, '') + .replaceAll(/\s{4}\n/gi, '\n') .replaceAll(/(\n{3,})/gi, '\n\n') } if (isDev) { From 9fb70ff7cc4bc56b3336fa039bb68e20812611f3 Mon Sep 17 00:00:00 2001 From: ydcjeff <32727188+ydcjeff@users.noreply.github.com> Date: Wed, 26 May 2021 14:14:49 +0630 Subject: [PATCH 5/8] fix: add sampler argument --- src/templates/template-vision-dcgan/test_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/templates/template-vision-dcgan/test_all.py b/src/templates/template-vision-dcgan/test_all.py index 8214f397..a99233e9 100644 --- a/src/templates/template-vision-dcgan/test_all.py +++ b/src/templates/template-vision-dcgan/test_all.py @@ -65,7 +65,7 @@ def test_setup_trainer(): model, optimizer, device, loss_fn, batch = set_up() config = Namespace(use_amp=False, train_batch_size=2, z_dim=100) trainer = setup_trainer( - config, model, model, optimizer, optimizer, loss_fn, device + config, model, model, optimizer, optimizer, loss_fn, device, None ) trainer.run([batch, batch]) assert isinstance(trainer.state.output, dict) From f2d3e7cc360ed1c46a6b8ea8606dd0ef84d42c81 Mon Sep 17 00:00:00 2001 From: ydcjeff <32727188+ydcjeff@users.noreply.github.com> Date: Thu, 27 May 2021 17:02:37 +0630 Subject: [PATCH 6/8] fix: drop timer, show more options for saving when checked, separate model file for segmentation - drop timer handlers - show more options that are needed for saving checkpoints if user wants to save checkpoints - put a separate model file in segmentation template - more simple conditions for setup_handlers --- __tests__/text-classification.spec.js | 3 -- __tests__/vision-classification.spec.js | 3 -- __tests__/vision-dcgan.spec.js | 3 -- __tests__/vision-segmentation.spec.js | 3 -- src/components/TabHandlers.vue | 8 ++-- src/templates/template-common/main.py | 30 +++++------- src/templates/template-common/utils.py | 22 ++------- .../template-text-classification/main.py | 39 ++++++---------- .../template-text-classification/utils.py | 20 ++------ .../template-vision-classification/main.py | 41 ++++++----------- .../template-vision-classification/utils.py | 20 ++------ src/templates/template-vision-dcgan/main.py | 38 ++++++--------- src/templates/template-vision-dcgan/utils.py | 20 ++------ .../template-vision-segmentation/main.py | 46 +++++++------------ .../template-vision-segmentation/model.py | 5 ++ .../template-vision-segmentation/utils.py | 20 ++------ 16 files changed, 105 insertions(+), 216 deletions(-) create mode 100644 src/templates/template-vision-segmentation/model.py diff --git a/__tests__/text-classification.spec.js b/__tests__/text-classification.spec.js index 844167d1..5e6ddf4d 100644 --- a/__tests__/text-classification.spec.js +++ b/__tests__/text-classification.spec.js @@ -76,9 +76,6 @@ test('text classification all', async () => { await page.check('#terminate_on_nan-checkbox') expect(await page.isChecked('#terminate_on_nan-checkbox')).toBeTruthy() - await page.check('#timer-checkbox') - expect(await page.isChecked('#timer-checkbox')).toBeTruthy() - await page.fill('#patience-input-number', '2') expect(await page.$eval('#patience-input-number', (e) => e.value)).toBe('2') diff --git a/__tests__/vision-classification.spec.js b/__tests__/vision-classification.spec.js index e4b1f366..d6907363 100644 --- a/__tests__/vision-classification.spec.js +++ b/__tests__/vision-classification.spec.js @@ -76,9 +76,6 @@ test('vision classification all', async () => { await page.check('#terminate_on_nan-checkbox') expect(await page.isChecked('#terminate_on_nan-checkbox')).toBeTruthy() - await page.check('#timer-checkbox') - expect(await page.isChecked('#timer-checkbox')).toBeTruthy() - await page.fill('#patience-input-number', '2') expect(await page.$eval('#patience-input-number', (e) => e.value)).toBe('2') diff --git a/__tests__/vision-dcgan.spec.js b/__tests__/vision-dcgan.spec.js index 6d903f7e..7b3dc05f 100644 --- a/__tests__/vision-dcgan.spec.js +++ b/__tests__/vision-dcgan.spec.js @@ -76,9 +76,6 @@ test('vision dcgan all', async () => { await page.check('#terminate_on_nan-checkbox') expect(await page.isChecked('#terminate_on_nan-checkbox')).toBeTruthy() - await page.check('#timer-checkbox') - expect(await page.isChecked('#timer-checkbox')).toBeTruthy() - await page.fill('#patience-input-number', '2') expect(await page.$eval('#patience-input-number', (e) => e.value)).toBe('2') diff --git a/__tests__/vision-segmentation.spec.js b/__tests__/vision-segmentation.spec.js index b14fc443..320c3efe 100644 --- a/__tests__/vision-segmentation.spec.js +++ b/__tests__/vision-segmentation.spec.js @@ -76,9 +76,6 @@ test('vision segmentation all', async () => { await page.check('#terminate_on_nan-checkbox') expect(await page.isChecked('#terminate_on_nan-checkbox')).toBeTruthy() - await page.check('#timer-checkbox') - expect(await page.isChecked('#timer-checkbox')).toBeTruthy() - await page.fill('#patience-input-number', '2') expect(await page.$eval('#patience-input-number', (e) => e.value)).toBe('2') diff --git a/src/components/TabHandlers.vue b/src/components/TabHandlers.vue index ba4ea14a..780d0fc9 100644 --- a/src/components/TabHandlers.vue +++ b/src/components/TabHandlers.vue @@ -11,16 +11,19 @@ :saveKey="save_evaluation.name" /> <FormInput + v-if="store.config.save_training" :label="filename_prefix.description" :saveKey="filename_prefix.name" :type="filename_prefix.type" /> <FormInput + v-if="store.config.save_training" :label="save_every_iters.description" :saveKey="save_every_iters.name" :type="save_every_iters.type" /> <FormInput + v-if="store.config.save_training || store.config.save_evaluation" :label="n_saved.description" :saveKey="n_saved.name" :type="n_saved.type" @@ -30,8 +33,6 @@ :label="terminate_on_nan.description" :saveKey="terminate_on_nan.name" /> - <h2>Events Timer</h2> - <FormCheckbox :label="timer.description" :saveKey="timer.name" /> <h2>Early Stopping</h2> <FormInput :label="patience.description" @@ -51,11 +52,12 @@ import { handlers } from '../metadata/metadata.json' import FormInput from './FormInput.vue' import FormCheckbox from './FormCheckbox.vue' +import { store } from '../store.js' export default { components: { FormInput, FormCheckbox }, setup() { - return { ...handlers } + return { ...handlers, store } } } </script> diff --git a/src/templates/template-common/main.py b/src/templates/template-common/main.py index 7704cb97..c2a0d38a 100644 --- a/src/templates/template-common/main.py +++ b/src/templates/template-common/main.py @@ -1,13 +1,7 @@ -ckpt_handler_train, ckpt_handler_eval, timer = setup_handlers( +ckpt_handler_train, ckpt_handler_eval = setup_handlers( trainer, evaluator, config, to_save_train, to_save_eval ) -#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# -if timer is not None: - logger.info("Time per batch: %.4f seconds", timer.value()) - timer.reset() -#::: } :::# - #::: if (it.logger) { :::# if rank == 0: from ignite.contrib.handlers.wandb_logger import WandBLogger @@ -20,19 +14,17 @@ exp_logger.close() #::: } :::# -#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# -if ckpt_handler_train is not None: - logger.info( - "Last training checkpoint name - %s", - ckpt_handler_train.last_checkpoint, - ) +#::: if (it.save_training || it.save_evaluation) { :::# +# show last checkpoint names +logger.info( + "Last training checkpoint name - %s", + ckpt_handler_train.last_checkpoint, +) -if ckpt_handler_eval is not None: - logger.info( - "Last evaluation checkpoint name - %s", - ckpt_handler_eval.last_checkpoint, - ) -#::: } :::# +logger.info( + "Last evaluation checkpoint name - %s", + ckpt_handler_eval.last_checkpoint, +) # main entrypoint def main(): diff --git a/src/templates/template-common/utils.py b/src/templates/template-common/utils.py index cd2a3310..01d39e85 100644 --- a/src/templates/template-common/utils.py +++ b/src/templates/template-common/utils.py @@ -132,9 +132,7 @@ def setup_logging(config: Any) -> Logger: return logger -#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - - +#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.limit_sec) { :::# def setup_handlers( trainer: Engine, evaluator: Engine, @@ -144,7 +142,7 @@ def setup_handlers( ): """Setup Ignite handlers.""" - ckpt_handler_train = ckpt_handler_eval = timer = None + ckpt_handler_train = ckpt_handler_eval = None #::: if (it.save_training || it.save_evaluation) { :::# # checkpointing saver = DiskSaver(config.output_dir / "checkpoints", require_empty=False) @@ -191,25 +189,15 @@ def setup_handlers( trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan()) #::: } :::# - #::: if (it.timer) { :::# - # timer - timer = Timer(average=True) - timer.attach( - trainer, - start=Events.EPOCH_STARTED, - resume=Events.ITERATION_STARTED, - pause=Events.ITERATION_COMPLETED, - step=Events.ITERATION_COMPLETED, - ) - #::: } :::# - #::: if (it.limit_sec) { :::# # time limit trainer.add_event_handler( Events.ITERATION_COMPLETED, TimeLimit(config.limit_sec) ) #::: } :::# - return ckpt_handler_train, ckpt_handler_eval, timer + #::: if (it.save_training || it.save_evaluation) { :::# + return ckpt_handler_train, ckpt_handler_eval + #::: } :::# #::: } :::# diff --git a/src/templates/template-text-classification/main.py b/src/templates/template-text-classification/main.py index 2aba9e4f..899b8fd8 100644 --- a/src/templates/template-text-classification/main.py +++ b/src/templates/template-text-classification/main.py @@ -93,9 +93,8 @@ def run(local_rank: int, config: Any): else: trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler) - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# # setup ignite handlers - + #::: if (it.save_training || it.save_evaluation) { :::# #::: if (it.save_training) { :::# to_save_train = { "model": model, @@ -106,16 +105,16 @@ def run(local_rank: int, config: Any): #::: } else { :::# to_save_train = None #::: } :::# - #::: if (it.save_evaluation) { :::# to_save_eval = {"model": model} #::: } else { :::# to_save_eval = None #::: } :::# - - ckpt_handler_train, ckpt_handler_eval, timer = setup_handlers( + ckpt_handler_train, ckpt_handler_eval = setup_handlers( trainer, evaluator, config, to_save_train, to_save_eval ) + #::: } else if (it.patience || it.terminate_on_nan || it.limit_sec) { :::# + setup_handlers(trainer, evaluator, config) #::: } :::# #::: if (it.logger) { :::# @@ -140,13 +139,6 @@ def run(local_rank: int, config: Any): # for evaluation stats @trainer.on(Events.EPOCH_COMPLETED(every=1)) def _(): - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - # show timer - if timer is not None: - logger.info("Time per batch: %.4f seconds", timer.value()) - timer.reset() - #::: } :::# - evaluator.run(dataloader_eval, epoch_length=config.eval_epoch_length) log_metrics(evaluator, "eval") @@ -175,20 +167,17 @@ def _(): exp_logger.close() #::: } :::# - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - # show the last checkpoint filename - if ckpt_handler_train is not None: - logger.info( - "Last training checkpoint name - %s", - ckpt_handler_train.last_checkpoint, - ) + #::: if (it.save_training || it.save_evaluation) { :::# + # show last checkpoint names + logger.info( + "Last training checkpoint name - %s", + ckpt_handler_train.last_checkpoint, + ) - if ckpt_handler_eval is not None: - logger.info( - "Last evaluation checkpoint name - %s", - ckpt_handler_eval.last_checkpoint, - ) - #::: } :::# + logger.info( + "Last evaluation checkpoint name - %s", + ckpt_handler_eval.last_checkpoint, + ) # main entrypoint diff --git a/src/templates/template-text-classification/utils.py b/src/templates/template-text-classification/utils.py index 39974ba4..36d2d3d2 100644 --- a/src/templates/template-text-classification/utils.py +++ b/src/templates/template-text-classification/utils.py @@ -132,7 +132,7 @@ def setup_logging(config: Any) -> Logger: return logger -#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# +#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.limit_sec) { :::# def setup_handlers( trainer: Engine, evaluator: Engine, @@ -142,7 +142,7 @@ def setup_handlers( ): """Setup Ignite handlers.""" - ckpt_handler_train = ckpt_handler_eval = timer = None + ckpt_handler_train = ckpt_handler_eval = None #::: if (it.save_training || it.save_evaluation) { :::# # checkpointing saver = DiskSaver(config.output_dir / "checkpoints", require_empty=False) @@ -193,25 +193,15 @@ def score_fn(engine: Engine): trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan()) #::: } :::# - #::: if (it.timer) { :::# - # timer - timer = Timer(average=True) - timer.attach( - trainer, - start=Events.EPOCH_STARTED, - resume=Events.ITERATION_STARTED, - pause=Events.ITERATION_COMPLETED, - step=Events.ITERATION_COMPLETED, - ) - #::: } :::# - #::: if (it.limit_sec) { :::# # time limit trainer.add_event_handler( Events.ITERATION_COMPLETED, TimeLimit(config.limit_sec) ) #::: } :::# - return ckpt_handler_train, ckpt_handler_eval, timer + #::: if (it.save_training || it.save_evaluation) { :::# + return ckpt_handler_train, ckpt_handler_eval + #::: } :::# #::: } :::# diff --git a/src/templates/template-vision-classification/main.py b/src/templates/template-vision-classification/main.py index 684f17ad..98634cdf 100644 --- a/src/templates/template-vision-classification/main.py +++ b/src/templates/template-vision-classification/main.py @@ -54,24 +54,23 @@ def run(local_rank: int, config: Any): (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) trainer.logger = evaluator.logger = logger - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# # setup ignite handlers - + #::: if (it.save_training || it.save_evaluation) { :::# #::: if (it.save_training) { :::# to_save_train = {"model": model, "optimizer": optimizer, "trainer": trainer} #::: } else { :::# to_save_train = None #::: } :::# - #::: if (it.save_evaluation) { :::# to_save_eval = {"model": model} #::: } else { :::# to_save_eval = None #::: } :::# - - ckpt_handler_train, ckpt_handler_eval, timer = setup_handlers( + ckpt_handler_train, ckpt_handler_eval = setup_handlers( trainer, evaluator, config, to_save_train, to_save_eval ) + #::: } else if (it.patience || it.terminate_on_nan || it.limit_sec) { :::# + setup_handlers(trainer, evaluator, config) #::: } :::# #::: if (it.logger) { :::# @@ -96,13 +95,6 @@ def run(local_rank: int, config: Any): # for evaluation stats @trainer.on(Events.EPOCH_COMPLETED(every=1)) def _(): - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - # show timer - if timer is not None: - logger.info("Time per batch: %.4f seconds", timer.value()) - timer.reset() - #::: } :::# - evaluator.run(dataloader_eval, epoch_length=config.eval_epoch_length) log_metrics(evaluator, "eval") @@ -131,20 +123,17 @@ def _(): exp_logger.close() #::: } :::# - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - # show the last checkpoint filename - if ckpt_handler_train is not None: - logger.info( - "Last training checkpoint name - %s", - ckpt_handler_train.last_checkpoint, - ) - - if ckpt_handler_eval is not None: - logger.info( - "Last evaluation checkpoint name - %s", - ckpt_handler_eval.last_checkpoint, - ) - #::: } :::# + #::: if (it.save_training || it.save_evaluation) { :::# + # show last checkpoint names + logger.info( + "Last training checkpoint name - %s", + ckpt_handler_train.last_checkpoint, + ) + + logger.info( + "Last evaluation checkpoint name - %s", + ckpt_handler_eval.last_checkpoint, + ) # main entrypoint diff --git a/src/templates/template-vision-classification/utils.py b/src/templates/template-vision-classification/utils.py index 66a5c280..9fe87c0a 100644 --- a/src/templates/template-vision-classification/utils.py +++ b/src/templates/template-vision-classification/utils.py @@ -132,7 +132,7 @@ def setup_logging(config: Any) -> Logger: return logger -#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# +#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.limit_sec) { :::# def setup_handlers( trainer: Engine, evaluator: Engine, @@ -142,7 +142,7 @@ def setup_handlers( ): """Setup Ignite handlers.""" - ckpt_handler_train = ckpt_handler_eval = timer = None + ckpt_handler_train = ckpt_handler_eval = None #::: if (it.save_training || it.save_evaluation) { :::# # checkpointing saver = DiskSaver(config.output_dir / "checkpoints", require_empty=False) @@ -191,25 +191,15 @@ def score_fn(engine: Engine): trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan()) #::: } :::# - #::: if (it.timer) { :::# - # timer - timer = Timer(average=True) - timer.attach( - trainer, - start=Events.EPOCH_STARTED, - resume=Events.ITERATION_STARTED, - pause=Events.ITERATION_COMPLETED, - step=Events.ITERATION_COMPLETED, - ) - #::: } :::# - #::: if (it.limit_sec) { :::# # time limit trainer.add_event_handler( Events.ITERATION_COMPLETED, TimeLimit(config.limit_sec) ) #::: } :::# - return ckpt_handler_train, ckpt_handler_eval, timer + #::: if (it.save_training || it.save_evaluation) { :::# + return ckpt_handler_train, ckpt_handler_eval + #::: } :::# #::: } :::# diff --git a/src/templates/template-vision-dcgan/main.py b/src/templates/template-vision-dcgan/main.py index 5db5fcfd..3e0f0c84 100644 --- a/src/templates/template-vision-dcgan/main.py +++ b/src/templates/template-vision-dcgan/main.py @@ -84,9 +84,8 @@ def run(local_rank: int, config: Any): (config.output_dir / "config-lock.yaml").write_text(yaml.dump(config)) trainer.logger = evaluator.logger = logger - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# # setup ignite handlers - + #::: if (it.save_training || it.save_evaluation) { :::# #::: if (it.save_training) { :::# to_save_train = { "model_d": model_d, @@ -98,16 +97,16 @@ def run(local_rank: int, config: Any): #::: } else { :::# to_save_train = None #::: } :::# - #::: if (it.save_evaluation) { :::# to_save_eval = {"model_d": model_d, "model_g": model_g} #::: } else { :::# - to_save_eval = None + to_save_train = None #::: } :::# - - ckpt_handler_train, ckpt_handler_eval, timer = setup_handlers( + ckpt_handler_train, ckpt_handler_eval = setup_handlers( trainer, evaluator, config, to_save_train, to_save_eval ) + #::: } else if (it.patience || it.terminate_on_nan || it.limit_sec) { :::# + setup_handlers(trainer, evaluator, config) #::: } :::# #::: if (it.logger) { :::# @@ -151,13 +150,6 @@ def save_real_example(engine): # for evaluation stats @trainer.on(Events.EPOCH_COMPLETED(every=1)) def _(): - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - # show timer - if timer is not None: - logger.info("Time per batch: %.4f seconds", timer.value()) - timer.reset() - #::: } :::# - evaluator.run(dataloader_eval, epoch_length=config.eval_epoch_length) log_metrics(evaluator, "eval") @@ -186,19 +178,17 @@ def _(): exp_logger.close() #::: } :::# - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# + #::: if (it.save_training || it.save_evaluation) { :::# # show last checkpoint names - if ckpt_handler_train is not None: - logger.info( - "Last training checkpoint name - %s", - ckpt_handler_train.last_checkpoint, - ) + logger.info( + "Last training checkpoint name - %s", + ckpt_handler_train.last_checkpoint, + ) - if ckpt_handler_eval is not None: - logger.info( - "Last evaluation checkpoint name - %s", - ckpt_handler_eval.last_checkpoint, - ) + logger.info( + "Last evaluation checkpoint name - %s", + ckpt_handler_eval.last_checkpoint, + ) #::: } :::# diff --git a/src/templates/template-vision-dcgan/utils.py b/src/templates/template-vision-dcgan/utils.py index 66a5c280..9fe87c0a 100644 --- a/src/templates/template-vision-dcgan/utils.py +++ b/src/templates/template-vision-dcgan/utils.py @@ -132,7 +132,7 @@ def setup_logging(config: Any) -> Logger: return logger -#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# +#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.limit_sec) { :::# def setup_handlers( trainer: Engine, evaluator: Engine, @@ -142,7 +142,7 @@ def setup_handlers( ): """Setup Ignite handlers.""" - ckpt_handler_train = ckpt_handler_eval = timer = None + ckpt_handler_train = ckpt_handler_eval = None #::: if (it.save_training || it.save_evaluation) { :::# # checkpointing saver = DiskSaver(config.output_dir / "checkpoints", require_empty=False) @@ -191,25 +191,15 @@ def score_fn(engine: Engine): trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan()) #::: } :::# - #::: if (it.timer) { :::# - # timer - timer = Timer(average=True) - timer.attach( - trainer, - start=Events.EPOCH_STARTED, - resume=Events.ITERATION_STARTED, - pause=Events.ITERATION_COMPLETED, - step=Events.ITERATION_COMPLETED, - ) - #::: } :::# - #::: if (it.limit_sec) { :::# # time limit trainer.add_event_handler( Events.ITERATION_COMPLETED, TimeLimit(config.limit_sec) ) #::: } :::# - return ckpt_handler_train, ckpt_handler_eval, timer + #::: if (it.save_training || it.save_evaluation) { :::# + return ckpt_handler_train, ckpt_handler_eval + #::: } :::# #::: } :::# diff --git a/src/templates/template-vision-segmentation/main.py b/src/templates/template-vision-segmentation/main.py index e72440c6..c303f329 100644 --- a/src/templates/template-vision-segmentation/main.py +++ b/src/templates/template-vision-segmentation/main.py @@ -9,10 +9,9 @@ from ignite.engine import Events from ignite.metrics import ConfusionMatrix, IoU, mIoU from ignite.utils import manual_seed +from model import setup_model from torch import nn, optim from torch.optim.lr_scheduler import LambdaLR, _LRScheduler -from torch.utils.data.distributed import DistributedSampler -from torchvision.models.segmentation import deeplabv3_resnet101 from trainers import setup_evaluator, setup_trainer from utils import * from vis import predictions_gt_images_handler @@ -33,9 +32,7 @@ def run(local_rank: int, config: Any): # model, optimizer, loss function, device device = idist.device() - model = idist.auto_model( - deeplabv3_resnet101(num_classes=config.num_classes) - ) + model = idist.auto_model(setup_model(config)) optimizer = idist.auto_optim( optim.SGD( model.parameters(), @@ -85,9 +82,8 @@ def run(local_rank: int, config: Any): else: trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler) - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# # setup ignite handlers - + #::: if (it.save_training || it.save_evaluation) { :::# #::: if (it.save_training) { :::# to_save_train = { "model": model, @@ -98,16 +94,16 @@ def run(local_rank: int, config: Any): #::: } else { :::# to_save_train = None #::: } :::# - #::: if (it.save_evaluation) { :::# to_save_eval = {"model": model} #::: } else { :::# to_save_eval = None #::: } :::# - - ckpt_handler_train, ckpt_handler_eval, timer = setup_handlers( + ckpt_handler_train, ckpt_handler_eval = setup_handlers( trainer, evaluator, config, to_save_train, to_save_eval ) + #::: } else if (it.patience || it.terminate_on_nan || it.limit_sec) { :::# + setup_handlers(trainer, evaluator, config) #::: } :::# #::: if (it.logger) { :::# @@ -162,13 +158,6 @@ def custom_event_filter(_, val_iteration): # for evaluation stats @trainer.on(Events.EPOCH_COMPLETED(every=1)) def _(): - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - # show timer - if timer is not None: - logger.info("Time per batch: %.4f seconds", timer.value()) - timer.reset() - #::: } :::# - evaluator.run(dataloader_eval, epoch_length=config.eval_epoch_length) log_metrics(evaluator, "eval") @@ -197,20 +186,17 @@ def _(): exp_logger.close() #::: } :::# - #::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# - # show the last checkpoint filename - if ckpt_handler_train is not None: - logger.info( - "Last training checkpoint name - %s", - ckpt_handler_train.last_checkpoint, - ) + #::: if (it.save_training || it.save_evaluation) { :::# + # show last checkpoint names + logger.info( + "Last training checkpoint name - %s", + ckpt_handler_train.last_checkpoint, + ) - if ckpt_handler_eval is not None: - logger.info( - "Last evaluation checkpoint name - %s", - ckpt_handler_eval.last_checkpoint, - ) - #::: } :::# + logger.info( + "Last evaluation checkpoint name - %s", + ckpt_handler_eval.last_checkpoint, + ) # main entrypoint diff --git a/src/templates/template-vision-segmentation/model.py b/src/templates/template-vision-segmentation/model.py new file mode 100644 index 00000000..ee5c8119 --- /dev/null +++ b/src/templates/template-vision-segmentation/model.py @@ -0,0 +1,5 @@ +from torchvision.models.segmentation import deeplabv3_resnet101 + + +def setup_model(config): + return deeplabv3_resnet101(num_classes=config.num_classes) diff --git a/src/templates/template-vision-segmentation/utils.py b/src/templates/template-vision-segmentation/utils.py index 15b10023..93f8b96c 100644 --- a/src/templates/template-vision-segmentation/utils.py +++ b/src/templates/template-vision-segmentation/utils.py @@ -132,7 +132,7 @@ def setup_logging(config: Any) -> Logger: return logger -#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.timer || it.limit_sec) { :::# +#::: if (it.save_training || it.save_evaluation || it.patience || it.terminate_on_nan || it.limit_sec) { :::# def setup_handlers( trainer: Engine, evaluator: Engine, @@ -142,7 +142,7 @@ def setup_handlers( ): """Setup Ignite handlers.""" - ckpt_handler_train = ckpt_handler_eval = timer = None + ckpt_handler_train = ckpt_handler_eval = None #::: if (it.save_training || it.save_evaluation) { :::# # checkpointing saver = DiskSaver(config.output_dir / "checkpoints", require_empty=False) @@ -194,25 +194,15 @@ def score_fn(engine: Engine): trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan()) #::: } :::# - #::: if (it.timer) { :::# - # timer - timer = Timer(average=True) - timer.attach( - trainer, - start=Events.EPOCH_STARTED, - resume=Events.ITERATION_STARTED, - pause=Events.ITERATION_COMPLETED, - step=Events.ITERATION_COMPLETED, - ) - #::: } :::# - #::: if (it.limit_sec) { :::# # time limit trainer.add_event_handler( Events.ITERATION_COMPLETED, TimeLimit(config.limit_sec) ) #::: } :::# - return ckpt_handler_train, ckpt_handler_eval, timer + #::: if (it.save_training || it.save_evaluation) { :::# + return ckpt_handler_train, ckpt_handler_eval + #::: } :::# #::: } :::# From 1abb22858a2074ce3232f0b095a86028406edbd1 Mon Sep 17 00:00:00 2001 From: ydcjeff <32727188+ydcjeff@users.noreply.github.com> Date: Thu, 27 May 2021 18:11:29 +0630 Subject: [PATCH 7/8] fix: put model.py in templates.jons --- src/templates/templates.json | 1 + 1 file changed, 1 insertion(+) diff --git a/src/templates/templates.json b/src/templates/templates.json index 41f6efa8..cff8910a 100644 --- a/src/templates/templates.json +++ b/src/templates/templates.json @@ -26,6 +26,7 @@ "config.yaml", "data.py", "main.py", + "model.py", "trainers.py", "utils.py", "vis.py", From a5148cd3d1d55a21a3f5ed74d55c23e753b597ac Mon Sep 17 00:00:00 2001 From: ydcjeff <32727188+ydcjeff@users.noreply.github.com> Date: Thu, 27 May 2021 19:08:33 +0630 Subject: [PATCH 8/8] fix: add missing } --- src/templates/template-common/main.py | 2 ++ src/templates/template-text-classification/main.py | 1 + src/templates/template-vision-classification/main.py | 1 + src/templates/template-vision-segmentation/main.py | 1 + 4 files changed, 5 insertions(+) diff --git a/src/templates/template-common/main.py b/src/templates/template-common/main.py index c2a0d38a..5893fb83 100644 --- a/src/templates/template-common/main.py +++ b/src/templates/template-common/main.py @@ -25,6 +25,8 @@ "Last evaluation checkpoint name - %s", ckpt_handler_eval.last_checkpoint, ) +#::: } :::# + # main entrypoint def main(): diff --git a/src/templates/template-text-classification/main.py b/src/templates/template-text-classification/main.py index 899b8fd8..f6f43cbb 100644 --- a/src/templates/template-text-classification/main.py +++ b/src/templates/template-text-classification/main.py @@ -178,6 +178,7 @@ def _(): "Last evaluation checkpoint name - %s", ckpt_handler_eval.last_checkpoint, ) + #::: } :::# # main entrypoint diff --git a/src/templates/template-vision-classification/main.py b/src/templates/template-vision-classification/main.py index 98634cdf..9e1a5776 100644 --- a/src/templates/template-vision-classification/main.py +++ b/src/templates/template-vision-classification/main.py @@ -134,6 +134,7 @@ def _(): "Last evaluation checkpoint name - %s", ckpt_handler_eval.last_checkpoint, ) + #::: } :::# # main entrypoint diff --git a/src/templates/template-vision-segmentation/main.py b/src/templates/template-vision-segmentation/main.py index c303f329..885c2031 100644 --- a/src/templates/template-vision-segmentation/main.py +++ b/src/templates/template-vision-segmentation/main.py @@ -197,6 +197,7 @@ def _(): "Last evaluation checkpoint name - %s", ckpt_handler_eval.last_checkpoint, ) + #::: } :::# # main entrypoint