Skip to content

Separate the output_dir and sub_output_dir to make reproducing results easier #306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/templates/template-common/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ def test_save_config():
with open("./config.yaml", "r") as f:
config = OmegaConf.load(f)

config.sub_output_dir = "job-dir"

save_config(config, "./")

del config["sub_output_dir"]

with open("./config-lock.yaml", "r") as f:
test_config = OmegaConf.load(f)

Expand Down
21 changes: 13 additions & 8 deletions src/templates/template-common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,20 @@ def setup_output_dir(config: Any, rank: int) -> Path:
name = f"{now}-backend-{config.backend}-lr-{config.lr}"
path = Path(config.output_dir, name)
path.mkdir(parents=True, exist_ok=True)
config.output_dir = path.as_posix()
config.sub_output_dir = path.as_posix()

return Path(idist.broadcast(config.output_dir, src=0))
return Path(idist.broadcast(config.sub_output_dir, src=0))


def save_config(config, output_dir):
def save_config(config: Any, sub_output_dir: Path):
"""Save configuration to config-lock.yaml for result reproducibility."""
with open(f"{output_dir}/config-lock.yaml", "w") as f:
OmegaConf.save(config, f)
saved_config = config.copy()

# Delete the sub_output_dir from saved_config for config-lock.yaml
del saved_config["sub_output_dir"]

with open(f"{sub_output_dir}/config-lock.yaml", "w") as f:
OmegaConf.save(saved_config, f)


def setup_logging(config: Any) -> Logger:
Expand All @@ -171,7 +176,7 @@ def setup_logging(config: Any) -> Logger:
Parameters
----------
config
config object. config has to contain `verbose` and `output_dir` attribute.
config object. config has to contain `verbose` and `sub_output_dir` attribute.

Returns
-------
Expand All @@ -183,7 +188,7 @@ def setup_logging(config: Any) -> Logger:
logger = setup_logger(
name=f"{green}[ignite]{reset}",
level=logging.DEBUG if config.debug else logging.INFO,
filepath=config.output_dir / "training-info.log",
filepath=config.sub_output_dir / "training-info.log",
)
return logger

Expand All @@ -204,7 +209,7 @@ def setup_exp_logging(config, trainer, optimizers, evaluators):
logger = common.setup_plx_logging(trainer, optimizers, evaluators, config.log_every_iters)
#::: } else if (it.logger === 'tensorboard') { :::#
logger = common.setup_tb_logging(
config.output_dir,
config.sub_output_dir,
trainer,
optimizers,
evaluators,
Expand Down
4 changes: 2 additions & 2 deletions src/templates/template-text-classification/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def run(local_rank: int, config: Any):
manual_seed(config.seed + rank)

# create output folder and copy config file to output dir
config.output_dir = setup_output_dir(config, rank)
config.sub_output_dir = setup_output_dir(config, rank)
if rank == 0:
save_config(config, config.output_dir)
save_config(config, config.sub_output_dir)

# donwload datasets and create dataloaders
dataloader_train, dataloader_eval = setup_data(config)
Expand Down
4 changes: 2 additions & 2 deletions src/templates/template-vision-classification/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def run(local_rank: int, config: Any):
manual_seed(config.seed + rank)

# create output folder and copy config file to output dir
config.output_dir = setup_output_dir(config, rank)
config.sub_output_dir = setup_output_dir(config, rank)
if rank == 0:
save_config(config, config.output_dir)
save_config(config, config.sub_output_dir)

# donwload datasets and create dataloaders
dataloader_train, dataloader_eval = setup_data(config)
Expand Down
8 changes: 4 additions & 4 deletions src/templates/template-vision-dcgan/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ def run(local_rank: int, config: Any):
manual_seed(config.seed + rank)

# create output folder and copy config file to output dir
config.output_dir = setup_output_dir(config, rank)
config.sub_output_dir = setup_output_dir(config, rank)
if rank == 0:
save_config(config, config.output_dir)
save_config(config, config.sub_output_dir)

# donwload datasets and create dataloaders
dataloader_train, dataloader_eval, num_channels = setup_data(config)
Expand Down Expand Up @@ -131,14 +131,14 @@ def run(local_rank: int, config: Any):
@trainer.on(Events.EPOCH_COMPLETED)
def save_fake_example(engine):
fake = model_g(fixed_noise)
path = config.output_dir / FAKE_IMG_FNAME.format(engine.state.epoch)
path = config.sub_output_dir / FAKE_IMG_FNAME.format(engine.state.epoch)
vutils.save_image(fake.detach(), path, normalize=True)

# adding handlers using `trainer.on` decorator API
@trainer.on(Events.EPOCH_COMPLETED)
def save_real_example(engine):
img, y = engine.state.batch
path = config.output_dir / REAL_IMG_FNAME.format(engine.state.epoch)
path = config.sub_output_dir / REAL_IMG_FNAME.format(engine.state.epoch)
vutils.save_image(img, path, normalize=True)

# run evaluation at every training epoch end
Expand Down
2 changes: 1 addition & 1 deletion src/templates/template-vision-dcgan/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def setup_handlers(
ckpt_handler_train = ckpt_handler_eval = None
#::: if (it.save_training || it.save_evaluation) { :::#
# checkpointing
saver = DiskSaver(config.output_dir / "checkpoints", require_empty=False)
saver = DiskSaver(config.sub_output_dir / "checkpoints", require_empty=False)
#::: if (it.save_training) { :::#
ckpt_handler_train = Checkpoint(
to_save_train,
Expand Down
4 changes: 2 additions & 2 deletions src/templates/template-vision-segmentation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def run(local_rank: int, config: Any):
manual_seed(config.seed + rank)

# create output folder and copy config file to output dir
config.output_dir = setup_output_dir(config, rank)
config.sub_output_dir = setup_output_dir(config, rank)
if rank == 0:
save_config(config, config.output_dir)
save_config(config, config.sub_output_dir)

# donwload datasets and create dataloaders
dataloader_train, dataloader_eval = setup_data(config)
Expand Down
2 changes: 1 addition & 1 deletion src/templates/template-vision-segmentation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def setup_handlers(
ckpt_handler_train = ckpt_handler_eval = None
#::: if (it.save_training || it.save_evaluation) { :::#
# checkpointing
saver = DiskSaver(config.output_dir / "checkpoints", require_empty=False)
saver = DiskSaver(config.sub_output_dir / "checkpoints", require_empty=False)
#::: if (it.save_training) { :::#
ckpt_handler_train = Checkpoint(
to_save_train,
Expand Down