From fa56b67b46a30342c47c83d7f72199d7ae1c2a88 Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Sun, 4 Jun 2023 22:47:08 +0530 Subject: [PATCH 01/11] Restructured config - Restructured config so that arguments are defined in cofing.yaml --- .../template-vision-segmentation/utils.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/templates/template-vision-segmentation/utils.py b/src/templates/template-vision-segmentation/utils.py index 53c03940..6df43d06 100644 --- a/src/templates/template-vision-segmentation/utils.py +++ b/src/templates/template-vision-segmentation/utils.py @@ -18,19 +18,33 @@ from ignite.utils import setup_logger -def setup_parser(): - with open("config.yaml", "r") as f: +def get_default_parser(): + parser = ArgumentParser() + parser.add_argument("config", type=Path, help="Config file path") + parser.add_argument( + "--backend", + default=None, + choices=["nccl", "gloo"], + type=str, + help="DDP backend", + ) + return parser + + +def setup_config(parser=None): + if parser is None: + parser = get_default_parser() + + args = parser.parse_args() + config_path = args.config + + with open(config_path, "r") as f: config = yaml.safe_load(f.read()) - parser = ArgumentParser() - parser.add_argument("--backend", default=None, type=str) for k, v in config.items(): - if isinstance(v, bool): - parser.add_argument(f"--{k}", action="store_true") - else: - parser.add_argument(f"--{k}", default=v, type=type(v)) + setattr(args, k, v) - return parser + return args def log_metrics(engine: Engine, tag: str) -> None: From f12634e436b1a579b5f485080dc7472113365f6f Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Mon, 5 Jun 2023 15:18:35 +0530 Subject: [PATCH 02/11] Fix main.py --- src/templates/template-vision-segmentation/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/templates/template-vision-segmentation/main.py b/src/templates/template-vision-segmentation/main.py index 6ee1c906..0ef48744 100644 --- a/src/templates/template-vision-segmentation/main.py +++ b/src/templates/template-vision-segmentation/main.py @@ -197,7 +197,7 @@ def _(): # main entrypoint def main(): - config = setup_parser().parse_args() + config = setup_config() #::: if (it.dist === 'spawn') { :::# #::: if (it.nproc_per_node && it.nnodes > 1 && it.master_addr && it.master_port) { :::# kwargs = { From 91bb7ff66569bcba01916c9e44cae57ed8c5fd73 Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Sat, 17 Jun 2023 01:44:37 +0530 Subject: [PATCH 03/11] MOdified tests according to new config restructuring --- scripts/run_tests.sh | 8 +------- .../ci-configs/vision-segmentation-simple.yaml | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 src/tests/ci-configs/vision-segmentation-simple.yaml diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index a63f49cf..8943097c 100644 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -18,13 +18,7 @@ run_simple() { for dir in $(find ./dist-tests/$1-simple -type d) do cd $dir - python main.py --data_path ~/data \ - --train_batch_size 2 \ - --eval_batch_size 2 \ - --num_workers 2 \ - --max_epochs 2 \ - --train_epoch_length 4 \ - --eval_epoch_length 4 + python main.py ../../src/tests/ci-configs/$1-simple.yaml cd $CWD done } diff --git a/src/tests/ci-configs/vision-segmentation-simple.yaml b/src/tests/ci-configs/vision-segmentation-simple.yaml new file mode 100644 index 00000000..96668425 --- /dev/null +++ b/src/tests/ci-configs/vision-segmentation-simple.yaml @@ -0,0 +1,15 @@ +seed: 666 +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 2 +max_epochs: 2 +train_epoch_length: 2 +eval_epoch_length: 2 +lr: 0.007 +use_amp: false +debug: false +accumulation_steps: 4 +num_classes: 21 +output_dir: ./logs +log_every_iters: 2 \ No newline at end of file From 8f3978b4a59648519bbb8fbf6dfa799e60e9a74c Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Mon, 19 Jun 2023 11:43:09 +0530 Subject: [PATCH 04/11] Updating remaining templates with restructured config --- .../template-text-classification/main.py | 2 +- .../template-text-classification/utils.py | 32 +++++++++++++------ .../template-vision-classification/main.py | 2 +- .../template-vision-classification/utils.py | 32 +++++++++++++------ src/templates/template-vision-dcgan/main.py | 2 +- src/templates/template-vision-dcgan/utils.py | 32 +++++++++++++------ 6 files changed, 72 insertions(+), 30 deletions(-) diff --git a/src/templates/template-text-classification/main.py b/src/templates/template-text-classification/main.py index a526ee5f..d6c3487a 100644 --- a/src/templates/template-text-classification/main.py +++ b/src/templates/template-text-classification/main.py @@ -175,7 +175,7 @@ def _(): # main entrypoint def main(): - config = setup_parser().parse_args() + config = setup_config() #::: if (it.dist === 'spawn') { :::# #::: if (it.nproc_per_node && it.nnodes > 1 && it.master_addr && it.master_port) { :::# kwargs = { diff --git a/src/templates/template-text-classification/utils.py b/src/templates/template-text-classification/utils.py index 038c954e..9a2b79dd 100644 --- a/src/templates/template-text-classification/utils.py +++ b/src/templates/template-text-classification/utils.py @@ -18,19 +18,33 @@ from ignite.utils import setup_logger -def setup_parser(): - with open("config.yaml", "r") as f: +def get_default_parser(): + parser = ArgumentParser() + parser.add_argument("config", type=Path, help="Config file path") + parser.add_argument( + "--backend", + default=None, + choices=["nccl", "gloo"], + type=str, + help="DDP backend", + ) + return parser + + +def setup_config(parser=None): + if parser is None: + parser = get_default_parser() + + args = parser.parse_args() + config_path = args.config + + with open(config_path, "r") as f: config = yaml.safe_load(f.read()) - parser = ArgumentParser() - parser.add_argument("--backend", default=None, type=str) for k, v in config.items(): - if isinstance(v, bool): - parser.add_argument(f"--{k}", action="store_true") - else: - parser.add_argument(f"--{k}", default=v, type=type(v)) + setattr(args, k, v) - return parser + return args def log_metrics(engine: Engine, tag: str) -> None: diff --git a/src/templates/template-vision-classification/main.py b/src/templates/template-vision-classification/main.py index 04b16e39..9f3e0033 100644 --- a/src/templates/template-vision-classification/main.py +++ b/src/templates/template-vision-classification/main.py @@ -131,7 +131,7 @@ def _(): # main entrypoint def main(): - config = setup_parser().parse_args() + config = setup_config() #::: if (it.dist === 'spawn') { :::# #::: if (it.nproc_per_node && it.nnodes > 1 && it.master_addr && it.master_port) { :::# kwargs = { diff --git a/src/templates/template-vision-classification/utils.py b/src/templates/template-vision-classification/utils.py index 85a5f440..e1055bdb 100644 --- a/src/templates/template-vision-classification/utils.py +++ b/src/templates/template-vision-classification/utils.py @@ -18,19 +18,33 @@ from ignite.utils import setup_logger -def setup_parser(): - with open("config.yaml", "r") as f: +def get_default_parser(): + parser = ArgumentParser() + parser.add_argument("config", type=Path, help="Config file path") + parser.add_argument( + "--backend", + default=None, + choices=["nccl", "gloo"], + type=str, + help="DDP backend", + ) + return parser + + +def setup_config(parser=None): + if parser is None: + parser = get_default_parser() + + args = parser.parse_args() + config_path = args.config + + with open(config_path, "r") as f: config = yaml.safe_load(f.read()) - parser = ArgumentParser() - parser.add_argument("--backend", default=None, type=str) for k, v in config.items(): - if isinstance(v, bool): - parser.add_argument(f"--{k}", action="store_true") - else: - parser.add_argument(f"--{k}", default=v, type=type(v)) + setattr(args, k, v) - return parser + return args def log_metrics(engine: Engine, tag: str) -> None: diff --git a/src/templates/template-vision-dcgan/main.py b/src/templates/template-vision-dcgan/main.py index 420875c0..6ac8af8b 100644 --- a/src/templates/template-vision-dcgan/main.py +++ b/src/templates/template-vision-dcgan/main.py @@ -183,7 +183,7 @@ def _(): # main entrypoint def main(): - config = setup_parser().parse_args() + config = setup_config() #::: if (it.dist === 'spawn') { :::# #::: if (it.nproc_per_node && it.nnodes > 1 && it.master_addr && it.master_port) { :::# kwargs = { diff --git a/src/templates/template-vision-dcgan/utils.py b/src/templates/template-vision-dcgan/utils.py index 85e3108b..00efb724 100644 --- a/src/templates/template-vision-dcgan/utils.py +++ b/src/templates/template-vision-dcgan/utils.py @@ -18,19 +18,33 @@ from ignite.utils import setup_logger -def setup_parser(): - with open("config.yaml", "r") as f: +def get_default_parser(): + parser = ArgumentParser() + parser.add_argument("config", type=Path, help="Config file path") + parser.add_argument( + "--backend", + default=None, + choices=["nccl", "gloo"], + type=str, + help="DDP backend", + ) + return parser + + +def setup_config(parser=None): + if parser is None: + parser = get_default_parser() + + args = parser.parse_args() + config_path = args.config + + with open(config_path, "r") as f: config = yaml.safe_load(f.read()) - parser = ArgumentParser() - parser.add_argument("--backend", default=None, type=str) for k, v in config.items(): - if isinstance(v, bool): - parser.add_argument(f"--{k}", action="store_true") - else: - parser.add_argument(f"--{k}", default=v, type=type(v)) + setattr(args, k, v) - return parser + return args def log_metrics(engine: Engine, tag: str) -> None: From 4b4d118b03d77c225886cff0f681a3f4018c5387 Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Mon, 19 Jun 2023 11:47:18 +0530 Subject: [PATCH 05/11] Update according to original config args --- src/tests/ci-configs/vision-segmentation-simple.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ci-configs/vision-segmentation-simple.yaml b/src/tests/ci-configs/vision-segmentation-simple.yaml index 96668425..dece00d4 100644 --- a/src/tests/ci-configs/vision-segmentation-simple.yaml +++ b/src/tests/ci-configs/vision-segmentation-simple.yaml @@ -4,8 +4,8 @@ train_batch_size: 2 eval_batch_size: 2 num_workers: 2 max_epochs: 2 -train_epoch_length: 2 -eval_epoch_length: 2 +train_epoch_length: 4 +eval_epoch_length: 4 lr: 0.007 use_amp: false debug: false From 0214a084cef0baffeaec859a37442d0cb5022553 Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Mon, 19 Jun 2023 12:30:20 +0530 Subject: [PATCH 06/11] Configs for all the templates - Created new yaml files for testing the code - These are the test args that will be run when we run the tests --- .../ci-configs/text-classification-all.yaml | 27 +++++++++++++++++++ .../text-classification-launch.yaml | 24 +++++++++++++++++ .../text-classification-simple.yaml | 22 +++++++++++++++ .../ci-configs/text-classification-spawn.yaml | 27 +++++++++++++++++++ .../ci-configs/vision-classification-all.yaml | 19 +++++++++++++ .../vision-classification-launch.yaml | 16 +++++++++++ .../vision-classification-simple.yaml | 14 ++++++++++ .../vision-classification-spawn.yaml | 19 +++++++++++++ src/tests/ci-configs/vision-dcgan-all.yaml | 21 +++++++++++++++ src/tests/ci-configs/vision-dcgan-launch.yaml | 18 +++++++++++++ src/tests/ci-configs/vision-dcgan-simple.yaml | 16 +++++++++++ src/tests/ci-configs/vision-dcgan-spawn.yaml | 21 +++++++++++++++ .../ci-configs/vision-segmentation-all.yaml | 20 ++++++++++++++ .../vision-segmentation-launch.yaml | 17 ++++++++++++ .../vision-segmentation-simple.yaml | 2 +- .../ci-configs/vision-segmentation-spawn.yaml | 20 ++++++++++++++ 16 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 src/tests/ci-configs/text-classification-all.yaml create mode 100644 src/tests/ci-configs/text-classification-launch.yaml create mode 100644 src/tests/ci-configs/text-classification-simple.yaml create mode 100644 src/tests/ci-configs/text-classification-spawn.yaml create mode 100644 src/tests/ci-configs/vision-classification-all.yaml create mode 100644 src/tests/ci-configs/vision-classification-launch.yaml create mode 100644 src/tests/ci-configs/vision-classification-simple.yaml create mode 100644 src/tests/ci-configs/vision-classification-spawn.yaml create mode 100644 src/tests/ci-configs/vision-dcgan-all.yaml create mode 100644 src/tests/ci-configs/vision-dcgan-launch.yaml create mode 100644 src/tests/ci-configs/vision-dcgan-simple.yaml create mode 100644 src/tests/ci-configs/vision-dcgan-spawn.yaml create mode 100644 src/tests/ci-configs/vision-segmentation-all.yaml create mode 100644 src/tests/ci-configs/vision-segmentation-launch.yaml create mode 100644 src/tests/ci-configs/vision-segmentation-spawn.yaml diff --git a/src/tests/ci-configs/text-classification-all.yaml b/src/tests/ci-configs/text-classification-all.yaml new file mode 100644 index 00000000..208f352e --- /dev/null +++ b/src/tests/ci-configs/text-classification-all.yaml @@ -0,0 +1,27 @@ +seed: 666 +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 2 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +use_amp: false +debug: false +model: bert-base-uncased +model_dir: /tmp/model +tokenizer_dir: /tmp/tokenizer +num_classes: 1 +drop_out: .3 +n_fc: 768 +weight_decay: 0.01 +num_warmup_epochs: 0 +max_length: 256 +lr: 0.00005 +filename_prefix: training +n_saved: 2 +save_every_iters: 2 +patience: 2 +limit_sec: 60 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/text-classification-launch.yaml b/src/tests/ci-configs/text-classification-launch.yaml new file mode 100644 index 00000000..25b37c26 --- /dev/null +++ b/src/tests/ci-configs/text-classification-launch.yaml @@ -0,0 +1,24 @@ +seed: 666 +backend: gloo +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 1 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +use_amp: false +debug: false +model: bert-base-uncased +model_dir: /tmp/model +tokenizer_dir: /tmp/tokenizer +num_classes: 1 +nproc_per_node: 2 +drop_out: .3 +n_fc: 768 +weight_decay: 0.01 +num_warmup_epochs: 0 +max_length: 256 +lr: 0.00005 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/text-classification-simple.yaml b/src/tests/ci-configs/text-classification-simple.yaml new file mode 100644 index 00000000..ffaba468 --- /dev/null +++ b/src/tests/ci-configs/text-classification-simple.yaml @@ -0,0 +1,22 @@ +seed: 666 +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 2 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +use_amp: false +debug: false +model: bert-base-uncased +model_dir: /tmp/model +tokenizer_dir: /tmp/tokenizer +num_classes: 1 +drop_out: .3 +n_fc: 768 +weight_decay: 0.01 +num_warmup_epochs: 0 +max_length: 256 +lr: 0.00005 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/text-classification-spawn.yaml b/src/tests/ci-configs/text-classification-spawn.yaml new file mode 100644 index 00000000..09391012 --- /dev/null +++ b/src/tests/ci-configs/text-classification-spawn.yaml @@ -0,0 +1,27 @@ +seed: 666 +data_path: ~/data +ackend: gloo +train_batch_size: 4 +eval_batch_size: 4 +num_workers: 1 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +use_amp: false +debug: false +model: bert-base-uncased +model_dir: /tmp/model +tokenizer_dir: /tmp/tokenizer +num_classes: 1 +drop_out: .3 +n_fc: 768 +weight_decay: 0.01 +num_warmup_epochs: 0 +max_length: 256 +lr: 0.00005 +# distributed spawn +nproc_per_node: 2 +# distributed multi node spawn +nnodes: 1 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-classification-all.yaml b/src/tests/ci-configs/vision-classification-all.yaml new file mode 100644 index 00000000..b5939c13 --- /dev/null +++ b/src/tests/ci-configs/vision-classification-all.yaml @@ -0,0 +1,19 @@ +seed: 666 +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 2 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.0001 +use_amp: false +debug: false +model: resnet18 +filename_prefix: training +n_saved: 2 +save_every_iters: 2 +patience: 2 +limit_sec: 60 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-classification-launch.yaml b/src/tests/ci-configs/vision-classification-launch.yaml new file mode 100644 index 00000000..6b2b51c7 --- /dev/null +++ b/src/tests/ci-configs/vision-classification-launch.yaml @@ -0,0 +1,16 @@ +seed: 666 +data_path: ~/data +backend: gloo +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 1 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.0001 +nproc_per_node: 2 +use_amp: false +debug: false +model: resnet18 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-classification-simple.yaml b/src/tests/ci-configs/vision-classification-simple.yaml new file mode 100644 index 00000000..4f044ea7 --- /dev/null +++ b/src/tests/ci-configs/vision-classification-simple.yaml @@ -0,0 +1,14 @@ +seed: 666 +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 2 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.0001 +use_amp: false +debug: false +model: resnet18 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-classification-spawn.yaml b/src/tests/ci-configs/vision-classification-spawn.yaml new file mode 100644 index 00000000..7315ecda --- /dev/null +++ b/src/tests/ci-configs/vision-classification-spawn.yaml @@ -0,0 +1,19 @@ +seed: 666 +data_path: ~/data +backend: gloo +train_batch_size: 4 +eval_batch_size: 4 +num_workers: 1 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.0001 +use_amp: false +debug: false +model: resnet18 +# distributed spawn +nproc_per_node: 2 +# distributed multi node spawn +nnodes: 1 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-dcgan-all.yaml b/src/tests/ci-configs/vision-dcgan-all.yaml new file mode 100644 index 00000000..908b6224 --- /dev/null +++ b/src/tests/ci-configs/vision-dcgan-all.yaml @@ -0,0 +1,21 @@ +seed: 666 +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 2 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.0001 +use_amp: false +debug: false +z_dim: 100 +d_filters: 64 +g_filters: 64 +filename_prefix: training +n_saved: 2 +save_every_iters: 2 +patience: 2 +limit_sec: 60 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-dcgan-launch.yaml b/src/tests/ci-configs/vision-dcgan-launch.yaml new file mode 100644 index 00000000..eac759c6 --- /dev/null +++ b/src/tests/ci-configs/vision-dcgan-launch.yaml @@ -0,0 +1,18 @@ +seed: 666 +data_path: ~/data +backend: gloo +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 1 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.0001 +use_amp: false +debug: false +z_dim: 100 +d_filters: 64 +g_filters: 64 +nproc_per_node: 2 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-dcgan-simple.yaml b/src/tests/ci-configs/vision-dcgan-simple.yaml new file mode 100644 index 00000000..fe64b1c7 --- /dev/null +++ b/src/tests/ci-configs/vision-dcgan-simple.yaml @@ -0,0 +1,16 @@ +seed: 666 +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 2 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.0001 +use_amp: false +debug: false +z_dim: 100 +d_filters: 64 +g_filters: 64 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-dcgan-spawn.yaml b/src/tests/ci-configs/vision-dcgan-spawn.yaml new file mode 100644 index 00000000..48046f65 --- /dev/null +++ b/src/tests/ci-configs/vision-dcgan-spawn.yaml @@ -0,0 +1,21 @@ +seed: 666 +data_path: ~/data +backend: gloo +train_batch_size: 4 +eval_batch_size: 4 +num_workers: 1 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.0001 +use_amp: false +debug: false +z_dim: 100 +d_filters: 64 +g_filters: 64 +# distributed spawn +nproc_per_node: 2 +# distributed multi node spawn +nnodes: 1 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-segmentation-all.yaml b/src/tests/ci-configs/vision-segmentation-all.yaml new file mode 100644 index 00000000..a27e2cd1 --- /dev/null +++ b/src/tests/ci-configs/vision-segmentation-all.yaml @@ -0,0 +1,20 @@ +seed: 666 +data_path: ~/data +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 2 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.007 +use_amp: false +debug: false +accumulation_steps: 4 +num_classes: 21 +filename_prefix: training +n_saved: 2 +save_every_iters: 2 +patience: 2 +limit_sec: 60 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-segmentation-launch.yaml b/src/tests/ci-configs/vision-segmentation-launch.yaml new file mode 100644 index 00000000..05b0bebe --- /dev/null +++ b/src/tests/ci-configs/vision-segmentation-launch.yaml @@ -0,0 +1,17 @@ +seed: 666 +data_path: ~/data +backend: gloo +train_batch_size: 2 +eval_batch_size: 2 +num_workers: 1 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.007 +use_amp: false +debug: false +accumulation_steps: 4 +nproc_per_node: 2 +num_classes: 21 +output_dir: ./logs +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-segmentation-simple.yaml b/src/tests/ci-configs/vision-segmentation-simple.yaml index dece00d4..09856b5f 100644 --- a/src/tests/ci-configs/vision-segmentation-simple.yaml +++ b/src/tests/ci-configs/vision-segmentation-simple.yaml @@ -12,4 +12,4 @@ debug: false accumulation_steps: 4 num_classes: 21 output_dir: ./logs -log_every_iters: 2 \ No newline at end of file +log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-segmentation-spawn.yaml b/src/tests/ci-configs/vision-segmentation-spawn.yaml new file mode 100644 index 00000000..2baf5c9f --- /dev/null +++ b/src/tests/ci-configs/vision-segmentation-spawn.yaml @@ -0,0 +1,20 @@ +seed: 666 +data_path: ~/data +backend: gloo +train_batch_size: 4 +eval_batch_size: 4 +num_workers: 1 +max_epochs: 2 +train_epoch_length: 4 +eval_epoch_length: 4 +lr: 0.007 +use_amp: false +debug: false +accumulation_steps: 4 +num_classes: 21 +# distributed spawn +nproc_per_node: 2 +# distributed multi node spawn +nnodes: 1 +output_dir: ./logs +log_every_iters: 2 From 9205719419cb0989f2d47d30cd67dc4be0b9a1ab Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Mon, 19 Jun 2023 12:33:24 +0530 Subject: [PATCH 07/11] Modified tests according to new config structure --- scripts/run_tests.sh | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 8943097c..0569be26 100644 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -28,13 +28,7 @@ run_all() { do cd $dir pytest -vra --color=yes --tb=short test_*.py - python main.py --data_path ~/data \ - --train_batch_size 2 \ - --eval_batch_size 2 \ - --num_workers 2 \ - --max_epochs 2 \ - --train_epoch_length 4 \ - --eval_epoch_length 4 + python main.py ../../src/tests/ci-configs/$1-all.yaml cd $CWD done } @@ -43,15 +37,7 @@ run_launch() { for dir in $(find ./dist-tests/$1-launch -type d) do cd $dir - torchrun \ - --nproc_per_node 2 \ - main.py --backend gloo --data_path ~/data \ - --train_batch_size 2 \ - --eval_batch_size 2 \ - --num_workers 1 \ - --max_epochs 2 \ - --train_epoch_length 4 \ - --eval_epoch_length 4 + torchrun main.py ../../src/tests/ci-configs/$1-launch.yaml cd $CWD done } @@ -60,14 +46,7 @@ run_spawn() { for dir in $(find ./dist-tests/$1-spawn -type d) do cd $dir - python main.py --data_path ~/data \ - --nproc_per_node 2 --backend gloo \ - --train_batch_size 4 \ - --eval_batch_size 4 \ - --num_workers 1 \ - --max_epochs 2 \ - --train_epoch_length 4 \ - --eval_epoch_length 4 + python main.py ../../src/tests/ci-configs/$1-spawn.yaml cd $CWD done } From eaf86b2ebbaa4313a7d09943534e23bf37630023 Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Mon, 19 Jun 2023 12:44:59 +0530 Subject: [PATCH 08/11] Fix typo --- src/tests/ci-configs/text-classification-spawn.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ci-configs/text-classification-spawn.yaml b/src/tests/ci-configs/text-classification-spawn.yaml index 09391012..7c2b3b2a 100644 --- a/src/tests/ci-configs/text-classification-spawn.yaml +++ b/src/tests/ci-configs/text-classification-spawn.yaml @@ -1,6 +1,6 @@ seed: 666 data_path: ~/data -ackend: gloo +backend: gloo train_batch_size: 4 eval_batch_size: 4 num_workers: 1 From f3ef211f8ed0d16dc0d92bdd0ee9f90cc4e2680f Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Mon, 19 Jun 2023 13:06:05 +0530 Subject: [PATCH 09/11] Correct backend argument to be passed in command line --- src/tests/ci-configs/text-classification-launch.yaml | 2 -- src/tests/ci-configs/text-classification-spawn.yaml | 1 - src/tests/ci-configs/vision-classification-launch.yaml | 2 -- src/tests/ci-configs/vision-classification-spawn.yaml | 1 - src/tests/ci-configs/vision-dcgan-launch.yaml | 2 -- src/tests/ci-configs/vision-dcgan-spawn.yaml | 1 - src/tests/ci-configs/vision-segmentation-launch.yaml | 2 -- src/tests/ci-configs/vision-segmentation-spawn.yaml | 1 - 8 files changed, 12 deletions(-) diff --git a/src/tests/ci-configs/text-classification-launch.yaml b/src/tests/ci-configs/text-classification-launch.yaml index 25b37c26..843c63c6 100644 --- a/src/tests/ci-configs/text-classification-launch.yaml +++ b/src/tests/ci-configs/text-classification-launch.yaml @@ -1,5 +1,4 @@ seed: 666 -backend: gloo data_path: ~/data train_batch_size: 2 eval_batch_size: 2 @@ -13,7 +12,6 @@ model: bert-base-uncased model_dir: /tmp/model tokenizer_dir: /tmp/tokenizer num_classes: 1 -nproc_per_node: 2 drop_out: .3 n_fc: 768 weight_decay: 0.01 diff --git a/src/tests/ci-configs/text-classification-spawn.yaml b/src/tests/ci-configs/text-classification-spawn.yaml index 7c2b3b2a..b9029559 100644 --- a/src/tests/ci-configs/text-classification-spawn.yaml +++ b/src/tests/ci-configs/text-classification-spawn.yaml @@ -1,6 +1,5 @@ seed: 666 data_path: ~/data -backend: gloo train_batch_size: 4 eval_batch_size: 4 num_workers: 1 diff --git a/src/tests/ci-configs/vision-classification-launch.yaml b/src/tests/ci-configs/vision-classification-launch.yaml index 6b2b51c7..ea660894 100644 --- a/src/tests/ci-configs/vision-classification-launch.yaml +++ b/src/tests/ci-configs/vision-classification-launch.yaml @@ -1,6 +1,5 @@ seed: 666 data_path: ~/data -backend: gloo train_batch_size: 2 eval_batch_size: 2 num_workers: 1 @@ -8,7 +7,6 @@ max_epochs: 2 train_epoch_length: 4 eval_epoch_length: 4 lr: 0.0001 -nproc_per_node: 2 use_amp: false debug: false model: resnet18 diff --git a/src/tests/ci-configs/vision-classification-spawn.yaml b/src/tests/ci-configs/vision-classification-spawn.yaml index 7315ecda..259694f4 100644 --- a/src/tests/ci-configs/vision-classification-spawn.yaml +++ b/src/tests/ci-configs/vision-classification-spawn.yaml @@ -1,6 +1,5 @@ seed: 666 data_path: ~/data -backend: gloo train_batch_size: 4 eval_batch_size: 4 num_workers: 1 diff --git a/src/tests/ci-configs/vision-dcgan-launch.yaml b/src/tests/ci-configs/vision-dcgan-launch.yaml index eac759c6..33e58a02 100644 --- a/src/tests/ci-configs/vision-dcgan-launch.yaml +++ b/src/tests/ci-configs/vision-dcgan-launch.yaml @@ -1,6 +1,5 @@ seed: 666 data_path: ~/data -backend: gloo train_batch_size: 2 eval_batch_size: 2 num_workers: 1 @@ -13,6 +12,5 @@ debug: false z_dim: 100 d_filters: 64 g_filters: 64 -nproc_per_node: 2 output_dir: ./logs log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-dcgan-spawn.yaml b/src/tests/ci-configs/vision-dcgan-spawn.yaml index 48046f65..b76c65c9 100644 --- a/src/tests/ci-configs/vision-dcgan-spawn.yaml +++ b/src/tests/ci-configs/vision-dcgan-spawn.yaml @@ -1,6 +1,5 @@ seed: 666 data_path: ~/data -backend: gloo train_batch_size: 4 eval_batch_size: 4 num_workers: 1 diff --git a/src/tests/ci-configs/vision-segmentation-launch.yaml b/src/tests/ci-configs/vision-segmentation-launch.yaml index 05b0bebe..36ab69eb 100644 --- a/src/tests/ci-configs/vision-segmentation-launch.yaml +++ b/src/tests/ci-configs/vision-segmentation-launch.yaml @@ -1,6 +1,5 @@ seed: 666 data_path: ~/data -backend: gloo train_batch_size: 2 eval_batch_size: 2 num_workers: 1 @@ -11,7 +10,6 @@ lr: 0.007 use_amp: false debug: false accumulation_steps: 4 -nproc_per_node: 2 num_classes: 21 output_dir: ./logs log_every_iters: 2 diff --git a/src/tests/ci-configs/vision-segmentation-spawn.yaml b/src/tests/ci-configs/vision-segmentation-spawn.yaml index 2baf5c9f..7ef6bb1b 100644 --- a/src/tests/ci-configs/vision-segmentation-spawn.yaml +++ b/src/tests/ci-configs/vision-segmentation-spawn.yaml @@ -1,6 +1,5 @@ seed: 666 data_path: ~/data -backend: gloo train_batch_size: 4 eval_batch_size: 4 num_workers: 1 From 34709216279e8cfb7c30a76cd12e7db6cf62bc70 Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Mon, 19 Jun 2023 13:49:11 +0530 Subject: [PATCH 10/11] Pass backend argument as a command line argument --- scripts/run_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 0569be26..564a031c 100644 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -37,7 +37,7 @@ run_launch() { for dir in $(find ./dist-tests/$1-launch -type d) do cd $dir - torchrun main.py ../../src/tests/ci-configs/$1-launch.yaml + torchrun --nproc_per_node 2 main.py ../../src/tests/ci-configs/$1-launch.yaml --backend gloo cd $CWD done } @@ -46,7 +46,7 @@ run_spawn() { for dir in $(find ./dist-tests/$1-spawn -type d) do cd $dir - python main.py ../../src/tests/ci-configs/$1-spawn.yaml + python main.py ../../src/tests/ci-configs/$1-spawn.yaml --backend gloo cd $CWD done } From 0dd2b4b864d6f7a31112bd9aee43029146488f3c Mon Sep 17 00:00:00 2001 From: Jyotirmay Khavasi Date: Tue, 20 Jun 2023 22:14:49 +0530 Subject: [PATCH 11/11] Modifying the config structure in template-common --- src/templates/template-common/main.py | 2 +- src/templates/template-common/utils.py | 32 ++++++++++++++++++-------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/templates/template-common/main.py b/src/templates/template-common/main.py index 959cc8de..07e91314 100644 --- a/src/templates/template-common/main.py +++ b/src/templates/template-common/main.py @@ -23,7 +23,7 @@ # main entrypoint def main(): - config = setup_parser().parse_args() + config = setup_config() #::: if (it.dist === 'spawn') { :::# #::: if (it.nproc_per_node && it.nnodes > 1 && it.master_addr && it.master_port) { :::# kwargs = { diff --git a/src/templates/template-common/utils.py b/src/templates/template-common/utils.py index 26b06836..137a6199 100644 --- a/src/templates/template-common/utils.py +++ b/src/templates/template-common/utils.py @@ -18,19 +18,33 @@ from ignite.utils import setup_logger -def setup_parser(): - with open("config.yaml", "r") as f: +def get_default_parser(): + parser = ArgumentParser() + parser.add_argument("config", type=Path, help="Config file path") + parser.add_argument( + "--backend", + default=None, + choices=["nccl", "gloo"], + type=str, + help="DDP backend", + ) + return parser + + +def setup_config(parser=None): + if parser is None: + parser = get_default_parser() + + args = parser.parse_args() + config_path = args.config + + with open(config_path, "r") as f: config = yaml.safe_load(f.read()) - parser = ArgumentParser() - parser.add_argument("--backend", default=None, type=str) for k, v in config.items(): - if isinstance(v, bool): - parser.add_argument(f"--{k}", action="store_true") - else: - parser.add_argument(f"--{k}", default=v, type=type(v)) + setattr(args, k, v) - return parser + return args def log_metrics(engine: Engine, tag: str) -> None: