Update megatron_t5_pretraining.py (NVIDIA-NeMo#10952)

huvunvidia · artbataev · commit c253bb4052dc · 2024-10-22T20:27:30.000+04:00
Signed-off-by: Huy Vu &lt;86480512+huvunvidia@users.noreply.github.com&gt;
diff --git a/tests/collections/llm/megatron_t5_pretraining.py b/tests/collections/llm/megatron_t5_pretraining.py
@@ -59,16 +59,16 @@ def get_args():
         paths=args.data_path,
         seq_length=512,
         seq_length_dec=128,
-        micro_batch_size=args.devices,
-        global_batch_size=2 * args.devices,
+        micro_batch_size=64,
+        global_batch_size=512,
         seed=1234,
         tokenizer=tokenizer,
         split="99982,9,9",
         index_mapping_dir=args.index_mapping_dir,
     )
     t5_config = llm.t5.model.t5.T5Config(
-        num_layers=args.devices,
-        encoder_num_layers=args.devices,
+        num_layers=12,
+        encoder_num_layers=12,
         hidden_size=768,
         ffn_hidden_size=3072,
         num_attention_heads=12,