We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 714c826 commit c253bb4Copy full SHA for c253bb4
tests/collections/llm/megatron_t5_pretraining.py
@@ -59,16 +59,16 @@ def get_args():
59
paths=args.data_path,
60
seq_length=512,
61
seq_length_dec=128,
62
- micro_batch_size=args.devices,
63
- global_batch_size=2 * args.devices,
+ micro_batch_size=64,
+ global_batch_size=512,
64
seed=1234,
65
tokenizer=tokenizer,
66
split="99982,9,9",
67
index_mapping_dir=args.index_mapping_dir,
68
)
69
t5_config = llm.t5.model.t5.T5Config(
70
- num_layers=args.devices,
71
- encoder_num_layers=args.devices,
+ num_layers=12,
+ encoder_num_layers=12,
72
hidden_size=768,
73
ffn_hidden_size=3072,
74
num_attention_heads=12,
0 commit comments