diff --git a/launcher_scripts/conf/peft/llama/sft.yaml b/launcher_scripts/conf/peft/llama/sft.yaml index fecab19b2b..6549a90539 100644 --- a/launcher_scripts/conf/peft/llama/sft.yaml +++ b/launcher_scripts/conf/peft/llama/sft.yaml @@ -120,6 +120,8 @@ model: attention_dropout: 0.0 ffn_dropout: 0.0 + gc_interval: 100 + peft: peft_scheme: null # null (SFT, no PEFT), ptuning, lora restore_from_path: null diff --git a/launcher_scripts/conf/peft/nemotron/sft.yaml b/launcher_scripts/conf/peft/nemotron/sft.yaml index d534162afd..9f93d22069 100644 --- a/launcher_scripts/conf/peft/nemotron/sft.yaml +++ b/launcher_scripts/conf/peft/nemotron/sft.yaml @@ -120,6 +120,8 @@ model: attention_dropout: 0.0 ffn_dropout: 0.0 + gc_interval: 100 + peft: peft_scheme: null # null (SFT, no PEFT), ptuning, lora restore_from_path: null diff --git a/launcher_scripts/conf/training/llama/llama2_13b.yaml b/launcher_scripts/conf/training/llama/llama2_13b.yaml index 121e370e55..fcddf113f9 100644 --- a/launcher_scripts/conf/training/llama/llama2_13b.yaml +++ b/launcher_scripts/conf/training/llama/llama2_13b.yaml @@ -139,6 +139,7 @@ model: tp_comm_atomic_ag: False tp_comm_atomic_rs: False use_flash_attention: true + gc_interval: 100 nsys_profile: enabled: False trace: [nvtx,cuda] diff --git a/launcher_scripts/conf/training/llama/llama2_7b.yaml b/launcher_scripts/conf/training/llama/llama2_7b.yaml index 00a4ec0fee..9ebc546494 100644 --- a/launcher_scripts/conf/training/llama/llama2_7b.yaml +++ b/launcher_scripts/conf/training/llama/llama2_7b.yaml @@ -139,6 +139,7 @@ model: tp_comm_atomic_ag: False tp_comm_atomic_rs: False use_flash_attention: true + gc_interval: 100 nsys_profile: enabled: False trace: [nvtx,cuda] diff --git a/launcher_scripts/conf/training/nemotron/nemotron_15b.yaml b/launcher_scripts/conf/training/nemotron/nemotron_15b.yaml index 2e9305d010..80fb0a97bc 100644 --- a/launcher_scripts/conf/training/nemotron/nemotron_15b.yaml +++ b/launcher_scripts/conf/training/nemotron/nemotron_15b.yaml @@ -158,6 +158,7 @@ model: ub_tp_comm_overlap: True tp_comm_atomic_ag: False tp_comm_atomic_rs: False + gc_interval: 100 nsys_profile: enabled: False diff --git a/launcher_scripts/conf/training/nemotron/nemotron_340b.yaml b/launcher_scripts/conf/training/nemotron/nemotron_340b.yaml index ea7c39eeeb..68eefad395 100644 --- a/launcher_scripts/conf/training/nemotron/nemotron_340b.yaml +++ b/launcher_scripts/conf/training/nemotron/nemotron_340b.yaml @@ -154,6 +154,7 @@ model: fp8_amax_compute_algo: max # 'most_recent' or 'max'. Algorithm for computing amax from history fp8_wgrad: True ub_tp_comm_overlap: False + gc_interval: 100 optim: name: distributed_fused_adam @@ -188,4 +189,4 @@ model: - .0333 - ${data_dir}/my-nemotron_00_text_document - .0333 - - ${data_dir}/my-nemotron_00_text_document \ No newline at end of file + - ${data_dir}/my-nemotron_00_text_document diff --git a/launcher_scripts/conf/training/nemotron/nemotron_4b.yaml b/launcher_scripts/conf/training/nemotron/nemotron_4b.yaml index 4e529cb65b..f7996084bd 100644 --- a/launcher_scripts/conf/training/nemotron/nemotron_4b.yaml +++ b/launcher_scripts/conf/training/nemotron/nemotron_4b.yaml @@ -158,6 +158,7 @@ model: ub_tp_comm_overlap: true tp_comm_atomic_ag: False tp_comm_atomic_rs: False + gc_interval: 100 nsys_profile: enabled: False diff --git a/launcher_scripts/conf/training/nemotron/nemotron_8b.yaml b/launcher_scripts/conf/training/nemotron/nemotron_8b.yaml index 69c5e64efa..00516cccf4 100644 --- a/launcher_scripts/conf/training/nemotron/nemotron_8b.yaml +++ b/launcher_scripts/conf/training/nemotron/nemotron_8b.yaml @@ -158,6 +158,7 @@ model: ub_tp_comm_overlap: true tp_comm_atomic_ag: False tp_comm_atomic_rs: False + gc_interval: 100 nsys_profile: enabled: False