-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.json
More file actions
53 lines (53 loc) · 1.2 KB
/
config.json
File metadata and controls
53 lines (53 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
{
"model_type": "FlashSTU",
"n_embd": 1536,
"n_heads": 8,
"n_layers": 26,
"seq_len": 8192,
"window_size": 1024,
"vocab_size": 200064,
"mlp_scale": 12,
"bias": false,
"dropout": 0.0,
"num_eigh": 24,
"use_hankel_L": false,
"num_epochs": 1,
"global_bsz": 524288,
"bsz": 1,
"warmup_steps": 19073,
"eval_period": 25,
"save_period": 19000,
"max_lr": 3.0e-4,
"min_lr": 3.0e-5,
"max_norm": 1.0,
"dilation": 1,
"fsdp": true,
"ddp": false,
"mixed_precision": true,
"torch_dtype": "bfloat16",
"use_cpu_offload": false,
"sharding_strategy": "full_shard",
"state_dict_type": "full",
"auto_wrap_policy": "partial",
"backward_prefetch": "backward_pre",
"forward_prefetch": false,
"sync_module_states": true,
"use_orig_params": true,
"device_id": null,
"precision": {
"param": "bfloat16",
"reduce": "bfloat16",
"buffer": "bfloat16"
},
"fsdp_modules": [
"STU",
"Attention",
"MLP"
],
"use_activation_checkpointing": true,
"use_flash_fft": true,
"use_approx": true,
"use_attn": true,
"softcap": 50.0,
"torch_compile": false
}