File tree 2 files changed +13
-2
lines changed
2 files changed +13
-2
lines changed Original file line number Diff line number Diff line change @@ -72,7 +72,7 @@ uint32_t llama_hparams::n_embd_v_s() const {
72
72
73
73
bool llama_hparams::is_swa (uint32_t il) const {
74
74
if (il < n_layer) {
75
- return n_swa > 0 && n_swa_pattern > 0 && il % n_swa_pattern < (n_swa_pattern - 1 );
75
+ return n_swa_pattern == 0 || ( il % n_swa_pattern < (n_swa_pattern - 1 ) );
76
76
}
77
77
78
78
GGML_ABORT (" fatal error" );
Original file line number Diff line number Diff line change @@ -104,7 +104,18 @@ struct llama_hparams {
104
104
llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;
105
105
106
106
uint32_t n_swa = 0 ; // the size of the sliding window (0 - no SWA)
107
- uint32_t n_swa_pattern = 1 ; // by default, all layers use non-sliding-window attention
107
+ uint32_t n_swa_pattern = 1 ; // this value n means that every nth layer is dense (i.e. non-SWA)
108
+ // by default n == 1, all layers are dense
109
+ // note that if n_swa_pattern == 0, all layers are SWA
110
+ // example: n_swa_pattern = 3
111
+ // il == 0: swa
112
+ // il == 1: swa
113
+ // il == 2: dense
114
+ // il == 3: swa
115
+ // il == 4: swa
116
+ // il == 5: dense
117
+ // il == 6: swa
118
+ // etc ...
108
119
109
120
// for State Space Models
110
121
uint32_t ssm_d_conv = 0 ;
You can’t perform that action at this time.
0 commit comments