We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 333eeed commit 454d7b7Copy full SHA for 454d7b7
src/llama-model.cpp
@@ -714,13 +714,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
714
} break;
715
case LLM_ARCH_MODERN_BERT:
716
{
717
- hparams.rope_freq_base_train = 160000.0f;
718
- hparams.rope_freq_base_train_swa = 10000.0f;
719
- hparams.n_swa = 128;
720
-
721
hparams.swa_type = LLAMA_SWA_TYPE_SYMMETRIC;
722
hparams.set_swa_pattern(3, 0);
723
+ ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
724
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
725
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
726
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
0 commit comments