Skip to content

Commit 50d1a03

Browse files
committed
convert_hf : fix Gemma v1 not setting BOS and EOS tokens
1 parent 5a9cb57 commit 50d1a03

File tree

1 file changed

+3
-7
lines changed

1 file changed

+3
-7
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,7 @@ def set_vocab(self):
13101310
special_vocab._set_special_token("prefix", 1)
13111311
special_vocab._set_special_token("suffix", 3)
13121312
special_vocab._set_special_token("middle", 2)
1313+
special_vocab.chat_template = None # do not add it twice
13131314
special_vocab.add_to_gguf(self.gguf_writer)
13141315

13151316
def set_gguf_parameters(self):
@@ -2466,13 +2467,7 @@ class GemmaModel(Model):
24662467
model_arch = gguf.MODEL_ARCH.GEMMA
24672468

24682469
def set_vocab(self):
2469-
tokens, scores, toktypes = self._create_vocab_sentencepiece()
2470-
2471-
self.gguf_writer.add_tokenizer_model("llama")
2472-
self.gguf_writer.add_tokenizer_pre("default")
2473-
self.gguf_writer.add_token_list(tokens)
2474-
self.gguf_writer.add_token_scores(scores)
2475-
self.gguf_writer.add_token_types(toktypes)
2470+
self._set_vocab_sentencepiece()
24762471

24772472
# TODO: these special tokens should be exported only for the CodeGemma family
24782473
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False,
@@ -2482,6 +2477,7 @@ def set_vocab(self):
24822477
special_vocab._set_special_token("middle", 68)
24832478
special_vocab._set_special_token("fsep", 70)
24842479
special_vocab._set_special_token("eot", 107)
2480+
special_vocab.chat_template = None # do not add it twice
24852481
special_vocab.add_to_gguf(self.gguf_writer)
24862482

24872483
self.gguf_writer.add_add_space_prefix(False)

0 commit comments

Comments
 (0)