Skip to content

Commit 1a0fcb1

Browse files
mokeddembillelmglambda
authored andcommitted
llama : add Falcon3 support (ggml-org#10883)
* Add Falcon3 model support * Add fix for adding bos to added special tokens * Add comment explaining the logic behind the if statement * Add a log message to better track the when the following line of code is triggered * Update log to only print when input and output characters are different * Fix handling pre-normalized tokens * Refactoring
1 parent 6a8c1fc commit 1a0fcb1

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,9 +529,19 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
529529
else:
530530
token: str = reverse_vocab[i]
531531
if token in added_vocab:
532+
# The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
533+
# To avoid unexpected issues - we make sure to normalize non-normalized tokens
534+
if not tokenizer.added_tokens_decoder[i].normalized:
535+
previous_token = token
536+
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
537+
if previous_token != token:
538+
logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")
539+
532540
if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
533541
toktypes.append(gguf.TokenType.CONTROL)
534542
else:
543+
# NOTE: this was added for Gemma.
544+
# Encoding and decoding the tokens above isn't sufficient for this case.
535545
token = token.replace(b"\xe2\x96\x81".decode("utf-8"), " ") # pre-normalize user-defined spaces
536546
toktypes.append(gguf.TokenType.USER_DEFINED)
537547
else:
@@ -575,6 +585,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
575585
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
576586
# ref: https://huggingface.co/tiiuae/falcon-7b
577587
res = "falcon"
588+
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
589+
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
590+
res = "falcon3"
578591
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
579592
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
580593
res = "bert-bge"

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class TOKENIZER_TYPE(IntEnum):
7272
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
7373
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
7474
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
75+
{"name": "falcon3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon3-7B-Base", },
7576
{"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
7677
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
7778
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },

src/llama.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1673,6 +1673,7 @@ enum llm_chat_template {
16731673
LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
16741674
LLM_CHAT_TEMPLATE_MISTRAL_V7,
16751675
LLM_CHAT_TEMPLATE_PHI_3,
1676+
LLM_CHAT_TEMPLATE_FALCON_3,
16761677
LLM_CHAT_TEMPLATE_ZEPHYR,
16771678
LLM_CHAT_TEMPLATE_MONARCH,
16781679
LLM_CHAT_TEMPLATE_GEMMA,
@@ -1705,6 +1706,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
17051706
{ "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
17061707
{ "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
17071708
{ "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
1709+
{ "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
17081710
{ "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
17091711
{ "monarch", LLM_CHAT_TEMPLATE_MONARCH },
17101712
{ "gemma", LLM_CHAT_TEMPLATE_GEMMA },
@@ -6562,7 +6564,8 @@ static void llm_load_vocab(
65626564
} else if (
65636565
tokenizer_pre == "llama3" ||
65646566
tokenizer_pre == "llama-v3" ||
6565-
tokenizer_pre == "llama-bpe") {
6567+
tokenizer_pre == "llama-bpe"||
6568+
tokenizer_pre == "falcon3") {
65666569
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
65676570
vocab.tokenizer_ignore_merges = true;
65686571
vocab.tokenizer_add_bos = true;
@@ -22615,6 +22618,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2261522618
}
2261622619
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
2261722620
return LLM_CHAT_TEMPLATE_PHI_3;
22621+
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
22622+
return LLM_CHAT_TEMPLATE_FALCON_3;
2261822623
} else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
2261922624
return LLM_CHAT_TEMPLATE_ZEPHYR;
2262022625
} else if (tmpl_contains("bos_token + message['role']")) {
@@ -22767,6 +22772,15 @@ static int32_t llama_chat_apply_template_internal(
2276722772
if (add_ass) {
2276822773
ss << "<|assistant|>\n";
2276922774
}
22775+
} else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
22776+
// Falcon 3
22777+
for (auto message : chat) {
22778+
std::string role(message->role);
22779+
ss << "<|" << role << "|>\n" << message->content << "\n";
22780+
}
22781+
if (add_ass) {
22782+
ss << "<|assistant|>\n";
22783+
}
2277022784
} else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
2277122785
// zephyr template
2277222786
for (auto message : chat) {

0 commit comments

Comments
 (0)