From 417bd2d6772a3424a4ec9a6ce8375828eedf6dae Mon Sep 17 00:00:00 2001 From: Jakub Horak Date: Sun, 26 Mar 2023 16:23:11 +0200 Subject: [PATCH] Add backwards-compatibility for older model format --- llama.cpp | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/llama.cpp b/llama.cpp index 2bd520353efda..83699509c2247 100644 --- a/llama.cpp +++ b/llama.cpp @@ -314,27 +314,26 @@ static bool llama_model_load( fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } + uint32_t format_version = 0; // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic == LLAMA_FILE_MAGIC_UNVERSIONED) { - fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", + fprintf(stderr, "%s: model '%s' is too old, continuing in compatibility mode with degraded performance\n", __func__, fname.c_str()); - return false; - } - if (magic != LLAMA_FILE_MAGIC) { - fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); - return false; - } - - uint32_t format_version; - fin.read((char *) &format_version, sizeof(format_version)); + format_version = 0; + } else if (magic == LLAMA_FILE_MAGIC) { + fin.read((char *) &format_version, sizeof(format_version)); - if (format_version != LLAMA_FILE_VERSION) { - fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", - __func__, fname.c_str(), format_version, LLAMA_FILE_VERSION); + if (format_version > LLAMA_FILE_VERSION) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", + __func__, fname.c_str(), format_version, LLAMA_FILE_VERSION); + return false; + } + } else { + fprintf(stderr, "%s: invalid model file '%s' (bad magic %08x)\n", __func__, fname.c_str(), magic); return false; } } @@ -417,7 +416,14 @@ static bool llama_model_load( } float score; - fin.read((char *) &score, sizeof(score)); + if (format_version == 0) { + // Older version doesn't have embedded token score, use approximation: length^2 + // TODO: Maybe read it from tokenizer.model as a fallback? + score = word.length(); + score *= score; + } else { + fin.read((char *) &score, sizeof(score)); + } vocab.token_to_id[word] = i;