From d3a936fd0ebe3aaea299b0be4c87bfc598af4cc3 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 28 May 2024 12:44:56 +1000 Subject: [PATCH 01/65] convert-*.py: licence -> license --- examples/convert_legacy_llama.py | 2 +- gguf-py/gguf/gguf_writer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index c2c73e8ad39ec..65c66e5fff7fe 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -829,7 +829,7 @@ def add_meta_model(self, params: Params, metadata: Metadata | None) -> None: if metadata.description is not None: self.gguf.add_description(metadata.description) if metadata.license is not None: - self.gguf.add_licence(metadata.license) + self.gguf.add_license(metadata.license) if metadata.source_url is not None: self.gguf.add_source_url(metadata.source_url) if metadata.source_hf_repo is not None: diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index b0197961d46a8..3b94af25ed98c 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -445,8 +445,8 @@ def add_url(self, url: str) -> None: def add_description(self, description: str) -> None: self.add_string(Keys.General.DESCRIPTION, description) - def add_licence(self, licence: str) -> None: - self.add_string(Keys.General.LICENSE, licence) + def add_license(self, license: str) -> None: + self.add_string(Keys.General.LICENSE, license) def add_source_url(self, url: str) -> None: self.add_string(Keys.General.SOURCE_URL, url) From dbb1b471e475631693109a74ac3f13f1dca1bb9c Mon Sep 17 00:00:00 2001 From: brian khuu Date: Fri, 24 May 2024 03:48:00 +1000 Subject: [PATCH 02/65] convert-*.py: add --get-outfile command and refactor --- convert_hf_to_gguf.py | 209 +++++++++++++++++++++++++------ examples/convert_legacy_llama.py | 86 +++++-------- gguf-py/gguf/__init__.py | 1 + gguf-py/gguf/utility.py | 35 ++++++ 4 files changed, 242 insertions(+), 89 deletions(-) create mode 100644 gguf-py/gguf/utility.py diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a755b0a60bf0a..7ecedfcde4d06 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -13,7 +13,8 @@ from enum import IntEnum from pathlib import Path from hashlib import sha256 -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast +from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast, Optional +from dataclasses import dataclass import math import numpy as np @@ -29,6 +30,42 @@ logger = logging.getLogger("hf-to-gguf") +@dataclass +class Metadata: + name: Optional[str] = None + author: Optional[str] = None + version: Optional[str] = None + url: Optional[str] = None + description: Optional[str] = None + licence: Optional[str] = None + source_url: Optional[str] = None + source_hf_repo: Optional[str] = None + + @staticmethod + def load(metadata_path: Path) -> Metadata: + if metadata_path is None or not metadata_path.exists(): + return Metadata() + + with open(metadata_path, 'r') as file: + data = json.load(file) + + # Create a new Metadata instance + metadata = Metadata() + + # Assigning values to Metadata attributes if they exist in the JSON file + # This is based on LLM_KV_NAMES mapping in llama.cpp + metadata.name = data.get("general.name") + metadata.author = data.get("general.author") + metadata.version = data.get("general.version") + metadata.url = data.get("general.url") + metadata.description = data.get("general.description") + metadata.license = data.get("general.license") + metadata.source_url = data.get("general.source.url") + metadata.source_hf_repo = data.get("general.source.huggingface.repository") + + return metadata + + ###### MODEL DEFINITIONS ###### class SentencePieceTokenTypes(IntEnum): @@ -46,13 +83,13 @@ class SentencePieceTokenTypes(IntEnum): class Model: _model_classes: dict[str, type[Model]] = {} + model_name: str dir_model: Path ftype: gguf.LlamaFileType is_big_endian: bool endianess: gguf.GGUFEndian use_temp_file: bool lazy: bool - model_name: str | None part_names: list[str] is_safetensors: bool hparams: dict[str, Any] @@ -60,12 +97,14 @@ class Model: tensor_map: gguf.TensorNameMap tensor_names: set[str] | None fname_out: Path + fname_default: Path gguf_writer: gguf.GGUFWriter + metadata: Metadata # subclasses should define this! model_arch: gguf.MODEL_ARCH - def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, metadata: Metadata, model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") @@ -84,15 +123,20 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) self.tensor_names = None + self.metadata = metadata + + model_tensors = self.get_tensors() + if self.ftype == gguf.LlamaFileType.GUESSED: # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie. - _, first_tensor = next(self.get_tensors()) + _, first_tensor = next(model_tensors) if first_tensor.dtype == torch.float16: logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})") self.ftype = gguf.LlamaFileType.MOSTLY_F16 else: logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})") self.ftype = gguf.LlamaFileType.MOSTLY_BF16 + ftype_up: str = self.ftype.name.partition("_")[2].upper() ftype_lw: str = ftype_up.lower() # allow templating the file name with the output ftype, useful with the "auto" ftype @@ -100,6 +144,84 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard) + # Update any missing authorship metadata with huggingface_parameters + if self.metadata is not None and self.metadata.source_hf_repo is None: + if self.hparams is not None and "_name_or_path" in self.hparams: + self.metadata.source_hf_repo = self.hparams["_name_or_path"] + + # Set model name based on latest metadata either provided or calculated from environment + def get_model_name(metadata, huggingface_parameters, dir_model, model_arch): + if metadata is not None and metadata.name is not None: + # Explicit Metadata Was Provided By User + return metadata.name + elif huggingface_parameters is not None and "_name_or_path" in huggingface_parameters: + # Hugging Face Parameters Model Name or Model Folder Name is Provided + return huggingface_parameters["_name_or_path"] + elif huggingface_parameters is not None and "model_type" in huggingface_parameters: + # Hugging Face Parameters Model Type is Provided + return huggingface_parameters["model_type"] + elif dir_model is not None and dir_model.name is not None: + # Use directory folder name + return dir_model.name + else: + return gguf.MODEL_ARCH_NAMES[model_arch] + self.model_name = get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) + + # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' + encodingScheme = self.ftype.name.partition("_")[2] + + # Get Expert Count From huggingface_parameters + expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None + + def per_model_weight_count_estimation(tensors, expert_count): + # TODO: Ensure parameter count is accurate throughout various model type + # May currently overestimate parameter count in Mamba model because + # output weights is tied with token embeddings. + sum_weight_estimate = 0 + for name, data_torch in tensors: + # Got A Tensor + + # We don't need these + if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): + continue + + # Calculate Tensor Volume + sum_weights_in_tensor = 1 + for dim in data_torch.shape: + sum_weights_in_tensor *= dim + + # Add Tensor Volume To Running Count + sum_weight_estimate += sum_weights_in_tensor + + # Calculate weight estimate per model + per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate + + return per_model_weight_estimate + + weight_estimate = per_model_weight_count_estimation(model_tensors, expert_count) + + # Generate default filename based on model specification and available metadata + self.fname_default = gguf.naming_convention(self.model_name, self.metadata.version, expert_count, weight_estimate, encodingScheme) + + # Filename Output + if fname_out is not None: + # custom defined filename and path was provided + def fill_templated_filename(filename: str, encodingScheme: str): + # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' + ftype_uppercase: str = encodingScheme.upper() + ftype_lowercase: str = encodingScheme.lower() + return filename.format(ftype_lowercase, + outtype=ftype_lowercase, ftype=ftype_lowercase, + OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) + + self.fname_out = fname_out.parent / fill_templated_filename(fname_out.name, encodingScheme) + else: + # output in the same directory as the model by default + self.fname_out = dir_model.parent / self.fname_default + + # Configure GGUF Writer + self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) + @classmethod def __init_subclass__(cls): # can't use an abstract property, because overriding it without type errors @@ -185,8 +307,26 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", " raise ValueError(f"Can not map tensor {name!r}") return new_name + def set_gguf_meta_model(self): + self.gguf_writer.add_name(self.model_name) + + if self.metadata is not None: + if self.metadata.author is not None: + self.gguf_writer.add_author(self.metadata.author) + if self.metadata.version is not None: + self.gguf_writer.add_version(self.metadata.version) + if self.metadata.url is not None: + self.gguf_writer.add_url(self.metadata.url) + if self.metadata.description is not None: + self.gguf_writer.add_description(self.metadata.description) + if self.metadata.licence is not None: + self.gguf_writer.add_licence(self.metadata.licence) + if self.metadata.source_url is not None: + self.gguf_writer.add_source_url(self.metadata.source_url) + if self.metadata.source_hf_repo is not None: + self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) + def set_gguf_parameters(self): - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_block_count(self.block_count) if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None: @@ -773,7 +913,6 @@ class GPTNeoXModel(Model): def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -829,7 +968,6 @@ class BloomModel(Model): model_arch = gguf.MODEL_ARCH.BLOOM def set_gguf_parameters(self): - self.gguf_writer.add_name("Bloom") n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed")) n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads")) self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed)) @@ -906,7 +1044,6 @@ def set_vocab(self): def set_gguf_parameters(self): block_count = self.hparams["n_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(self.hparams["max_seq_len"]) self.gguf_writer.add_embedding_length(self.hparams["d_model"]) self.gguf_writer.add_block_count(block_count) @@ -945,7 +1082,6 @@ def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] head_count = self.hparams["num_attention_heads"] head_count_kv = self.hparams.get("num_key_value_heads", head_count) - hf_repo = self.hparams.get("_name_or_path", "") ctx_length = 0 if "max_sequence_length" in self.hparams: @@ -958,8 +1094,6 @@ def set_gguf_parameters(self): raise ValueError("gguf: can not find ctx length parameter.") self.gguf_writer.add_file_type(self.ftype) - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) - self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) @@ -983,7 +1117,6 @@ def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] head_count = self.hparams["num_attention_heads"] head_count_kv = self.hparams.get("num_key_value_heads", head_count) - hf_repo = self.hparams.get("_name_or_path", "") ctx_length = 0 if "max_sequence_length" in self.hparams: @@ -995,8 +1128,6 @@ def set_gguf_parameters(self): else: raise ValueError("gguf: can not find ctx length parameter.") - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) - self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) @@ -1110,7 +1241,6 @@ def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] head_count = self.hparams["num_attention_heads"] head_count_kv = self.hparams.get("num_key_value_heads", head_count) - hf_repo = self.hparams.get("_name_or_path", "") ctx_length = 0 if "max_sequence_length" in self.hparams: @@ -1122,8 +1252,6 @@ def set_gguf_parameters(self): else: raise ValueError("gguf: can not find ctx length parameter.") - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) - self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) @@ -1182,7 +1310,6 @@ def set_gguf_parameters(self): if n_head_kv is None: n_head_kv = self.hparams.get("n_head_kv", 1) # old name - self.gguf_writer.add_name("Falcon") self.gguf_writer.add_context_length(2048) # not in config.json self.gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) @@ -1227,7 +1354,6 @@ class StarCoderModel(Model): def set_gguf_parameters(self): block_count = self.hparams["n_layer"] - self.gguf_writer.add_name("StarCoder") self.gguf_writer.add_context_length(self.hparams["n_positions"]) self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"]) @@ -1262,7 +1388,6 @@ def set_gguf_parameters(self): block_count = self.hparams["n_layer"] - self.gguf_writer.add_name("Refact") # refact uses Alibi. So this is from config.json which might be used by training. self.gguf_writer.add_context_length(self.hparams["n_positions"]) self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) @@ -1317,7 +1442,6 @@ def set_gguf_parameters(self): hparams = self.hparams block_count = hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -1560,7 +1684,6 @@ def __init__(self, *args, **kwargs): def set_gguf_parameters(self): super().set_gguf_parameters() - self.gguf_writer.add_name("Grok") _experts: list[dict[str, Tensor]] | None = None @@ -1609,7 +1732,6 @@ class DbrxModel(Model): def set_gguf_parameters(self): ffn_config = self.hparams["ffn_config"] attn_config = self.hparams["attn_config"] - self.gguf_writer.add_name(self.hparams["model_type"]) self.gguf_writer.add_block_count(self.hparams["n_layers"]) self.gguf_writer.add_context_length(self.hparams["max_seq_len"]) @@ -1678,7 +1800,6 @@ class MiniCPMModel(Model): def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] - self.gguf_writer.add_name("MiniCPM") self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -1748,7 +1869,6 @@ def set_vocab(self): self._set_vocab_qwen() def set_gguf_parameters(self): - self.gguf_writer.add_name("Qwen") self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) @@ -1839,7 +1959,6 @@ class GPT2Model(Model): model_arch = gguf.MODEL_ARCH.GPT2 def set_gguf_parameters(self): - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_block_count(self.hparams["n_layer"]) self.gguf_writer.add_context_length(self.hparams["n_ctx"]) self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) @@ -1882,7 +2001,6 @@ def set_gguf_parameters(self): n_embd = self.find_hparam(["hidden_size", "n_embd"]) n_head = self.find_hparam(["num_attention_heads", "n_head"]) - self.gguf_writer.add_name("Phi2") self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"])) self.gguf_writer.add_embedding_length(n_embd) @@ -2004,7 +2122,6 @@ def set_gguf_parameters(self): orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"]) rope_dims = n_embd // n_head - self.gguf_writer.add_name("Phi3") self.gguf_writer.add_context_length(max_pos_embds) self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds) self.gguf_writer.add_embedding_length(n_embd) @@ -2061,7 +2178,6 @@ def set_gguf_parameters(self): hparams = self.hparams block_count = hparams["num_hidden_layers"] - self.gguf_writer.add_name("PLaMo") self.gguf_writer.add_context_length(4096) # not in config.json self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"]) @@ -2106,7 +2222,6 @@ class CodeShellModel(Model): def set_gguf_parameters(self): block_count = self.hparams["n_layer"] - self.gguf_writer.add_name("CodeShell") self.gguf_writer.add_context_length(self.hparams["n_positions"]) self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"]) @@ -2265,7 +2380,6 @@ def set_vocab(self): special_vocab.add_to_gguf(self.gguf_writer) def set_gguf_parameters(self): - self.gguf_writer.add_name("InternLM2") self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) @@ -2433,7 +2547,6 @@ def set_gguf_parameters(self): hparams = self.hparams block_count = hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -2549,7 +2662,6 @@ def set_gguf_parameters(self): # Fail early for models which don't have a block expansion factor of 2 assert d_inner == 2 * d_model - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default self.gguf_writer.add_embedding_length(d_model) self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading @@ -3505,6 +3617,14 @@ def parse_args() -> argparse.Namespace: "--no-tensor-first-split", action="store_true", help="do not add tensors to the first split (disabled by default)" ) + parser.add_argument( + "--metadata", type=Path, + help="Specify the path for a metadata file" + ) + parser.add_argument( + "--get-outfile", action="store_true", + help="get calculated default outfile name" + ) return parser.parse_args() @@ -3530,8 +3650,15 @@ def split_str_to_n_bytes(split_str: str) -> int: def main() -> None: args = parse_args() - logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + elif args.get_outfile: + # Avoid printing anything besides the dump output + logging.basicConfig(level=logging.WARNING) + else: + logging.basicConfig(level=logging.INFO) + metadata = Metadata.load(args.metadata) dir_model = args.model if not dir_model.is_dir(): @@ -3562,17 +3689,27 @@ def main() -> None: hparams = Model.load_hparams(dir_model) with torch.inference_mode(): + encodingScheme = ftype_map[args.outtype] + model_architecture = hparams["architectures"][0] + try: - model_class = Model.from_model_architecture(hparams["architectures"][0]) + model_class = Model.from_model_architecture(model_architecture) except NotImplementedError: logger.error(f"Model {hparams['architectures'][0]} is not supported") sys.exit(1) - model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, + model_instance = model_class(dir_model, encodingScheme, fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split) + if args.get_outfile: + print(f"{model_instance.fname_default}") # noqa: NP100 + return + + logger.info("Set meta model") + model_instance.set_gguf_meta_model() + logger.info("Set model parameters") model_instance.gguf_writer.add_type(gguf.GGUFType.MODEL) model_instance.set_gguf_parameters() diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 65c66e5fff7fe..eee8f1fee853c 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1021,35 +1021,28 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT raise ValueError(f"Unexpected combination of types: {name_to_type}") -def model_parameter_count(model: LazyModel) -> int: - total_model_parameters = 0 - for i, (name, lazy_tensor) in enumerate(model.items()): +def per_model_weight_count_estimation(model: LazyModel, expert_count:int) -> int: + # TODO: Ensure parameter count is accurate throughout various model type + sum_weight_estimate = 0 + for name, lazy_tensor in model.items(): + # We don't need these + if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): + continue + + # Got A Tensor sum_weights_in_tensor = 1 + + # Tensor Volume for dim in lazy_tensor.shape: sum_weights_in_tensor *= dim - total_model_parameters += sum_weights_in_tensor - return total_model_parameters - - -def model_parameter_count_rounded_notation(model_params_count: int) -> str: - if model_params_count > 1e12 : - # Trillions Of Parameters - scaled_model_params = model_params_count * 1e-12 - scale_suffix = "T" - elif model_params_count > 1e9 : - # Billions Of Parameters - scaled_model_params = model_params_count * 1e-9 - scale_suffix = "B" - elif model_params_count > 1e6 : - # Millions Of Parameters - scaled_model_params = model_params_count * 1e-6 - scale_suffix = "M" - else: - # Thousands Of Parameters - scaled_model_params = model_params_count * 1e-3 - scale_suffix = "K" - return f"{round(scaled_model_params)}{scale_suffix}" + # Add Tensor Volume To Running Count + sum_weight_estimate += sum_weights_in_tensor + + # Calculate weight estimate per model + per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate + + return per_model_weight_estimate def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel: @@ -1231,34 +1224,21 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> return vocab, special_vocab -def default_convention_outfile(file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> str: - quantization = { +def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> str: + name = metadata.name if metadata is not None and metadata.name is not None else model_name + version = metadata.version if metadata is not None and metadata.version is not None else None + + encodingScheme = { GGMLFileType.AllF32: "F32", GGMLFileType.MostlyF16: "F16", GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - parameters = model_parameter_count_rounded_notation(model_params_count) - - expert_count = "" - if params.n_experts is not None: - expert_count = f"{params.n_experts}x" - - version = "" - if metadata is not None and metadata.version is not None: - version = f"-{metadata.version}" - - name = "ggml-model" - if metadata is not None and metadata.name is not None: - name = metadata.name - elif params.path_model is not None: - name = params.path_model.name - - return f"{name}{version}-{expert_count}{parameters}-{quantization}" + return gguf.naming_convention(name, version, expert_count, model_params_count, encodingScheme) -def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path: - default_filename = default_convention_outfile(file_type, params, model_params_count, metadata) +def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> Path: + default_filename = default_convention_outfile(file_type, model_name, expert_count, model_params_count, metadata) ret = model_paths[0].parent / f"{default_filename}.gguf" if ret in model_paths: logger.error( @@ -1315,10 +1295,10 @@ def main(args_in: list[str] | None = None) -> None: if args.get_outfile: model_plus = load_some_model(args.model) params = Params.load(model_plus) - model = convert_model_names(model_plus.model, params, args.skip_unknown) - model_params_count = model_parameter_count(model_plus.model) - ftype = pick_output_type(model, args.outtype) - print(f"{default_convention_outfile(ftype, params, model_params_count, metadata)}") # noqa: NP100 + model = convert_model_names(model_plus.model, params, args.skip_unknown) + model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts) + ftype = pick_output_type(model, args.outtype) + print(f"{default_convention_outfile(ftype, params.path_model.name, params.n_experts, model_params_count, metadata)}") # noqa: NP100 return if args.no_vocab and args.vocab_only: @@ -1334,8 +1314,8 @@ def main(args_in: list[str] | None = None) -> None: else: model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None) - model_params_count = model_parameter_count(model_plus.model) - logger.info(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})") + model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts) + logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})") if args.dump: do_dump_model(model_plus) @@ -1405,7 +1385,7 @@ def main(args_in: list[str] | None = None) -> None: model = convert_model_names(model, params, args.skip_unknown) ftype = pick_output_type(model, args.outtype) model = convert_to_output_type(model, ftype) - outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata) + outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata) params.ftype = ftype logger.info(f"Writing {outfile}, format {ftype}") diff --git a/gguf-py/gguf/__init__.py b/gguf-py/gguf/__init__.py index ea5146b161bc8..a07b8ff0ddbc7 100644 --- a/gguf-py/gguf/__init__.py +++ b/gguf-py/gguf/__init__.py @@ -5,3 +5,4 @@ from .quants import * from .tensor_mapping import * from .vocab import * +from .utility import * diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py new file mode 100644 index 0000000000000..e375b687d2da1 --- /dev/null +++ b/gguf-py/gguf/utility.py @@ -0,0 +1,35 @@ +from __future__ import annotations + + +def model_weight_count_rounded_notation(model_params_count: int) -> str: + if model_params_count > 1e15 : + # Quadrillion Of Parameters + scaled_model_params = model_params_count * 1e-15 + scale_suffix = "Q" + elif model_params_count > 1e12 : + # Trillions Of Parameters + scaled_model_params = model_params_count * 1e-12 + scale_suffix = "T" + elif model_params_count > 1e9 : + # Billions Of Parameters + scaled_model_params = model_params_count * 1e-9 + scale_suffix = "B" + elif model_params_count > 1e6 : + # Millions Of Parameters + scaled_model_params = model_params_count * 1e-6 + scale_suffix = "M" + else: + # Thousands Of Parameters + scaled_model_params = model_params_count * 1e-3 + scale_suffix = "K" + return f"{round(scaled_model_params)}{scale_suffix}" + + +def naming_convention(model_name: str, version_string:str, expert_count_int:int, model_params_count: int, encodingScheme: str) -> str: + # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention + name = model_name.strip().replace(' ', '-') if model_name is not None else "ggml-model" + version = f"-{version_string}" if version_string is not None else "" + expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None and expert_count_int > 0 else "" + parameters = model_weight_count_rounded_notation(model_params_count) + encodingScheme = encodingScheme.upper() + return f"{name}{version}-{expert_count_chunk}{parameters}-{encodingScheme}" From a42c2b7efc720d4d36a258d7430b33c82ec3bfd0 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Fri, 31 May 2024 03:14:11 +1000 Subject: [PATCH 03/65] convert-*.py: add basename and finetune metadata --- convert_hf_to_gguf.py | 10 +++++++++- examples/convert_legacy_llama.py | 12 +++++++++++- gguf-py/gguf/constants.py | 2 ++ gguf-py/gguf/gguf_writer.py | 6 ++++++ gguf-py/gguf/utility.py | 29 ++++++++++++++++++++++------- 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7ecedfcde4d06..a79898350eb52 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -33,6 +33,8 @@ @dataclass class Metadata: name: Optional[str] = None + basename: Optional[str] = None + finetune: Optional[str] = None author: Optional[str] = None version: Optional[str] = None url: Optional[str] = None @@ -55,6 +57,8 @@ def load(metadata_path: Path) -> Metadata: # Assigning values to Metadata attributes if they exist in the JSON file # This is based on LLM_KV_NAMES mapping in llama.cpp metadata.name = data.get("general.name") + metadata.basename = data.get("general.basename") + metadata.finetune = data.get("general.finetune") metadata.author = data.get("general.author") metadata.version = data.get("general.version") metadata.url = data.get("general.url") @@ -201,7 +205,7 @@ def per_model_weight_count_estimation(tensors, expert_count): weight_estimate = per_model_weight_count_estimation(model_tensors, expert_count) # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.model_name, self.metadata.version, expert_count, weight_estimate, encodingScheme) + self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, encodingScheme) # Filename Output if fname_out is not None: @@ -311,6 +315,10 @@ def set_gguf_meta_model(self): self.gguf_writer.add_name(self.model_name) if self.metadata is not None: + if self.metadata.basename is not None: + self.gguf_writer.add_basename(self.metadata.basename) + if self.metadata.finetune is not None: + self.gguf_writer.add_finetune(self.metadata.finetune) if self.metadata.author is not None: self.gguf_writer.add_author(self.metadata.author) if self.metadata.version is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index eee8f1fee853c..874f8f8e6f302 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -349,6 +349,8 @@ def load(model_plus: ModelPlus) -> Params: @dataclass class Metadata: name: Optional[str] = None + basename: Optional[str] = None + finetune: Optional[str] = None author: Optional[str] = None version: Optional[str] = None url: Optional[str] = None @@ -371,6 +373,8 @@ def load(metadata_path: Path) -> Metadata: # Assigning values to Metadata attributes if they exist in the JSON file # This is based on LLM_KV_NAMES mapping in llama.cpp metadata.name = data.get("general.name") + metadata.basename = data.get("general.basename") + metadata.finetune = data.get("general.finetune") metadata.author = data.get("general.author") metadata.version = data.get("general.version") metadata.url = data.get("general.url") @@ -820,6 +824,10 @@ def add_meta_model(self, params: Params, metadata: Metadata | None) -> None: self.gguf.add_name(name) if metadata is not None: + if metadata.basename is not None: + self.gguf.add_basename(metadata.basename) + if metadata.finetune is not None: + self.gguf.add_finetune(metadata.finetune) if metadata.author is not None: self.gguf.add_author(metadata.author) if metadata.version is not None: @@ -1226,6 +1234,8 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> str: name = metadata.name if metadata is not None and metadata.name is not None else model_name + basename = metadata.basename if metadata is not None and metadata.basename is not None else None + finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None version = metadata.version if metadata is not None and metadata.version is not None else None encodingScheme = { @@ -1234,7 +1244,7 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - return gguf.naming_convention(name, version, expert_count, model_params_count, encodingScheme) + return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, encodingScheme) def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> Path: diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 5eb3df706e6e2..69360a4fb322d 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -24,6 +24,8 @@ class General: QUANTIZATION_VERSION = "general.quantization_version" ALIGNMENT = "general.alignment" NAME = "general.name" + BASENAME = "general.basename" + FINETUNE = "general.finetune" AUTHOR = "general.author" VERSION = "general.version" URL = "general.url" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 3b94af25ed98c..a02882f8fd503 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -430,6 +430,12 @@ def add_type(self, type_name: str) -> None: def add_architecture(self) -> None: self.add_string(Keys.General.ARCHITECTURE, self.arch) + def add_basename(self, basename: str) -> None: + self.add_string(Keys.General.BASENAME, basename) + + def add_finetune(self, finetune: str) -> None: + self.add_string(Keys.General.FINETUNE, finetune) + def add_author(self, author: str) -> None: self.add_string(Keys.General.AUTHOR, author) diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index e375b687d2da1..dd08a36d27511 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -25,11 +25,26 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" -def naming_convention(model_name: str, version_string:str, expert_count_int:int, model_params_count: int, encodingScheme: str) -> str: +def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, encoding_scheme: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention - name = model_name.strip().replace(' ', '-') if model_name is not None else "ggml-model" - version = f"-{version_string}" if version_string is not None else "" - expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None and expert_count_int > 0 else "" - parameters = model_weight_count_rounded_notation(model_params_count) - encodingScheme = encodingScheme.upper() - return f"{name}{version}-{expert_count_chunk}{parameters}-{encodingScheme}" + + if base_name is not None: + name = base_name.strip().title().replace(' ', '_') + elif model_name is not None: + name = model_name.strip().title().replace(' ', '_') + else: + name = "ggml-model" + + per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) + if expert_count_int is not None and expert_count_int > 0: + parameters = f"-{expert_count_int}x{per_model_rounded_weight_estimate}" + else: + parameters = f"-{per_model_rounded_weight_estimate}" + + finetune = f"-{finetune_string.strip().title().replace(' ', '_')}" if finetune_string is not None else "" + + version = f"-{version_string.strip().replace(' ', '_')}" if version_string is not None else "" + + encoding = f"-{encoding_scheme.strip().replace(' ', '_').upper()}" + + return f"{name}{parameters}{finetune}{version}{encoding}" From 916872f72f19912582297dd4d3da41f027504b82 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Fri, 31 May 2024 14:19:53 +1000 Subject: [PATCH 04/65] convert-*.py: model card metadata --- convert_hf_to_gguf.py | 24 ++++++++++++++++++++---- gguf-py/gguf/utility.py | 10 +++++----- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a79898350eb52..9fe81d1a200eb 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -10,6 +10,7 @@ import os import re import sys +import frontmatter from enum import IntEnum from pathlib import Path from hashlib import sha256 @@ -96,6 +97,7 @@ class Model: lazy: bool part_names: list[str] is_safetensors: bool + model_card: dict[str, Any] hparams: dict[str, Any] block_count: int tensor_map: gguf.TensorNameMap @@ -123,6 +125,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.is_safetensors = len(self.part_names) > 0 if not self.is_safetensors: self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") + self.model_card = Model.load_model_card(dir_model) self.hparams = Model.load_hparams(self.dir_model) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @@ -148,10 +151,18 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard) - # Update any missing authorship metadata with huggingface_parameters - if self.metadata is not None and self.metadata.source_hf_repo is None: - if self.hparams is not None and "_name_or_path" in self.hparams: - self.metadata.source_hf_repo = self.hparams["_name_or_path"] + # Update any missing authorship metadata with HuggingFace parameters or model card frontmatter + if self.metadata is not None: + + # Source Hugging Face Repository + if self.metadata.source_hf_repo is None: + if self.hparams is not None and "_name_or_path" in self.hparams: + self.metadata.source_hf_repo = self.hparams["_name_or_path"] + + # Model License + if self.metadata.license is None: + if self.model_card is not None and "license" in self.model_card: + self.metadata.source_hf_repo = self.model_card["license"] # Set model name based on latest metadata either provided or calculated from environment def get_model_name(metadata, huggingface_parameters, dir_model, model_arch): @@ -499,6 +510,11 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str] return part_names + @staticmethod + def load_model_card(dir_model: Path): + with open(dir_model / "README.md", "r", encoding="utf-8") as f: + return frontmatter.load(f) + @staticmethod def load_hparams(dir_model: Path): with open(dir_model / "config.json", "r", encoding="utf-8") as f: diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index dd08a36d27511..0919a744ee762 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -29,9 +29,9 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: - name = base_name.strip().title().replace(' ', '_') + name = base_name.strip().title().replace(' ', '-') elif model_name is not None: - name = model_name.strip().title().replace(' ', '_') + name = model_name.strip().title().replace(' ', '-') else: name = "ggml-model" @@ -41,10 +41,10 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers else: parameters = f"-{per_model_rounded_weight_estimate}" - finetune = f"-{finetune_string.strip().title().replace(' ', '_')}" if finetune_string is not None else "" + finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else "" - version = f"-{version_string.strip().replace(' ', '_')}" if version_string is not None else "" + version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" - encoding = f"-{encoding_scheme.strip().replace(' ', '_').upper()}" + encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}" return f"{name}{parameters}{finetune}{version}{encoding}" From 4d5f18a0e6b540b126ec0ef53af125b2e08916cd Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 01:49:58 +1000 Subject: [PATCH 05/65] convert-*.py: metadata class moved to utility --- convert_hf_to_gguf.py | 125 +++++++------------------------ examples/convert_legacy_llama.py | 62 +++------------ gguf-py/gguf/__init__.py | 1 + gguf-py/gguf/metadata.py | 49 ++++++++++++ gguf-py/gguf/utility.py | 40 ++++++++++ 5 files changed, 128 insertions(+), 149 deletions(-) create mode 100644 gguf-py/gguf/metadata.py diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 9fe81d1a200eb..d3c1e4c0c7758 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -31,46 +31,6 @@ logger = logging.getLogger("hf-to-gguf") -@dataclass -class Metadata: - name: Optional[str] = None - basename: Optional[str] = None - finetune: Optional[str] = None - author: Optional[str] = None - version: Optional[str] = None - url: Optional[str] = None - description: Optional[str] = None - licence: Optional[str] = None - source_url: Optional[str] = None - source_hf_repo: Optional[str] = None - - @staticmethod - def load(metadata_path: Path) -> Metadata: - if metadata_path is None or not metadata_path.exists(): - return Metadata() - - with open(metadata_path, 'r') as file: - data = json.load(file) - - # Create a new Metadata instance - metadata = Metadata() - - # Assigning values to Metadata attributes if they exist in the JSON file - # This is based on LLM_KV_NAMES mapping in llama.cpp - metadata.name = data.get("general.name") - metadata.basename = data.get("general.basename") - metadata.finetune = data.get("general.finetune") - metadata.author = data.get("general.author") - metadata.version = data.get("general.version") - metadata.url = data.get("general.url") - metadata.description = data.get("general.description") - metadata.license = data.get("general.license") - metadata.source_url = data.get("general.source.url") - metadata.source_hf_repo = data.get("general.source.huggingface.repository") - - return metadata - - ###### MODEL DEFINITIONS ###### class SentencePieceTokenTypes(IntEnum): @@ -105,12 +65,12 @@ class Model: fname_out: Path fname_default: Path gguf_writer: gguf.GGUFWriter - metadata: Metadata + metadata: gguf.Metadata # subclasses should define this! model_arch: gguf.MODEL_ARCH - def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, metadata: Metadata, + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, metadata: gguf.Metadata, model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") @@ -164,72 +124,23 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, if self.model_card is not None and "license" in self.model_card: self.metadata.source_hf_repo = self.model_card["license"] - # Set model name based on latest metadata either provided or calculated from environment - def get_model_name(metadata, huggingface_parameters, dir_model, model_arch): - if metadata is not None and metadata.name is not None: - # Explicit Metadata Was Provided By User - return metadata.name - elif huggingface_parameters is not None and "_name_or_path" in huggingface_parameters: - # Hugging Face Parameters Model Name or Model Folder Name is Provided - return huggingface_parameters["_name_or_path"] - elif huggingface_parameters is not None and "model_type" in huggingface_parameters: - # Hugging Face Parameters Model Type is Provided - return huggingface_parameters["model_type"] - elif dir_model is not None and dir_model.name is not None: - # Use directory folder name - return dir_model.name - else: - return gguf.MODEL_ARCH_NAMES[model_arch] - self.model_name = get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) + self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' - encodingScheme = self.ftype.name.partition("_")[2] + encoding_scheme = self.ftype.name.partition("_")[2] # Get Expert Count From huggingface_parameters expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - def per_model_weight_count_estimation(tensors, expert_count): - # TODO: Ensure parameter count is accurate throughout various model type - # May currently overestimate parameter count in Mamba model because - # output weights is tied with token embeddings. - sum_weight_estimate = 0 - for name, data_torch in tensors: - # Got A Tensor - - # We don't need these - if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): - continue - - # Calculate Tensor Volume - sum_weights_in_tensor = 1 - for dim in data_torch.shape: - sum_weights_in_tensor *= dim - - # Add Tensor Volume To Running Count - sum_weight_estimate += sum_weights_in_tensor - - # Calculate weight estimate per model - per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate - - return per_model_weight_estimate - - weight_estimate = per_model_weight_count_estimation(model_tensors, expert_count) + weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count) # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, encodingScheme) + self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, encoding_scheme) # Filename Output if fname_out is not None: # custom defined filename and path was provided - def fill_templated_filename(filename: str, encodingScheme: str): - # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' - ftype_uppercase: str = encodingScheme.upper() - ftype_lowercase: str = encodingScheme.lower() - return filename.format(ftype_lowercase, - outtype=ftype_lowercase, ftype=ftype_lowercase, - OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) - - self.fname_out = fname_out.parent / fill_templated_filename(fname_out.name, encodingScheme) + self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, encoding_scheme) else: # output in the same directory as the model by default self.fname_out = dir_model.parent / self.fname_default @@ -499,6 +410,24 @@ def write_vocab(self): self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() + # Set model name based on latest metadata either provided or calculated from environment + @staticmethod + def get_model_name(metadata, huggingface_parameters, dir_model, model_arch): + if metadata is not None and metadata.name is not None: + # Explicit Metadata Was Provided By User + return metadata.name + elif huggingface_parameters is not None and "_name_or_path" in huggingface_parameters: + # Hugging Face Parameters Model Name or Model Folder Name is Provided + return huggingface_parameters["_name_or_path"] + elif huggingface_parameters is not None and "model_type" in huggingface_parameters: + # Hugging Face Parameters Model Type is Provided + return huggingface_parameters["model_type"] + elif dir_model is not None and dir_model.name is not None: + # Use directory folder name + return dir_model.name + else: + return gguf.MODEL_ARCH_NAMES[model_arch] + @staticmethod def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]: part_names: list[str] = [] @@ -3682,7 +3611,7 @@ def main() -> None: else: logging.basicConfig(level=logging.INFO) - metadata = Metadata.load(args.metadata) + metadata = gguf.Metadata.load(args.metadata) dir_model = args.model if not dir_model.is_dir(): @@ -3713,7 +3642,7 @@ def main() -> None: hparams = Model.load_hparams(dir_model) with torch.inference_mode(): - encodingScheme = ftype_map[args.outtype] + encoding_scheme = ftype_map[args.outtype] model_architecture = hparams["architectures"][0] try: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 874f8f8e6f302..38d1745f5e2aa 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -24,7 +24,7 @@ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, IO, Iterable, Literal, TypeVar, Optional +from typing import TYPE_CHECKING, Any, Callable, IO, Iterable, Literal, TypeVar import numpy as np @@ -346,46 +346,6 @@ def load(model_plus: ModelPlus) -> Params: return params -@dataclass -class Metadata: - name: Optional[str] = None - basename: Optional[str] = None - finetune: Optional[str] = None - author: Optional[str] = None - version: Optional[str] = None - url: Optional[str] = None - description: Optional[str] = None - license: Optional[str] = None - source_url: Optional[str] = None - source_hf_repo: Optional[str] = None - - @staticmethod - def load(metadata_path: Path) -> Metadata: - if metadata_path is None or not metadata_path.exists(): - return Metadata() - - with open(metadata_path, 'r') as file: - data = json.load(file) - - # Create a new Metadata instance - metadata = Metadata() - - # Assigning values to Metadata attributes if they exist in the JSON file - # This is based on LLM_KV_NAMES mapping in llama.cpp - metadata.name = data.get("general.name") - metadata.basename = data.get("general.basename") - metadata.finetune = data.get("general.finetune") - metadata.author = data.get("general.author") - metadata.version = data.get("general.version") - metadata.url = data.get("general.url") - metadata.description = data.get("general.description") - metadata.license = data.get("general.license") - metadata.source_url = data.get("general.source.url") - metadata.source_hf_repo = data.get("general.source.huggingface.repository") - - return metadata - - # # data loading # TODO: reuse (probably move to gguf.py?) @@ -810,7 +770,7 @@ class OutputFile: def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE): self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess) - def add_meta_model(self, params: Params, metadata: Metadata | None) -> None: + def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None: # Metadata About The Model And Its Provenence name = "LLaMA" if metadata is not None and metadata.name is not None: @@ -952,7 +912,7 @@ def close(self) -> None: @staticmethod def write_vocab_only( fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, - endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata | None = None, + endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: gguf.Metadata | None = None, ) -> None: check_vocab_size(params, vocab, pad_vocab=pad_vocab) @@ -986,7 +946,7 @@ def write_all( fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, - metadata: Metadata | None = None, + metadata: gguf.Metadata | None = None, ) -> None: check_vocab_size(params, vocab, pad_vocab=pad_vocab) @@ -1029,10 +989,10 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT raise ValueError(f"Unexpected combination of types: {name_to_type}") -def per_model_weight_count_estimation(model: LazyModel, expert_count:int) -> int: +def per_model_weight_count_estimation(tensors: dict[str, LazyTensor], expert_count:int) -> int: # TODO: Ensure parameter count is accurate throughout various model type sum_weight_estimate = 0 - for name, lazy_tensor in model.items(): + for name, lazy_tensor in tensors: # We don't need these if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): continue @@ -1232,7 +1192,7 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> return vocab, special_vocab -def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> str: +def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str: name = metadata.name if metadata is not None and metadata.name is not None else model_name basename = metadata.basename if metadata is not None and metadata.basename is not None else None finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None @@ -1247,7 +1207,7 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, encodingScheme) -def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> Path: +def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: default_filename = default_convention_outfile(file_type, model_name, expert_count, model_params_count, metadata) ret = model_paths[0].parent / f"{default_filename}.gguf" if ret in model_paths: @@ -1300,13 +1260,13 @@ def main(args_in: list[str] | None = None) -> None: else: logging.basicConfig(level=logging.INFO) - metadata = Metadata.load(args.metadata) + metadata = gguf.Metadata.load(args.metadata) if args.get_outfile: model_plus = load_some_model(args.model) params = Params.load(model_plus) model = convert_model_names(model_plus.model, params, args.skip_unknown) - model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts) + model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) ftype = pick_output_type(model, args.outtype) print(f"{default_convention_outfile(ftype, params.path_model.name, params.n_experts, model_params_count, metadata)}") # noqa: NP100 return @@ -1324,7 +1284,7 @@ def main(args_in: list[str] | None = None) -> None: else: model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None) - model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts) + model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})") if args.dump: diff --git a/gguf-py/gguf/__init__.py b/gguf-py/gguf/__init__.py index a07b8ff0ddbc7..243defc4c1ca4 100644 --- a/gguf-py/gguf/__init__.py +++ b/gguf-py/gguf/__init__.py @@ -6,3 +6,4 @@ from .tensor_mapping import * from .vocab import * from .utility import * +from .metadata import * diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py new file mode 100644 index 0000000000000..0d175605ae160 --- /dev/null +++ b/gguf-py/gguf/metadata.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from typing import Optional +from dataclasses import dataclass + +from .constants import Keys + + +@dataclass +class Metadata: + name: Optional[str] = None + basename: Optional[str] = None + finetune: Optional[str] = None + author: Optional[str] = None + version: Optional[str] = None + url: Optional[str] = None + description: Optional[str] = None + licence: Optional[str] = None + source_url: Optional[str] = None + source_hf_repo: Optional[str] = None + + @staticmethod + def load(metadata_path: Path) -> Metadata: + if metadata_path is None or not metadata_path.exists(): + return Metadata() + + with open(metadata_path, 'r') as file: + data = json.load(file) + + # Create a new Metadata instance + metadata = Metadata() + + # Assigning values to Metadata attributes if they exist in the JSON file + # This is based on LLM_KV_NAMES mapping in llama.cpp + metadata.name = data.get(Keys.General.NAME) + metadata.basename = data.get(Keys.General.BASENAME) + metadata.finetune = data.get(Keys.General.FINETUNE) + metadata.author = data.get(Keys.General.AUTHOR) + metadata.version = data.get(Keys.General.VERSION) + metadata.url = data.get(Keys.General.URL) + metadata.description = data.get(Keys.General.DESCRIPTION) + metadata.license = data.get(Keys.General.LICENSE) + metadata.source_url = data.get(Keys.General.SOURCE_URL) + metadata.source_hf_repo = data.get(Keys.General.SOURCE_HF_REPO) + + return metadata diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 0919a744ee762..3a6046277120b 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -1,5 +1,45 @@ from __future__ import annotations +from typing import TYPE_CHECKING, Iterator + +if TYPE_CHECKING: + from torch import Tensor + + +def fill_templated_filename(filename: str, encoding_scheme: str): + # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' + ftype_uppercase: str = encoding_scheme.upper() + ftype_lowercase: str = encoding_scheme.lower() + return filename.format(ftype_lowercase, + outtype=ftype_lowercase, ftype=ftype_lowercase, + OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) + + +def per_model_weight_count_estimation(tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int: + # TODO: Ensure parameter count is accurate throughout various model type + # May currently overestimate parameter count in Mamba model because + # output weights is tied with token embeddings. + sum_weight_estimate = 0 + for name, data_torch in tensors: + # Got A Tensor + + # We don't need these + if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): + continue + + # Calculate Tensor Volume + sum_weights_in_tensor = 1 + for dim in data_torch.shape: + sum_weights_in_tensor *= dim + + # Add Tensor Volume To Running Count + sum_weight_estimate += sum_weights_in_tensor + + # Calculate weight estimate per model + per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate + + return per_model_weight_estimate + def model_weight_count_rounded_notation(model_params_count: int) -> str: if model_params_count > 1e15 : From 5c263cb257b7f3132c7b46bc36182a6a29ad6e13 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 01:58:47 +1000 Subject: [PATCH 06/65] convert-*.py: encoding_scheme --> output_type --- convert_hf_to_gguf.py | 12 ++++++------ examples/convert_legacy_llama.py | 4 ++-- gguf-py/gguf/utility.py | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index d3c1e4c0c7758..2968954499a0f 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -127,7 +127,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' - encoding_scheme = self.ftype.name.partition("_")[2] + output_type = self.ftype.name.partition("_")[2] # Get Expert Count From huggingface_parameters expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None @@ -135,12 +135,12 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count) # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, encoding_scheme) + self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) # Filename Output if fname_out is not None: # custom defined filename and path was provided - self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, encoding_scheme) + self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type) else: # output in the same directory as the model by default self.fname_out = dir_model.parent / self.fname_default @@ -3642,7 +3642,7 @@ def main() -> None: hparams = Model.load_hparams(dir_model) with torch.inference_mode(): - encoding_scheme = ftype_map[args.outtype] + output_type = ftype_map[args.outtype] model_architecture = hparams["architectures"][0] try: @@ -3651,8 +3651,8 @@ def main() -> None: logger.error(f"Model {hparams['architectures'][0]} is not supported") sys.exit(1) - model_instance = model_class(dir_model, encodingScheme, fname_out, args.bigendian, args.use_temp_file, - args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors, + model_instance = model_class(dir_model, output_type, fname_out, args.bigendian, args.use_temp_file, args.no_lazy, + metadata, args.model_name, split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 38d1745f5e2aa..fe18d59704daa 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1198,13 +1198,13 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None version = metadata.version if metadata is not None and metadata.version is not None else None - encodingScheme = { + output_type = { GGMLFileType.AllF32: "F32", GGMLFileType.MostlyF16: "F16", GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, encodingScheme) + return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type) def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 3a6046277120b..2a52d12736a34 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -6,10 +6,10 @@ from torch import Tensor -def fill_templated_filename(filename: str, encoding_scheme: str): +def fill_templated_filename(filename: str, output_type: str): # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' - ftype_uppercase: str = encoding_scheme.upper() - ftype_lowercase: str = encoding_scheme.lower() + ftype_uppercase: str = output_type.upper() + ftype_lowercase: str = output_type.lower() return filename.format(ftype_lowercase, outtype=ftype_lowercase, ftype=ftype_lowercase, OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) @@ -65,7 +65,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" -def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, encoding_scheme: str) -> str: +def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: @@ -85,6 +85,6 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" - encoding = f"-{encoding_scheme.strip().replace(' ', '-').upper()}" + precision = f"-{output_type.strip().replace(' ', '-').upper()}" - return f"{name}{parameters}{finetune}{version}{encoding}" + return f"{name}{parameters}{finetune}{version}{precision}" From b36e391b872c1f8ec8c430fec75c93e4baca725a Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 12:27:28 +1000 Subject: [PATCH 07/65] convert-*.py: parse model card in metadata util. Add license_link and license_name to kv store --- convert_hf_to_gguf.py | 14 ++--- examples/convert_legacy_llama.py | 12 ++-- gguf-py/gguf/constants.py | 2 + gguf-py/gguf/gguf_writer.py | 6 ++ gguf-py/gguf/metadata.py | 104 +++++++++++++++++++++++++------ 5 files changed, 105 insertions(+), 33 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 2968954499a0f..7cfcb61064077 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -57,7 +57,6 @@ class Model: lazy: bool part_names: list[str] is_safetensors: bool - model_card: dict[str, Any] hparams: dict[str, Any] block_count: int tensor_map: gguf.TensorNameMap @@ -85,7 +84,6 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.is_safetensors = len(self.part_names) > 0 if not self.is_safetensors: self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") - self.model_card = Model.load_model_card(dir_model) self.hparams = Model.load_hparams(self.dir_model) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @@ -249,8 +247,8 @@ def set_gguf_meta_model(self): self.gguf_writer.add_url(self.metadata.url) if self.metadata.description is not None: self.gguf_writer.add_description(self.metadata.description) - if self.metadata.licence is not None: - self.gguf_writer.add_licence(self.metadata.licence) + if self.metadata.license is not None: + self.gguf_writer.add_license(self.metadata.license) if self.metadata.source_url is not None: self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_hf_repo is not None: @@ -439,11 +437,6 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str] return part_names - @staticmethod - def load_model_card(dir_model: Path): - with open(dir_model / "README.md", "r", encoding="utf-8") as f: - return frontmatter.load(f) - @staticmethod def load_hparams(dir_model: Path): with open(dir_model / "config.json", "r", encoding="utf-8") as f: @@ -3611,9 +3604,10 @@ def main() -> None: else: logging.basicConfig(level=logging.INFO) - metadata = gguf.Metadata.load(args.metadata) dir_model = args.model + metadata = gguf.Metadata.load(args.metadata, dir_model) + if not dir_model.is_dir(): logger.error(f'Error: {args.model} is not a directory') sys.exit(1) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index fe18d59704daa..5827da5306524 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1260,10 +1260,12 @@ def main(args_in: list[str] | None = None) -> None: else: logging.basicConfig(level=logging.INFO) + dir_model = args.model + metadata = gguf.Metadata.load(args.metadata) if args.get_outfile: - model_plus = load_some_model(args.model) + model_plus = load_some_model(dir_model) params = Params.load(model_plus) model = convert_model_names(model_plus.model, params, args.skip_unknown) model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) @@ -1275,14 +1277,14 @@ def main(args_in: list[str] | None = None) -> None: raise ValueError("--vocab-only does not make sense with --no-vocab") if args.dump_single: - model_plus = lazy_load_file(args.model) + model_plus = lazy_load_file(dir_model) do_dump_model(model_plus) return if not args.vocab_only: - model_plus = load_some_model(args.model) + model_plus = load_some_model(dir_model) else: - model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None) + model_plus = ModelPlus(model = {}, paths = [dir_model / 'dummy'], format = 'none', vocab = None) model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})") @@ -1318,7 +1320,7 @@ def main(args_in: list[str] | None = None) -> None: logger.info(f"params = {params}") model_parent_path = model_plus.paths[0].parent - vocab_path = Path(args.vocab_dir or args.model or model_parent_path) + vocab_path = Path(args.vocab_dir or dir_model or model_parent_path) vocab_factory = VocabFactory(vocab_path) vocab_types = None if args.no_vocab else args.vocab_type.split(",") vocab, special_vocab = vocab_factory.load_vocab(vocab_types, model_parent_path) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 69360a4fb322d..a0d94a8f6c363 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -31,6 +31,8 @@ class General: URL = "general.url" DESCRIPTION = "general.description" LICENSE = "general.license" + LICENSE_NAME = "general.license.name" + LICENSE_LINK = "general.license.link" SOURCE_URL = "general.source.url" SOURCE_HF_REPO = "general.source.huggingface.repository" FILE_TYPE = "general.file_type" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index a02882f8fd503..5429d5c5585cc 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -454,6 +454,12 @@ def add_description(self, description: str) -> None: def add_license(self, license: str) -> None: self.add_string(Keys.General.LICENSE, license) + def add_license_name(self, license: str) -> None: + self.add_string(Keys.General.LICENSE_NAME, license) + + def add_license_link(self, license: str) -> None: + self.add_string(Keys.General.LICENSE_LINK, license) + def add_source_url(self, url: str) -> None: self.add_string(Keys.General.SOURCE_URL, url) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 0d175605ae160..a20d2fde81e1d 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import frontmatter from pathlib import Path from typing import Optional @@ -11,6 +12,7 @@ @dataclass class Metadata: + # Authorship Metadata to be written to GGUF KV Store name: Optional[str] = None basename: Optional[str] = None finetune: Optional[str] = None @@ -18,32 +20,98 @@ class Metadata: version: Optional[str] = None url: Optional[str] = None description: Optional[str] = None - licence: Optional[str] = None + license: Optional[str] = None + license_name: Optional[str] = None + license_link: Optional[str] = None source_url: Optional[str] = None source_hf_repo: Optional[str] = None @staticmethod - def load(metadata_path: Path) -> Metadata: - if metadata_path is None or not metadata_path.exists(): - return Metadata() - - with open(metadata_path, 'r') as file: - data = json.load(file) + def load(metadata_override_path: Path, model_path: Path) -> Metadata: + # This grabs as many contextual authorship metadata as possible from the model repository + # making any conversion as required to match the gguf kv store metadata format + # as well as giving users the ability to override any authorship metadata that may be incorrect # Create a new Metadata instance metadata = Metadata() - # Assigning values to Metadata attributes if they exist in the JSON file + # load model folder model card if available + # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 + model_card = Metadata.load_model_card(model_path) + if metadata.name is None: + if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: + metadata.name = model_card["model_name"][0].get("name") + elif "model_name" in model_card: + # non huggingface model card standard but notice some model creator using it + metadata.name = model_card.get("model_name") + if metadata.license is None: + metadata.license = model_card.get("license") + if metadata.license_name is None: + metadata.license_name = model_card.get("license_name") + if metadata.license_link is None: + metadata.license_link = model_card.get("license_link") + + # load huggingface parameters if available + hf_params = Metadata.load_huggingface_parameters(model_path) + hf_name_or_path = hf_params.get("_name_or_path") + if metadata.name is None and hf_name_or_path is not None: + metadata.name = Path(hf_name_or_path).name + if metadata.source_hf_repo is None and hf_name_or_path is not None: + metadata.source_hf_repo = Path(hf_name_or_path).name + + # Use Directory Folder Name As Fallback Name + if metadata.name is None: + if model_path is not None and model_path.exists(): + metadata.name = model_path.name + + # Metadata Override # This is based on LLM_KV_NAMES mapping in llama.cpp - metadata.name = data.get(Keys.General.NAME) - metadata.basename = data.get(Keys.General.BASENAME) - metadata.finetune = data.get(Keys.General.FINETUNE) - metadata.author = data.get(Keys.General.AUTHOR) - metadata.version = data.get(Keys.General.VERSION) - metadata.url = data.get(Keys.General.URL) - metadata.description = data.get(Keys.General.DESCRIPTION) - metadata.license = data.get(Keys.General.LICENSE) - metadata.source_url = data.get(Keys.General.SOURCE_URL) - metadata.source_hf_repo = data.get(Keys.General.SOURCE_HF_REPO) + metadata_override = Metadata.load_metadata_override(metadata_override_path) + metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 + metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 + metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 + metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 + metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 + metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO, metadata.source_hf_repo) # noqa: E202 return metadata + + @staticmethod + def load_metadata_override(metadata_override_path: Path): + if metadata_override_path is None or not metadata_override_path.exists(): + return {} + + with open(metadata_override_path, "r", encoding="utf-8") as f: + return json.load(f) + + @staticmethod + def load_model_card(model_path: Path): + if model_path is None or not model_path.exists(): + return {} + + model_card_path = model_path / "README.md" + + if not model_card_path.exists(): + return {} + + with open(model_card_path, "r", encoding="utf-8") as f: + return frontmatter.load(f) + + @staticmethod + def load_huggingface_parameters(model_path: Path): + if model_path is None or not model_path.exists(): + return {} + + config_path = model_path / "config.json" + + if not config_path.exists(): + return {} + + with open(config_path, "r", encoding="utf-8") as f: + return json.load(f) From 8f734083ddb0126933bf064520f06c6ddaee3ad2 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 15:11:52 +1000 Subject: [PATCH 08/65] convert-*.py: add base_version and add tags --- convert_hf_to_gguf.py | 4 ++++ examples/convert_legacy_llama.py | 4 ++++ gguf-py/gguf/constants.py | 2 ++ gguf-py/gguf/gguf_writer.py | 6 ++++++ gguf-py/gguf/metadata.py | 11 +++++++++++ 5 files changed, 27 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7cfcb61064077..fc07226e4afcd 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -243,6 +243,8 @@ def set_gguf_meta_model(self): self.gguf_writer.add_author(self.metadata.author) if self.metadata.version is not None: self.gguf_writer.add_version(self.metadata.version) + if self.metadata.base_version is not None: + self.gguf_writer.add_base_version(self.metadata.base_version) if self.metadata.url is not None: self.gguf_writer.add_url(self.metadata.url) if self.metadata.description is not None: @@ -253,6 +255,8 @@ def set_gguf_meta_model(self): self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_hf_repo is not None: self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) + if self.metadata.tags is not None: + self.gguf_writer.add_tags(self.metadata.tags) def set_gguf_parameters(self): self.gguf_writer.add_block_count(self.block_count) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 5827da5306524..6e17cb3bdfbaf 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -792,6 +792,8 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_author(metadata.author) if metadata.version is not None: self.gguf.add_version(metadata.version) + if metadata.base_version is not None: + self.gguf.add_base_version(metadata.base_version) if metadata.url is not None: self.gguf.add_url(metadata.url) if metadata.description is not None: @@ -802,6 +804,8 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_source_url(metadata.source_url) if metadata.source_hf_repo is not None: self.gguf.add_source_hf_repo(metadata.source_hf_repo) + if metadata.tags is not None: + self.gguf_writer.add_tags(metadata.tags) def add_meta_arch(self, params: Params) -> None: # Metadata About The Neural Architecture Itself diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index a0d94a8f6c363..907d781b06891 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -28,6 +28,7 @@ class General: FINETUNE = "general.finetune" AUTHOR = "general.author" VERSION = "general.version" + BASE_VERSION = "general.base_version" URL = "general.url" DESCRIPTION = "general.description" LICENSE = "general.license" @@ -36,6 +37,7 @@ class General: SOURCE_URL = "general.source.url" SOURCE_HF_REPO = "general.source.huggingface.repository" FILE_TYPE = "general.file_type" + TAGS = "general.tags" class LLM: VOCAB_SIZE = "{arch}.vocab_size" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 5429d5c5585cc..619220d44a6ea 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -442,6 +442,9 @@ def add_author(self, author: str) -> None: def add_version(self, version: str) -> None: self.add_string(Keys.General.VERSION, version) + def add_base_version(self, version: str) -> None: + self.add_string(Keys.General.BASE_VERSION, version) + def add_tensor_data_layout(self, layout: str) -> None: self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) @@ -469,6 +472,9 @@ def add_source_hf_repo(self, repo: str) -> None: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) + def add_tags(self, tags: Sequence[str]) -> None: + self.add_array(Keys.Tokenizer.TAGS, tags) + def add_name(self, name: str) -> None: self.add_string(Keys.General.NAME, name) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index a20d2fde81e1d..b481dd93de7f4 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -18,6 +18,7 @@ class Metadata: finetune: Optional[str] = None author: Optional[str] = None version: Optional[str] = None + base_version: Optional[str] = None url: Optional[str] = None description: Optional[str] = None license: Optional[str] = None @@ -25,6 +26,7 @@ class Metadata: license_link: Optional[str] = None source_url: Optional[str] = None source_hf_repo: Optional[str] = None + tags: Optional[List[str]] = None @staticmethod def load(metadata_override_path: Path, model_path: Path) -> Metadata: @@ -40,6 +42,8 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: model_card = Metadata.load_model_card(model_path) if metadata.name is None: if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: + # We check if there is only one model information in the model-index + # (This is a safe choice in case there is multiple models in one repo in the future) metadata.name = model_card["model_name"][0].get("name") elif "model_name" in model_card: # non huggingface model card standard but notice some model creator using it @@ -50,6 +54,11 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: metadata.license_name = model_card.get("license_name") if metadata.license_link is None: metadata.license_link = model_card.get("license_link") + if metadata.author is None: + # non huggingface model card standard but notice some model creator using it + metadata.author = model_card.get("model_creator") + if metadata.tags is None: + metadata.tags = model_card.get("tags", []) # load huggingface parameters if available hf_params = Metadata.load_huggingface_parameters(model_path) @@ -72,6 +81,7 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 + metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 @@ -79,6 +89,7 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO, metadata.source_hf_repo) # noqa: E202 + metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 return metadata From 0f1d50fab7c17fcaf53b8bb2101c07eca5fd1360 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 15:40:31 +1000 Subject: [PATCH 09/65] convert-*.py: add parameter size class --- convert_hf_to_gguf.py | 13 ++++--- examples/convert_legacy_llama.py | 2 ++ gguf-py/gguf/constants.py | 1 + gguf-py/gguf/gguf_writer.py | 3 ++ gguf-py/gguf/metadata.py | 34 ++++++++++--------- gguf-py/gguf/utility.py | 11 ++++++ .../requirements-convert_legacy_llama.txt | 1 + 7 files changed, 42 insertions(+), 23 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index fc07226e4afcd..ab2f39e27ebde 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -10,7 +10,6 @@ import os import re import sys -import frontmatter from enum import IntEnum from pathlib import Path from hashlib import sha256 @@ -90,11 +89,9 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.tensor_names = None self.metadata = metadata - model_tensors = self.get_tensors() - if self.ftype == gguf.LlamaFileType.GUESSED: # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie. - _, first_tensor = next(model_tensors) + _, first_tensor = next(self.get_tensors()) if first_tensor.dtype == torch.float16: logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})") self.ftype = gguf.LlamaFileType.MOSTLY_F16 @@ -127,10 +124,10 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] - # Get Expert Count From huggingface_parameters + # Update authorship metadata class with parameter size class (useful for leader boards) expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - - weight_estimate = gguf.per_model_weight_count_estimation(model_tensors, expert_count) + weight_estimate = gguf.per_model_weight_count_estimation(self.get_tensors(), expert_count) + self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate) # Generate default filename based on model specification and available metadata self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) @@ -255,6 +252,8 @@ def set_gguf_meta_model(self): self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_hf_repo is not None: self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) + if self.metadata.parameter_size_class is not None: + self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) if self.metadata.tags is not None: self.gguf_writer.add_tags(self.metadata.tags) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 6e17cb3bdfbaf..2bf008a6b29df 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1363,6 +1363,8 @@ def main(args_in: list[str] | None = None) -> None: model = convert_to_output_type(model, ftype) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata) + metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count) + params.ftype = ftype logger.info(f"Writing {outfile}, format {ftype}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 907d781b06891..561e082aa5bce 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -37,6 +37,7 @@ class General: SOURCE_URL = "general.source.url" SOURCE_HF_REPO = "general.source.huggingface.repository" FILE_TYPE = "general.file_type" + PARAMETER_SIZE_CLASS = "general.parameter_size_class" TAGS = "general.tags" class LLM: diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 619220d44a6ea..fdac3455efcf7 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -472,6 +472,9 @@ def add_source_hf_repo(self, repo: str) -> None: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) + def add_parameter_size_class(self, parameter_size_class: str) -> None: + self.add_string(Keys.General.PARAMETER_SIZE_CLASS, parameter_size_class) + def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.Tokenizer.TAGS, tags) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index b481dd93de7f4..8bed1a3df5edf 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -26,7 +26,8 @@ class Metadata: license_link: Optional[str] = None source_url: Optional[str] = None source_hf_repo: Optional[str] = None - tags: Optional[List[str]] = None + parameter_size_class: Optional[str] = None + tags: Optional[list[str]] = None @staticmethod def load(metadata_override_path: Path, model_path: Path) -> Metadata: @@ -56,7 +57,7 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: metadata.license_link = model_card.get("license_link") if metadata.author is None: # non huggingface model card standard but notice some model creator using it - metadata.author = model_card.get("model_creator") + metadata.author = model_card.get("model_creator") if metadata.tags is None: metadata.tags = model_card.get("tags", []) @@ -76,20 +77,21 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: # Metadata Override # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 - metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 - metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 - metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 - metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 - metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 - metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 - metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 - metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 - metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 - metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 - metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 - metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO, metadata.source_hf_repo) # noqa: E202 - metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 + metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 + metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 + metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 + metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 + metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 + metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 + metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 + metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202 + metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 return metadata diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 2a52d12736a34..0ee3499e234a8 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -65,6 +65,17 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" +def parameter_size_class(expert_count_int:int, model_params_count: int) -> str: + per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) + + if expert_count_int is not None and expert_count_int > 0: + size_class = f"{expert_count_int}x{per_model_rounded_weight_estimate}" + else: + size_class = f"{per_model_rounded_weight_estimate}" + + return size_class + + def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention diff --git a/requirements/requirements-convert_legacy_llama.txt b/requirements/requirements-convert_legacy_llama.txt index 1d07b09522f61..493dbe18c5a17 100644 --- a/requirements/requirements-convert_legacy_llama.txt +++ b/requirements/requirements-convert_legacy_llama.txt @@ -3,3 +3,4 @@ sentencepiece~=0.2.0 transformers>=4.40.1,<5.0.0 gguf>=0.1.0 protobuf>=4.21.0,<5.0.0 +python-frontmatter~=1.0.1 From 684c604ecaf83578d18510d2cf14775225128cb4 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 17:17:56 +1000 Subject: [PATCH 10/65] convert-*.py: add datasets and language to KV store --- convert_hf_to_gguf.py | 4 ++++ gguf-py/gguf/constants.py | 2 ++ gguf-py/gguf/gguf_writer.py | 6 ++++++ gguf-py/gguf/metadata.py | 8 ++++++++ 4 files changed, 20 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ab2f39e27ebde..245ae94b808d8 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -256,6 +256,10 @@ def set_gguf_meta_model(self): self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) if self.metadata.tags is not None: self.gguf_writer.add_tags(self.metadata.tags) + if self.metadata.languages is not None: + self.gguf_writer.add_languages(self.metadata.languages) + if self.metadata.datasets is not None: + self.gguf_writer.add_datasets(self.metadata.datasets) def set_gguf_parameters(self): self.gguf_writer.add_block_count(self.block_count) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 561e082aa5bce..bd6fffd31b0ff 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -39,6 +39,8 @@ class General: FILE_TYPE = "general.file_type" PARAMETER_SIZE_CLASS = "general.parameter_size_class" TAGS = "general.tags" + LANGUAGE = "general.language" + DATASETS = "general.datasets" class LLM: VOCAB_SIZE = "{arch}.vocab_size" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index fdac3455efcf7..c0d553d786d35 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -478,6 +478,12 @@ def add_parameter_size_class(self, parameter_size_class: str) -> None: def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.Tokenizer.TAGS, tags) + def add_languages(self, languages: Sequence[str]) -> None: + self.add_array(Keys.Tokenizer.LANGUAGE, languages) + + def add_datasets(self, datasets: Sequence[str]) -> None: + self.add_array(Keys.Tokenizer.DATASETS, datasets) + def add_name(self, name: str) -> None: self.add_string(Keys.General.NAME, name) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 8bed1a3df5edf..4144874198beb 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -28,6 +28,8 @@ class Metadata: source_hf_repo: Optional[str] = None parameter_size_class: Optional[str] = None tags: Optional[list[str]] = None + language: Optional[list[str]] = None + datasets: Optional[list[str]] = None @staticmethod def load(metadata_override_path: Path, model_path: Path) -> Metadata: @@ -60,6 +62,10 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: metadata.author = model_card.get("model_creator") if metadata.tags is None: metadata.tags = model_card.get("tags", []) + if metadata.languages is None: + metadata.languages = model_card.get("languages", []) + if metadata.datasets is None: + metadata.datasets = model_card.get("datasets", []) # load huggingface parameters if available hf_params = Metadata.load_huggingface_parameters(model_path) @@ -92,6 +98,8 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202 metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 + metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 + metadata.datasets = metadata_override.get(Keys.General.datasets , metadata.datasets ) # noqa: E202 return metadata From b1927eed82be06994cb76fb54b3ab3120356a049 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 17:44:53 +1000 Subject: [PATCH 11/65] convert-*.py: move per model weight estimation away from util back to main script plus some refactoring --- convert_hf_to_gguf.py | 115 +++++++++++++++++-------------- examples/convert_legacy_llama.py | 55 ++++++++------- gguf-py/gguf/utility.py | 31 --------- 3 files changed, 91 insertions(+), 110 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 245ae94b808d8..8ff5b067a3f90 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -47,7 +47,6 @@ class SentencePieceTokenTypes(IntEnum): class Model: _model_classes: dict[str, type[Model]] = {} - model_name: str dir_model: Path ftype: gguf.LlamaFileType is_big_endian: bool @@ -72,6 +71,10 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") + + if metadata is None: + raise TypeError("authorship metadata must be provided") + self.dir_model = dir_model self.ftype = ftype self.is_big_endian = is_big_endian @@ -121,16 +124,20 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) + # Fallback to model architecture name if metadata name is still missing + if self.metadata.name is None: + self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch] + # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] # Update authorship metadata class with parameter size class (useful for leader boards) expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - weight_estimate = gguf.per_model_weight_count_estimation(self.get_tensors(), expert_count) + weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate) # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.model_name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) + self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) # Filename Output if fname_out is not None: @@ -229,37 +236,36 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", " return new_name def set_gguf_meta_model(self): - self.gguf_writer.add_name(self.model_name) - - if self.metadata is not None: - if self.metadata.basename is not None: - self.gguf_writer.add_basename(self.metadata.basename) - if self.metadata.finetune is not None: - self.gguf_writer.add_finetune(self.metadata.finetune) - if self.metadata.author is not None: - self.gguf_writer.add_author(self.metadata.author) - if self.metadata.version is not None: - self.gguf_writer.add_version(self.metadata.version) - if self.metadata.base_version is not None: - self.gguf_writer.add_base_version(self.metadata.base_version) - if self.metadata.url is not None: - self.gguf_writer.add_url(self.metadata.url) - if self.metadata.description is not None: - self.gguf_writer.add_description(self.metadata.description) - if self.metadata.license is not None: - self.gguf_writer.add_license(self.metadata.license) - if self.metadata.source_url is not None: - self.gguf_writer.add_source_url(self.metadata.source_url) - if self.metadata.source_hf_repo is not None: - self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) - if self.metadata.parameter_size_class is not None: - self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) - if self.metadata.tags is not None: - self.gguf_writer.add_tags(self.metadata.tags) - if self.metadata.languages is not None: - self.gguf_writer.add_languages(self.metadata.languages) - if self.metadata.datasets is not None: - self.gguf_writer.add_datasets(self.metadata.datasets) + self.gguf_writer.add_name(self.metadata.name) + + if self.metadata.basename is not None: + self.gguf_writer.add_basename(self.metadata.basename) + if self.metadata.finetune is not None: + self.gguf_writer.add_finetune(self.metadata.finetune) + if self.metadata.author is not None: + self.gguf_writer.add_author(self.metadata.author) + if self.metadata.version is not None: + self.gguf_writer.add_version(self.metadata.version) + if self.metadata.base_version is not None: + self.gguf_writer.add_base_version(self.metadata.base_version) + if self.metadata.url is not None: + self.gguf_writer.add_url(self.metadata.url) + if self.metadata.description is not None: + self.gguf_writer.add_description(self.metadata.description) + if self.metadata.license is not None: + self.gguf_writer.add_license(self.metadata.license) + if self.metadata.source_url is not None: + self.gguf_writer.add_source_url(self.metadata.source_url) + if self.metadata.source_hf_repo is not None: + self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) + if self.metadata.parameter_size_class is not None: + self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) + if self.metadata.tags is not None: + self.gguf_writer.add_tags(self.metadata.tags) + if self.metadata.languages is not None: + self.gguf_writer.add_languages(self.metadata.languages) + if self.metadata.datasets is not None: + self.gguf_writer.add_datasets(self.metadata.datasets) def set_gguf_parameters(self): self.gguf_writer.add_block_count(self.block_count) @@ -415,23 +421,30 @@ def write_vocab(self): self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() - # Set model name based on latest metadata either provided or calculated from environment - @staticmethod - def get_model_name(metadata, huggingface_parameters, dir_model, model_arch): - if metadata is not None and metadata.name is not None: - # Explicit Metadata Was Provided By User - return metadata.name - elif huggingface_parameters is not None and "_name_or_path" in huggingface_parameters: - # Hugging Face Parameters Model Name or Model Folder Name is Provided - return huggingface_parameters["_name_or_path"] - elif huggingface_parameters is not None and "model_type" in huggingface_parameters: - # Hugging Face Parameters Model Type is Provided - return huggingface_parameters["model_type"] - elif dir_model is not None and dir_model.name is not None: - # Use directory folder name - return dir_model.name - else: - return gguf.MODEL_ARCH_NAMES[model_arch] + def per_model_weight_count_estimation(tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int: + # TODO: Ensure parameter count is accurate throughout various model type + # May currently overestimate parameter count in Mamba model because + # output weights is tied with token embeddings. + sum_weight_estimate = 0 + for name, data_torch in tensors: + # Got A Tensor + + # We don't need these + if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): + continue + + # Calculate Tensor Volume + sum_weights_in_tensor = 1 + for dim in data_torch.shape: + sum_weights_in_tensor *= dim + + # Add Tensor Volume To Running Count + sum_weight_estimate += sum_weights_in_tensor + + # Calculate weight estimate per model + per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate + + return per_model_weight_estimate @staticmethod def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 2bf008a6b29df..7bbd77e47ba8c 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -773,7 +773,7 @@ def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian. def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None: # Metadata About The Model And Its Provenence name = "LLaMA" - if metadata is not None and metadata.name is not None: + if metadata.name is not None: name = metadata.name elif params.path_model is not None: name = params.path_model.name @@ -783,29 +783,28 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_name(name) - if metadata is not None: - if metadata.basename is not None: - self.gguf.add_basename(metadata.basename) - if metadata.finetune is not None: - self.gguf.add_finetune(metadata.finetune) - if metadata.author is not None: - self.gguf.add_author(metadata.author) - if metadata.version is not None: - self.gguf.add_version(metadata.version) - if metadata.base_version is not None: - self.gguf.add_base_version(metadata.base_version) - if metadata.url is not None: - self.gguf.add_url(metadata.url) - if metadata.description is not None: - self.gguf.add_description(metadata.description) - if metadata.license is not None: - self.gguf.add_license(metadata.license) - if metadata.source_url is not None: - self.gguf.add_source_url(metadata.source_url) - if metadata.source_hf_repo is not None: - self.gguf.add_source_hf_repo(metadata.source_hf_repo) - if metadata.tags is not None: - self.gguf_writer.add_tags(metadata.tags) + if metadata.basename is not None: + self.gguf.add_basename(metadata.basename) + if metadata.finetune is not None: + self.gguf.add_finetune(metadata.finetune) + if metadata.author is not None: + self.gguf.add_author(metadata.author) + if metadata.version is not None: + self.gguf.add_version(metadata.version) + if metadata.base_version is not None: + self.gguf.add_base_version(metadata.base_version) + if metadata.url is not None: + self.gguf.add_url(metadata.url) + if metadata.description is not None: + self.gguf.add_description(metadata.description) + if metadata.license is not None: + self.gguf.add_license(metadata.license) + if metadata.source_url is not None: + self.gguf.add_source_url(metadata.source_url) + if metadata.source_hf_repo is not None: + self.gguf.add_source_hf_repo(metadata.source_hf_repo) + if metadata.tags is not None: + self.gguf_writer.add_tags(metadata.tags) def add_meta_arch(self, params: Params) -> None: # Metadata About The Neural Architecture Itself @@ -1197,10 +1196,10 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str: - name = metadata.name if metadata is not None and metadata.name is not None else model_name - basename = metadata.basename if metadata is not None and metadata.basename is not None else None - finetune = metadata.finetune if metadata is not None and metadata.finetune is not None else None - version = metadata.version if metadata is not None and metadata.version is not None else None + name = metadata.name if metadata.name is not None else model_name + basename = metadata.basename if metadata.basename is not None else None + finetune = metadata.finetune if metadata.finetune is not None else None + version = metadata.version if metadata.version is not None else None output_type = { GGMLFileType.AllF32: "F32", diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 0ee3499e234a8..429a590b26a9e 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -1,10 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Iterator - -if TYPE_CHECKING: - from torch import Tensor - def fill_templated_filename(filename: str, output_type: str): # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' @@ -15,32 +10,6 @@ def fill_templated_filename(filename: str, output_type: str): OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) -def per_model_weight_count_estimation(tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int: - # TODO: Ensure parameter count is accurate throughout various model type - # May currently overestimate parameter count in Mamba model because - # output weights is tied with token embeddings. - sum_weight_estimate = 0 - for name, data_torch in tensors: - # Got A Tensor - - # We don't need these - if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): - continue - - # Calculate Tensor Volume - sum_weights_in_tensor = 1 - for dim in data_torch.shape: - sum_weights_in_tensor *= dim - - # Add Tensor Volume To Running Count - sum_weight_estimate += sum_weights_in_tensor - - # Calculate weight estimate per model - per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate - - return per_model_weight_estimate - - def model_weight_count_rounded_notation(model_params_count: int) -> str: if model_params_count > 1e15 : # Quadrillion Of Parameters From f7c20793b904e64800d4cf665fcd2ea4390add7d Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 2 Jun 2024 23:56:04 +1000 Subject: [PATCH 12/65] convert-*.py: enable --model-name direct metadata override --- convert_hf_to_gguf.py | 3 ++- examples/convert_legacy_llama.py | 23 ++++++++++++++++------- gguf-py/gguf/metadata.py | 16 +++++++++------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 8ff5b067a3f90..697238ab01806 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3624,9 +3624,10 @@ def main() -> None: else: logging.basicConfig(level=logging.INFO) + model_name = args.model_name dir_model = args.model - metadata = gguf.Metadata.load(args.metadata, dir_model) + metadata = gguf.Metadata.load(args.metadata, dir_model, model_name) if not dir_model.is_dir(): logger.error(f'Error: {args.model} is not a directory') diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 7bbd77e47ba8c..16175cde5c278 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1195,8 +1195,8 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> return vocab, special_vocab -def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str: - name = metadata.name if metadata.name is not None else model_name +def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str: + name = metadata.name if metadata.name is not None else None basename = metadata.basename if metadata.basename is not None else None finetune = metadata.finetune if metadata.finetune is not None else None version = metadata.version if metadata.version is not None else None @@ -1210,8 +1210,8 @@ def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_c return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type) -def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: - default_filename = default_convention_outfile(file_type, model_name, expert_count, model_params_count, metadata) +def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: + default_filename = default_convention_outfile(file_type, expert_count, model_params_count, metadata) ret = model_paths[0].parent / f"{default_filename}.gguf" if ret in model_paths: logger.error( @@ -1252,6 +1252,7 @@ def main(args_in: list[str] | None = None) -> None: parser.add_argument("--verbose", action="store_true", help="increase output verbosity") parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file") parser.add_argument("--get-outfile", action="store_true", help="get calculated default outfile name") + parser.add_argument("--model-name", type=str, default=None, help="name of the model") args = parser.parse_args(args_in) @@ -1263,9 +1264,10 @@ def main(args_in: list[str] | None = None) -> None: else: logging.basicConfig(level=logging.INFO) + model_name = args.model_name dir_model = args.model - metadata = gguf.Metadata.load(args.metadata) + metadata = gguf.Metadata.load(args.metadata, dir_model, model_name) if args.get_outfile: model_plus = load_some_model(dir_model) @@ -1273,7 +1275,11 @@ def main(args_in: list[str] | None = None) -> None: model = convert_model_names(model_plus.model, params, args.skip_unknown) model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) ftype = pick_output_type(model, args.outtype) - print(f"{default_convention_outfile(ftype, params.path_model.name, params.n_experts, model_params_count, metadata)}") # noqa: NP100 + + if metadata.name is None: + metadata.name = params.path_model.name + + print(f"{default_convention_outfile(ftype, params.n_experts, model_params_count, metadata)}") # noqa: NP100 return if args.no_vocab and args.vocab_only: @@ -1354,13 +1360,16 @@ def main(args_in: list[str] | None = None) -> None: assert params is not None + if metadata.name is None: + metadata.name = params.path_model.name + logger.info(f"Vocab info: {vocab}") logger.info(f"Special vocab info: {special_vocab}") model = model_plus.model model = convert_model_names(model, params, args.skip_unknown) ftype = pick_output_type(model, args.outtype) model = convert_to_output_type(model, ftype) - outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata) + outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata) metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 4144874198beb..13b97ccd66a50 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -3,10 +3,8 @@ import json import frontmatter from pathlib import Path - from typing import Optional from dataclasses import dataclass - from .constants import Keys @@ -32,7 +30,7 @@ class Metadata: datasets: Optional[list[str]] = None @staticmethod - def load(metadata_override_path: Path, model_path: Path) -> Metadata: + def load(metadata_override_path: Optional[Path], model_path: Optional[Path], model_name: Optional[str]) -> Metadata: # This grabs as many contextual authorship metadata as possible from the model repository # making any conversion as required to match the gguf kv store metadata format # as well as giving users the ability to override any authorship metadata that may be incorrect @@ -80,7 +78,7 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: if model_path is not None and model_path.exists(): metadata.name = model_path.name - # Metadata Override + # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 @@ -101,10 +99,14 @@ def load(metadata_override_path: Path, model_path: Path) -> Metadata: metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 metadata.datasets = metadata_override.get(Keys.General.datasets , metadata.datasets ) # noqa: E202 + # Direct Metadata Override (via direct cli argument) + if model_name is not None: + metadata.name = model_name + return metadata @staticmethod - def load_metadata_override(metadata_override_path: Path): + def load_metadata_override(metadata_override_path: Optional[Path]): if metadata_override_path is None or not metadata_override_path.exists(): return {} @@ -112,7 +114,7 @@ def load_metadata_override(metadata_override_path: Path): return json.load(f) @staticmethod - def load_model_card(model_path: Path): + def load_model_card(model_path: Optional[Path]): if model_path is None or not model_path.exists(): return {} @@ -125,7 +127,7 @@ def load_model_card(model_path: Path): return frontmatter.load(f) @staticmethod - def load_huggingface_parameters(model_path: Path): + def load_huggingface_parameters(model_path: Optional[Path]): if model_path is None or not model_path.exists(): return {} From 5a86dfaa1c7a934da3a7a29a22aa0b6eab15615d Mon Sep 17 00:00:00 2001 From: brian khuu Date: Mon, 3 Jun 2024 00:57:37 +1000 Subject: [PATCH 13/65] convert-*.py: add general.organization to kv store --- convert_hf_to_gguf.py | 2 ++ examples/convert_legacy_llama.py | 6 ++++++ gguf-py/gguf/constants.py | 3 ++- gguf-py/gguf/gguf_writer.py | 5 ++++- gguf-py/gguf/metadata.py | 12 +++++++----- 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 697238ab01806..429b83dbeb965 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -244,6 +244,8 @@ def set_gguf_meta_model(self): self.gguf_writer.add_finetune(self.metadata.finetune) if self.metadata.author is not None: self.gguf_writer.add_author(self.metadata.author) + if self.metadata.organization is not None: + self.gguf_writer.add_organization(self.metadata.organization) if self.metadata.version is not None: self.gguf_writer.add_version(self.metadata.version) if self.metadata.base_version is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 16175cde5c278..286fa36370682 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -789,6 +789,8 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_finetune(metadata.finetune) if metadata.author is not None: self.gguf.add_author(metadata.author) + if metadata.organization is not None: + self.add_organization(metadata.organization) if metadata.version is not None: self.gguf.add_version(metadata.version) if metadata.base_version is not None: @@ -805,6 +807,10 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_source_hf_repo(metadata.source_hf_repo) if metadata.tags is not None: self.gguf_writer.add_tags(metadata.tags) + if metadata.languages is not None: + self.gguf_writer.add_languages(metadata.languages) + if metadata.datasets is not None: + self.gguf_writer.add_datasets(metadata.datasets) def add_meta_arch(self, params: Params) -> None: # Metadata About The Neural Architecture Itself diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index bd6fffd31b0ff..a965b37a3e455 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -27,6 +27,7 @@ class General: BASENAME = "general.basename" FINETUNE = "general.finetune" AUTHOR = "general.author" + ORGANIZATION = "general.organization" VERSION = "general.version" BASE_VERSION = "general.base_version" URL = "general.url" @@ -39,7 +40,7 @@ class General: FILE_TYPE = "general.file_type" PARAMETER_SIZE_CLASS = "general.parameter_size_class" TAGS = "general.tags" - LANGUAGE = "general.language" + LANGUAGES = "general.languages" DATASETS = "general.datasets" class LLM: diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index c0d553d786d35..8f87e446e123d 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -439,6 +439,9 @@ def add_finetune(self, finetune: str) -> None: def add_author(self, author: str) -> None: self.add_string(Keys.General.AUTHOR, author) + def add_organization(self, organization: str) -> None: + self.add_string(Keys.General.ORGANIZATION, organization) + def add_version(self, version: str) -> None: self.add_string(Keys.General.VERSION, version) @@ -479,7 +482,7 @@ def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.Tokenizer.TAGS, tags) def add_languages(self, languages: Sequence[str]) -> None: - self.add_array(Keys.Tokenizer.LANGUAGE, languages) + self.add_array(Keys.Tokenizer.LANGUAGES, languages) def add_datasets(self, datasets: Sequence[str]) -> None: self.add_array(Keys.Tokenizer.DATASETS, datasets) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 13b97ccd66a50..475c99f58c5a0 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -15,6 +15,7 @@ class Metadata: basename: Optional[str] = None finetune: Optional[str] = None author: Optional[str] = None + organization: Optional[str] = None version: Optional[str] = None base_version: Optional[str] = None url: Optional[str] = None @@ -26,7 +27,7 @@ class Metadata: source_hf_repo: Optional[str] = None parameter_size_class: Optional[str] = None tags: Optional[list[str]] = None - language: Optional[list[str]] = None + languages: Optional[list[str]] = None datasets: Optional[list[str]] = None @staticmethod @@ -38,7 +39,7 @@ def load(metadata_override_path: Optional[Path], model_path: Optional[Path], mod # Create a new Metadata instance metadata = Metadata() - # load model folder model card if available + # load huggingface model card if available # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 model_card = Metadata.load_model_card(model_path) if metadata.name is None: @@ -61,9 +62,9 @@ def load(metadata_override_path: Optional[Path], model_path: Optional[Path], mod if metadata.tags is None: metadata.tags = model_card.get("tags", []) if metadata.languages is None: - metadata.languages = model_card.get("languages", []) + metadata.languages = model_card.get("language", model_card.get("languages", [])) if metadata.datasets is None: - metadata.datasets = model_card.get("datasets", []) + metadata.datasets = model_card.get("datasets", model_card.get("dataset", [])) # load huggingface parameters if available hf_params = Metadata.load_huggingface_parameters(model_path) @@ -85,6 +86,7 @@ def load(metadata_override_path: Optional[Path], model_path: Optional[Path], mod metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 @@ -97,7 +99,7 @@ def load(metadata_override_path: Optional[Path], model_path: Optional[Path], mod metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202 metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 - metadata.datasets = metadata_override.get(Keys.General.datasets , metadata.datasets ) # noqa: E202 + metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 # Direct Metadata Override (via direct cli argument) if model_name is not None: From dd1571211e0184f348e06b04094b384c5d8a67bc Mon Sep 17 00:00:00 2001 From: brian khuu Date: Mon, 3 Jun 2024 23:52:46 +1000 Subject: [PATCH 14/65] convert-*.py: add quantized_by and enhance heuristics --- convert_hf_to_gguf.py | 8 +- examples/convert_legacy_llama.py | 2 +- gguf-py/gguf/constants.py | 47 +++++----- gguf-py/gguf/gguf_writer.py | 7 +- gguf-py/gguf/metadata.py | 152 ++++++++++++++++++++++++------- gguf-py/gguf/utility.py | 6 +- 6 files changed, 159 insertions(+), 63 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 429b83dbeb965..43936352b54e6 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -134,7 +134,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, # Update authorship metadata class with parameter size class (useful for leader boards) expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) - self.metadata.parameter_size_class = gguf.parameter_size_class(expert_count, weight_estimate) + self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate) # Generate default filename based on model specification and available metadata self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) @@ -244,6 +244,8 @@ def set_gguf_meta_model(self): self.gguf_writer.add_finetune(self.metadata.finetune) if self.metadata.author is not None: self.gguf_writer.add_author(self.metadata.author) + if self.metadata.quantized_by is not None: + self.gguf_writer.add_quantized_by(self.metadata.quantized_by) if self.metadata.organization is not None: self.gguf_writer.add_organization(self.metadata.organization) if self.metadata.version is not None: @@ -260,8 +262,8 @@ def set_gguf_meta_model(self): self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_hf_repo is not None: self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) - if self.metadata.parameter_size_class is not None: - self.gguf_writer.add_parameter_size_class(self.metadata.parameter_size_class) + if self.metadata.parameter_weight_class is not None: + self.gguf_writer.add_parameter_weight_class(self.metadata.parameter_weight_class) if self.metadata.tags is not None: self.gguf_writer.add_tags(self.metadata.tags) if self.metadata.languages is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 286fa36370682..f3839861c708a 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1377,7 +1377,7 @@ def main(args_in: list[str] | None = None) -> None: model = convert_to_output_type(model, ftype) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata) - metadata.parameter_size_class = gguf.parameter_size_class(params.n_experts, model_params_count) + metadata.parameter_weight_class = gguf.parameter_weight_class(params.n_experts, model_params_count) params.ftype = ftype logger.info(f"Writing {outfile}, format {ftype}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index a965b37a3e455..21d9e84b3d00c 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -19,29 +19,30 @@ class Keys: class General: - TYPE = "general.type" - ARCHITECTURE = "general.architecture" - QUANTIZATION_VERSION = "general.quantization_version" - ALIGNMENT = "general.alignment" - NAME = "general.name" - BASENAME = "general.basename" - FINETUNE = "general.finetune" - AUTHOR = "general.author" - ORGANIZATION = "general.organization" - VERSION = "general.version" - BASE_VERSION = "general.base_version" - URL = "general.url" - DESCRIPTION = "general.description" - LICENSE = "general.license" - LICENSE_NAME = "general.license.name" - LICENSE_LINK = "general.license.link" - SOURCE_URL = "general.source.url" - SOURCE_HF_REPO = "general.source.huggingface.repository" - FILE_TYPE = "general.file_type" - PARAMETER_SIZE_CLASS = "general.parameter_size_class" - TAGS = "general.tags" - LANGUAGES = "general.languages" - DATASETS = "general.datasets" + TYPE = "general.type" + ARCHITECTURE = "general.architecture" + QUANTIZATION_VERSION = "general.quantization_version" + ALIGNMENT = "general.alignment" + NAME = "general.name" + BASENAME = "general.basename" + FINETUNE = "general.finetune" + AUTHOR = "general.author" + QUANTIZED_BY = "general.quantized_by" + ORGANIZATION = "general.organization" + VERSION = "general.version" + BASE_VERSION = "general.base_version" + URL = "general.url" + DESCRIPTION = "general.description" + LICENSE = "general.license" + LICENSE_NAME = "general.license.name" + LICENSE_LINK = "general.license.link" + SOURCE_URL = "general.source.url" + SOURCE_HF_REPO = "general.source.huggingface.repository" + FILE_TYPE = "general.file_type" + PARAMETER_WEIGHT_CLASS = "general.parameter_weight_class" + TAGS = "general.tags" + LANGUAGES = "general.languages" + DATASETS = "general.datasets" class LLM: VOCAB_SIZE = "{arch}.vocab_size" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 8f87e446e123d..b17112beb2e37 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -439,6 +439,9 @@ def add_finetune(self, finetune: str) -> None: def add_author(self, author: str) -> None: self.add_string(Keys.General.AUTHOR, author) + def add_quantized_by(self, quantized: str) -> None: + self.add_string(Keys.General.QUANTIZED_BY, quantized) + def add_organization(self, organization: str) -> None: self.add_string(Keys.General.ORGANIZATION, organization) @@ -475,8 +478,8 @@ def add_source_hf_repo(self, repo: str) -> None: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) - def add_parameter_size_class(self, parameter_size_class: str) -> None: - self.add_string(Keys.General.PARAMETER_SIZE_CLASS, parameter_size_class) + def add_parameter_weight_class(self, parameter_weight_class: str) -> None: + self.add_string(Keys.General.PARAMETER_WEIGHT_CLASS, parameter_weight_class) def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.Tokenizer.TAGS, tags) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 475c99f58c5a0..aa8a2383077b6 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re import json import frontmatter from pathlib import Path @@ -15,6 +16,7 @@ class Metadata: basename: Optional[str] = None finetune: Optional[str] = None author: Optional[str] = None + quantized_by: Optional[str] = None organization: Optional[str] = None version: Optional[str] = None base_version: Optional[str] = None @@ -25,13 +27,13 @@ class Metadata: license_link: Optional[str] = None source_url: Optional[str] = None source_hf_repo: Optional[str] = None - parameter_size_class: Optional[str] = None + parameter_weight_class: Optional[str] = None tags: Optional[list[str]] = None languages: Optional[list[str]] = None datasets: Optional[list[str]] = None @staticmethod - def load(metadata_override_path: Optional[Path], model_path: Optional[Path], model_name: Optional[str]) -> Metadata: + def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None) -> Metadata: # This grabs as many contextual authorship metadata as possible from the model repository # making any conversion as required to match the gguf kv store metadata format # as well as giving users the ability to override any authorship metadata that may be incorrect @@ -42,14 +44,54 @@ def load(metadata_override_path: Optional[Path], model_path: Optional[Path], mod # load huggingface model card if available # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 model_card = Metadata.load_model_card(model_path) - if metadata.name is None: - if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: - # We check if there is only one model information in the model-index - # (This is a safe choice in case there is multiple models in one repo in the future) - metadata.name = model_card["model_name"][0].get("name") - elif "model_name" in model_card: - # non huggingface model card standard but notice some model creator using it - metadata.name = model_card.get("model_name") + + if "model_name" in model_card: + # Not part of huggingface model card standard but notice some model creator using it + # such as TheBloke who would encode 'Mixtral 8X7B Instruct v0.1' into model_name + metadata.name = model_card.get("model_name") + + if "base_model" in model_card: + # Not part of huggingface model card standard but notice some model creator using it + # such as TheBloke who would encode 'mistralai/Mixtral-8x7B-Instruct-v0.1' into base_model + model_id = model_card.get("base_model") + model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) + + if metadata.name is None and model_name_normal is not None: + metadata.name = model_name_normal + if metadata.organization is None and organization_name is not None: + metadata.organization = organization_name + if metadata.basename is None and base_name is not None: + metadata.basename = base_name + if metadata.finetune is None and fine_tune is not None: + metadata.finetune = fine_tune + if metadata.version is None and version_string is not None: + metadata.version = version_string + if metadata.parameter_weight_class is None and parameter_weight_class is not None: + metadata.parameter_weight_class = parameter_weight_class + + if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: + # This is a model index which has model id that can be extracted into organization and model name + # if so then we can safely extract organization and name + # (This is a safe choice in case there is multiple models in one repo in the future) + model_id = model_card["model-index"][0].get("name") + model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) + + if metadata.name is None and model_name_normal is not None: + metadata.name = model_name_normal + if metadata.organization is None and organization_name is not None: + metadata.organization = organization_name + if metadata.basename is None and base_name is not None: + metadata.basename = base_name + if metadata.finetune is None and fine_tune is not None: + metadata.finetune = fine_tune + if metadata.version is None and version_string is not None: + metadata.version = version_string + if metadata.parameter_weight_class is None and parameter_weight_class is not None: + metadata.parameter_weight_class = parameter_weight_class + + if metadata.quantized_by is None: + # Not part of hugging face model card standard, but is used by TheBloke to credit them self for quantizing 3rd party models + metadata.quantized_by = model_card.get("quantized_by") if metadata.license is None: metadata.license = model_card.get("license") if metadata.license_name is None: @@ -82,24 +124,25 @@ def load(metadata_override_path: Optional[Path], model_path: Optional[Path], mod # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 - metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 - metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 - metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 - metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 - metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 - metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 - metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 - metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 - metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 - metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 - metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 - metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 - metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 - metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202 - metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 - metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 - metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 + metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 + metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 + metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY , metadata.quantized_by ) # noqa: E202 + metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 + metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 + metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 + metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 + metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 + metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 + metadata.parameter_weight_class = metadata_override.get(Keys.General.PARAMETER_WEIGHT_CLASS, metadata.parameter_weight_class) # noqa: E202 + metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 + metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 + metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 # Direct Metadata Override (via direct cli argument) if model_name is not None: @@ -108,7 +151,7 @@ def load(metadata_override_path: Optional[Path], model_path: Optional[Path], mod return metadata @staticmethod - def load_metadata_override(metadata_override_path: Optional[Path]): + def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, object]: if metadata_override_path is None or not metadata_override_path.exists(): return {} @@ -116,7 +159,7 @@ def load_metadata_override(metadata_override_path: Optional[Path]): return json.load(f) @staticmethod - def load_model_card(model_path: Optional[Path]): + def load_model_card(model_path: Optional[Path] = None) -> dict[str, object]: if model_path is None or not model_path.exists(): return {} @@ -129,7 +172,7 @@ def load_model_card(model_path: Optional[Path]): return frontmatter.load(f) @staticmethod - def load_huggingface_parameters(model_path: Optional[Path]): + def load_huggingface_parameters(model_path: Optional[Path] = None) -> dict[str, object]: if model_path is None or not model_path.exists(): return {} @@ -140,3 +183,50 @@ def load_huggingface_parameters(model_path: Optional[Path]): with open(config_path, "r", encoding="utf-8") as f: return json.load(f) + + @staticmethod + def get_model_name_components(model_identifier: Optional[str] = None) -> dict[str, object]: + # Huggingface often store model id + + if model_identifier is None: + # model ID missing + return None, None, None, None, None, None + + if ' ' in model_identifier: + # model ID is actually a normal human sentence + # which means its most likely a normal model name only + # not part of the hugging face naming standard, but whatever + return model_identifier, None, None, None, None, None + + if '/' in model_identifier: + # model ID (huggingface style) + organization, model = model_identifier.split('/', 1) + else: + # model ID but missing org components + model = model_identifier + organization = None + + # Apply formatting to organization and model_name + # 'stable-diffusion-xl-base-1.0' --> 'Stable Diffusion Xl Base 1.0' + + organization_name = organization.strip().replace('-', ' ').title() if organization is not None else None + model_name_normal = model.strip().replace('-', ' ').title() if model is not None else None + + # Regular expression to extract model name components + # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1' + + regex_match = re.compile(r'^(?P[A-Za-z0-9\s]*(?:(?:-[A-Za-z\s][A-Za-z0-9\s]*)*))' + r'(?:-(?P(?:\d+x)?\d+[A-Za-z]+))?' + r'(?:-(?P[A-Za-z0-9\s-]+))?' + r'(?:-(?Pv\d+(?:\.\d+)*))?$').match(model) + + if not regex_match: + return model_name_normal, organization_name, None, None, None, None + + components = regex_match.groupdict() + base_name = components.get("base_name") + fine_tune = components.get("fine_tune") + version_string = components.get("version_string") + parameter_weight_class = components.get("parameter_weight_class") + + return model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 429a590b26a9e..6b50f7e45228c 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -34,7 +34,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" -def parameter_size_class(expert_count_int:int, model_params_count: int) -> str: +def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str: per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) if expert_count_int is not None and expert_count_int > 0: @@ -49,9 +49,9 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: - name = base_name.strip().title().replace(' ', '-') + name = base_name.strip().title().replace(' ', '-').replace('/', '-') elif model_name is not None: - name = model_name.strip().title().replace(' ', '-') + name = model_name.strip().title().replace(' ', '-').replace('/', '-') else: name = "ggml-model" From b0553f42da021520003beb01712d6fcb7231a802 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Mon, 3 Jun 2024 23:56:14 +1000 Subject: [PATCH 15/65] convert-*.py: adjust help message --- convert_hf_to_gguf.py | 2 +- examples/convert_legacy_llama.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 43936352b54e6..7b1697ff3de0a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3589,7 +3589,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument( "--metadata", type=Path, - help="Specify the path for a metadata file" + help="Specify the path for an authorship metadata override file" ) parser.add_argument( "--get-outfile", action="store_true", diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index f3839861c708a..30cc66d597bf3 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1256,7 +1256,7 @@ def main(args_in: list[str] | None = None) -> None: parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides") parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing") parser.add_argument("--verbose", action="store_true", help="increase output verbosity") - parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file") + parser.add_argument("--metadata", type=Path, help="Specify the path for an authorship metadata override file") parser.add_argument("--get-outfile", action="store_true", help="get calculated default outfile name") parser.add_argument("--model-name", type=str, default=None, help="name of the model") From 4d5cd0670a364da0be9620e14a2660744772da30 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 4 Jun 2024 00:22:52 +1000 Subject: [PATCH 16/65] convert-*.py: use heuristics to parse _name_or_path --- gguf-py/gguf/metadata.py | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index aa8a2383077b6..3107e59cb8cd3 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -110,11 +110,27 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # load huggingface parameters if available hf_params = Metadata.load_huggingface_parameters(model_path) + hf_name_or_path = hf_params.get("_name_or_path") - if metadata.name is None and hf_name_or_path is not None: - metadata.name = Path(hf_name_or_path).name - if metadata.source_hf_repo is None and hf_name_or_path is not None: - metadata.source_hf_repo = Path(hf_name_or_path).name + if hf_name_or_path is not None and Metadata.is_model_id(hf_name_or_path): + # Use _name_or_path only if its actually a model name and not some computer path + # e.g. 'meta-llama/Llama-2-7b-hf' + model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(hf_name_or_path) + if metadata.name is None and model_name_normal is not None: + metadata.name = model_name_normal + if metadata.organization is None and organization_name is not None: + metadata.organization = organization_name + if metadata.basename is None and base_name is not None: + metadata.basename = base_name + if metadata.finetune is None and fine_tune is not None: + metadata.finetune = fine_tune + if metadata.version is None and version_string is not None: + metadata.version = version_string + if metadata.parameter_weight_class is None and parameter_weight_class is not None: + metadata.parameter_weight_class = parameter_weight_class + if metadata.source_hf_repo is None and not Metadata.is_model_name_only(hf_name_or_path): + # Can't just have the model name as the source hf repo as a link to the huggingface website needs the org name and the model name + metadata.source_hf_repo = "https://huggingface.co/{hf_name_or_path}" # Use Directory Folder Name As Fallback Name if metadata.name is None: @@ -184,6 +200,22 @@ def load_huggingface_parameters(model_path: Optional[Path] = None) -> dict[str, with open(config_path, "r", encoding="utf-8") as f: return json.load(f) + @staticmethod + def is_model_id(name_or_path: Optional[str] = None) -> bool: + # Return True if the string has 1 or 0 slashes, indicating a model id + # Created specifically because of _name_or_path in hugging face parameter + if name_or_path is None: + return False + return name_or_path.count('/') <= 1 + + @staticmethod + def is_model_name_only(name_or_path: Optional[str] = None) -> bool: + # Return True if the string has 0 slashes, indicating a model name only model id + # Created specifically because of _name_or_path in hugging face parameter + if name_or_path is None: + return False + return name_or_path.count('/') == 0 + @staticmethod def get_model_name_components(model_identifier: Optional[str] = None) -> dict[str, object]: # Huggingface often store model id From 32e80e094cd7fbb5441c216839448c41e726fb4a Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 4 Jun 2024 00:28:16 +1000 Subject: [PATCH 17/65] convert-*.py: base_model is actually in spec for model cards --- gguf-py/gguf/metadata.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 3107e59cb8cd3..61b6a510f0a1d 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -51,8 +51,6 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata.name = model_card.get("model_name") if "base_model" in model_card: - # Not part of huggingface model card standard but notice some model creator using it - # such as TheBloke who would encode 'mistralai/Mixtral-8x7B-Instruct-v0.1' into base_model model_id = model_card.get("base_model") model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) From 54918ad14e362314fe882bc8fa2c1f85124bc1c3 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 4 Jun 2024 01:14:50 +1000 Subject: [PATCH 18/65] convert-*.py: refactor parameter weight class --- convert_hf_to_gguf.py | 13 +++++++------ examples/convert_legacy_llama.py | 3 ++- gguf-py/gguf/utility.py | 9 ++------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7b1697ff3de0a..348e85535395b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -128,16 +128,17 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, if self.metadata.name is None: self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch] + # Generate parameter weight class (useful for leader boards) if not yet determined + if self.metadata.parameter_weight_class is None: + expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None + weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) + self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate) + # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] - # Update authorship metadata class with parameter size class (useful for leader boards) - expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) - self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate) - # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, expert_count, weight_estimate, output_type) + self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.parameter_weight_class, output_type) # Filename Output if fname_out is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 30cc66d597bf3..0a26a5503ce36 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1206,6 +1206,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_ basename = metadata.basename if metadata.basename is not None else None finetune = metadata.finetune if metadata.finetune is not None else None version = metadata.version if metadata.version is not None else None + parameter_weight_class = metadata.parameter_weight_class if metadata.parameter_weight_class is not None else gguf.parameter_weight_class(expert_count, model_params_count) output_type = { GGMLFileType.AllF32: "F32", @@ -1213,7 +1214,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_ GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - return gguf.naming_convention(name, basename, finetune, version, expert_count, model_params_count, output_type) + return gguf.naming_convention(name, basename, finetune, version, parameter_weight_class, output_type) def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 6b50f7e45228c..5ddfd8cb9745f 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -44,8 +44,7 @@ def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str return size_class - -def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, expert_count_int:int, model_params_count: int, output_type: str) -> str: +def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_weight_class: str, output_type: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: @@ -55,11 +54,7 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers else: name = "ggml-model" - per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) - if expert_count_int is not None and expert_count_int > 0: - parameters = f"-{expert_count_int}x{per_model_rounded_weight_estimate}" - else: - parameters = f"-{per_model_rounded_weight_estimate}" + parameters = f"-{parameter_weight_class}" if parameter_weight_class is not None else "" finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else "" From 39472a09da1003a90f1d81fbecbf10a15b120896 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 4 Jun 2024 02:18:53 +1000 Subject: [PATCH 19/65] convert-*.py: need to include self in per_model_weight_count_estimation() --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 348e85535395b..f6f59eb81d688 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -426,7 +426,7 @@ def write_vocab(self): self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() - def per_model_weight_count_estimation(tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int: + def per_model_weight_count_estimation(self, tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int: # TODO: Ensure parameter count is accurate throughout various model type # May currently overestimate parameter count in Mamba model because # output weights is tied with token embeddings. From 3625a42061aa5dfc0c21b6aa3c6f8bf0b7d975e3 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 4 Jun 2024 02:25:11 +1000 Subject: [PATCH 20/65] convert-*.py: add heuristic to directory name fallback Also add source_url for huggingface url --- convert_hf_to_gguf.py | 4 +- examples/convert_legacy_llama.py | 16 ++++---- gguf-py/gguf/gguf_writer.py | 6 +-- gguf-py/gguf/metadata.py | 63 +++++++++++++++++++++----------- gguf-py/gguf/utility.py | 1 + 5 files changed, 57 insertions(+), 33 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index f6f59eb81d688..41858dc0db5e4 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -146,7 +146,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type) else: # output in the same directory as the model by default - self.fname_out = dir_model.parent / self.fname_default + self.fname_out = dir_model / f"{self.fname_default}.gguf" # Configure GGUF Writer self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) @@ -447,7 +447,7 @@ def per_model_weight_count_estimation(self, tensors: Iterator[tuple[str, Tensor] sum_weight_estimate += sum_weights_in_tensor # Calculate weight estimate per model - per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate + per_model_weight_estimate = (sum_weight_estimate / expert_count) if expert_count is not None and (expert_count > 0) else sum_weight_estimate return per_model_weight_estimate diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 0a26a5503ce36..6e3d328488608 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -806,11 +806,11 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None if metadata.source_hf_repo is not None: self.gguf.add_source_hf_repo(metadata.source_hf_repo) if metadata.tags is not None: - self.gguf_writer.add_tags(metadata.tags) + self.gguf.add_tags(metadata.tags) if metadata.languages is not None: - self.gguf_writer.add_languages(metadata.languages) + self.gguf.add_languages(metadata.languages) if metadata.datasets is not None: - self.gguf_writer.add_datasets(metadata.datasets) + self.gguf.add_datasets(metadata.datasets) def add_meta_arch(self, params: Params) -> None: # Metadata About The Neural Architecture Itself @@ -961,6 +961,8 @@ def write_all( of = OutputFile(fname_out, endianess=endianess) + print(metadata) + # meta data of.add_meta_model(params, metadata) of.add_meta_arch(params) @@ -1017,7 +1019,7 @@ def per_model_weight_count_estimation(tensors: dict[str, LazyTensor], expert_cou sum_weight_estimate += sum_weights_in_tensor # Calculate weight estimate per model - per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate + per_model_weight_estimate = (sum_weight_estimate / expert_count) if expert_count is not None and (expert_count > 0) else sum_weight_estimate return per_model_weight_estimate @@ -1302,9 +1304,6 @@ def main(args_in: list[str] | None = None) -> None: else: model_plus = ModelPlus(model = {}, paths = [dir_model / 'dummy'], format = 'none', vocab = None) - model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) - logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})") - if args.dump: do_dump_model(model_plus) return @@ -1370,6 +1369,9 @@ def main(args_in: list[str] | None = None) -> None: if metadata.name is None: metadata.name = params.path_model.name + model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) + logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})") + logger.info(f"Vocab info: {vocab}") logger.info(f"Special vocab info: {special_vocab}") model = model_plus.model diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index b17112beb2e37..b5950afab06ca 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -482,13 +482,13 @@ def add_parameter_weight_class(self, parameter_weight_class: str) -> None: self.add_string(Keys.General.PARAMETER_WEIGHT_CLASS, parameter_weight_class) def add_tags(self, tags: Sequence[str]) -> None: - self.add_array(Keys.Tokenizer.TAGS, tags) + self.add_array(Keys.General.TAGS, tags) def add_languages(self, languages: Sequence[str]) -> None: - self.add_array(Keys.Tokenizer.LANGUAGES, languages) + self.add_array(Keys.General.LANGUAGES, languages) def add_datasets(self, datasets: Sequence[str]) -> None: - self.add_array(Keys.Tokenizer.DATASETS, datasets) + self.add_array(Keys.General.DATASETS, datasets) def add_name(self, name: str) -> None: self.add_string(Keys.General.NAME, name) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 61b6a510f0a1d..08f49b7d0202f 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -51,21 +51,28 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata.name = model_card.get("model_name") if "base_model" in model_card: + # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges) model_id = model_card.get("base_model") - model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) - if metadata.name is None and model_name_normal is not None: - metadata.name = model_name_normal - if metadata.organization is None and organization_name is not None: - metadata.organization = organization_name - if metadata.basename is None and base_name is not None: - metadata.basename = base_name - if metadata.finetune is None and fine_tune is not None: - metadata.finetune = fine_tune - if metadata.version is None and version_string is not None: - metadata.version = version_string - if metadata.parameter_weight_class is None and parameter_weight_class is not None: - metadata.parameter_weight_class = parameter_weight_class + # Check if string. We cannot handle lists as that is too ambagious + if isinstance(model_id, str): + model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) + if metadata.name is None and model_name_normal is not None: + metadata.name = model_name_normal + if metadata.organization is None and organization_name is not None: + metadata.organization = organization_name + if metadata.basename is None and base_name is not None: + metadata.basename = base_name + if metadata.finetune is None and fine_tune is not None: + metadata.finetune = fine_tune + if metadata.version is None and version_string is not None: + metadata.version = version_string + if metadata.parameter_weight_class is None and parameter_weight_class is not None: + metadata.parameter_weight_class = parameter_weight_class + if metadata.source_url is None: + metadata.source_url = f"https://huggingface.co/{model_id}" + if metadata.source_hf_repo is None: + metadata.source_hf_repo = model_id if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: # This is a model index which has model id that can be extracted into organization and model name @@ -100,11 +107,11 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # non huggingface model card standard but notice some model creator using it metadata.author = model_card.get("model_creator") if metadata.tags is None: - metadata.tags = model_card.get("tags", []) + metadata.tags = model_card.get("tags", None) if metadata.languages is None: - metadata.languages = model_card.get("language", model_card.get("languages", [])) + metadata.languages = model_card.get("language", model_card.get("languages", None)) if metadata.datasets is None: - metadata.datasets = model_card.get("datasets", model_card.get("dataset", [])) + metadata.datasets = model_card.get("datasets", model_card.get("dataset", None)) # load huggingface parameters if available hf_params = Metadata.load_huggingface_parameters(model_path) @@ -126,14 +133,28 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata.version = version_string if metadata.parameter_weight_class is None and parameter_weight_class is not None: metadata.parameter_weight_class = parameter_weight_class - if metadata.source_hf_repo is None and not Metadata.is_model_name_only(hf_name_or_path): + if not Metadata.is_model_name_only(hf_name_or_path): # Can't just have the model name as the source hf repo as a link to the huggingface website needs the org name and the model name - metadata.source_hf_repo = "https://huggingface.co/{hf_name_or_path}" + if metadata.source_url is None: + metadata.source_url = f"https://huggingface.co/{hf_name_or_path}" + if metadata.source_hf_repo is None: + metadata.source_hf_repo = hf_name_or_path # Use Directory Folder Name As Fallback Name - if metadata.name is None: - if model_path is not None and model_path.exists(): - metadata.name = model_path.name + if model_path is not None and model_path.exists(): + model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_path.name) + if metadata.name is None and model_name_normal is not None: + metadata.name = model_name_normal + if metadata.organization is None and organization_name is not None: + metadata.organization = organization_name + if metadata.basename is None and base_name is not None: + metadata.basename = base_name + if metadata.finetune is None and fine_tune is not None: + metadata.finetune = fine_tune + if metadata.version is None and version_string is not None: + metadata.version = version_string + if metadata.parameter_weight_class is None and parameter_weight_class is not None: + metadata.parameter_weight_class = parameter_weight_class # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 5ddfd8cb9745f..d3ccc99fe860c 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -44,6 +44,7 @@ def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str return size_class + def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_weight_class: str, output_type: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention From 91e65d94853ee58eebda020a8fb1e447468a3a5c Mon Sep 17 00:00:00 2001 From: brian khuu Date: Wed, 5 Jun 2024 03:51:38 +1000 Subject: [PATCH 21/65] convert-*.py: add unittest to metadata class --- convert_hf_to_gguf.py | 10 +- examples/convert_legacy_llama.py | 10 +- gguf-py/gguf/constants.py | 48 ++-- gguf-py/gguf/gguf_writer.py | 4 +- gguf-py/gguf/metadata.py | 391 +++++++++++++++++++------------ gguf-py/gguf/utility.py | 6 +- 6 files changed, 275 insertions(+), 194 deletions(-) mode change 100644 => 100755 gguf-py/gguf/metadata.py diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 41858dc0db5e4..fd07ff22cbee9 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -129,16 +129,16 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch] # Generate parameter weight class (useful for leader boards) if not yet determined - if self.metadata.parameter_weight_class is None: + if self.metadata.parameter_class_attribute is None: expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) - self.metadata.parameter_weight_class = gguf.parameter_weight_class(expert_count, weight_estimate) + self.metadata.parameter_class_attribute = gguf.parameter_class_attribute(expert_count, weight_estimate) # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.parameter_weight_class, output_type) + self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.parameter_class_attribute, output_type) # Filename Output if fname_out is not None: @@ -263,8 +263,8 @@ def set_gguf_meta_model(self): self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_hf_repo is not None: self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) - if self.metadata.parameter_weight_class is not None: - self.gguf_writer.add_parameter_weight_class(self.metadata.parameter_weight_class) + if self.metadata.parameter_class_attribute is not None: + self.gguf_writer.add_parameter_class_attribute(self.metadata.parameter_class_attribute) if self.metadata.tags is not None: self.gguf_writer.add_tags(self.metadata.tags) if self.metadata.languages is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 6e3d328488608..b903aecbd18c3 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -805,6 +805,8 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_source_url(metadata.source_url) if metadata.source_hf_repo is not None: self.gguf.add_source_hf_repo(metadata.source_hf_repo) + if metadata.parameter_class_attribute is not None: + self.gguf.add_parameter_class_attribute(metadata.parameter_class_attribute) if metadata.tags is not None: self.gguf.add_tags(metadata.tags) if metadata.languages is not None: @@ -961,8 +963,6 @@ def write_all( of = OutputFile(fname_out, endianess=endianess) - print(metadata) - # meta data of.add_meta_model(params, metadata) of.add_meta_arch(params) @@ -1208,7 +1208,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_ basename = metadata.basename if metadata.basename is not None else None finetune = metadata.finetune if metadata.finetune is not None else None version = metadata.version if metadata.version is not None else None - parameter_weight_class = metadata.parameter_weight_class if metadata.parameter_weight_class is not None else gguf.parameter_weight_class(expert_count, model_params_count) + parameter_class_attribute = metadata.parameter_class_attribute if metadata.parameter_class_attribute is not None else gguf.parameter_class_attribute(expert_count, model_params_count) output_type = { GGMLFileType.AllF32: "F32", @@ -1216,7 +1216,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_ GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - return gguf.naming_convention(name, basename, finetune, version, parameter_weight_class, output_type) + return gguf.naming_convention(name, basename, finetune, version, parameter_class_attribute, output_type) def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: @@ -1380,7 +1380,7 @@ def main(args_in: list[str] | None = None) -> None: model = convert_to_output_type(model, ftype) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata) - metadata.parameter_weight_class = gguf.parameter_weight_class(params.n_experts, model_params_count) + metadata.parameter_class_attribute = gguf.parameter_class_attribute(params.n_experts, model_params_count) params.ftype = ftype logger.info(f"Writing {outfile}, format {ftype}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 21d9e84b3d00c..47a1846a81ec1 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -19,30 +19,30 @@ class Keys: class General: - TYPE = "general.type" - ARCHITECTURE = "general.architecture" - QUANTIZATION_VERSION = "general.quantization_version" - ALIGNMENT = "general.alignment" - NAME = "general.name" - BASENAME = "general.basename" - FINETUNE = "general.finetune" - AUTHOR = "general.author" - QUANTIZED_BY = "general.quantized_by" - ORGANIZATION = "general.organization" - VERSION = "general.version" - BASE_VERSION = "general.base_version" - URL = "general.url" - DESCRIPTION = "general.description" - LICENSE = "general.license" - LICENSE_NAME = "general.license.name" - LICENSE_LINK = "general.license.link" - SOURCE_URL = "general.source.url" - SOURCE_HF_REPO = "general.source.huggingface.repository" - FILE_TYPE = "general.file_type" - PARAMETER_WEIGHT_CLASS = "general.parameter_weight_class" - TAGS = "general.tags" - LANGUAGES = "general.languages" - DATASETS = "general.datasets" + TYPE = "general.type" + ARCHITECTURE = "general.architecture" + QUANTIZATION_VERSION = "general.quantization_version" + ALIGNMENT = "general.alignment" + NAME = "general.name" + BASENAME = "general.basename" + FINETUNE = "general.finetune" + AUTHOR = "general.author" + QUANTIZED_BY = "general.quantized_by" + ORGANIZATION = "general.organization" + VERSION = "general.version" + BASE_VERSION = "general.base_version" + URL = "general.url" + DESCRIPTION = "general.description" + LICENSE = "general.license" + LICENSE_NAME = "general.license.name" + LICENSE_LINK = "general.license.link" + SOURCE_URL = "general.source.url" + SOURCE_HF_REPO = "general.source.huggingface.repository" + FILE_TYPE = "general.file_type" + PARAMETER_CLASS_ATTRIBUTE = "general.parameter_class_attribute" + TAGS = "general.tags" + LANGUAGES = "general.languages" + DATASETS = "general.datasets" class LLM: VOCAB_SIZE = "{arch}.vocab_size" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index b5950afab06ca..fe25fb2590f24 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -478,8 +478,8 @@ def add_source_hf_repo(self, repo: str) -> None: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) - def add_parameter_weight_class(self, parameter_weight_class: str) -> None: - self.add_string(Keys.General.PARAMETER_WEIGHT_CLASS, parameter_weight_class) + def add_parameter_class_attribute(self, parameter_class_attribute: str) -> None: + self.add_string(Keys.General.PARAMETER_CLASS_ATTRIBUTE, parameter_class_attribute) def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.General.TAGS, tags) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py old mode 100644 new mode 100755 index 08f49b7d0202f..1609a630647c2 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -1,12 +1,19 @@ +#!/usr/bin/env python3 + from __future__ import annotations import re import json +import unittest import frontmatter from pathlib import Path from typing import Optional from dataclasses import dataclass -from .constants import Keys + +if __name__ == '__main__': + from constants import Keys +else: + from .constants import Keys @dataclass @@ -27,7 +34,7 @@ class Metadata: license_link: Optional[str] = None source_url: Optional[str] = None source_hf_repo: Optional[str] = None - parameter_weight_class: Optional[str] = None + parameter_class_attribute: Optional[str] = None tags: Optional[list[str]] = None languages: Optional[list[str]] = None datasets: Optional[list[str]] = None @@ -41,120 +48,11 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # Create a new Metadata instance metadata = Metadata() - # load huggingface model card if available - # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 model_card = Metadata.load_model_card(model_path) + hf_params = Metadata.load_hf_parameters(model_path) - if "model_name" in model_card: - # Not part of huggingface model card standard but notice some model creator using it - # such as TheBloke who would encode 'Mixtral 8X7B Instruct v0.1' into model_name - metadata.name = model_card.get("model_name") - - if "base_model" in model_card: - # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges) - model_id = model_card.get("base_model") - - # Check if string. We cannot handle lists as that is too ambagious - if isinstance(model_id, str): - model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) - if metadata.name is None and model_name_normal is not None: - metadata.name = model_name_normal - if metadata.organization is None and organization_name is not None: - metadata.organization = organization_name - if metadata.basename is None and base_name is not None: - metadata.basename = base_name - if metadata.finetune is None and fine_tune is not None: - metadata.finetune = fine_tune - if metadata.version is None and version_string is not None: - metadata.version = version_string - if metadata.parameter_weight_class is None and parameter_weight_class is not None: - metadata.parameter_weight_class = parameter_weight_class - if metadata.source_url is None: - metadata.source_url = f"https://huggingface.co/{model_id}" - if metadata.source_hf_repo is None: - metadata.source_hf_repo = model_id - - if "model-index" in model_card and len(model_card["model_name"]) == 1 and "name" in model_card["model_name"][0]: - # This is a model index which has model id that can be extracted into organization and model name - # if so then we can safely extract organization and name - # (This is a safe choice in case there is multiple models in one repo in the future) - model_id = model_card["model-index"][0].get("name") - model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_id) - - if metadata.name is None and model_name_normal is not None: - metadata.name = model_name_normal - if metadata.organization is None and organization_name is not None: - metadata.organization = organization_name - if metadata.basename is None and base_name is not None: - metadata.basename = base_name - if metadata.finetune is None and fine_tune is not None: - metadata.finetune = fine_tune - if metadata.version is None and version_string is not None: - metadata.version = version_string - if metadata.parameter_weight_class is None and parameter_weight_class is not None: - metadata.parameter_weight_class = parameter_weight_class - - if metadata.quantized_by is None: - # Not part of hugging face model card standard, but is used by TheBloke to credit them self for quantizing 3rd party models - metadata.quantized_by = model_card.get("quantized_by") - if metadata.license is None: - metadata.license = model_card.get("license") - if metadata.license_name is None: - metadata.license_name = model_card.get("license_name") - if metadata.license_link is None: - metadata.license_link = model_card.get("license_link") - if metadata.author is None: - # non huggingface model card standard but notice some model creator using it - metadata.author = model_card.get("model_creator") - if metadata.tags is None: - metadata.tags = model_card.get("tags", None) - if metadata.languages is None: - metadata.languages = model_card.get("language", model_card.get("languages", None)) - if metadata.datasets is None: - metadata.datasets = model_card.get("datasets", model_card.get("dataset", None)) - - # load huggingface parameters if available - hf_params = Metadata.load_huggingface_parameters(model_path) - - hf_name_or_path = hf_params.get("_name_or_path") - if hf_name_or_path is not None and Metadata.is_model_id(hf_name_or_path): - # Use _name_or_path only if its actually a model name and not some computer path - # e.g. 'meta-llama/Llama-2-7b-hf' - model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(hf_name_or_path) - if metadata.name is None and model_name_normal is not None: - metadata.name = model_name_normal - if metadata.organization is None and organization_name is not None: - metadata.organization = organization_name - if metadata.basename is None and base_name is not None: - metadata.basename = base_name - if metadata.finetune is None and fine_tune is not None: - metadata.finetune = fine_tune - if metadata.version is None and version_string is not None: - metadata.version = version_string - if metadata.parameter_weight_class is None and parameter_weight_class is not None: - metadata.parameter_weight_class = parameter_weight_class - if not Metadata.is_model_name_only(hf_name_or_path): - # Can't just have the model name as the source hf repo as a link to the huggingface website needs the org name and the model name - if metadata.source_url is None: - metadata.source_url = f"https://huggingface.co/{hf_name_or_path}" - if metadata.source_hf_repo is None: - metadata.source_hf_repo = hf_name_or_path - - # Use Directory Folder Name As Fallback Name - if model_path is not None and model_path.exists(): - model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class = Metadata.get_model_name_components(model_path.name) - if metadata.name is None and model_name_normal is not None: - metadata.name = model_name_normal - if metadata.organization is None and organization_name is not None: - metadata.organization = organization_name - if metadata.basename is None and base_name is not None: - metadata.basename = base_name - if metadata.finetune is None and fine_tune is not None: - metadata.finetune = fine_tune - if metadata.version is None and version_string is not None: - metadata.version = version_string - if metadata.parameter_weight_class is None and parameter_weight_class is not None: - metadata.parameter_weight_class = parameter_weight_class + # heuristics + metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path) # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp @@ -174,7 +72,7 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 - metadata.parameter_weight_class = metadata_override.get(Keys.General.PARAMETER_WEIGHT_CLASS, metadata.parameter_weight_class) # noqa: E202 + metadata.parameter_class_attribute = metadata_override.get(Keys.General.PARAMETER_CLASS_ATTRIBUTE, metadata.parameter_class_attribute) # noqa: E202 metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 @@ -207,7 +105,7 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, object]: return frontmatter.load(f) @staticmethod - def load_huggingface_parameters(model_path: Optional[Path] = None) -> dict[str, object]: + def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, object]: if model_path is None or not model_path.exists(): return {} @@ -220,64 +118,247 @@ def load_huggingface_parameters(model_path: Optional[Path] = None) -> dict[str, return json.load(f) @staticmethod - def is_model_id(name_or_path: Optional[str] = None) -> bool: - # Return True if the string has 1 or 0 slashes, indicating a model id - # Created specifically because of _name_or_path in hugging face parameter - if name_or_path is None: - return False - return name_or_path.count('/') <= 1 + def id_to_title(string): + # Convert capitalization into title form unless acronym or version number + string = string.strip().replace('-', ' ') + return ' '.join([w.title() if w.islower() and not re.match(r'^v\d+(?:\.\d+)*$', w) else w for w in string.split()]) @staticmethod - def is_model_name_only(name_or_path: Optional[str] = None) -> bool: - # Return True if the string has 0 slashes, indicating a model name only model id - # Created specifically because of _name_or_path in hugging face parameter - if name_or_path is None: - return False - return name_or_path.count('/') == 0 + def get_model_id_components(model_id: Optional[str] = None) -> dict[str, object]: + # Huggingface often store model id as '/' + # so let's parse it and apply some heuristics if possible for model name components - @staticmethod - def get_model_name_components(model_identifier: Optional[str] = None) -> dict[str, object]: - # Huggingface often store model id - - if model_identifier is None: + if model_id is None: # model ID missing return None, None, None, None, None, None - if ' ' in model_identifier: + if ' ' in model_id: # model ID is actually a normal human sentence # which means its most likely a normal model name only # not part of the hugging face naming standard, but whatever - return model_identifier, None, None, None, None, None + return model_id, None, None, None, None, None - if '/' in model_identifier: + if '/' in model_id: # model ID (huggingface style) - organization, model = model_identifier.split('/', 1) + org_component, model_full_name_component = model_id.split('/', 1) else: # model ID but missing org components - model = model_identifier - organization = None - - # Apply formatting to organization and model_name - # 'stable-diffusion-xl-base-1.0' --> 'Stable Diffusion Xl Base 1.0' + org_component, model_full_name_component = None, model_id - organization_name = organization.strip().replace('-', ' ').title() if organization is not None else None - model_name_normal = model.strip().replace('-', ' ').title() if model is not None else None + # Check if we erroneously matched against './' or '../' etc... + if org_component is not None and org_component[0] == '.': + org_component = None # Regular expression to extract model name components # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1' - - regex_match = re.compile(r'^(?P[A-Za-z0-9\s]*(?:(?:-[A-Za-z\s][A-Za-z0-9\s]*)*))' - r'(?:-(?P(?:\d+x)?\d+[A-Za-z]+))?' - r'(?:-(?P[A-Za-z0-9\s-]+))?' - r'(?:-(?Pv\d+(?:\.\d+)*))?$').match(model) + regex_match = re.compile(r'^(?P[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))' + r'(?:-(?P(?:\d+x)?\d+[A-Za-z]+)(?:-(?P[A-Za-z0-9\s-]+))?)?' + r'(?:-(?Pv\d+(?:\.\d+)*))?$').match(model_full_name_component) if not regex_match: - return model_name_normal, organization_name, None, None, None, None + return model_full_name_component, org_component, None, None, None, None components = regex_match.groupdict() - base_name = components.get("base_name") - fine_tune = components.get("fine_tune") - version_string = components.get("version_string") - parameter_weight_class = components.get("parameter_weight_class") + basename = components.get("basename") + finetune = components.get("finetune") + version = components.get("version") + parameter_class_attribute = components.get("parameter_class_attribute") + + return model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute + + @staticmethod + def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None) -> Metadata: + # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 + found_model_name = False + + # Model Card Heuristics + ######################## + if model_card is not None: + + if "model_name" in model_card: + # Not part of huggingface model card standard but notice some model creator using it + # such as TheBloke who would encode 'Mixtral 8X7B Instruct v0.1' into model_name + metadata.name = model_card.get("model_name") + + if "model-index" in model_card and len(model_card["model-index"]) == 1 and "name" in model_card["model-index"][0]: + # This is a model index which has model id that can be extracted into organization and model name + # if so then we can safely extract organization and name + # (This is a safe choice in case there is multiple models in one repo in the future) + model_id = model_card["model-index"][0]["name"] + model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) + if metadata.name is None and model_full_name_component is not None: + metadata.name = Metadata.id_to_title(model_full_name_component) + if metadata.organization is None and org_component is not None: + metadata.organization = Metadata.id_to_title(org_component) + if metadata.basename is None and basename is not None: + metadata.basename = basename + if metadata.finetune is None and finetune is not None: + metadata.finetune = finetune + if metadata.version is None and version is not None: + metadata.version = version + if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: + metadata.parameter_class_attribute = parameter_class_attribute + if metadata.source_url is None and org_component is not None and model_full_name_component is not None: + metadata.source_url = f"https://huggingface.co/{org_component}/{model_full_name_component}" + if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: + metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" + + found_model_name = True + + if "base_model" in model_card and isinstance(model_card["base_model"], str) and not found_model_name: + # Check if string. We cannot handle lists as that is too ambagious + # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges) + model_id = model_card.get("base_model") + model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) + if metadata.name is None and model_full_name_component is not None: + metadata.name = Metadata.id_to_title(model_full_name_component) + if metadata.organization is None and org_component is not None: + metadata.organization = Metadata.id_to_title(org_component) + if metadata.basename is None and basename is not None: + metadata.basename = basename + if metadata.finetune is None and finetune is not None: + metadata.finetune = finetune + if metadata.version is None and version is not None: + metadata.version = version + if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: + metadata.parameter_class_attribute = parameter_class_attribute + if metadata.source_url is None and org_component is not None and model_full_name_component is not None: + metadata.source_url = f"https://huggingface.co/{org_component}/{model_full_name_component}" + if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: + metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" + + found_model_name = True + + if metadata.quantized_by is None: + # Not part of hugging face model card standard, but is used by TheBloke to credit them self for quantizing 3rd party models + metadata.quantized_by = model_card.get("quantized_by") + if metadata.license is None: + metadata.license = model_card.get("license") + if metadata.license_name is None: + metadata.license_name = model_card.get("license_name") + if metadata.license_link is None: + metadata.license_link = model_card.get("license_link") + if metadata.author is None: + # non huggingface model card standard but notice some model creator using it + metadata.author = model_card.get("model_creator") + if metadata.tags is None: + metadata.tags = model_card.get("tags", None) + if metadata.languages is None: + metadata.languages = model_card.get("language", model_card.get("languages", None)) + if metadata.datasets is None: + metadata.datasets = model_card.get("datasets", model_card.get("dataset", None)) + + # Hugging Face Parameter Heuristics + #################################### + + if hf_params is not None: + hf_name_or_path = hf_params.get("_name_or_path") + + if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1 and not found_model_name: + # Use _name_or_path only if its actually a model name and not some computer path + # e.g. 'meta-llama/Llama-2-7b-hf' + model_id = hf_name_or_path + model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) + if metadata.name is None and model_full_name_component is not None: + metadata.name = Metadata.id_to_title(model_full_name_component) + if metadata.organization is None and org_component is not None: + metadata.organization = Metadata.id_to_title(org_component) + if metadata.basename is None and basename is not None: + metadata.basename = basename + if metadata.finetune is None and finetune is not None: + metadata.finetune = finetune + if metadata.version is None and version is not None: + metadata.version = version + if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: + metadata.parameter_class_attribute = parameter_class_attribute + if metadata.source_url is None and org_component is not None and model_full_name_component is not None: + metadata.source_url = f"https://huggingface.co/{org_component}/{model_full_name_component}" + if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: + metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" + + # Directory Folder Name Fallback Heuristics + ############################################ + if model_path is not None: + model_id = model_path.name + model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) + if metadata.name is None and model_full_name_component is not None: + metadata.name = Metadata.id_to_title(model_full_name_component) + if metadata.organization is None and org_component is not None: + metadata.organization = Metadata.id_to_title(org_component) + if metadata.basename is None and basename is not None: + metadata.basename = basename + if metadata.finetune is None and finetune is not None: + metadata.finetune = finetune + if metadata.version is None and version is not None: + metadata.version = version + if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: + metadata.parameter_class_attribute = parameter_class_attribute + if metadata.source_url is None and org_component is not None and model_full_name_component is not None: + metadata.source_url = f"https://huggingface.co/{org_component}/{model_full_name_component}" + if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: + metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" + + return metadata + + +class TestStringMethods(unittest.TestCase): + + def test_get_model_id_components(self): + self.assertEqual(Metadata.get_model_id_components("Mistral/Mixtral-8x7B-Instruct-v0.1"), + ('Mixtral-8x7B-Instruct-v0.1', "Mistral", 'Mixtral', 'Instruct', 'v0.1', '8x7B')) + self.assertEqual(Metadata.get_model_id_components("Mixtral-8x7B-Instruct-v0.1"), + ('Mixtral-8x7B-Instruct-v0.1', None, 'Mixtral', 'Instruct', 'v0.1', '8x7B')) + self.assertEqual(Metadata.get_model_id_components("Mixtral-8x7B-Instruct"), + ('Mixtral-8x7B-Instruct', None, 'Mixtral', 'Instruct', None, '8x7B')) + self.assertEqual(Metadata.get_model_id_components("Mixtral-8x7B-v0.1"), + ('Mixtral-8x7B-v0.1', None, 'Mixtral', None, 'v0.1', '8x7B')) + self.assertEqual(Metadata.get_model_id_components("Mixtral-8x7B"), + ('Mixtral-8x7B', None, 'Mixtral', None, None, '8x7B')) + self.assertEqual(Metadata.get_model_id_components("Mixtral"), + ('Mixtral', None, 'Mixtral', None, None, None)) + self.assertEqual(Metadata.get_model_id_components("Mixtral-v0.1"), + ('Mixtral-v0.1', None, 'Mixtral', None, 'v0.1', None)) + self.assertEqual(Metadata.get_model_id_components("hermes-2-pro-llama-3-8b-DPO"), + ('hermes-2-pro-llama-3-8b-DPO', None, 'hermes-2-pro-llama-3', 'DPO', None, '8b')) + self.assertEqual(Metadata.get_model_id_components("NousResearch/Meta-Llama-3-8B"), + ('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, "8B")) + + def test_apply_metadata_heuristic_from_model_card(self): + # Source: https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B/blob/main/README.md + model_card = { + 'base_model': 'NousResearch/Meta-Llama-3-8B', + 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], + 'model-index': [{'name': 'Hermes-2-Pro-Llama-3-8B', 'results': []}], + 'language': ['en'], + 'datasets': ['teknium/OpenHermes-2.5'], + 'widget': [{'example_title': 'Hermes 2 Pro', 'messages': [{'role': 'system', 'content': 'You are a sentient, superintelligent artificial general intelligence, here to teach and assist me.'}, {'role': 'user', 'content': 'Write a short story about Goku discovering kirby has teamed up with Majin Buu to destroy the world.'}]}] + } + expected = Metadata(name='Hermes 2 Pro Llama 3 8B', basename='Hermes-2-Pro-Llama-3', finetune=None, author=None, quantized_by=None, organization=None, version=None, base_version=None, url=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_hf_repo=None, parameter_class_attribute='8B', tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], languages=['en'], datasets=['teknium/OpenHermes-2.5']) + + got = Metadata.apply_metadata_heuristic(Metadata(), model_card, None, None) + + self.assertEqual(got, expected) + + def test_apply_metadata_heuristic_from_hf_parameters(self): + # Source: https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B/blob/main/config.json + hf_params = {"_name_or_path": "./hermes-2-pro-llama-3-8b-DPO"} + + expected = Metadata(name='Hermes 2 Pro Llama 3 8B DPO', basename='hermes-2-pro-llama-3', finetune='DPO', author=None, quantized_by=None, organization=None, version=None, base_version=None, url=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_hf_repo=None, parameter_class_attribute='8b', tags=None, languages=None, datasets=None) + + got = Metadata.apply_metadata_heuristic(Metadata(), None, hf_params, None) + + self.assertEqual(got, expected) + + def test_apply_metadata_heuristic_from_model_dir(self): + # Source: https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B/blob/main/config.json + model_dir_path = Path("./hermes-2-pro-llama-3-8b-DPO") + + expected = Metadata(name='Hermes 2 Pro Llama 3 8B DPO', basename='hermes-2-pro-llama-3', finetune='DPO', author=None, quantized_by=None, organization=None, version=None, base_version=None, url=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_hf_repo=None, parameter_class_attribute='8b', tags=None, languages=None, datasets=None) + + got = Metadata.apply_metadata_heuristic(Metadata(), None, None, model_dir_path) + + self.assertEqual(got, expected) + - return model_name_normal, organization_name, base_name, fine_tune, version_string, parameter_weight_class +if __name__ == '__main__': + unittest.main() diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index d3ccc99fe860c..530bdaabef4fc 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -34,7 +34,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" -def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str: +def parameter_class_attribute(expert_count_int:int, model_params_count: int) -> str: per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) if expert_count_int is not None and expert_count_int > 0: @@ -45,7 +45,7 @@ def parameter_weight_class(expert_count_int:int, model_params_count: int) -> str return size_class -def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_weight_class: str, output_type: str) -> str: +def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_class_attribute: str, output_type: str) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: @@ -55,7 +55,7 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers else: name = "ggml-model" - parameters = f"-{parameter_weight_class}" if parameter_weight_class is not None else "" + parameters = f"-{parameter_class_attribute}" if parameter_class_attribute is not None else "" finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else "" From d060fcdbe277d614ddf9dec2900b19d6a8dd012b Mon Sep 17 00:00:00 2001 From: brian khuu Date: Fri, 7 Jun 2024 03:33:21 +1000 Subject: [PATCH 22/65] convert-*.py: adjusted authorship KV store --- convert_hf_to_gguf.py | 35 ++++++++++- examples/convert_legacy_llama.py | 31 +++++++++- gguf-py/gguf/constants.py | 46 +++++++++++--- gguf-py/gguf/gguf_writer.py | 39 +++++++++++- gguf-py/gguf/metadata.py | 100 +++++++++++++------------------ 5 files changed, 179 insertions(+), 72 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index fd07ff22cbee9..4c8da99d25c7b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -251,20 +251,51 @@ def set_gguf_meta_model(self): self.gguf_writer.add_organization(self.metadata.organization) if self.metadata.version is not None: self.gguf_writer.add_version(self.metadata.version) - if self.metadata.base_version is not None: - self.gguf_writer.add_base_version(self.metadata.base_version) if self.metadata.url is not None: self.gguf_writer.add_url(self.metadata.url) + if self.metadata.doi is not None: + self.gguf_writer.add_doi(self.metadata.doi) + if self.metadata.uuid is not None: + self.gguf_writer.add_uuid(self.metadata.uuid) + if self.metadata.hf_repo is not None: + self.gguf_writer.add_hf_repo(self.metadata.hf_repo) if self.metadata.description is not None: self.gguf_writer.add_description(self.metadata.description) if self.metadata.license is not None: self.gguf_writer.add_license(self.metadata.license) + if self.metadata.license_name is not None: + self.gguf_writer.add_license_name(self.metadata.license_name) + if self.metadata.license_link is not None: + self.gguf_writer.add_license_link(self.metadata.license_link) if self.metadata.source_url is not None: self.gguf_writer.add_source_url(self.metadata.source_url) + if self.metadata.source_doi is not None: + self.gguf_writer.add_source_doi(self.metadata.source_doi) + if self.metadata.source_uuid is not None: + self.gguf_writer.add_source_uuid(self.metadata.source_uuid) if self.metadata.source_hf_repo is not None: self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) if self.metadata.parameter_class_attribute is not None: self.gguf_writer.add_parameter_class_attribute(self.metadata.parameter_class_attribute) + if self.metadata.parents is not None: + metadata.parent_count = len(self.metadata.parents) + for key, parent_entry in self.metadata.parents: + if "name" in parent_entry: + self.gguf_writer.add_parent_name(key, parent_entry.get("name")) + if "author" in parent_entry: + self.gguf_writer.add_parent_author(key, parent_entry.get("author")) + if "version" in parent_entry: + self.gguf_writer.add_parent_version(key, parent_entry.get("version")) + if "organization" in parent_entry: + self.gguf_writer.add_parent_organization(key, parent_entry.get("organization")) + if "url" in parent_entry: + self.gguf_writer.add_parent_url(key, parent_entry.get("url")) + if "doi" in parent_entry: + self.gguf_writer.add_parent_doi(key, parent_entry.get("doi")) + if "uuid" in parent_entry: + self.gguf_writer.add_parent_uuid(key, parent_entry.get("uuid")) + if "hf_repo" in parent_entry: + self.gguf_writer.add_parent_hf_repo(key, parent_entry.get("hf_repo")) if self.metadata.tags is not None: self.gguf_writer.add_tags(self.metadata.tags) if self.metadata.languages is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index b903aecbd18c3..cc7acb9c88605 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -793,20 +793,47 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.add_organization(metadata.organization) if metadata.version is not None: self.gguf.add_version(metadata.version) - if metadata.base_version is not None: - self.gguf.add_base_version(metadata.base_version) if metadata.url is not None: self.gguf.add_url(metadata.url) + if metadata.doi is not None: + self.gguf.add_doi(metadata.doi) + if metadata.uuid is not None: + self.gguf.add_uuid(metadata.uuid) + if metadata.hf_repo is not None: + self.gguf.add_hf_repo(metadata.hf_repo) if metadata.description is not None: self.gguf.add_description(metadata.description) if metadata.license is not None: self.gguf.add_license(metadata.license) + if metadata.license_name is not None: + self.gguf.add_license_name(metadata.license_name) + if metadata.license_link is not None: + self.gguf.add_license_link(metadata.license_link) if metadata.source_url is not None: self.gguf.add_source_url(metadata.source_url) if metadata.source_hf_repo is not None: self.gguf.add_source_hf_repo(metadata.source_hf_repo) if metadata.parameter_class_attribute is not None: self.gguf.add_parameter_class_attribute(metadata.parameter_class_attribute) + if metadata.parents is not None: + metadata.parent_count = len(metadata.parents) + for key, parent_entry in metadata.parents: + if "name" in parent_entry: + self.gguf.add_parent_name(key, parent_entry.get("name")) + if "author" in parent_entry: + self.gguf.add_parent_author(key, parent_entry.get("author")) + if "version" in parent_entry: + self.gguf.add_parent_version(key, parent_entry.get("version")) + if "organization" in parent_entry: + self.gguf.add_parent_organization(key, parent_entry.get("organization")) + if "url" in parent_entry: + self.gguf.add_parent_url(key, parent_entry.get("url")) + if "doi" in parent_entry: + self.gguf.add_parent_doi(key, parent_entry.get("doi")) + if "uuid" in parent_entry: + self.gguf.add_parent_uuid(key, parent_entry.get("uuid")) + if "hf_repo" in parent_entry: + self.gguf.add_parent_hf_repo(key, parent_entry.get("hf_repo")) if metadata.tags is not None: self.gguf.add_tags(metadata.tags) if metadata.languages is not None: diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 47a1846a81ec1..1484ad5168504 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -23,23 +23,53 @@ class General: ARCHITECTURE = "general.architecture" QUANTIZATION_VERSION = "general.quantization_version" ALIGNMENT = "general.alignment" + FILE_TYPE = "general.file_type" + + # Authorship Metadata NAME = "general.name" - BASENAME = "general.basename" - FINETUNE = "general.finetune" AUTHOR = "general.author" - QUANTIZED_BY = "general.quantized_by" - ORGANIZATION = "general.organization" VERSION = "general.version" - BASE_VERSION = "general.base_version" - URL = "general.url" + ORGANIZATION = "general.organization" + + BASENAME = "general.basename" + FINETUNE = "general.finetune" DESCRIPTION = "general.description" + QUANTIZED_BY = "general.quantized_by" + PARAMETER_CLASS_ATTRIBUTE = "general.parameter_class_attribute" + + # Licensing details LICENSE = "general.license" LICENSE_NAME = "general.license.name" LICENSE_LINK = "general.license.link" + + # Typically represents the converted GGUF repo (Unless native) + URL = "general.url" + DOI = "general.doi" + UUID = "general.uuid" + HF_URL = "general.huggingface.repository" + + # Typically represents the original source repository (e.g. safetensors) + # that this was SOURCE_URL = "general.source.url" + SOURCE_DOI = "general.source.doi" + SOURCE_UUID = "general.source.uuid" SOURCE_HF_REPO = "general.source.huggingface.repository" - FILE_TYPE = "general.file_type" - PARAMETER_CLASS_ATTRIBUTE = "general.parameter_class_attribute" + + # This represents the parent model that the converted/source model was + # derived from on allowing users to trace the linage of a model. + # E.g. A finetune model would have the base model as the parent + # (A model can have multiple parent, especially if it's a merged model) + PARENTS_COUNT = "general.parents.count" + PARENTS_NAME = "general.parents.{id}.name" + PARENTS_AUTHOR = "general.parents.{id}.author" + PARENTS_VERSION = "general.parents.{id}.version" + PARENTS_ORGANIZATION = "general.parents.{id}.organization" + PARENTS_URL = "general.parents.{id}.url" + PARENTS_DOI = "general.parents.{id}.doi" + PARENTS_UUID = "general.parents.{id}.uuid" + PARENTS_HF_REPO = "general.parents.{id}.huggingface.repository" + + # Array based KV stores TAGS = "general.tags" LANGUAGES = "general.languages" DATASETS = "general.datasets" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index fe25fb2590f24..b38f000301110 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -448,15 +448,21 @@ def add_organization(self, organization: str) -> None: def add_version(self, version: str) -> None: self.add_string(Keys.General.VERSION, version) - def add_base_version(self, version: str) -> None: - self.add_string(Keys.General.BASE_VERSION, version) - def add_tensor_data_layout(self, layout: str) -> None: self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) def add_url(self, url: str) -> None: self.add_string(Keys.General.URL, url) + def add_doi(self, doi: str) -> None: + self.add_string(Keys.General.DOI, doi) + + def add_uuid(self, uuid: str) -> None: + self.add_string(Keys.General.UUID, uuid) + + def add_hf_repo(self, hf_repo: str) -> None: + self.add_string(Keys.General.HF_REPO, hf_repo) + def add_description(self, description: str) -> None: self.add_string(Keys.General.DESCRIPTION, description) @@ -481,6 +487,33 @@ def add_file_type(self, ftype: int) -> None: def add_parameter_class_attribute(self, parameter_class_attribute: str) -> None: self.add_string(Keys.General.PARAMETER_CLASS_ATTRIBUTE, parameter_class_attribute) + def add_parent_count(self, parent_count: int) -> None: + self.add_uint32(Keys.General.PARENTS_COUNT, parent_count) + + def add_parent_name(self, parent_id: int, name: str) -> None: + self.add_string(Keys.General.PARENTS_NAME.format(id=self.parent_id), name) + + def add_parent_author(self, parent_id: int, author: str) -> None: + self.add_string(Keys.General.PARENTS_AUTHOR.format(id=self.parent_id), author) + + def add_parent_version(self, parent_id: int, version: str) -> None: + self.add_string(Keys.General.PARENTS_VERSION.format(id=self.parent_id), version) + + def add_parent_organization(self, parent_id: int, organization: str) -> None: + self.add_string(Keys.General.PARENTS_ORGANIZATION.format(id=self.parent_id), organization) + + def add_parent_url(self, parent_id: int, url: str) -> None: + self.add_string(Keys.General.PARENTS_URL.format(id=self.parent_id), url) + + def add_parent_doi(self, parent_id: int, doi: str) -> None: + self.add_string(Keys.General.PARENTS_DOI.format(id=self.parent_id), doi) + + def add_parent_uuid(self, parent_id: int, uuid: str) -> None: + self.add_string(Keys.General.PARENTS_UUID.format(id=self.parent_id), uuid) + + def add_parent_hf_repo(self, parent_id: int, hf_repo: str) -> None: + self.add_string(Keys.General.PARENTS_HF_REPO.format(id=self.parent_id), hf_repo) + def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.General.TAGS, tags) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 1609a630647c2..e1bbfc78c56b7 100755 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -26,15 +26,20 @@ class Metadata: quantized_by: Optional[str] = None organization: Optional[str] = None version: Optional[str] = None - base_version: Optional[str] = None url: Optional[str] = None + doi: Optional[str] = None + uuid: Optional[str] = None + hf_repo: Optional[str] = None description: Optional[str] = None license: Optional[str] = None license_name: Optional[str] = None license_link: Optional[str] = None source_url: Optional[str] = None + source_doi: Optional[str] = None + source_uuid: Optional[str] = None source_hf_repo: Optional[str] = None parameter_class_attribute: Optional[str] = None + parents: Optional[list[dict]] = None tags: Optional[list[str]] = None languages: Optional[list[str]] = None datasets: Optional[list[str]] = None @@ -57,25 +62,37 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 - metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 - metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 - metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 - metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY , metadata.quantized_by ) # noqa: E202 - metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 - metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 - metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 - metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 - metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 - metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 - metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 - metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 - metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 - metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 + + metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 + metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 + metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 + + metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 + metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY , metadata.quantized_by ) # noqa: E202 metadata.parameter_class_attribute = metadata_override.get(Keys.General.PARAMETER_CLASS_ATTRIBUTE, metadata.parameter_class_attribute) # noqa: E202 - metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 - metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 - metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 + + metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 + + metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 + metadata.doi = metadata_override.get(Keys.General.DOI , metadata.doi ) # noqa: E202 + metadata.uuid = metadata_override.get(Keys.General.UUID , metadata.uuid ) # noqa: E202 + metadata.hf_repo = metadata_override.get(Keys.General.HF_REPO , metadata.hf_repo ) # noqa: E202 + + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 + metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI , metadata.source_doi ) # noqa: E202 + metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID , metadata.source_uuid ) # noqa: E202 + metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 + + metadata.parent_count = metadata_override.get("general.parents" , metadata.parent_count ) # noqa: E202 + + metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 + metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 + metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 # Direct Metadata Override (via direct cli argument) if model_name is not None: @@ -169,7 +186,7 @@ def get_model_id_components(model_id: Optional[str] = None) -> dict[str, object] @staticmethod def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None) -> Metadata: # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 - found_model_name = False + found_base_model = False # Model Card Heuristics ######################## @@ -180,32 +197,7 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No # such as TheBloke who would encode 'Mixtral 8X7B Instruct v0.1' into model_name metadata.name = model_card.get("model_name") - if "model-index" in model_card and len(model_card["model-index"]) == 1 and "name" in model_card["model-index"][0]: - # This is a model index which has model id that can be extracted into organization and model name - # if so then we can safely extract organization and name - # (This is a safe choice in case there is multiple models in one repo in the future) - model_id = model_card["model-index"][0]["name"] - model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) - if metadata.name is None and model_full_name_component is not None: - metadata.name = Metadata.id_to_title(model_full_name_component) - if metadata.organization is None and org_component is not None: - metadata.organization = Metadata.id_to_title(org_component) - if metadata.basename is None and basename is not None: - metadata.basename = basename - if metadata.finetune is None and finetune is not None: - metadata.finetune = finetune - if metadata.version is None and version is not None: - metadata.version = version - if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: - metadata.parameter_class_attribute = parameter_class_attribute - if metadata.source_url is None and org_component is not None and model_full_name_component is not None: - metadata.source_url = f"https://huggingface.co/{org_component}/{model_full_name_component}" - if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: - metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" - - found_model_name = True - - if "base_model" in model_card and isinstance(model_card["base_model"], str) and not found_model_name: + if "base_model" in model_card and isinstance(model_card["base_model"], str) and not found_base_model: # Check if string. We cannot handle lists as that is too ambagious # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges) model_id = model_card.get("base_model") @@ -227,7 +219,7 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" - found_model_name = True + found_base_model = True if metadata.quantized_by is None: # Not part of hugging face model card standard, but is used by TheBloke to credit them self for quantizing 3rd party models @@ -254,7 +246,7 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No if hf_params is not None: hf_name_or_path = hf_params.get("_name_or_path") - if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1 and not found_model_name: + if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1 and not found_base_model: # Use _name_or_path only if its actually a model name and not some computer path # e.g. 'meta-llama/Llama-2-7b-hf' model_id = hf_name_or_path @@ -333,7 +325,7 @@ def test_apply_metadata_heuristic_from_model_card(self): 'datasets': ['teknium/OpenHermes-2.5'], 'widget': [{'example_title': 'Hermes 2 Pro', 'messages': [{'role': 'system', 'content': 'You are a sentient, superintelligent artificial general intelligence, here to teach and assist me.'}, {'role': 'user', 'content': 'Write a short story about Goku discovering kirby has teamed up with Majin Buu to destroy the world.'}]}] } - expected = Metadata(name='Hermes 2 Pro Llama 3 8B', basename='Hermes-2-Pro-Llama-3', finetune=None, author=None, quantized_by=None, organization=None, version=None, base_version=None, url=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_hf_repo=None, parameter_class_attribute='8B', tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], languages=['en'], datasets=['teknium/OpenHermes-2.5']) + expected = Metadata(name='Meta Llama 3 8B', basename='Meta-Llama-3', finetune=None, author=None, quantized_by=None, organization='NousResearch', version=None, url=None, doi=None, uuid=None, hf_repo=None, description=None, license=None, license_name=None, license_link=None, source_url='https://huggingface.co/NousResearch/Meta-Llama-3-8B', source_doi=None, source_uuid=None, source_hf_repo='NousResearch/Meta-Llama-3-8B', parameter_class_attribute='8B', parents=None, tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], languages=['en'], datasets=['teknium/OpenHermes-2.5']) got = Metadata.apply_metadata_heuristic(Metadata(), model_card, None, None) @@ -342,21 +334,15 @@ def test_apply_metadata_heuristic_from_model_card(self): def test_apply_metadata_heuristic_from_hf_parameters(self): # Source: https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B/blob/main/config.json hf_params = {"_name_or_path": "./hermes-2-pro-llama-3-8b-DPO"} - - expected = Metadata(name='Hermes 2 Pro Llama 3 8B DPO', basename='hermes-2-pro-llama-3', finetune='DPO', author=None, quantized_by=None, organization=None, version=None, base_version=None, url=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_hf_repo=None, parameter_class_attribute='8b', tags=None, languages=None, datasets=None) - + expected = Metadata(name='Hermes 2 Pro Llama 3 8B DPO', basename='hermes-2-pro-llama-3', finetune='DPO', author=None, quantized_by=None, organization=None, version=None, url=None, doi=None, uuid=None, hf_repo=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_doi=None, source_uuid=None, source_hf_repo=None, parameter_class_attribute='8b', parents=None, tags=None, languages=None, datasets=None) got = Metadata.apply_metadata_heuristic(Metadata(), None, hf_params, None) - self.assertEqual(got, expected) def test_apply_metadata_heuristic_from_model_dir(self): # Source: https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B/blob/main/config.json model_dir_path = Path("./hermes-2-pro-llama-3-8b-DPO") - - expected = Metadata(name='Hermes 2 Pro Llama 3 8B DPO', basename='hermes-2-pro-llama-3', finetune='DPO', author=None, quantized_by=None, organization=None, version=None, base_version=None, url=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_hf_repo=None, parameter_class_attribute='8b', tags=None, languages=None, datasets=None) - + expected = Metadata(name='Hermes 2 Pro Llama 3 8B DPO', basename='hermes-2-pro-llama-3', finetune='DPO', author=None, quantized_by=None, organization=None, version=None, url=None, doi=None, uuid=None, hf_repo=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_doi=None, source_uuid=None, source_hf_repo=None, parameter_class_attribute='8b', parents=None, tags=None, languages=None, datasets=None) got = Metadata.apply_metadata_heuristic(Metadata(), None, None, model_dir_path) - self.assertEqual(got, expected) From eaa47f554610371c81249a25255521c5aa4ee7a4 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sat, 8 Jun 2024 21:54:20 +1000 Subject: [PATCH 23/65] convert-*.py: separated unit test, hf_repo to repo_url --- convert_hf_to_gguf.py | 86 +++++---- examples/convert_legacy_llama.py | 88 +++++---- gguf-py/README.md | 8 + gguf-py/gguf/constants.py | 42 +++-- gguf-py/gguf/gguf_writer.py | 117 ++++++------ gguf-py/gguf/metadata.py | 294 +++++++++++++++++-------------- gguf-py/tests/__init__.py | 1 + gguf-py/tests/test_metadata.py | 64 +++++++ 8 files changed, 418 insertions(+), 282 deletions(-) mode change 100755 => 100644 gguf-py/gguf/metadata.py create mode 100644 gguf-py/tests/__init__.py create mode 100755 gguf-py/tests/test_metadata.py diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 4c8da99d25c7b..99a76cd70e120 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -239,63 +239,71 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", " def set_gguf_meta_model(self): self.gguf_writer.add_name(self.metadata.name) - if self.metadata.basename is not None: - self.gguf_writer.add_basename(self.metadata.basename) - if self.metadata.finetune is not None: - self.gguf_writer.add_finetune(self.metadata.finetune) if self.metadata.author is not None: self.gguf_writer.add_author(self.metadata.author) - if self.metadata.quantized_by is not None: - self.gguf_writer.add_quantized_by(self.metadata.quantized_by) - if self.metadata.organization is not None: - self.gguf_writer.add_organization(self.metadata.organization) if self.metadata.version is not None: self.gguf_writer.add_version(self.metadata.version) - if self.metadata.url is not None: - self.gguf_writer.add_url(self.metadata.url) - if self.metadata.doi is not None: - self.gguf_writer.add_doi(self.metadata.doi) - if self.metadata.uuid is not None: - self.gguf_writer.add_uuid(self.metadata.uuid) - if self.metadata.hf_repo is not None: - self.gguf_writer.add_hf_repo(self.metadata.hf_repo) + if self.metadata.organization is not None: + self.gguf_writer.add_organization(self.metadata.organization) + + if self.metadata.finetune is not None: + self.gguf_writer.add_finetune(self.metadata.finetune) + if self.metadata.basename is not None: + self.gguf_writer.add_basename(self.metadata.basename) + if self.metadata.description is not None: self.gguf_writer.add_description(self.metadata.description) + if self.metadata.quantized_by is not None: + self.gguf_writer.add_quantized_by(self.metadata.quantized_by) + + if self.metadata.parameter_class_attribute is not None: + self.gguf_writer.add_parameter_class_attribute(self.metadata.parameter_class_attribute) + if self.metadata.license is not None: self.gguf_writer.add_license(self.metadata.license) if self.metadata.license_name is not None: self.gguf_writer.add_license_name(self.metadata.license_name) if self.metadata.license_link is not None: self.gguf_writer.add_license_link(self.metadata.license_link) + + if self.metadata.url is not None: + self.gguf_writer.add_url(self.metadata.url) + if self.metadata.doi is not None: + self.gguf_writer.add_doi(self.metadata.doi) + if self.metadata.uuid is not None: + self.gguf_writer.add_uuid(self.metadata.uuid) + if self.metadata.repo_url is not None: + self.gguf_writer.add_repo_url(self.metadata.repo_url) + if self.metadata.source_url is not None: self.gguf_writer.add_source_url(self.metadata.source_url) if self.metadata.source_doi is not None: self.gguf_writer.add_source_doi(self.metadata.source_doi) if self.metadata.source_uuid is not None: self.gguf_writer.add_source_uuid(self.metadata.source_uuid) - if self.metadata.source_hf_repo is not None: - self.gguf_writer.add_source_hf_repo(self.metadata.source_hf_repo) - if self.metadata.parameter_class_attribute is not None: - self.gguf_writer.add_parameter_class_attribute(self.metadata.parameter_class_attribute) - if self.metadata.parents is not None: - metadata.parent_count = len(self.metadata.parents) - for key, parent_entry in self.metadata.parents: - if "name" in parent_entry: - self.gguf_writer.add_parent_name(key, parent_entry.get("name")) - if "author" in parent_entry: - self.gguf_writer.add_parent_author(key, parent_entry.get("author")) - if "version" in parent_entry: - self.gguf_writer.add_parent_version(key, parent_entry.get("version")) - if "organization" in parent_entry: - self.gguf_writer.add_parent_organization(key, parent_entry.get("organization")) - if "url" in parent_entry: - self.gguf_writer.add_parent_url(key, parent_entry.get("url")) - if "doi" in parent_entry: - self.gguf_writer.add_parent_doi(key, parent_entry.get("doi")) - if "uuid" in parent_entry: - self.gguf_writer.add_parent_uuid(key, parent_entry.get("uuid")) - if "hf_repo" in parent_entry: - self.gguf_writer.add_parent_hf_repo(key, parent_entry.get("hf_repo")) + if self.metadata.source_repo_url is not None: + self.gguf_writer.add_source_repo_url(self.metadata.source_repo_url) + + if self.metadata.base_models is not None: + self.gguf_writer.add_base_model_count(len(self.metadata.base_models)) + for key, base_model_entry in enumerate(self.metadata.base_models): + if "name" in base_model_entry: + self.gguf_writer.add_base_model_name(key, base_model_entry["name"]) + if "author" in base_model_entry: + self.gguf_writer.add_base_model_author(key, base_model_entry["author"]) + if "version" in base_model_entry: + self.gguf_writer.add_base_model_version(key, base_model_entry["version"]) + if "organization" in base_model_entry: + self.gguf_writer.add_base_model_organization(key, base_model_entry["organization"]) + if "url" in base_model_entry: + self.gguf_writer.add_base_model_url(key, base_model_entry["url"]) + if "doi" in base_model_entry: + self.gguf_writer.add_base_model_doi(key, base_model_entry["doi"]) + if "uuid" in base_model_entry: + self.gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"]) + if "repo_url" in base_model_entry: + self.gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"]) + if self.metadata.tags is not None: self.gguf_writer.add_tags(self.metadata.tags) if self.metadata.languages is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index cc7acb9c88605..bde27a9375ba3 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -783,57 +783,71 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_name(name) - if metadata.basename is not None: - self.gguf.add_basename(metadata.basename) - if metadata.finetune is not None: - self.gguf.add_finetune(metadata.finetune) if metadata.author is not None: self.gguf.add_author(metadata.author) - if metadata.organization is not None: - self.add_organization(metadata.organization) if metadata.version is not None: self.gguf.add_version(metadata.version) - if metadata.url is not None: - self.gguf.add_url(metadata.url) - if metadata.doi is not None: - self.gguf.add_doi(metadata.doi) - if metadata.uuid is not None: - self.gguf.add_uuid(metadata.uuid) - if metadata.hf_repo is not None: - self.gguf.add_hf_repo(metadata.hf_repo) + if metadata.organization is not None: + self.gguf.add_organization(metadata.organization) + + if metadata.finetune is not None: + self.gguf.add_finetune(metadata.finetune) + if metadata.basename is not None: + self.gguf.add_basename(metadata.basename) + if metadata.description is not None: self.gguf.add_description(metadata.description) + if metadata.quantized_by is not None: + self.gguf.add_quantized_by(metadata.quantized_by) + + if metadata.parameter_class_attribute is not None: + self.gguf.add_parameter_class_attribute(metadata.parameter_class_attribute) + if metadata.license is not None: self.gguf.add_license(metadata.license) if metadata.license_name is not None: self.gguf.add_license_name(metadata.license_name) if metadata.license_link is not None: self.gguf.add_license_link(metadata.license_link) + + if metadata.url is not None: + self.gguf.add_url(metadata.url) + if metadata.doi is not None: + self.gguf.add_doi(metadata.doi) + if metadata.uuid is not None: + self.gguf.add_uuid(metadata.uuid) + if metadata.repo_url is not None: + self.gguf.add_repo_url(metadata.repo_url) + if metadata.source_url is not None: self.gguf.add_source_url(metadata.source_url) - if metadata.source_hf_repo is not None: - self.gguf.add_source_hf_repo(metadata.source_hf_repo) - if metadata.parameter_class_attribute is not None: - self.gguf.add_parameter_class_attribute(metadata.parameter_class_attribute) - if metadata.parents is not None: - metadata.parent_count = len(metadata.parents) - for key, parent_entry in metadata.parents: - if "name" in parent_entry: - self.gguf.add_parent_name(key, parent_entry.get("name")) - if "author" in parent_entry: - self.gguf.add_parent_author(key, parent_entry.get("author")) - if "version" in parent_entry: - self.gguf.add_parent_version(key, parent_entry.get("version")) - if "organization" in parent_entry: - self.gguf.add_parent_organization(key, parent_entry.get("organization")) - if "url" in parent_entry: - self.gguf.add_parent_url(key, parent_entry.get("url")) - if "doi" in parent_entry: - self.gguf.add_parent_doi(key, parent_entry.get("doi")) - if "uuid" in parent_entry: - self.gguf.add_parent_uuid(key, parent_entry.get("uuid")) - if "hf_repo" in parent_entry: - self.gguf.add_parent_hf_repo(key, parent_entry.get("hf_repo")) + if metadata.source_doi is not None: + self.gguf.add_source_doi(metadata.source_doi) + if metadata.source_uuid is not None: + self.gguf.add_source_uuid(metadata.source_uuid) + if metadata.source_repo_url is not None: + self.gguf.add_source_repo_url(metadata.source_repo_url) + + if metadata.base_models is not None: + self.gguf.add_base_model_count(len(metadata.base_models)) + for key, base_model_entry in enumerate(metadata.base_models): + if "name" in base_model_entry: + self.gguf.add_base_model_name(key, base_model_entry["name"]) + if "author" in base_model_entry: + self.gguf.add_base_model_author(key, base_model_entry["author"]) + if "version" in base_model_entry: + self.gguf.add_base_model_version(key, base_model_entry["version"]) + if "organization" in base_model_entry: + self.gguf.add_base_model_organization(key, base_model_entry["organization"]) + if "url" in base_model_entry: + self.gguf.add_base_model_url(key, base_model_entry["url"]) + if "doi" in base_model_entry: + self.gguf.add_base_model_doi(key, base_model_entry["doi"]) + if "uuid" in base_model_entry: + self.gguf.add_base_model_uuid(key, base_model_entry["uuid"]) + if "repo_url" in base_model_entry: + self.gguf.add_base_model_repo_url(key, base_model_entry["repo_url"]) + if metadata.tags is not None: self.gguf.add_tags(metadata.tags) if metadata.languages is not None: diff --git a/gguf-py/README.md b/gguf-py/README.md index 9dd888f3180d1..24af96a17a5bb 100644 --- a/gguf-py/README.md +++ b/gguf-py/README.md @@ -78,5 +78,13 @@ python -m build python -m twine upload dist/* ``` +## Run Unit Tests + +From root of this repository you can run this command to run all the unit tests + +```bash +python -m unittest discover ./gguf-py -v +``` + ## TODO - [ ] Include conversion scripts as command line entry points in this package. diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 1484ad5168504..35395381b911d 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -31,10 +31,12 @@ class General: VERSION = "general.version" ORGANIZATION = "general.organization" - BASENAME = "general.basename" FINETUNE = "general.finetune" + BASENAME = "general.basename" + DESCRIPTION = "general.description" QUANTIZED_BY = "general.quantized_by" + PARAMETER_CLASS_ATTRIBUTE = "general.parameter_class_attribute" # Licensing details @@ -43,31 +45,29 @@ class General: LICENSE_LINK = "general.license.link" # Typically represents the converted GGUF repo (Unless native) - URL = "general.url" + URL = "general.url" # Model Website/Paper DOI = "general.doi" UUID = "general.uuid" - HF_URL = "general.huggingface.repository" + REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...) - # Typically represents the original source repository (e.g. safetensors) - # that this was - SOURCE_URL = "general.source.url" + # Model Source during conversion + SOURCE_URL = "general.source.url" # Model Website/Paper SOURCE_DOI = "general.source.doi" SOURCE_UUID = "general.source.uuid" - SOURCE_HF_REPO = "general.source.huggingface.repository" + SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...) - # This represents the parent model that the converted/source model was - # derived from on allowing users to trace the linage of a model. - # E.g. A finetune model would have the base model as the parent - # (A model can have multiple parent, especially if it's a merged model) - PARENTS_COUNT = "general.parents.count" - PARENTS_NAME = "general.parents.{id}.name" - PARENTS_AUTHOR = "general.parents.{id}.author" - PARENTS_VERSION = "general.parents.{id}.version" - PARENTS_ORGANIZATION = "general.parents.{id}.organization" - PARENTS_URL = "general.parents.{id}.url" - PARENTS_DOI = "general.parents.{id}.doi" - PARENTS_UUID = "general.parents.{id}.uuid" - PARENTS_HF_REPO = "general.parents.{id}.huggingface.repository" + # Base Model Source. There can be more than one source if it's a merged + # model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in + # tracing linage of models as it is finetuned or merged over time. + BASE_MODEL_COUNT = "general.base_model.count" + BASE_MODEL_NAME = "general.base_model.{id}.name" + BASE_MODEL_AUTHOR = "general.base_model.{id}.author" + BASE_MODEL_VERSION = "general.base_model.{id}.version" + BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization" + BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper + BASE_MODEL_DOI = "general.base_model.{id}.doi" + BASE_MODEL_UUID = "general.base_model.{id}.uuid" + BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...) # Array based KV stores TAGS = "general.tags" @@ -1273,8 +1273,6 @@ def get_type(val: Any) -> GGUFValueType: KEY_GENERAL_URL = Keys.General.URL KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION KEY_GENERAL_LICENSE = Keys.General.LICENSE -KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL -KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE # LLM diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index b38f000301110..0f94f2dde6dcb 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -430,42 +430,43 @@ def add_type(self, type_name: str) -> None: def add_architecture(self) -> None: self.add_string(Keys.General.ARCHITECTURE, self.arch) - def add_basename(self, basename: str) -> None: - self.add_string(Keys.General.BASENAME, basename) + def add_quantization_version(self, quantization_version: int) -> None: + self.add_uint32(Keys.General.QUANTIZATION_VERSION, quantization_version) - def add_finetune(self, finetune: str) -> None: - self.add_string(Keys.General.FINETUNE, finetune) + def add_custom_alignment(self, alignment: int) -> None: + self.data_alignment = alignment + self.add_uint32(Keys.General.ALIGNMENT, alignment) - def add_author(self, author: str) -> None: - self.add_string(Keys.General.AUTHOR, author) + def add_file_type(self, ftype: int) -> None: + self.add_uint32(Keys.General.FILE_TYPE, ftype) - def add_quantized_by(self, quantized: str) -> None: - self.add_string(Keys.General.QUANTIZED_BY, quantized) + def add_name(self, name: str) -> None: + self.add_string(Keys.General.NAME, name) - def add_organization(self, organization: str) -> None: - self.add_string(Keys.General.ORGANIZATION, organization) + def add_author(self, author: str) -> None: + self.add_string(Keys.General.AUTHOR, author) def add_version(self, version: str) -> None: self.add_string(Keys.General.VERSION, version) - def add_tensor_data_layout(self, layout: str) -> None: - self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) - - def add_url(self, url: str) -> None: - self.add_string(Keys.General.URL, url) - - def add_doi(self, doi: str) -> None: - self.add_string(Keys.General.DOI, doi) + def add_organization(self, organization: str) -> None: + self.add_string(Keys.General.ORGANIZATION, organization) - def add_uuid(self, uuid: str) -> None: - self.add_string(Keys.General.UUID, uuid) + def add_finetune(self, finetune: str) -> None: + self.add_string(Keys.General.FINETUNE, finetune) - def add_hf_repo(self, hf_repo: str) -> None: - self.add_string(Keys.General.HF_REPO, hf_repo) + def add_basename(self, basename: str) -> None: + self.add_string(Keys.General.BASENAME, basename) def add_description(self, description: str) -> None: self.add_string(Keys.General.DESCRIPTION, description) + def add_quantized_by(self, quantized: str) -> None: + self.add_string(Keys.General.QUANTIZED_BY, quantized) + + def add_parameter_class_attribute(self, parameter_class_attribute: str) -> None: + self.add_string(Keys.General.PARAMETER_CLASS_ATTRIBUTE, parameter_class_attribute) + def add_license(self, license: str) -> None: self.add_string(Keys.General.LICENSE, license) @@ -475,44 +476,56 @@ def add_license_name(self, license: str) -> None: def add_license_link(self, license: str) -> None: self.add_string(Keys.General.LICENSE_LINK, license) + def add_url(self, url: str) -> None: + self.add_string(Keys.General.URL, url) + + def add_doi(self, doi: str) -> None: + self.add_string(Keys.General.DOI, doi) + + def add_uuid(self, uuid: str) -> None: + self.add_string(Keys.General.UUID, uuid) + + def add_repo_url(self, repo_url: str) -> None: + self.add_string(Keys.General.REPO_URL, repo_url) + def add_source_url(self, url: str) -> None: self.add_string(Keys.General.SOURCE_URL, url) - def add_source_hf_repo(self, repo: str) -> None: - self.add_string(Keys.General.SOURCE_HF_REPO, repo) + def add_source_doi(self, doi: str) -> None: + self.add_string(Keys.General.SOURCE_DOI, doi) - def add_file_type(self, ftype: int) -> None: - self.add_uint32(Keys.General.FILE_TYPE, ftype) + def add_source_uuid(self, uuid: str) -> None: + self.add_string(Keys.General.SOURCE_UUID, uuid) - def add_parameter_class_attribute(self, parameter_class_attribute: str) -> None: - self.add_string(Keys.General.PARAMETER_CLASS_ATTRIBUTE, parameter_class_attribute) + def add_source_repo_url(self, repo_url: str) -> None: + self.add_string(Keys.General.SOURCE_REPO_URL, repo_url) - def add_parent_count(self, parent_count: int) -> None: - self.add_uint32(Keys.General.PARENTS_COUNT, parent_count) + def add_base_model_count(self, source_count: int) -> None: + self.add_uint32(Keys.General.BASE_MODEL_COUNT, source_count) - def add_parent_name(self, parent_id: int, name: str) -> None: - self.add_string(Keys.General.PARENTS_NAME.format(id=self.parent_id), name) + def add_base_model_name(self, source_id: int, name: str) -> None: + self.add_string(Keys.General.BASE_MODEL_NAME.format(id=self.source_id), name) - def add_parent_author(self, parent_id: int, author: str) -> None: - self.add_string(Keys.General.PARENTS_AUTHOR.format(id=self.parent_id), author) + def add_base_model_author(self, source_id: int, author: str) -> None: + self.add_string(Keys.General.BASE_MODEL_AUTHOR.format(id=self.source_id), author) - def add_parent_version(self, parent_id: int, version: str) -> None: - self.add_string(Keys.General.PARENTS_VERSION.format(id=self.parent_id), version) + def add_base_model_version(self, source_id: int, version: str) -> None: + self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=self.source_id), version) - def add_parent_organization(self, parent_id: int, organization: str) -> None: - self.add_string(Keys.General.PARENTS_ORGANIZATION.format(id=self.parent_id), organization) + def add_base_model_organization(self, source_id: int, organization: str) -> None: + self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=self.source_id), organization) - def add_parent_url(self, parent_id: int, url: str) -> None: - self.add_string(Keys.General.PARENTS_URL.format(id=self.parent_id), url) + def add_base_model_url(self, source_id: int, url: str) -> None: + self.add_string(Keys.General.BASE_MODEL_URL.format(id=self.source_id), url) - def add_parent_doi(self, parent_id: int, doi: str) -> None: - self.add_string(Keys.General.PARENTS_DOI.format(id=self.parent_id), doi) + def add_base_model_doi(self, source_id: int, doi: str) -> None: + self.add_string(Keys.General.BASE_MODEL_DOI.format(id=self.source_id), doi) - def add_parent_uuid(self, parent_id: int, uuid: str) -> None: - self.add_string(Keys.General.PARENTS_UUID.format(id=self.parent_id), uuid) + def add_base_model_uuid(self, source_id: int, uuid: str) -> None: + self.add_string(Keys.General.BASE_MODEL_UUID.format(id=self.source_id), uuid) - def add_parent_hf_repo(self, parent_id: int, hf_repo: str) -> None: - self.add_string(Keys.General.PARENTS_HF_REPO.format(id=self.parent_id), hf_repo) + def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None: + self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=self.source_id), repo_url) def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.General.TAGS, tags) @@ -523,16 +536,8 @@ def add_languages(self, languages: Sequence[str]) -> None: def add_datasets(self, datasets: Sequence[str]) -> None: self.add_array(Keys.General.DATASETS, datasets) - def add_name(self, name: str) -> None: - self.add_string(Keys.General.NAME, name) - - def add_quantization_version(self, quantization_version: int) -> None: - self.add_uint32( - Keys.General.QUANTIZATION_VERSION, quantization_version) - - def add_custom_alignment(self, alignment: int) -> None: - self.data_alignment = alignment - self.add_uint32(Keys.General.ALIGNMENT, alignment) + def add_tensor_data_layout(self, layout: str) -> None: + self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) def add_vocab_size(self, size: int) -> None: self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py old mode 100755 new mode 100644 index e1bbfc78c56b7..6ae4e044c0dd6 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -1,45 +1,40 @@ -#!/usr/bin/env python3 - from __future__ import annotations import re import json -import unittest +import uuid import frontmatter from pathlib import Path from typing import Optional from dataclasses import dataclass -if __name__ == '__main__': - from constants import Keys -else: - from .constants import Keys +from .constants import Keys @dataclass class Metadata: # Authorship Metadata to be written to GGUF KV Store name: Optional[str] = None - basename: Optional[str] = None - finetune: Optional[str] = None author: Optional[str] = None - quantized_by: Optional[str] = None - organization: Optional[str] = None version: Optional[str] = None + organization: Optional[str] = None + finetune: Optional[str] = None + basename: Optional[str] = None + description: Optional[str] = None + quantized_by: Optional[str] = None + parameter_class_attribute: Optional[str] = None url: Optional[str] = None doi: Optional[str] = None uuid: Optional[str] = None - hf_repo: Optional[str] = None - description: Optional[str] = None - license: Optional[str] = None - license_name: Optional[str] = None - license_link: Optional[str] = None + repo_url: Optional[str] = None source_url: Optional[str] = None source_doi: Optional[str] = None source_uuid: Optional[str] = None - source_hf_repo: Optional[str] = None - parameter_class_attribute: Optional[str] = None - parents: Optional[list[dict]] = None + source_repo_url: Optional[str] = None + license: Optional[str] = None + license_name: Optional[str] = None + license_link: Optional[str] = None + base_models: Optional[list[dict]] = None tags: Optional[list[str]] = None languages: Optional[list[str]] = None datasets: Optional[list[str]] = None @@ -68,10 +63,12 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 - metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 + metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY , metadata.quantized_by ) # noqa: E202 + metadata.parameter_class_attribute = metadata_override.get(Keys.General.PARAMETER_CLASS_ATTRIBUTE, metadata.parameter_class_attribute) # noqa: E202 metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 @@ -81,14 +78,14 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 metadata.doi = metadata_override.get(Keys.General.DOI , metadata.doi ) # noqa: E202 metadata.uuid = metadata_override.get(Keys.General.UUID , metadata.uuid ) # noqa: E202 - metadata.hf_repo = metadata_override.get(Keys.General.HF_REPO , metadata.hf_repo ) # noqa: E202 + metadata.repo_url = metadata_override.get(Keys.General.REPO_URL , metadata.repo_url ) # noqa: E202 metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI , metadata.source_doi ) # noqa: E202 metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID , metadata.source_uuid ) # noqa: E202 - metadata.source_hf_repo = metadata_override.get(Keys.General.SOURCE_HF_REPO , metadata.source_hf_repo ) # noqa: E202 + metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL , metadata.source_repo_url ) # noqa: E202 - metadata.parent_count = metadata_override.get("general.parents" , metadata.parent_count ) # noqa: E202 + metadata.base_models = metadata_override.get("general.base_models" , metadata.base_models ) # noqa: E202 metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 @@ -98,6 +95,9 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat if model_name is not None: metadata.name = model_name + # If any UUID is still missing at this point, then we should fill it in + metadata = Metadata.generate_any_missing_uuid(metadata) + return metadata @staticmethod @@ -137,8 +137,7 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, object]: @staticmethod def id_to_title(string): # Convert capitalization into title form unless acronym or version number - string = string.strip().replace('-', ' ') - return ' '.join([w.title() if w.islower() and not re.match(r'^v\d+(?:\.\d+)*$', w) else w for w in string.split()]) + return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()]) @staticmethod def get_model_id_components(model_id: Optional[str] = None) -> dict[str, object]: @@ -186,67 +185,119 @@ def get_model_id_components(model_id: Optional[str] = None) -> dict[str, object] @staticmethod def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None) -> Metadata: # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 - found_base_model = False # Model Card Heuristics ######################## if model_card is not None: - if "model_name" in model_card: + if "model_name" in model_card and metadata.name is None: # Not part of huggingface model card standard but notice some model creator using it - # such as TheBloke who would encode 'Mixtral 8X7B Instruct v0.1' into model_name + # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' metadata.name = model_card.get("model_name") - if "base_model" in model_card and isinstance(model_card["base_model"], str) and not found_base_model: - # Check if string. We cannot handle lists as that is too ambagious - # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges) - model_id = model_card.get("base_model") - model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) - if metadata.name is None and model_full_name_component is not None: - metadata.name = Metadata.id_to_title(model_full_name_component) - if metadata.organization is None and org_component is not None: - metadata.organization = Metadata.id_to_title(org_component) - if metadata.basename is None and basename is not None: - metadata.basename = basename - if metadata.finetune is None and finetune is not None: - metadata.finetune = finetune - if metadata.version is None and version is not None: - metadata.version = version - if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: - metadata.parameter_class_attribute = parameter_class_attribute - if metadata.source_url is None and org_component is not None and model_full_name_component is not None: - metadata.source_url = f"https://huggingface.co/{org_component}/{model_full_name_component}" - if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: - metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" + if "model_creator" in model_card and metadata.author is None: + # Not part of huggingface model card standard but notice some model creator using it + # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' + metadata.author = model_card.get("model_creator") - found_base_model = True + if "model_type" in model_card and metadata.basename is None: + # Not part of huggingface model card standard but notice some model creator using it + # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF' + metadata.basename = model_card.get("model_type") - if metadata.quantized_by is None: + if "base_model" in model_card: + # This represents the parent models that this is based on + # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges) + # Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md + metadata_base_models = [] + base_model_value = model_card.get("base_model", None) + + if base_model_value is not None: + if isinstance(base_model_value, str): + metadata_base_models.append(base_model_value) + elif isinstance(base_model_value, list): + metadata_base_models.extend(base_model_value) + + if metadata.base_models is None: + metadata.base_models = [] + + for model_id in metadata_base_models: + model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) + base_model = {} + if model_full_name_component is not None: + base_model["name"] = Metadata.id_to_title(model_full_name_component) + if org_component is not None: + base_model["organization"] = Metadata.id_to_title(org_component) + if version is not None: + base_model["version"] = version + if org_component is not None and model_full_name_component is not None: + base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}" + metadata.base_models.append(base_model) + + if "quantized_by" in model_card and metadata.quantized_by is None: # Not part of hugging face model card standard, but is used by TheBloke to credit them self for quantizing 3rd party models metadata.quantized_by = model_card.get("quantized_by") - if metadata.license is None: + + if "license" in model_card and metadata.license is None: metadata.license = model_card.get("license") - if metadata.license_name is None: + + if "license_name" in model_card and metadata.license_name is None: metadata.license_name = model_card.get("license_name") - if metadata.license_link is None: + + if "license_link" in model_card and metadata.license_link is None: metadata.license_link = model_card.get("license_link") - if metadata.author is None: - # non huggingface model card standard but notice some model creator using it - metadata.author = model_card.get("model_creator") - if metadata.tags is None: - metadata.tags = model_card.get("tags", None) - if metadata.languages is None: - metadata.languages = model_card.get("language", model_card.get("languages", None)) - if metadata.datasets is None: - metadata.datasets = model_card.get("datasets", model_card.get("dataset", None)) + + tags_value = model_card.get("tags", None) + if tags_value is not None: + + if metadata.tags is None: + metadata.tags = [] + + if isinstance(tags_value, str): + metadata.tags.append(tags_value) + elif isinstance(tags_value, list): + metadata.tags.extend(tags_value) + + pipeline_tags_value = model_card.get("pipeline_tag", None) + if pipeline_tags_value is not None: + + if metadata.tags is None: + metadata.tags = [] + + if isinstance(pipeline_tags_value, str): + metadata.tags.append(pipeline_tags_value) + elif isinstance(pipeline_tags_value, list): + metadata.tags.extend(pipeline_tags_value) + + language_value = model_card.get("languages", model_card.get("language", None)) + if language_value is not None: + + if metadata.languages is None: + metadata.languages = [] + + if isinstance(language_value, str): + metadata.languages.append(language_value) + elif isinstance(language_value, list): + metadata.languages.extend(language_value) + + dataset_value = model_card.get("datasets", model_card.get("dataset", None)) + if dataset_value is not None: + + if metadata.datasets is None: + metadata.datasets = [] + + if isinstance(dataset_value, str): + metadata.datasets.append(dataset_value) + elif isinstance(dataset_value, list): + metadata.datasets.extend(dataset_value) # Hugging Face Parameter Heuristics #################################### if hf_params is not None: - hf_name_or_path = hf_params.get("_name_or_path") - if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1 and not found_base_model: + hf_name_or_path = hf_params.get("_name_or_path") + if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1: # Use _name_or_path only if its actually a model name and not some computer path # e.g. 'meta-llama/Llama-2-7b-hf' model_id = hf_name_or_path @@ -263,10 +314,6 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No metadata.version = version if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: metadata.parameter_class_attribute = parameter_class_attribute - if metadata.source_url is None and org_component is not None and model_full_name_component is not None: - metadata.source_url = f"https://huggingface.co/{org_component}/{model_full_name_component}" - if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: - metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" # Directory Folder Name Fallback Heuristics ############################################ @@ -285,66 +332,57 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No metadata.version = version if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: metadata.parameter_class_attribute = parameter_class_attribute - if metadata.source_url is None and org_component is not None and model_full_name_component is not None: - metadata.source_url = f"https://huggingface.co/{org_component}/{model_full_name_component}" - if metadata.source_hf_repo is None and org_component is not None and model_full_name_component is not None: - metadata.source_hf_repo = f"{org_component}/{model_full_name_component}" return metadata + @staticmethod + def generate_any_missing_uuid(metadata: Metadata) -> Metadata: + + # UUID Generation if not already provided + if metadata.uuid is None: + # Generate UUID based on provided links/id. UUIDv4 used as fallback + new_uuid = None + + if metadata.doi is not None: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, f"https://doi.org/{metadata.doi}") + elif metadata.repo_url is not None: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.repo_url) + elif metadata.url is not None: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.url) + else: + new_uuid = uuid.uuid4() # every model must have at least a random UUIDv4 + + if new_uuid is not None: + metadata.uuid = str(new_uuid) + + if metadata.source_uuid is None: + # Generate a UUID based on provided links/id only if source provided + new_uuid = None + + if metadata.source_doi is not None: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, f"https://doi.org/{metadata.source_doi}") + elif metadata.source_repo_url is not None: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.source_repo_url) + elif metadata.source_url is not None: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.source_url) + + if new_uuid is not None: + metadata.source_uuid = str(new_uuid) + + if metadata.base_models is not None: + for model_entry in metadata.base_models: + if "uuid" not in model_entry: + # Generate a UUID based on provided links/id only if source provided + new_uuid = None + + if "repo_url" in model_entry: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["repo_url"]) + elif "url" in model_entry: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["url"]) + elif "doi" in model_entry: + new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["doi"]) + + if new_uuid is not None: + model_entry["uuid"] = str(new_uuid) -class TestStringMethods(unittest.TestCase): - - def test_get_model_id_components(self): - self.assertEqual(Metadata.get_model_id_components("Mistral/Mixtral-8x7B-Instruct-v0.1"), - ('Mixtral-8x7B-Instruct-v0.1', "Mistral", 'Mixtral', 'Instruct', 'v0.1', '8x7B')) - self.assertEqual(Metadata.get_model_id_components("Mixtral-8x7B-Instruct-v0.1"), - ('Mixtral-8x7B-Instruct-v0.1', None, 'Mixtral', 'Instruct', 'v0.1', '8x7B')) - self.assertEqual(Metadata.get_model_id_components("Mixtral-8x7B-Instruct"), - ('Mixtral-8x7B-Instruct', None, 'Mixtral', 'Instruct', None, '8x7B')) - self.assertEqual(Metadata.get_model_id_components("Mixtral-8x7B-v0.1"), - ('Mixtral-8x7B-v0.1', None, 'Mixtral', None, 'v0.1', '8x7B')) - self.assertEqual(Metadata.get_model_id_components("Mixtral-8x7B"), - ('Mixtral-8x7B', None, 'Mixtral', None, None, '8x7B')) - self.assertEqual(Metadata.get_model_id_components("Mixtral"), - ('Mixtral', None, 'Mixtral', None, None, None)) - self.assertEqual(Metadata.get_model_id_components("Mixtral-v0.1"), - ('Mixtral-v0.1', None, 'Mixtral', None, 'v0.1', None)) - self.assertEqual(Metadata.get_model_id_components("hermes-2-pro-llama-3-8b-DPO"), - ('hermes-2-pro-llama-3-8b-DPO', None, 'hermes-2-pro-llama-3', 'DPO', None, '8b')) - self.assertEqual(Metadata.get_model_id_components("NousResearch/Meta-Llama-3-8B"), - ('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, "8B")) - - def test_apply_metadata_heuristic_from_model_card(self): - # Source: https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B/blob/main/README.md - model_card = { - 'base_model': 'NousResearch/Meta-Llama-3-8B', - 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], - 'model-index': [{'name': 'Hermes-2-Pro-Llama-3-8B', 'results': []}], - 'language': ['en'], - 'datasets': ['teknium/OpenHermes-2.5'], - 'widget': [{'example_title': 'Hermes 2 Pro', 'messages': [{'role': 'system', 'content': 'You are a sentient, superintelligent artificial general intelligence, here to teach and assist me.'}, {'role': 'user', 'content': 'Write a short story about Goku discovering kirby has teamed up with Majin Buu to destroy the world.'}]}] - } - expected = Metadata(name='Meta Llama 3 8B', basename='Meta-Llama-3', finetune=None, author=None, quantized_by=None, organization='NousResearch', version=None, url=None, doi=None, uuid=None, hf_repo=None, description=None, license=None, license_name=None, license_link=None, source_url='https://huggingface.co/NousResearch/Meta-Llama-3-8B', source_doi=None, source_uuid=None, source_hf_repo='NousResearch/Meta-Llama-3-8B', parameter_class_attribute='8B', parents=None, tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], languages=['en'], datasets=['teknium/OpenHermes-2.5']) - - got = Metadata.apply_metadata_heuristic(Metadata(), model_card, None, None) - - self.assertEqual(got, expected) - - def test_apply_metadata_heuristic_from_hf_parameters(self): - # Source: https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B/blob/main/config.json - hf_params = {"_name_or_path": "./hermes-2-pro-llama-3-8b-DPO"} - expected = Metadata(name='Hermes 2 Pro Llama 3 8B DPO', basename='hermes-2-pro-llama-3', finetune='DPO', author=None, quantized_by=None, organization=None, version=None, url=None, doi=None, uuid=None, hf_repo=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_doi=None, source_uuid=None, source_hf_repo=None, parameter_class_attribute='8b', parents=None, tags=None, languages=None, datasets=None) - got = Metadata.apply_metadata_heuristic(Metadata(), None, hf_params, None) - self.assertEqual(got, expected) - - def test_apply_metadata_heuristic_from_model_dir(self): - # Source: https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B/blob/main/config.json - model_dir_path = Path("./hermes-2-pro-llama-3-8b-DPO") - expected = Metadata(name='Hermes 2 Pro Llama 3 8B DPO', basename='hermes-2-pro-llama-3', finetune='DPO', author=None, quantized_by=None, organization=None, version=None, url=None, doi=None, uuid=None, hf_repo=None, description=None, license=None, license_name=None, license_link=None, source_url=None, source_doi=None, source_uuid=None, source_hf_repo=None, parameter_class_attribute='8b', parents=None, tags=None, languages=None, datasets=None) - got = Metadata.apply_metadata_heuristic(Metadata(), None, None, model_dir_path) - self.assertEqual(got, expected) - - -if __name__ == '__main__': - unittest.main() + return metadata diff --git a/gguf-py/tests/__init__.py b/gguf-py/tests/__init__.py new file mode 100644 index 0000000000000..d23ff9cb7380c --- /dev/null +++ b/gguf-py/tests/__init__.py @@ -0,0 +1 @@ +from .test_metadata import * diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py new file mode 100755 index 0000000000000..b6d493b36c6ae --- /dev/null +++ b/gguf-py/tests/test_metadata.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +import unittest +import gguf # noqa: F401 +from pathlib import Path + + +class TestMetadataMethod(unittest.TestCase): + + def test_id_to_title(self): + self.assertEqual(gguf.Metadata.id_to_title("Mixtral-8x7B-Instruct-v0.1"), "Mixtral 8x7B Instruct v0.1") + self.assertEqual(gguf.Metadata.id_to_title("Meta-Llama-3-8B"), "Meta Llama 3 8B") + self.assertEqual(gguf.Metadata.id_to_title("hermes-2-pro-llama-3-8b-DPO"), "Hermes 2 Pro Llama 3 8b DPO") + + def test_get_model_id_components(self): + self.assertEqual(gguf.Metadata.get_model_id_components("Mistral/Mixtral-8x7B-Instruct-v0.1"), + ('Mixtral-8x7B-Instruct-v0.1', "Mistral", 'Mixtral', 'Instruct', 'v0.1', '8x7B')) + self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-8x7B-Instruct-v0.1"), + ('Mixtral-8x7B-Instruct-v0.1', None, 'Mixtral', 'Instruct', 'v0.1', '8x7B')) + self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-8x7B-Instruct"), + ('Mixtral-8x7B-Instruct', None, 'Mixtral', 'Instruct', None, '8x7B')) + self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-8x7B-v0.1"), + ('Mixtral-8x7B-v0.1', None, 'Mixtral', None, 'v0.1', '8x7B')) + self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-8x7B"), + ('Mixtral-8x7B', None, 'Mixtral', None, None, '8x7B')) + self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral"), + ('Mixtral', None, 'Mixtral', None, None, None)) + self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-v0.1"), + ('Mixtral-v0.1', None, 'Mixtral', None, 'v0.1', None)) + self.assertEqual(gguf.Metadata.get_model_id_components("hermes-2-pro-llama-3-8b-DPO"), + ('hermes-2-pro-llama-3-8b-DPO', None, 'hermes-2-pro-llama-3', 'DPO', None, '8b')) + self.assertEqual(gguf.Metadata.get_model_id_components("NousResearch/Meta-Llama-3-8B"), + ('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, "8B")) + + def test_apply_metadata_heuristic_from_model_card(self): + model_card = { + 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], + 'model-index': [{'name': 'Hermes-2-Pro-Llama-3-8B', 'results': []}], + 'language': ['en'], + 'datasets': ['teknium/OpenHermes-2.5'], + 'widget': [{'example_title': 'Hermes 2 Pro', 'messages': [{'role': 'system', 'content': 'You are a sentient, superintelligent artificial general intelligence, here to teach and assist me.'}, {'role': 'user', 'content': 'Write a short story about Goku discovering kirby has teamed up with Majin Buu to destroy the world.'}]}], + 'base_model': ["EmbeddedLLM/Mistral-7B-Merge-14-v0", "janai-hq/trinity-v1"] + } + got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) + expect = gguf.Metadata(name=None, author=None, version=None, organization=None, finetune=None, basename=None, description=None, quantized_by=None, parameter_class_attribute=None, url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}], tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], languages=['en'], datasets=['teknium/OpenHermes-2.5']) + self.assertEqual(got, expect) + + def test_apply_metadata_heuristic_from_hf_parameters(self): + hf_params = {"_name_or_path": "./hermes-2-pro-llama-3-8b-DPO"} + got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, hf_params, None) + expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, parameter_class_attribute='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) + self.assertEqual(got, expect) + + def test_apply_metadata_heuristic_from_model_dir(self): + model_dir_path = Path("./hermes-2-pro-llama-3-8b-DPO") + got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, None, model_dir_path) + expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, parameter_class_attribute='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) + self.assertEqual(got, expect) + + def test_generate_any_missing_uuid(self): + metadata = gguf.Metadata(repo_url="example.com", source_url="example.com", base_models=[{"doi":"10.57967/hf/2410"},{"doi":"10.47366/sabia.v5n1a3"}]) + got = gguf.Metadata.generate_any_missing_uuid(metadata) + expect = gguf.Metadata(name=None, author=None, version=None, organization=None, finetune=None, basename=None, description=None, quantized_by=None, parameter_class_attribute=None, url=None, doi=None, uuid='a5cf6e8e-4cfa-5f31-a804-6de6d1245e26', repo_url='example.com', source_url='example.com', source_doi=None, source_uuid='a5cf6e8e-4cfa-5f31-a804-6de6d1245e26', source_repo_url=None, license=None, license_name=None, license_link=None, base_models=[{'doi': '10.57967/hf/2410', 'uuid': '26ce8128-2d34-5ea2-bc50-b5b90e21ed71'}, {'doi': '10.47366/sabia.v5n1a3', 'uuid': 'a15b24d6-5657-5d52-aaed-20dad7f4c500'}], tags=None, languages=None, datasets=None) + self.assertEqual(got, expect) From e9734434bd45ac5f2071265786db96f486080559 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 9 Jun 2024 16:57:39 +1000 Subject: [PATCH 24/65] convert-*.py: Remove self.model_name that was left in since last rebase --- convert_hf_to_gguf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 99a76cd70e120..dbd0336788cf3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -81,7 +81,6 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.use_temp_file = use_temp_file self.lazy = not eager - self.model_name = model_name self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors") self.is_safetensors = len(self.part_names) > 0 if not self.is_safetensors: From 5011eefeaf119ecd61dcf5d03e3f70c66fab17d3 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 7 Jul 2024 18:06:14 +1000 Subject: [PATCH 25/65] convert_hf_to_gguf.py: optional, dataclass removed from type as it was unused --- convert_hf_to_gguf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index dbd0336788cf3..a15f8a515fb04 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -13,8 +13,7 @@ from enum import IntEnum from pathlib import Path from hashlib import sha256 -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast, Optional -from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast import math import numpy as np From 2f23927d37eec05647f0e340e163f8030c60ae46 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 7 Jul 2024 18:52:52 +1000 Subject: [PATCH 26/65] convert_hf_to_gguf.py: rebase error correction --- convert_hf_to_gguf.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a15f8a515fb04..87e53cf19c98a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -67,7 +67,7 @@ class Model: model_arch: gguf.MODEL_ARCH def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, metadata: gguf.Metadata, - model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): + split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") @@ -107,21 +107,6 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard) - # Update any missing authorship metadata with HuggingFace parameters or model card frontmatter - if self.metadata is not None: - - # Source Hugging Face Repository - if self.metadata.source_hf_repo is None: - if self.hparams is not None and "_name_or_path" in self.hparams: - self.metadata.source_hf_repo = self.hparams["_name_or_path"] - - # Model License - if self.metadata.license is None: - if self.model_card is not None and "license" in self.model_card: - self.metadata.source_hf_repo = self.model_card["license"] - - self.model_name = Model.get_model_name(self.metadata, self.hparams, self.dir_model, self.model_arch) - # Fallback to model architecture name if metadata name is still missing if self.metadata.name is None: self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch] @@ -3708,8 +3693,8 @@ def main() -> None: logger.error(f"Model {hparams['architectures'][0]} is not supported") sys.exit(1) - model_instance = model_class(dir_model, output_type, fname_out, args.bigendian, args.use_temp_file, args.no_lazy, - metadata, args.model_name, split_max_tensors=args.split_max_tensors, + model_instance = model_class(dir_model, output_type, fname_out, args.bigendian, args.use_temp_file, + args.no_lazy, metadata, split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split) From 4dc8ddd35af2b238a258eee7797399d73b918f28 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 7 Jul 2024 20:00:26 +1000 Subject: [PATCH 27/65] convert_hf_to_gguf.py: Remove code that is already in fill_templated_filename() and GGUFWriter() --- convert_hf_to_gguf.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 87e53cf19c98a..36430b40a3f4e 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -90,6 +90,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.tensor_names = None self.metadata = metadata + # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type if self.ftype == gguf.LlamaFileType.GUESSED: # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie. _, first_tensor = next(self.get_tensors()) @@ -100,13 +101,6 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})") self.ftype = gguf.LlamaFileType.MOSTLY_BF16 - ftype_up: str = self.ftype.name.partition("_")[2].upper() - ftype_lw: str = ftype_up.lower() - # allow templating the file name with the output ftype, useful with the "auto" ftype - self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up) - self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, - split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard) - # Fallback to model architecture name if metadata name is still missing if self.metadata.name is None: self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch] @@ -126,13 +120,15 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, # Filename Output if fname_out is not None: # custom defined filename and path was provided + # allow templating the file name with the output ftype, useful with the "auto" ftype self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type) else: # output in the same directory as the model by default self.fname_out = dir_model / f"{self.fname_default}.gguf" # Configure GGUF Writer - self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) + self.gguf_writer = gguf.GGUFWriter(path=self.fname_out, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, + split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard) @classmethod def __init_subclass__(cls): From 007708e32dbed2448b32c4ec638703e405b6e1fd Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 9 Jul 2024 06:52:44 +1000 Subject: [PATCH 28/65] gguf_writer.py: generate tensor uuid if missing --- convert_hf_to_gguf.py | 5 ++++ gguf-py/gguf/gguf_writer.py | 16 +++++++++++ gguf-py/gguf/metadata.py | 55 ------------------------------------- 3 files changed, 21 insertions(+), 55 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 36430b40a3f4e..ff297dd6944d1 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -432,6 +432,11 @@ def write_tensors(self): def write(self): self.write_tensors() + + if self.metadata.uuid is None: + self.metadata.uuid = self.gguf_writer.generate_tensors_uuid() + logger.info("generating general.uuid (based on tensor content) {0}".format(self.metadata.uuid)) + self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_tensors_to_file(progress=True) diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 0f94f2dde6dcb..037b6762fbc12 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -2,6 +2,8 @@ import logging import os +import uuid +import hashlib import shutil import struct import tempfile @@ -115,6 +117,7 @@ def open_output_file(self, path: Path | None = None) -> None: if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path): # allow calling this multiple times as long as the path is the same return + if self.state is not WriterState.NO_FILE: raise ValueError(f'Expected output file to be not yet opened, got {self.state}') @@ -366,6 +369,19 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: self.state = WriterState.WEIGHTS + def generate_tensors_uuid(self) -> None: + uuidv5_sha1 = hashlib.sha1() + uuidv5_sha1.update(uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5').bytes) + + for tensors in self.tensors: + # relying on the fact that Python dicts preserve insertion order (since 3.7) + for name, ti in tensors.items(): + assert ti.tensor is not None + assert ti.tensor.nbytes == ti.nbytes + uuidv5_sha1.update(ti.tensor.tobytes('C')) + + return uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5) + def write_tensors_to_file(self, *, progress: bool = False) -> None: self.write_ti_data_to_file() diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 6ae4e044c0dd6..d0c26fd6aa53a 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -95,9 +95,6 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat if model_name is not None: metadata.name = model_name - # If any UUID is still missing at this point, then we should fill it in - metadata = Metadata.generate_any_missing_uuid(metadata) - return metadata @staticmethod @@ -334,55 +331,3 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No metadata.parameter_class_attribute = parameter_class_attribute return metadata - - @staticmethod - def generate_any_missing_uuid(metadata: Metadata) -> Metadata: - - # UUID Generation if not already provided - if metadata.uuid is None: - # Generate UUID based on provided links/id. UUIDv4 used as fallback - new_uuid = None - - if metadata.doi is not None: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, f"https://doi.org/{metadata.doi}") - elif metadata.repo_url is not None: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.repo_url) - elif metadata.url is not None: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.url) - else: - new_uuid = uuid.uuid4() # every model must have at least a random UUIDv4 - - if new_uuid is not None: - metadata.uuid = str(new_uuid) - - if metadata.source_uuid is None: - # Generate a UUID based on provided links/id only if source provided - new_uuid = None - - if metadata.source_doi is not None: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, f"https://doi.org/{metadata.source_doi}") - elif metadata.source_repo_url is not None: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.source_repo_url) - elif metadata.source_url is not None: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, metadata.source_url) - - if new_uuid is not None: - metadata.source_uuid = str(new_uuid) - - if metadata.base_models is not None: - for model_entry in metadata.base_models: - if "uuid" not in model_entry: - # Generate a UUID based on provided links/id only if source provided - new_uuid = None - - if "repo_url" in model_entry: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["repo_url"]) - elif "url" in model_entry: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["url"]) - elif "doi" in model_entry: - new_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model_entry["doi"]) - - if new_uuid is not None: - model_entry["uuid"] = str(new_uuid) - - return metadata From 7ecb8f00a0ab79bc2ded1db186e84941765484bb Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 9 Jul 2024 23:24:19 +1000 Subject: [PATCH 29/65] test: remove test_gguf.py and remove test_generate_any_missing_uuid() --- gguf-py/tests/test_gguf.py | 7 ------- gguf-py/tests/test_metadata.py | 6 ------ 2 files changed, 13 deletions(-) delete mode 100644 gguf-py/tests/test_gguf.py diff --git a/gguf-py/tests/test_gguf.py b/gguf-py/tests/test_gguf.py deleted file mode 100644 index 76b52181e0391..0000000000000 --- a/gguf-py/tests/test_gguf.py +++ /dev/null @@ -1,7 +0,0 @@ -import gguf # noqa: F401 # pyright: ignore[reportUnusedImport] - -# TODO: add tests - - -def test_write_gguf() -> None: - pass diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index b6d493b36c6ae..1924f3b3a8fe7 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -56,9 +56,3 @@ def test_apply_metadata_heuristic_from_model_dir(self): got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, None, model_dir_path) expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, parameter_class_attribute='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) self.assertEqual(got, expect) - - def test_generate_any_missing_uuid(self): - metadata = gguf.Metadata(repo_url="example.com", source_url="example.com", base_models=[{"doi":"10.57967/hf/2410"},{"doi":"10.47366/sabia.v5n1a3"}]) - got = gguf.Metadata.generate_any_missing_uuid(metadata) - expect = gguf.Metadata(name=None, author=None, version=None, organization=None, finetune=None, basename=None, description=None, quantized_by=None, parameter_class_attribute=None, url=None, doi=None, uuid='a5cf6e8e-4cfa-5f31-a804-6de6d1245e26', repo_url='example.com', source_url='example.com', source_doi=None, source_uuid='a5cf6e8e-4cfa-5f31-a804-6de6d1245e26', source_repo_url=None, license=None, license_name=None, license_link=None, base_models=[{'doi': '10.57967/hf/2410', 'uuid': '26ce8128-2d34-5ea2-bc50-b5b90e21ed71'}, {'doi': '10.47366/sabia.v5n1a3', 'uuid': 'a15b24d6-5657-5d52-aaed-20dad7f4c500'}], tags=None, languages=None, datasets=None) - self.assertEqual(got, expect) From fdc5a3fc8080e4125419568581b8c2f9e0c0a190 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 9 Jul 2024 23:30:28 +1000 Subject: [PATCH 30/65] convert-*.py: autogenerate general.uuid if missing --- convert_hf_to_gguf.py | 116 ++++++++---------------------------- gguf-py/gguf/gguf_writer.py | 2 +- gguf-py/gguf/metadata.py | 78 +++++++++++++++++++++++- 3 files changed, 104 insertions(+), 92 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ff297dd6944d1..1d8a87d2fe70f 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -215,81 +215,6 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", " raise ValueError(f"Can not map tensor {name!r}") return new_name - def set_gguf_meta_model(self): - self.gguf_writer.add_name(self.metadata.name) - - if self.metadata.author is not None: - self.gguf_writer.add_author(self.metadata.author) - if self.metadata.version is not None: - self.gguf_writer.add_version(self.metadata.version) - if self.metadata.organization is not None: - self.gguf_writer.add_organization(self.metadata.organization) - - if self.metadata.finetune is not None: - self.gguf_writer.add_finetune(self.metadata.finetune) - if self.metadata.basename is not None: - self.gguf_writer.add_basename(self.metadata.basename) - - if self.metadata.description is not None: - self.gguf_writer.add_description(self.metadata.description) - if self.metadata.quantized_by is not None: - self.gguf_writer.add_quantized_by(self.metadata.quantized_by) - - if self.metadata.parameter_class_attribute is not None: - self.gguf_writer.add_parameter_class_attribute(self.metadata.parameter_class_attribute) - - if self.metadata.license is not None: - self.gguf_writer.add_license(self.metadata.license) - if self.metadata.license_name is not None: - self.gguf_writer.add_license_name(self.metadata.license_name) - if self.metadata.license_link is not None: - self.gguf_writer.add_license_link(self.metadata.license_link) - - if self.metadata.url is not None: - self.gguf_writer.add_url(self.metadata.url) - if self.metadata.doi is not None: - self.gguf_writer.add_doi(self.metadata.doi) - if self.metadata.uuid is not None: - self.gguf_writer.add_uuid(self.metadata.uuid) - if self.metadata.repo_url is not None: - self.gguf_writer.add_repo_url(self.metadata.repo_url) - - if self.metadata.source_url is not None: - self.gguf_writer.add_source_url(self.metadata.source_url) - if self.metadata.source_doi is not None: - self.gguf_writer.add_source_doi(self.metadata.source_doi) - if self.metadata.source_uuid is not None: - self.gguf_writer.add_source_uuid(self.metadata.source_uuid) - if self.metadata.source_repo_url is not None: - self.gguf_writer.add_source_repo_url(self.metadata.source_repo_url) - - if self.metadata.base_models is not None: - self.gguf_writer.add_base_model_count(len(self.metadata.base_models)) - for key, base_model_entry in enumerate(self.metadata.base_models): - if "name" in base_model_entry: - self.gguf_writer.add_base_model_name(key, base_model_entry["name"]) - if "author" in base_model_entry: - self.gguf_writer.add_base_model_author(key, base_model_entry["author"]) - if "version" in base_model_entry: - self.gguf_writer.add_base_model_version(key, base_model_entry["version"]) - if "organization" in base_model_entry: - self.gguf_writer.add_base_model_organization(key, base_model_entry["organization"]) - if "url" in base_model_entry: - self.gguf_writer.add_base_model_url(key, base_model_entry["url"]) - if "doi" in base_model_entry: - self.gguf_writer.add_base_model_doi(key, base_model_entry["doi"]) - if "uuid" in base_model_entry: - self.gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"]) - if "repo_url" in base_model_entry: - self.gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"]) - - if self.metadata.tags is not None: - self.gguf_writer.add_tags(self.metadata.tags) - if self.metadata.languages is not None: - self.gguf_writer.add_languages(self.metadata.languages) - if self.metadata.datasets is not None: - self.gguf_writer.add_datasets(self.metadata.datasets) - def set_gguf_parameters(self): self.gguf_writer.add_block_count(self.block_count) @@ -430,13 +355,30 @@ def write_tensors(self): self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype) - def write(self): - self.write_tensors() + def prepare_key_value_store(self): + # Upon missing model uuid, generate uuid based on tensor content if self.metadata.uuid is None: self.metadata.uuid = self.gguf_writer.generate_tensors_uuid() - logger.info("generating general.uuid (based on tensor content) {0}".format(self.metadata.uuid)) + max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,") + logger.info(f"{f'%-{max_name_len}s' % f'generating general.uuid'} {self.metadata.uuid}") + logger.info("Set meta model") + self.metadata.set_gguf_meta_model(self.gguf_writer) + + logger.info("Set model parameters") + self.gguf_writer.add_type(gguf.GGUFType.MODEL) + self.set_gguf_parameters() + + logger.info("Set model tokenizer") + self.set_vocab() + + logger.info("Set model quantization version") + self.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION) + + def write(self): + self.write_tensors() + self.prepare_key_value_store() self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_tensors_to_file(progress=True) @@ -445,6 +387,12 @@ def write(self): def write_vocab(self): if len(self.gguf_writer.tensors) != 1: raise ValueError('Splitting the vocabulary is not supported') + + if self.metadata.uuid is None: + # Required tensor data least for uuid generation if in vocab_only mode + self.write_tensors() + + self.prepare_key_value_store() self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() @@ -3703,18 +3651,6 @@ def main() -> None: print(f"{model_instance.fname_default}") # noqa: NP100 return - logger.info("Set meta model") - model_instance.set_gguf_meta_model() - - logger.info("Set model parameters") - model_instance.gguf_writer.add_type(gguf.GGUFType.MODEL) - model_instance.set_gguf_parameters() - - logger.info("Set model tokenizer") - model_instance.set_vocab() - - model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION) - if args.vocab_only: logger.info("Exporting model vocab...") model_instance.write_vocab() diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 037b6762fbc12..f078110da849b 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -380,7 +380,7 @@ def generate_tensors_uuid(self) -> None: assert ti.tensor.nbytes == ti.nbytes uuidv5_sha1.update(ti.tensor.tobytes('C')) - return uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5) + return str(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5)) def write_tensors_to_file(self, *, progress: bool = False) -> None: self.write_ti_data_to_file() diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index d0c26fd6aa53a..d81807ae3fd5f 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -2,7 +2,6 @@ import re import json -import uuid import frontmatter from pathlib import Path from typing import Optional @@ -10,6 +9,8 @@ from .constants import Keys +import gguf + @dataclass class Metadata: @@ -331,3 +332,78 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No metadata.parameter_class_attribute = parameter_class_attribute return metadata + + def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter): + gguf_writer.add_name(self.name) + + if self.author is not None: + gguf_writer.add_author(self.author) + if self.version is not None: + gguf_writer.add_version(self.version) + if self.organization is not None: + gguf_writer.add_organization(self.organization) + + if self.finetune is not None: + gguf_writer.add_finetune(self.finetune) + if self.basename is not None: + gguf_writer.add_basename(self.basename) + + if self.description is not None: + gguf_writer.add_description(self.description) + if self.quantized_by is not None: + gguf_writer.add_quantized_by(self.quantized_by) + + if self.parameter_class_attribute is not None: + gguf_writer.add_parameter_class_attribute(self.parameter_class_attribute) + + if self.license is not None: + gguf_writer.add_license(self.license) + if self.license_name is not None: + gguf_writer.add_license_name(self.license_name) + if self.license_link is not None: + gguf_writer.add_license_link(self.license_link) + + if self.url is not None: + gguf_writer.add_url(self.url) + if self.doi is not None: + gguf_writer.add_doi(self.doi) + if self.uuid is not None: + gguf_writer.add_uuid(self.uuid) + if self.repo_url is not None: + gguf_writer.add_repo_url(self.repo_url) + + if self.source_url is not None: + gguf_writer.add_source_url(self.source_url) + if self.source_doi is not None: + gguf_writer.add_source_doi(self.source_doi) + if self.source_uuid is not None: + gguf_writer.add_source_uuid(self.source_uuid) + if self.source_repo_url is not None: + gguf_writer.add_source_repo_url(self.source_repo_url) + + if self.base_models is not None: + gguf_writer.add_base_model_count(len(self.base_models)) + for key, base_model_entry in enumerate(self.base_models): + if "name" in base_model_entry: + gguf_writer.add_base_model_name(key, base_model_entry["name"]) + if "author" in base_model_entry: + gguf_writer.add_base_model_author(key, base_model_entry["author"]) + if "version" in base_model_entry: + gguf_writer.add_base_model_version(key, base_model_entry["version"]) + if "organization" in base_model_entry: + gguf_writer.add_base_model_organization(key, base_model_entry["organization"]) + if "url" in base_model_entry: + gguf_writer.add_base_model_url(key, base_model_entry["url"]) + if "doi" in base_model_entry: + gguf_writer.add_base_model_doi(key, base_model_entry["doi"]) + if "uuid" in base_model_entry: + gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"]) + if "repo_url" in base_model_entry: + gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"]) + + if self.tags is not None: + gguf_writer.add_tags(self.tags) + if self.languages is not None: + gguf_writer.add_languages(self.languages) + if self.datasets is not None: + gguf_writer.add_datasets(self.datasets) From 2a976e1211d924e33d82e070099a5f9b173728f3 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Wed, 10 Jul 2024 20:18:40 +1000 Subject: [PATCH 31/65] convert-*.py: write_tensors() --> prepare_tensors_for_writing() --- convert_hf_to_gguf.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 1d8a87d2fe70f..51d9fa860f8c1 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -272,7 +272,7 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i return False - def write_tensors(self): + def prepare_tensors_for_writing(self): max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,") for name, data_torch in self.get_tensors(): @@ -377,7 +377,7 @@ def prepare_key_value_store(self): self.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION) def write(self): - self.write_tensors() + self.prepare_tensors_for_writing() self.prepare_key_value_store() self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_kv_data_to_file() @@ -390,7 +390,7 @@ def write_vocab(self): if self.metadata.uuid is None: # Required tensor data least for uuid generation if in vocab_only mode - self.write_tensors() + self.prepare_tensors_for_writing() self.prepare_key_value_store() self.gguf_writer.write_header_to_file(self.fname_out) @@ -1445,8 +1445,8 @@ def _stack_qk_norm(self, bid: int, n_head: int, norms: dict[str, Tensor], layer_ return [(new_name, data_torch)] - def write_tensors(self): - super().write_tensors() + def prepare_tensors_for_writing(self): + super().prepare_tensors_for_writing() if self._q_norms is not None or self._k_norms is not None: # flatten two `list[dict[str, Tensor]]` into a single `list[str]` @@ -1562,8 +1562,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - def write_tensors(self): - super().write_tensors() + def prepare_tensors_for_writing(self): + super().prepare_tensors_for_writing() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -1886,8 +1886,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - def write_tensors(self): - super().write_tensors() + def prepare_tensors_for_writing(self): + super().prepare_tensors_for_writing() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -2956,8 +2956,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - def write_tensors(self): - super().write_tensors() + def prepare_tensors_for_writing(self): + super().prepare_tensors_for_writing() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -3035,8 +3035,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - def write_tensors(self): - super().write_tensors() + def prepare_tensors_for_writing(self): + super().prepare_tensors_for_writing() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -3274,8 +3274,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return tensors - def write_tensors(self): - super().write_tensors() + def prepare_tensors_for_writing(self): + super().prepare_tensors_for_writing() self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) From 59a01df784ee988927f223e7d66701cb22201b10 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Wed, 10 Jul 2024 20:20:54 +1000 Subject: [PATCH 32/65] convert-*.py: refactor per model weight count estimation --- convert_hf_to_gguf.py | 50 ++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 51d9fa860f8c1..d2db213fa5520 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -108,8 +108,12 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, # Generate parameter weight class (useful for leader boards) if not yet determined if self.metadata.parameter_class_attribute is None: expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - weight_estimate = self.per_model_weight_count_estimation(self.get_tensors(), expert_count) - self.metadata.parameter_class_attribute = gguf.parameter_class_attribute(expert_count, weight_estimate) + sum_weight_estimate = self.calculate_total_weight_count() + + # Calculate weight estimate per model + per_model_weight_estimate = (sum_weight_estimate / expert_count) if expert_count is not None and (expert_count > 0) else sum_weight_estimate + + self.metadata.parameter_class_attribute = gguf.parameter_class_attribute(expert_count, per_model_weight_estimate) # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] @@ -187,6 +191,23 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]: if len(sym_diff := tensor_names_from_parts.symmetric_difference(self.tensor_names)) > 0: raise ValueError(f"Mismatch between weight map and model parts for tensor names: {sym_diff}") + def calculate_total_weight_count(self) -> int: + sum_weight_estimate = 0 + for name, data_torch in self.get_tensors(): + # we don't need these + if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): + continue + + # Calculate Tensor Volume + sum_weights_in_tensor = 1 + for dim in data_torch.shape: + sum_weights_in_tensor *= dim + + # Add Tensor Volume To Running Count + sum_weight_estimate += sum_weights_in_tensor + + return sum_weight_estimate + def format_tensor_name(self, key: gguf.MODEL_TENSOR, bid: int | None = None, suffix: str = ".weight") -> str: if key not in gguf.MODEL_TENSORS[self.model_arch]: raise ValueError(f"Missing {key!r} for MODEL_TENSORS of {self.model_arch!r}") @@ -397,31 +418,6 @@ def write_vocab(self): self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() - def per_model_weight_count_estimation(self, tensors: Iterator[tuple[str, Tensor]], expert_count: int) -> int: - # TODO: Ensure parameter count is accurate throughout various model type - # May currently overestimate parameter count in Mamba model because - # output weights is tied with token embeddings. - sum_weight_estimate = 0 - for name, data_torch in tensors: - # Got A Tensor - - # We don't need these - if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): - continue - - # Calculate Tensor Volume - sum_weights_in_tensor = 1 - for dim in data_torch.shape: - sum_weights_in_tensor *= dim - - # Add Tensor Volume To Running Count - sum_weight_estimate += sum_weights_in_tensor - - # Calculate weight estimate per model - per_model_weight_estimate = (sum_weight_estimate / expert_count) if expert_count is not None and (expert_count > 0) else sum_weight_estimate - - return per_model_weight_estimate - @staticmethod def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]: part_names: list[str] = [] From dd14b8fdb1dd26a936d2ce889ed166b7fb648882 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Wed, 10 Jul 2024 23:39:09 +1000 Subject: [PATCH 33/65] convert-*.py: pyright type fixes --- convert_hf_to_gguf.py | 24 +++++------ examples/convert_legacy_llama.py | 73 +++++++++++++++++--------------- gguf-py/gguf/gguf_writer.py | 18 ++++---- gguf-py/gguf/metadata.py | 55 ++++++++++++------------ gguf-py/gguf/utility.py | 6 +-- 5 files changed, 90 insertions(+), 86 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index d2db213fa5520..c6b50574f4f07 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -59,14 +59,14 @@ class Model: tensor_map: gguf.TensorNameMap tensor_names: set[str] | None fname_out: Path - fname_default: Path + fname_default: str gguf_writer: gguf.GGUFWriter metadata: gguf.Metadata # subclasses should define this! model_arch: gguf.MODEL_ARCH - def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, metadata: gguf.Metadata, + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | None, is_big_endian: bool, use_temp_file: bool, eager: bool, metadata: gguf.Metadata, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") @@ -107,11 +107,11 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, # Generate parameter weight class (useful for leader boards) if not yet determined if self.metadata.parameter_class_attribute is None: - expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None + expert_count = self.hparams.get("num_local_experts", 0) sum_weight_estimate = self.calculate_total_weight_count() # Calculate weight estimate per model - per_model_weight_estimate = (sum_weight_estimate / expert_count) if expert_count is not None and (expert_count > 0) else sum_weight_estimate + per_model_weight_estimate: int = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate self.metadata.parameter_class_attribute = gguf.parameter_class_attribute(expert_count, per_model_weight_estimate) @@ -400,7 +400,7 @@ def prepare_key_value_store(self): def write(self): self.prepare_tensors_for_writing() self.prepare_key_value_store() - self.gguf_writer.write_header_to_file(self.fname_out) + self.gguf_writer.write_header_to_file() self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_tensors_to_file(progress=True) self.gguf_writer.close() @@ -414,7 +414,7 @@ def write_vocab(self): self.prepare_tensors_for_writing() self.prepare_key_value_store() - self.gguf_writer.write_header_to_file(self.fname_out) + self.gguf_writer.write_header_to_file() self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() @@ -2525,7 +2525,6 @@ def set_gguf_parameters(self): hparams = self.hparams block_count = hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -2778,7 +2777,6 @@ def set_gguf_parameters(self): assert self.block_count == len(self._num_query_heads) assert self.block_count == len(self._ffn_dims) - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_block_count(self.block_count) self.gguf_writer.add_context_length(self.hparams["max_context_length"]) self.gguf_writer.add_embedding_length(n_embd) @@ -3618,11 +3616,10 @@ def main() -> None: logger.error("Error: Cannot use temp file when splitting") sys.exit(1) + fname_out = None + if args.outfile is not None: fname_out = args.outfile - else: - # output in the same directory as the model by default - fname_out = dir_model / 'ggml-model-{ftype}.gguf' logger.info(f"Loading model: {dir_model.name}") @@ -3638,8 +3635,9 @@ def main() -> None: logger.error(f"Model {hparams['architectures'][0]} is not supported") sys.exit(1) - model_instance = model_class(dir_model, output_type, fname_out, args.bigendian, args.use_temp_file, - args.no_lazy, metadata, split_max_tensors=args.split_max_tensors, + model_instance = model_class(dir_model=dir_model, ftype=output_type, fname_out=fname_out, + is_big_endian=args.bigendian, use_temp_file=args.use_temp_file, + eager=args.no_lazy, metadata=metadata, split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index bde27a9375ba3..3eeb8ad3dd188 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -25,6 +25,7 @@ from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, IO, Iterable, Literal, TypeVar +from _collections_abc import dict_items import numpy as np @@ -773,7 +774,7 @@ def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian. def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None: # Metadata About The Model And Its Provenence name = "LLaMA" - if metadata.name is not None: + if metadata is not None and metadata.name is not None: name = metadata.name elif params.path_model is not None: name = params.path_model.name @@ -783,52 +784,52 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_name(name) - if metadata.author is not None: + if metadata is not None and metadata.author is not None: self.gguf.add_author(metadata.author) - if metadata.version is not None: + if metadata is not None and metadata.version is not None: self.gguf.add_version(metadata.version) - if metadata.organization is not None: + if metadata is not None and metadata.organization is not None: self.gguf.add_organization(metadata.organization) - if metadata.finetune is not None: + if metadata is not None and metadata.finetune is not None: self.gguf.add_finetune(metadata.finetune) - if metadata.basename is not None: + if metadata is not None and metadata.basename is not None: self.gguf.add_basename(metadata.basename) - if metadata.description is not None: + if metadata is not None and metadata.description is not None: self.gguf.add_description(metadata.description) - if metadata.quantized_by is not None: + if metadata is not None and metadata.quantized_by is not None: self.gguf.add_quantized_by(metadata.quantized_by) - if metadata.parameter_class_attribute is not None: + if metadata is not None and metadata.parameter_class_attribute is not None: self.gguf.add_parameter_class_attribute(metadata.parameter_class_attribute) - if metadata.license is not None: + if metadata is not None and metadata.license is not None: self.gguf.add_license(metadata.license) - if metadata.license_name is not None: + if metadata is not None and metadata.license_name is not None: self.gguf.add_license_name(metadata.license_name) - if metadata.license_link is not None: + if metadata is not None and metadata.license_link is not None: self.gguf.add_license_link(metadata.license_link) - if metadata.url is not None: + if metadata is not None and metadata.url is not None: self.gguf.add_url(metadata.url) - if metadata.doi is not None: + if metadata is not None and metadata.doi is not None: self.gguf.add_doi(metadata.doi) - if metadata.uuid is not None: + if metadata is not None and metadata.uuid is not None: self.gguf.add_uuid(metadata.uuid) - if metadata.repo_url is not None: + if metadata is not None and metadata.repo_url is not None: self.gguf.add_repo_url(metadata.repo_url) - if metadata.source_url is not None: + if metadata is not None and metadata.source_url is not None: self.gguf.add_source_url(metadata.source_url) - if metadata.source_doi is not None: + if metadata is not None and metadata.source_doi is not None: self.gguf.add_source_doi(metadata.source_doi) - if metadata.source_uuid is not None: + if metadata is not None and metadata.source_uuid is not None: self.gguf.add_source_uuid(metadata.source_uuid) - if metadata.source_repo_url is not None: + if metadata is not None and metadata.source_repo_url is not None: self.gguf.add_source_repo_url(metadata.source_repo_url) - if metadata.base_models is not None: + if metadata is not None and metadata.base_models is not None: self.gguf.add_base_model_count(len(metadata.base_models)) for key, base_model_entry in enumerate(metadata.base_models): if "name" in base_model_entry: @@ -848,11 +849,11 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None if "repo_url" in base_model_entry: self.gguf.add_base_model_repo_url(key, base_model_entry["repo_url"]) - if metadata.tags is not None: + if metadata is not None and metadata.tags is not None: self.gguf.add_tags(metadata.tags) - if metadata.languages is not None: + if metadata is not None and metadata.languages is not None: self.gguf.add_languages(metadata.languages) - if metadata.datasets is not None: + if metadata is not None and metadata.datasets is not None: self.gguf.add_datasets(metadata.datasets) def add_meta_arch(self, params: Params) -> None: @@ -1041,16 +1042,16 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT raise ValueError(f"Unexpected combination of types: {name_to_type}") -def per_model_weight_count_estimation(tensors: dict[str, LazyTensor], expert_count:int) -> int: +def per_model_weight_count_estimation(tensors: dict_items[str, LazyTensor], expert_count:int | None) -> int: # TODO: Ensure parameter count is accurate throughout various model type - sum_weight_estimate = 0 + sum_weight_estimate: int = 0 for name, lazy_tensor in tensors: # We don't need these if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): continue # Got A Tensor - sum_weights_in_tensor = 1 + sum_weights_in_tensor: int = 1 # Tensor Volume for dim in lazy_tensor.shape: @@ -1059,10 +1060,14 @@ def per_model_weight_count_estimation(tensors: dict[str, LazyTensor], expert_cou # Add Tensor Volume To Running Count sum_weight_estimate += sum_weights_in_tensor - # Calculate weight estimate per model - per_model_weight_estimate = (sum_weight_estimate / expert_count) if expert_count is not None and (expert_count > 0) else sum_weight_estimate + if expert_count is None: + return sum_weight_estimate + + if expert_count is not None and expert_count == 0: + return sum_weight_estimate - return per_model_weight_estimate + # Calculate weight estimate per model + return int(sum_weight_estimate / expert_count) def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel: @@ -1244,7 +1249,7 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> return vocab, special_vocab -def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> str: +def default_convention_outfile(file_type: GGMLFileType, expert_count:int | None, model_params_count: int, metadata: gguf.Metadata) -> str: name = metadata.name if metadata.name is not None else None basename = metadata.basename if metadata.basename is not None else None finetune = metadata.finetune if metadata.finetune is not None else None @@ -1260,7 +1265,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int, model_ return gguf.naming_convention(name, basename, finetune, version, parameter_class_attribute, output_type) -def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int, model_params_count: int, metadata: gguf.Metadata) -> Path: +def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int | None, model_params_count: int, metadata: gguf.Metadata) -> Path: default_filename = default_convention_outfile(file_type, expert_count, model_params_count, metadata) ret = model_paths[0].parent / f"{default_filename}.gguf" if ret in model_paths: @@ -1326,7 +1331,7 @@ def main(args_in: list[str] | None = None) -> None: model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) ftype = pick_output_type(model, args.outtype) - if metadata.name is None: + if (metadata is None or metadata.name is None) and params.path_model is not None: metadata.name = params.path_model.name print(f"{default_convention_outfile(ftype, params.n_experts, model_params_count, metadata)}") # noqa: NP100 @@ -1407,7 +1412,7 @@ def main(args_in: list[str] | None = None) -> None: assert params is not None - if metadata.name is None: + if metadata.name is None and params.path_model is not None: metadata.name = params.path_model.name model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index f078110da849b..4322e7b9f8140 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -369,7 +369,7 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: self.state = WriterState.WEIGHTS - def generate_tensors_uuid(self) -> None: + def generate_tensors_uuid(self) -> str: uuidv5_sha1 = hashlib.sha1() uuidv5_sha1.update(uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5').bytes) @@ -520,28 +520,28 @@ def add_base_model_count(self, source_count: int) -> None: self.add_uint32(Keys.General.BASE_MODEL_COUNT, source_count) def add_base_model_name(self, source_id: int, name: str) -> None: - self.add_string(Keys.General.BASE_MODEL_NAME.format(id=self.source_id), name) + self.add_string(Keys.General.BASE_MODEL_NAME.format(id=source_id), name) def add_base_model_author(self, source_id: int, author: str) -> None: - self.add_string(Keys.General.BASE_MODEL_AUTHOR.format(id=self.source_id), author) + self.add_string(Keys.General.BASE_MODEL_AUTHOR.format(id=source_id), author) def add_base_model_version(self, source_id: int, version: str) -> None: - self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=self.source_id), version) + self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version) def add_base_model_organization(self, source_id: int, organization: str) -> None: - self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=self.source_id), organization) + self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization) def add_base_model_url(self, source_id: int, url: str) -> None: - self.add_string(Keys.General.BASE_MODEL_URL.format(id=self.source_id), url) + self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url) def add_base_model_doi(self, source_id: int, doi: str) -> None: - self.add_string(Keys.General.BASE_MODEL_DOI.format(id=self.source_id), doi) + self.add_string(Keys.General.BASE_MODEL_DOI.format(id=source_id), doi) def add_base_model_uuid(self, source_id: int, uuid: str) -> None: - self.add_string(Keys.General.BASE_MODEL_UUID.format(id=self.source_id), uuid) + self.add_string(Keys.General.BASE_MODEL_UUID.format(id=source_id), uuid) def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None: - self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=self.source_id), repo_url) + self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url) def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.General.TAGS, tags) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index d81807ae3fd5f..4425970eb1f97 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -4,7 +4,7 @@ import json import frontmatter from pathlib import Path -from typing import Optional +from typing import Optional, cast from dataclasses import dataclass from .constants import Keys @@ -59,38 +59,38 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.name = metadata_override.get(Keys.General.NAME , metadata.name ) # noqa: E202 - metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 - metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 - metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 + metadata.author = cast(Optional[str], metadata_override.get(Keys.General.AUTHOR , metadata.author )) # noqa: E202 + metadata.version = cast(Optional[str], metadata_override.get(Keys.General.VERSION , metadata.version )) # noqa: E202 + metadata.organization = cast(Optional[str], metadata_override.get(Keys.General.ORGANIZATION , metadata.organization )) # noqa: E202 - metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 - metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 + metadata.finetune = cast(Optional[str], metadata_override.get(Keys.General.FINETUNE , metadata.finetune )) # noqa: E202 + metadata.basename = cast(Optional[str], metadata_override.get(Keys.General.BASENAME , metadata.basename )) # noqa: E202 - metadata.description = metadata_override.get(Keys.General.DESCRIPTION , metadata.description ) # noqa: E202 - metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY , metadata.quantized_by ) # noqa: E202 + metadata.description = cast(Optional[str], metadata_override.get(Keys.General.DESCRIPTION , metadata.description )) # noqa: E202 + metadata.quantized_by = cast(Optional[str], metadata_override.get(Keys.General.QUANTIZED_BY , metadata.quantized_by )) # noqa: E202 - metadata.parameter_class_attribute = metadata_override.get(Keys.General.PARAMETER_CLASS_ATTRIBUTE, metadata.parameter_class_attribute) # noqa: E202 + metadata.parameter_class_attribute = cast(Optional[str], metadata_override.get(Keys.General.PARAMETER_CLASS_ATTRIBUTE, metadata.parameter_class_attribute)) # noqa: E202 - metadata.license = metadata_override.get(Keys.General.LICENSE , metadata.license ) # noqa: E202 - metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name ) # noqa: E202 - metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link ) # noqa: E202 + metadata.license = cast(Optional[str], metadata_override.get(Keys.General.LICENSE , metadata.license )) # noqa: E202 + metadata.license_name = cast(Optional[str], metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name )) # noqa: E202 + metadata.license_link = cast(Optional[str], metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link )) # noqa: E202 - metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 - metadata.doi = metadata_override.get(Keys.General.DOI , metadata.doi ) # noqa: E202 - metadata.uuid = metadata_override.get(Keys.General.UUID , metadata.uuid ) # noqa: E202 - metadata.repo_url = metadata_override.get(Keys.General.REPO_URL , metadata.repo_url ) # noqa: E202 + metadata.url = cast(Optional[str], metadata_override.get(Keys.General.URL , metadata.url )) # noqa: E202 + metadata.doi = cast(Optional[str], metadata_override.get(Keys.General.DOI , metadata.doi )) # noqa: E202 + metadata.uuid = cast(Optional[str], metadata_override.get(Keys.General.UUID , metadata.uuid )) # noqa: E202 + metadata.repo_url = cast(Optional[str], metadata_override.get(Keys.General.REPO_URL , metadata.repo_url )) # noqa: E202 - metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url ) # noqa: E202 - metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI , metadata.source_doi ) # noqa: E202 - metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID , metadata.source_uuid ) # noqa: E202 - metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL , metadata.source_repo_url ) # noqa: E202 + metadata.source_url = cast(Optional[str], metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url )) # noqa: E202 + metadata.source_doi = cast(Optional[str], metadata_override.get(Keys.General.SOURCE_DOI , metadata.source_doi )) # noqa: E202 + metadata.source_uuid = cast(Optional[str], metadata_override.get(Keys.General.SOURCE_UUID , metadata.source_uuid )) # noqa: E202 + metadata.source_repo_url = cast(Optional[str], metadata_override.get(Keys.General.SOURCE_REPO_URL , metadata.source_repo_url )) # noqa: E202 - metadata.base_models = metadata_override.get("general.base_models" , metadata.base_models ) # noqa: E202 + # Base Models is received here as an array of models + metadata.base_models = cast("Optional[list[dict]]", metadata_override.get("general.base_models" , metadata.base_models )) # noqa: E202 - metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 - metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 - metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 + metadata.tags = cast("Optional[list[str]]", metadata_override.get(Keys.General.TAGS , metadata.tags )) # noqa: E202 + metadata.languages = cast("Optional[list[str]]", metadata_override.get(Keys.General.LANGUAGES , metadata.languages )) # noqa: E202 + metadata.datasets = cast("Optional[list[str]]", metadata_override.get(Keys.General.DATASETS , metadata.datasets )) # noqa: E202 # Direct Metadata Override (via direct cli argument) if model_name is not None: @@ -117,7 +117,7 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, object]: return {} with open(model_card_path, "r", encoding="utf-8") as f: - return frontmatter.load(f) + return cast("dict[str, object]", frontmatter.load(f)) @staticmethod def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, object]: @@ -138,7 +138,7 @@ def id_to_title(string): return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()]) @staticmethod - def get_model_id_components(model_id: Optional[str] = None) -> dict[str, object]: + def get_model_id_components(model_id: Optional[str] = None) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]: # Huggingface often store model id as '/' # so let's parse it and apply some heuristics if possible for model name components @@ -334,6 +334,7 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No return metadata def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter): + assert self.name is not None gguf_writer.add_name(self.name) if self.author is not None: diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 530bdaabef4fc..3743d52f0348c 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -34,7 +34,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" -def parameter_class_attribute(expert_count_int:int, model_params_count: int) -> str: +def parameter_class_attribute(expert_count_int:int | None, model_params_count: int) -> str: per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) if expert_count_int is not None and expert_count_int > 0: @@ -45,7 +45,7 @@ def parameter_class_attribute(expert_count_int:int, model_params_count: int) -> return size_class -def naming_convention(model_name: str, base_name: str, finetune_string:str, version_string:str, parameter_class_attribute: str, output_type: str) -> str: +def naming_convention(model_name: str | None, base_name: str | None, finetune_string:str | None, version_string:str | None, parameter_class_attribute: str | None, output_type: str | None) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: @@ -61,6 +61,6 @@ def naming_convention(model_name: str, base_name: str, finetune_string:str, vers version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" - precision = f"-{output_type.strip().replace(' ', '-').upper()}" + precision = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else "" return f"{name}{parameters}{finetune}{version}{precision}" From 74383ba6d24ff6b2b5062ff93066b87cf0bf4c6f Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 11 Jul 2024 21:10:51 +1000 Subject: [PATCH 34/65] Apply suggestions from code review Co-authored-by: compilade --- convert_hf_to_gguf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index c6b50574f4f07..5629888d3187f 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -72,7 +72,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") if metadata is None: - raise TypeError("authorship metadata must be provided") + metadata = gguf.Metadata() self.dir_model = dir_model self.ftype = ftype @@ -101,9 +101,9 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})") self.ftype = gguf.LlamaFileType.MOSTLY_BF16 - # Fallback to model architecture name if metadata name is still missing + # Fallback to model directory name if metadata name is still missing if self.metadata.name is None: - self.metadata.name = gguf.MODEL_ARCH_NAMES[self.model_arch] + self.metadata.name = dir_model.name # Generate parameter weight class (useful for leader boards) if not yet determined if self.metadata.parameter_class_attribute is None: From 4c91d077d2b219bcda75e3a9db6d84ee231a98c5 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Thu, 11 Jul 2024 20:39:10 +1000 Subject: [PATCH 35/65] convert-*.py: cast not required if Metadata.load_metadata_override returned a dict[str, Any] instead of a dict[str, object] Co-authored-by: compilade --- gguf-py/gguf/metadata.py | 50 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 4425970eb1f97..4872247d7f4a2 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -4,7 +4,7 @@ import json import frontmatter from pathlib import Path -from typing import Optional, cast +from typing import Any, Optional from dataclasses import dataclass from .constants import Keys @@ -59,38 +59,36 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.author = cast(Optional[str], metadata_override.get(Keys.General.AUTHOR , metadata.author )) # noqa: E202 - metadata.version = cast(Optional[str], metadata_override.get(Keys.General.VERSION , metadata.version )) # noqa: E202 - metadata.organization = cast(Optional[str], metadata_override.get(Keys.General.ORGANIZATION , metadata.organization )) # noqa: E202 + metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author) + metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version) + metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization) - metadata.finetune = cast(Optional[str], metadata_override.get(Keys.General.FINETUNE , metadata.finetune )) # noqa: E202 - metadata.basename = cast(Optional[str], metadata_override.get(Keys.General.BASENAME , metadata.basename )) # noqa: E202 + metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune) + metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename) - metadata.description = cast(Optional[str], metadata_override.get(Keys.General.DESCRIPTION , metadata.description )) # noqa: E202 - metadata.quantized_by = cast(Optional[str], metadata_override.get(Keys.General.QUANTIZED_BY , metadata.quantized_by )) # noqa: E202 + metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description) + metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by) - metadata.parameter_class_attribute = cast(Optional[str], metadata_override.get(Keys.General.PARAMETER_CLASS_ATTRIBUTE, metadata.parameter_class_attribute)) # noqa: E202 + metadata.parameter_class_attribute = metadata_override.get(Keys.General.PARAMETER_CLASS_ATTRIBUTE, metadata.parameter_class_attribute) + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name) + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link) - metadata.license = cast(Optional[str], metadata_override.get(Keys.General.LICENSE , metadata.license )) # noqa: E202 - metadata.license_name = cast(Optional[str], metadata_override.get(Keys.General.LICENSE_NAME , metadata.license_name )) # noqa: E202 - metadata.license_link = cast(Optional[str], metadata_override.get(Keys.General.LICENSE_LINK , metadata.license_link )) # noqa: E202 + metadata.url = metadata_override.get(Keys.General.URL, metadata.url) + metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi) + metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid) + metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url) - metadata.url = cast(Optional[str], metadata_override.get(Keys.General.URL , metadata.url )) # noqa: E202 - metadata.doi = cast(Optional[str], metadata_override.get(Keys.General.DOI , metadata.doi )) # noqa: E202 - metadata.uuid = cast(Optional[str], metadata_override.get(Keys.General.UUID , metadata.uuid )) # noqa: E202 - metadata.repo_url = cast(Optional[str], metadata_override.get(Keys.General.REPO_URL , metadata.repo_url )) # noqa: E202 - - metadata.source_url = cast(Optional[str], metadata_override.get(Keys.General.SOURCE_URL , metadata.source_url )) # noqa: E202 - metadata.source_doi = cast(Optional[str], metadata_override.get(Keys.General.SOURCE_DOI , metadata.source_doi )) # noqa: E202 - metadata.source_uuid = cast(Optional[str], metadata_override.get(Keys.General.SOURCE_UUID , metadata.source_uuid )) # noqa: E202 - metadata.source_repo_url = cast(Optional[str], metadata_override.get(Keys.General.SOURCE_REPO_URL , metadata.source_repo_url )) # noqa: E202 + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url) + metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi) + metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid) + metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url) # Base Models is received here as an array of models - metadata.base_models = cast("Optional[list[dict]]", metadata_override.get("general.base_models" , metadata.base_models )) # noqa: E202 + metadata.base_models = metadata_override.get("general.base_models", metadata.base_models) - metadata.tags = cast("Optional[list[str]]", metadata_override.get(Keys.General.TAGS , metadata.tags )) # noqa: E202 - metadata.languages = cast("Optional[list[str]]", metadata_override.get(Keys.General.LANGUAGES , metadata.languages )) # noqa: E202 - metadata.datasets = cast("Optional[list[str]]", metadata_override.get(Keys.General.DATASETS , metadata.datasets )) # noqa: E202 + metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags) + metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages) + metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets) # Direct Metadata Override (via direct cli argument) if model_name is not None: @@ -99,7 +97,7 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat return metadata @staticmethod - def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, object]: + def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]: if metadata_override_path is None or not metadata_override_path.exists(): return {} From 6eb08ac868cb5146ec749b0cf1587deff7a370ae Mon Sep 17 00:00:00 2001 From: brian khuu Date: Thu, 11 Jul 2024 20:42:11 +1000 Subject: [PATCH 36/65] convert-*.py: Removing the redundant metadata is not None from all conditions, and indenting them. Co-authored-by: compilade --- examples/convert_legacy_llama.py | 143 ++++++++++++++++--------------- 1 file changed, 72 insertions(+), 71 deletions(-) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 3eeb8ad3dd188..21e63f6b7dd3f 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -784,77 +784,78 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None self.gguf.add_name(name) - if metadata is not None and metadata.author is not None: - self.gguf.add_author(metadata.author) - if metadata is not None and metadata.version is not None: - self.gguf.add_version(metadata.version) - if metadata is not None and metadata.organization is not None: - self.gguf.add_organization(metadata.organization) - - if metadata is not None and metadata.finetune is not None: - self.gguf.add_finetune(metadata.finetune) - if metadata is not None and metadata.basename is not None: - self.gguf.add_basename(metadata.basename) - - if metadata is not None and metadata.description is not None: - self.gguf.add_description(metadata.description) - if metadata is not None and metadata.quantized_by is not None: - self.gguf.add_quantized_by(metadata.quantized_by) - - if metadata is not None and metadata.parameter_class_attribute is not None: - self.gguf.add_parameter_class_attribute(metadata.parameter_class_attribute) - - if metadata is not None and metadata.license is not None: - self.gguf.add_license(metadata.license) - if metadata is not None and metadata.license_name is not None: - self.gguf.add_license_name(metadata.license_name) - if metadata is not None and metadata.license_link is not None: - self.gguf.add_license_link(metadata.license_link) - - if metadata is not None and metadata.url is not None: - self.gguf.add_url(metadata.url) - if metadata is not None and metadata.doi is not None: - self.gguf.add_doi(metadata.doi) - if metadata is not None and metadata.uuid is not None: - self.gguf.add_uuid(metadata.uuid) - if metadata is not None and metadata.repo_url is not None: - self.gguf.add_repo_url(metadata.repo_url) - - if metadata is not None and metadata.source_url is not None: - self.gguf.add_source_url(metadata.source_url) - if metadata is not None and metadata.source_doi is not None: - self.gguf.add_source_doi(metadata.source_doi) - if metadata is not None and metadata.source_uuid is not None: - self.gguf.add_source_uuid(metadata.source_uuid) - if metadata is not None and metadata.source_repo_url is not None: - self.gguf.add_source_repo_url(metadata.source_repo_url) - - if metadata is not None and metadata.base_models is not None: - self.gguf.add_base_model_count(len(metadata.base_models)) - for key, base_model_entry in enumerate(metadata.base_models): - if "name" in base_model_entry: - self.gguf.add_base_model_name(key, base_model_entry["name"]) - if "author" in base_model_entry: - self.gguf.add_base_model_author(key, base_model_entry["author"]) - if "version" in base_model_entry: - self.gguf.add_base_model_version(key, base_model_entry["version"]) - if "organization" in base_model_entry: - self.gguf.add_base_model_organization(key, base_model_entry["organization"]) - if "url" in base_model_entry: - self.gguf.add_base_model_url(key, base_model_entry["url"]) - if "doi" in base_model_entry: - self.gguf.add_base_model_doi(key, base_model_entry["doi"]) - if "uuid" in base_model_entry: - self.gguf.add_base_model_uuid(key, base_model_entry["uuid"]) - if "repo_url" in base_model_entry: - self.gguf.add_base_model_repo_url(key, base_model_entry["repo_url"]) - - if metadata is not None and metadata.tags is not None: - self.gguf.add_tags(metadata.tags) - if metadata is not None and metadata.languages is not None: - self.gguf.add_languages(metadata.languages) - if metadata is not None and metadata.datasets is not None: - self.gguf.add_datasets(metadata.datasets) + if metadata is not None: + if metadata.author is not None: + self.gguf.add_author(metadata.author) + if metadata.version is not None: + self.gguf.add_version(metadata.version) + if metadata.organization is not None: + self.gguf.add_organization(metadata.organization) + + if metadata.finetune is not None: + self.gguf.add_finetune(metadata.finetune) + if metadata.basename is not None: + self.gguf.add_basename(metadata.basename) + + if metadata.description is not None: + self.gguf.add_description(metadata.description) + if metadata.quantized_by is not None: + self.gguf.add_quantized_by(metadata.quantized_by) + + if metadata.parameter_class_attribute is not None: + self.gguf.add_parameter_class_attribute(metadata.parameter_class_attribute) + + if metadata.license is not None: + self.gguf.add_license(metadata.license) + if metadata.license_name is not None: + self.gguf.add_license_name(metadata.license_name) + if metadata.license_link is not None: + self.gguf.add_license_link(metadata.license_link) + + if metadata.url is not None: + self.gguf.add_url(metadata.url) + if metadata.doi is not None: + self.gguf.add_doi(metadata.doi) + if metadata.uuid is not None: + self.gguf.add_uuid(metadata.uuid) + if metadata.repo_url is not None: + self.gguf.add_repo_url(metadata.repo_url) + + if metadata.source_url is not None: + self.gguf.add_source_url(metadata.source_url) + if metadata.source_doi is not None: + self.gguf.add_source_doi(metadata.source_doi) + if metadata.source_uuid is not None: + self.gguf.add_source_uuid(metadata.source_uuid) + if metadata.source_repo_url is not None: + self.gguf.add_source_repo_url(metadata.source_repo_url) + + if metadata.base_models is not None: + self.gguf.add_base_model_count(len(metadata.base_models)) + for key, base_model_entry in enumerate(metadata.base_models): + if "name" in base_model_entry: + self.gguf.add_base_model_name(key, base_model_entry["name"]) + if "author" in base_model_entry: + self.gguf.add_base_model_author(key, base_model_entry["author"]) + if "version" in base_model_entry: + self.gguf.add_base_model_version(key, base_model_entry["version"]) + if "organization" in base_model_entry: + self.gguf.add_base_model_organization(key, base_model_entry["organization"]) + if "url" in base_model_entry: + self.gguf.add_base_model_url(key, base_model_entry["url"]) + if "doi" in base_model_entry: + self.gguf.add_base_model_doi(key, base_model_entry["doi"]) + if "uuid" in base_model_entry: + self.gguf.add_base_model_uuid(key, base_model_entry["uuid"]) + if "repo_url" in base_model_entry: + self.gguf.add_base_model_repo_url(key, base_model_entry["repo_url"]) + + if metadata.tags is not None: + self.gguf.add_tags(metadata.tags) + if metadata.languages is not None: + self.gguf.add_languages(metadata.languages) + if metadata.datasets is not None: + self.gguf.add_datasets(metadata.datasets) def add_meta_arch(self, params: Params) -> None: # Metadata About The Neural Architecture Itself From f8b5931180f0b47500cb28bb673fb3a3e8f5bc9c Mon Sep 17 00:00:00 2001 From: brian khuu Date: Thu, 11 Jul 2024 21:01:52 +1000 Subject: [PATCH 37/65] convert-*.py: parameter_class_attribute --> size_label --- convert_hf_to_gguf.py | 6 +++--- examples/convert_legacy_llama.py | 10 +++++----- gguf-py/gguf/constants.py | 2 +- gguf-py/gguf/gguf_writer.py | 4 ++-- gguf-py/gguf/metadata.py | 28 ++++++++++++++-------------- gguf-py/gguf/utility.py | 6 +++--- gguf-py/tests/test_metadata.py | 6 +++--- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 5629888d3187f..a7dc8eedc733a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -106,20 +106,20 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | self.metadata.name = dir_model.name # Generate parameter weight class (useful for leader boards) if not yet determined - if self.metadata.parameter_class_attribute is None: + if self.metadata.size_label is None: expert_count = self.hparams.get("num_local_experts", 0) sum_weight_estimate = self.calculate_total_weight_count() # Calculate weight estimate per model per_model_weight_estimate: int = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate - self.metadata.parameter_class_attribute = gguf.parameter_class_attribute(expert_count, per_model_weight_estimate) + self.metadata.size_label = gguf.size_label(expert_count, per_model_weight_estimate) # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' output_type = self.ftype.name.partition("_")[2] # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.parameter_class_attribute, output_type) + self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type) # Filename Output if fname_out is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 21e63f6b7dd3f..4203a689af631 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -802,8 +802,8 @@ def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None if metadata.quantized_by is not None: self.gguf.add_quantized_by(metadata.quantized_by) - if metadata.parameter_class_attribute is not None: - self.gguf.add_parameter_class_attribute(metadata.parameter_class_attribute) + if metadata.size_label is not None: + self.gguf.add_size_label(metadata.size_label) if metadata.license is not None: self.gguf.add_license(metadata.license) @@ -1255,7 +1255,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int | None, basename = metadata.basename if metadata.basename is not None else None finetune = metadata.finetune if metadata.finetune is not None else None version = metadata.version if metadata.version is not None else None - parameter_class_attribute = metadata.parameter_class_attribute if metadata.parameter_class_attribute is not None else gguf.parameter_class_attribute(expert_count, model_params_count) + size_label = metadata.size_label if metadata.size_label is not None else gguf.size_label(expert_count, model_params_count) output_type = { GGMLFileType.AllF32: "F32", @@ -1263,7 +1263,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int | None, GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - return gguf.naming_convention(name, basename, finetune, version, parameter_class_attribute, output_type) + return gguf.naming_convention(name, basename, finetune, version, size_label, output_type) def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int | None, model_params_count: int, metadata: gguf.Metadata) -> Path: @@ -1427,7 +1427,7 @@ def main(args_in: list[str] | None = None) -> None: model = convert_to_output_type(model, ftype) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata) - metadata.parameter_class_attribute = gguf.parameter_class_attribute(params.n_experts, model_params_count) + metadata.size_label = gguf.size_label(params.n_experts, model_params_count) params.ftype = ftype logger.info(f"Writing {outfile}, format {ftype}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 35395381b911d..0bed540e1bae7 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -37,7 +37,7 @@ class General: DESCRIPTION = "general.description" QUANTIZED_BY = "general.quantized_by" - PARAMETER_CLASS_ATTRIBUTE = "general.parameter_class_attribute" + SIZE_LABEL = "general.size_label" # Licensing details LICENSE = "general.license" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 4322e7b9f8140..8b1c019d2835a 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -480,8 +480,8 @@ def add_description(self, description: str) -> None: def add_quantized_by(self, quantized: str) -> None: self.add_string(Keys.General.QUANTIZED_BY, quantized) - def add_parameter_class_attribute(self, parameter_class_attribute: str) -> None: - self.add_string(Keys.General.PARAMETER_CLASS_ATTRIBUTE, parameter_class_attribute) + def add_size_label(self, size_label: str) -> None: + self.add_string(Keys.General.SIZE_LABEL, size_label) def add_license(self, license: str) -> None: self.add_string(Keys.General.LICENSE, license) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 4872247d7f4a2..e65a72edd5f57 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -23,7 +23,7 @@ class Metadata: basename: Optional[str] = None description: Optional[str] = None quantized_by: Optional[str] = None - parameter_class_attribute: Optional[str] = None + size_label: Optional[str] = None url: Optional[str] = None doi: Optional[str] = None uuid: Optional[str] = None @@ -69,7 +69,7 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description) metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by) - metadata.parameter_class_attribute = metadata_override.get(Keys.General.PARAMETER_CLASS_ATTRIBUTE, metadata.parameter_class_attribute) + metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label) metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name) metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link) @@ -164,7 +164,7 @@ def get_model_id_components(model_id: Optional[str] = None) -> tuple[str | None, # Regular expression to extract model name components # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1' regex_match = re.compile(r'^(?P[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))' - r'(?:-(?P(?:\d+x)?\d+[A-Za-z]+)(?:-(?P[A-Za-z0-9\s-]+))?)?' + r'(?:-(?P(?:\d+x)?\d+[A-Za-z]+)(?:-(?P[A-Za-z0-9\s-]+))?)?' r'(?:-(?Pv\d+(?:\.\d+)*))?$').match(model_full_name_component) if not regex_match: @@ -174,9 +174,9 @@ def get_model_id_components(model_id: Optional[str] = None) -> tuple[str | None, basename = components.get("basename") finetune = components.get("finetune") version = components.get("version") - parameter_class_attribute = components.get("parameter_class_attribute") + size_label = components.get("size_label") - return model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute + return model_full_name_component, org_component, basename, finetune, version, size_label @staticmethod def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None) -> Metadata: @@ -218,7 +218,7 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No metadata.base_models = [] for model_id in metadata_base_models: - model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) base_model = {} if model_full_name_component is not None: base_model["name"] = Metadata.id_to_title(model_full_name_component) @@ -297,7 +297,7 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No # Use _name_or_path only if its actually a model name and not some computer path # e.g. 'meta-llama/Llama-2-7b-hf' model_id = hf_name_or_path - model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) if metadata.name is None and model_full_name_component is not None: metadata.name = Metadata.id_to_title(model_full_name_component) if metadata.organization is None and org_component is not None: @@ -308,14 +308,14 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No metadata.finetune = finetune if metadata.version is None and version is not None: metadata.version = version - if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: - metadata.parameter_class_attribute = parameter_class_attribute + if metadata.size_label is None and size_label is not None: + metadata.size_label = size_label # Directory Folder Name Fallback Heuristics ############################################ if model_path is not None: model_id = model_path.name - model_full_name_component, org_component, basename, finetune, version, parameter_class_attribute = Metadata.get_model_id_components(model_id) + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) if metadata.name is None and model_full_name_component is not None: metadata.name = Metadata.id_to_title(model_full_name_component) if metadata.organization is None and org_component is not None: @@ -326,8 +326,8 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No metadata.finetune = finetune if metadata.version is None and version is not None: metadata.version = version - if metadata.parameter_class_attribute is None and parameter_class_attribute is not None: - metadata.parameter_class_attribute = parameter_class_attribute + if metadata.size_label is None and size_label is not None: + metadata.size_label = size_label return metadata @@ -352,8 +352,8 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter): if self.quantized_by is not None: gguf_writer.add_quantized_by(self.quantized_by) - if self.parameter_class_attribute is not None: - gguf_writer.add_parameter_class_attribute(self.parameter_class_attribute) + if self.size_label is not None: + gguf_writer.add_size_label(self.size_label) if self.license is not None: gguf_writer.add_license(self.license) diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 3743d52f0348c..88502e1808eec 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -34,7 +34,7 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: return f"{round(scaled_model_params)}{scale_suffix}" -def parameter_class_attribute(expert_count_int:int | None, model_params_count: int) -> str: +def size_label(expert_count_int:int | None, model_params_count: int) -> str: per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) if expert_count_int is not None and expert_count_int > 0: @@ -45,7 +45,7 @@ def parameter_class_attribute(expert_count_int:int | None, model_params_count: i return size_class -def naming_convention(model_name: str | None, base_name: str | None, finetune_string:str | None, version_string:str | None, parameter_class_attribute: str | None, output_type: str | None) -> str: +def naming_convention(model_name: str | None, base_name: str | None, finetune_string:str | None, version_string:str | None, size_label: str | None, output_type: str | None) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: @@ -55,7 +55,7 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st else: name = "ggml-model" - parameters = f"-{parameter_class_attribute}" if parameter_class_attribute is not None else "" + parameters = f"-{size_label}" if size_label is not None else "" finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else "" diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index 1924f3b3a8fe7..6f93adc0d96e4 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -42,17 +42,17 @@ def test_apply_metadata_heuristic_from_model_card(self): 'base_model': ["EmbeddedLLM/Mistral-7B-Merge-14-v0", "janai-hq/trinity-v1"] } got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) - expect = gguf.Metadata(name=None, author=None, version=None, organization=None, finetune=None, basename=None, description=None, quantized_by=None, parameter_class_attribute=None, url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}], tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], languages=['en'], datasets=['teknium/OpenHermes-2.5']) + expect = gguf.Metadata(name=None, author=None, version=None, organization=None, finetune=None, basename=None, description=None, quantized_by=None, size_label=None, url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}], tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], languages=['en'], datasets=['teknium/OpenHermes-2.5']) self.assertEqual(got, expect) def test_apply_metadata_heuristic_from_hf_parameters(self): hf_params = {"_name_or_path": "./hermes-2-pro-llama-3-8b-DPO"} got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, hf_params, None) - expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, parameter_class_attribute='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) + expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, size_label='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) self.assertEqual(got, expect) def test_apply_metadata_heuristic_from_model_dir(self): model_dir_path = Path("./hermes-2-pro-llama-3-8b-DPO") got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, None, model_dir_path) - expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, parameter_class_attribute='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) + expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, size_label='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) self.assertEqual(got, expect) From 64707b625cd06a5240be8318b40ba4a9a986ea8c Mon Sep 17 00:00:00 2001 From: brian khuu Date: Thu, 11 Jul 2024 21:11:16 +1000 Subject: [PATCH 38/65] convert-*.py: remove redundant gguf_writer.add_name() calls --- convert_hf_to_gguf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a7dc8eedc733a..834391a665586 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3148,7 +3148,6 @@ def set_vocab(self): self.gguf_writer.add_add_eos_token(True) def set_gguf_parameters(self): - self.gguf_writer.add_name("T5") if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None: logger.warning("Couldn't find context length in config.json, assuming default value of 512") n_ctx = 512 @@ -3222,7 +3221,6 @@ def set_vocab(self): self._set_vocab_gpt2() def set_gguf_parameters(self): - self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_block_count(self.hparams["n_layer"]) self.gguf_writer.add_context_length(self.hparams["n_positions"]) self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) From 04c4fffdcc167ea25ffe2cd674881284187df65a Mon Sep 17 00:00:00 2001 From: brian khuu Date: Thu, 11 Jul 2024 21:14:04 +1000 Subject: [PATCH 39/65] convert-*.py: prepare_tensors_for_writing() --> prepare_tensors() > Especially since it can be used for other purposes than "for writing", like preparing the tensors to then count and sum all their sizes. Co-authored-by: compilade --- convert_hf_to_gguf.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 834391a665586..f25fc287defc3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -293,7 +293,7 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i return False - def prepare_tensors_for_writing(self): + def prepare_tensors(self): max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,") for name, data_torch in self.get_tensors(): @@ -398,7 +398,7 @@ def prepare_key_value_store(self): self.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION) def write(self): - self.prepare_tensors_for_writing() + self.prepare_tensors() self.prepare_key_value_store() self.gguf_writer.write_header_to_file() self.gguf_writer.write_kv_data_to_file() @@ -411,7 +411,7 @@ def write_vocab(self): if self.metadata.uuid is None: # Required tensor data least for uuid generation if in vocab_only mode - self.prepare_tensors_for_writing() + self.prepare_tensors() self.prepare_key_value_store() self.gguf_writer.write_header_to_file() @@ -1441,8 +1441,8 @@ def _stack_qk_norm(self, bid: int, n_head: int, norms: dict[str, Tensor], layer_ return [(new_name, data_torch)] - def prepare_tensors_for_writing(self): - super().prepare_tensors_for_writing() + def prepare_tensors(self): + super().prepare_tensors() if self._q_norms is not None or self._k_norms is not None: # flatten two `list[dict[str, Tensor]]` into a single `list[str]` @@ -1558,8 +1558,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - def prepare_tensors_for_writing(self): - super().prepare_tensors_for_writing() + def prepare_tensors(self): + super().prepare_tensors() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -1882,8 +1882,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - def prepare_tensors_for_writing(self): - super().prepare_tensors_for_writing() + def prepare_tensors(self): + super().prepare_tensors() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -2950,8 +2950,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - def prepare_tensors_for_writing(self): - super().prepare_tensors_for_writing() + def prepare_tensors(self): + super().prepare_tensors() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -3029,8 +3029,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - def prepare_tensors_for_writing(self): - super().prepare_tensors_for_writing() + def prepare_tensors(self): + super().prepare_tensors() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -3266,8 +3266,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return tensors - def prepare_tensors_for_writing(self): - super().prepare_tensors_for_writing() + def prepare_tensors(self): + super().prepare_tensors() self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) From f2b425c59c15ac4621c670aff0578df844d5619a Mon Sep 17 00:00:00 2001 From: brian khuu Date: Thu, 11 Jul 2024 21:52:53 +1000 Subject: [PATCH 40/65] convert-*.py: import cast from typing and other refactor --- gguf-py/gguf/metadata.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index e65a72edd5f57..d18ab400ed19b 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -4,7 +4,7 @@ import json import frontmatter from pathlib import Path -from typing import Any, Optional +from typing import Any, Optional, cast from dataclasses import dataclass from .constants import Keys @@ -69,7 +69,7 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description) metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by) - metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label) + metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label) metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name) metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link) @@ -105,7 +105,7 @@ def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dic return json.load(f) @staticmethod - def load_model_card(model_path: Optional[Path] = None) -> dict[str, object]: + def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]: if model_path is None or not model_path.exists(): return {} @@ -115,10 +115,10 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, object]: return {} with open(model_card_path, "r", encoding="utf-8") as f: - return cast("dict[str, object]", frontmatter.load(f)) + return cast("dict[str, Any]", frontmatter.load(f)) @staticmethod - def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, object]: + def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: if model_path is None or not model_path.exists(): return {} From ad217d7249e80c12258efda29146729ad10003c1 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sat, 13 Jul 2024 19:18:11 +1000 Subject: [PATCH 41/65] convert-*.py: remove autogenerated uuid --- convert_hf_to_gguf.py | 6 ------ gguf-py/gguf/gguf_writer.py | 15 --------------- 2 files changed, 21 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index f25fc287defc3..aad94e413cfe4 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -378,12 +378,6 @@ def prepare_tensors(self): def prepare_key_value_store(self): - # Upon missing model uuid, generate uuid based on tensor content - if self.metadata.uuid is None: - self.metadata.uuid = self.gguf_writer.generate_tensors_uuid() - max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,") - logger.info(f"{f'%-{max_name_len}s' % f'generating general.uuid'} {self.metadata.uuid}") - logger.info("Set meta model") self.metadata.set_gguf_meta_model(self.gguf_writer) diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 8b1c019d2835a..9cb9415d00f37 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -2,8 +2,6 @@ import logging import os -import uuid -import hashlib import shutil import struct import tempfile @@ -369,19 +367,6 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: self.state = WriterState.WEIGHTS - def generate_tensors_uuid(self) -> str: - uuidv5_sha1 = hashlib.sha1() - uuidv5_sha1.update(uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5').bytes) - - for tensors in self.tensors: - # relying on the fact that Python dicts preserve insertion order (since 3.7) - for name, ti in tensors.items(): - assert ti.tensor is not None - assert ti.tensor.nbytes == ti.nbytes - uuidv5_sha1.update(ti.tensor.tobytes('C')) - - return str(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5)) - def write_tensors_to_file(self, *, progress: bool = False) -> None: self.write_ti_data_to_file() From 60278e4f4d679f5e7cfb664779d52838bc050aa0 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 13 Jul 2024 20:42:55 +1000 Subject: [PATCH 42/65] Update convert_hf_to_gguf.py Co-authored-by: Xuan Son Nguyen --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index aad94e413cfe4..7e44edd29260c 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3624,7 +3624,7 @@ def main() -> None: try: model_class = Model.from_model_architecture(model_architecture) except NotImplementedError: - logger.error(f"Model {hparams['architectures'][0]} is not supported") + logger.error(f"Model {model_architecture} is not supported") sys.exit(1) model_instance = model_class(dir_model=dir_model, ftype=output_type, fname_out=fname_out, From aa4e5892a0ce8bfc3f3f6e945683dba1e3cd2d15 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 13 Jul 2024 20:43:17 +1000 Subject: [PATCH 43/65] Update convert_hf_to_gguf.py Co-authored-by: Xuan Son Nguyen --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7e44edd29260c..9a58caf67f89c 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3551,7 +3551,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument( "--get-outfile", action="store_true", - help="get calculated default outfile name" + help="print calculated output file name then exit" ) return parser.parse_args() From 2c060303a65f7dfc0194f820757e35baddaf8400 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 13 Jul 2024 22:02:09 +1000 Subject: [PATCH 44/65] Update constants.py : spacing correction --- gguf-py/gguf/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 0bed540e1bae7..468861e870cda 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -37,7 +37,7 @@ class General: DESCRIPTION = "general.description" QUANTIZED_BY = "general.quantized_by" - SIZE_LABEL = "general.size_label" + SIZE_LABEL = "general.size_label" # Licensing details LICENSE = "general.license" From 8156835d4a208a76e8bf03ec93d640b4cf2bdaea Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 13 Jul 2024 22:26:32 +1000 Subject: [PATCH 45/65] constants.py : Revert removal of backward compatibility KEY_GENERAL_SOURCE_URL --- gguf-py/gguf/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 468861e870cda..e343c2ef1659a 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1273,6 +1273,7 @@ def get_type(val: Any) -> GGUFValueType: KEY_GENERAL_URL = Keys.General.URL KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION KEY_GENERAL_LICENSE = Keys.General.LICENSE +KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE # LLM From ccff6c7fb212e5d9540cc6d841e92f390c79f118 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sat, 13 Jul 2024 23:21:38 +1000 Subject: [PATCH 46/65] convert-*.py: remove reference to uuid generation --- convert_hf_to_gguf.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 9a58caf67f89c..37b4462d38ed0 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -403,10 +403,6 @@ def write_vocab(self): if len(self.gguf_writer.tensors) != 1: raise ValueError('Splitting the vocabulary is not supported') - if self.metadata.uuid is None: - # Required tensor data least for uuid generation if in vocab_only mode - self.prepare_tensors() - self.prepare_key_value_store() self.gguf_writer.write_header_to_file() self.gguf_writer.write_kv_data_to_file() From 455c0e53aca3a27da8714ea093111f1fec8b5736 Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 14 Jul 2024 10:29:03 +1000 Subject: [PATCH 47/65] Apply suggestions from code review Co-authored-by: compilade --- gguf-py/gguf/metadata.py | 46 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index d18ab400ed19b..30d0719d1eb58 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -59,36 +59,36 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author) - metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version) - metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization) + metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author) + metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version) + metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization) - metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune) - metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename) + metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune) + metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename) - metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description) - metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by) + metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description) + metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by) - metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label) - metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name) - metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link) + metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label) + metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name) + metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link) - metadata.url = metadata_override.get(Keys.General.URL, metadata.url) - metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi) - metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid) - metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url) + metadata.url = metadata_override.get(Keys.General.URL, metadata.url) + metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi) + metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid) + metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url) - metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url) - metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi) - metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid) - metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url) + metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url) + metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi) + metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid) + metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url) # Base Models is received here as an array of models - metadata.base_models = metadata_override.get("general.base_models", metadata.base_models) + metadata.base_models = metadata_override.get("general.base_models", metadata.base_models) - metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags) - metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages) - metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets) + metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags) + metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages) + metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets) # Direct Metadata Override (via direct cli argument) if model_name is not None: @@ -115,7 +115,7 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]: return {} with open(model_card_path, "r", encoding="utf-8") as f: - return cast("dict[str, Any]", frontmatter.load(f)) + return frontmatter.load(f).to_dict() @staticmethod def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: From 5ab1a84085b42a127e270e8a8b7efbf75350e55d Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 11:24:25 +1000 Subject: [PATCH 48/65] convert-*.py: dict_item --> Iterable --- examples/convert_legacy_llama.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 4203a689af631..3b2727aed73fa 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -25,7 +25,6 @@ from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, IO, Iterable, Literal, TypeVar -from _collections_abc import dict_items import numpy as np @@ -1043,7 +1042,7 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT raise ValueError(f"Unexpected combination of types: {name_to_type}") -def per_model_weight_count_estimation(tensors: dict_items[str, LazyTensor], expert_count:int | None) -> int: +def per_model_weight_count_estimation(tensors: Iterable[str, LazyTensor], expert_count:int | None) -> int: # TODO: Ensure parameter count is accurate throughout various model type sum_weight_estimate: int = 0 for name, lazy_tensor in tensors: From 5cdb03b2fc93852d7445b661beb56e6dbe16d232 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 11:24:53 +1000 Subject: [PATCH 49/65] convert-*.py: update nix package to add python frontmatter --- .devops/nix/package.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 911c42ecb16cf..03f57684290d8 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -90,6 +90,7 @@ let ps.tiktoken ps.torchWithoutCuda ps.transformers + ps.python-frontmatter # server bench ps.matplotlib From 9954b64862b7884a0b8be0893f9728335ea9c951 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 12:00:03 +1000 Subject: [PATCH 50/65] convert-*.py: add logger and refactor load_model_card() --- gguf-py/gguf/metadata.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 30d0719d1eb58..e1cb1340a72b0 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -2,15 +2,16 @@ import re import json -import frontmatter +import logging from pathlib import Path -from typing import Any, Optional, cast +from typing import Any, Optional from dataclasses import dataclass from .constants import Keys import gguf +logger = logging.getLogger("metadata") @dataclass class Metadata: @@ -106,16 +107,21 @@ def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dic @staticmethod def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]: - if model_path is None or not model_path.exists(): + if model_path is None or not model_path.is_dir(): return {} model_card_path = model_path / "README.md" - if not model_card_path.exists(): + if not model_card_path.is_file(): return {} - with open(model_card_path, "r", encoding="utf-8") as f: - return frontmatter.load(f).to_dict() + try: + import frontmatter + with open(model_card_path, "r", encoding="utf-8") as f: + return frontmatter.load(f).to_dict() + except ModuleNotFoundError: + logger.warning("module 'frontmatter' not available. Metadata from README.md will NOT be read.") + return {} @staticmethod def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: From abc351c270656d171d1a61d9a4223beb6bd97758 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 12:00:59 +1000 Subject: [PATCH 51/65] convert-*.py: quantized_by in model card is not relevant for converted gguf --- gguf-py/gguf/metadata.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index e1cb1340a72b0..103e489e2ba10 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -236,10 +236,6 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}" metadata.base_models.append(base_model) - if "quantized_by" in model_card and metadata.quantized_by is None: - # Not part of hugging face model card standard, but is used by TheBloke to credit them self for quantizing 3rd party models - metadata.quantized_by = model_card.get("quantized_by") - if "license" in model_card and metadata.license is None: metadata.license = model_card.get("license") From 144a7ec3a4c7bd98696bfd18609ff5891205bea0 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 12:12:23 +1000 Subject: [PATCH 52/65] convert-*.py: pathlib.Path exist() --> is_file() or is_dir() --- gguf-py/gguf/metadata.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 103e489e2ba10..fc7c1cb9e2ba0 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -99,7 +99,7 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat @staticmethod def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]: - if metadata_override_path is None or not metadata_override_path.exists(): + if metadata_override_path is None or not metadata_override_path.is_file(): return {} with open(metadata_override_path, "r", encoding="utf-8") as f: @@ -125,12 +125,12 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]: @staticmethod def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: - if model_path is None or not model_path.exists(): + if model_path is None or not model_path.is_dir(): return {} config_path = model_path / "config.json" - if not config_path.exists(): + if not config_path.is_file(): return {} with open(config_path, "r", encoding="utf-8") as f: From 8629b7bdc2d47375462ab0d954421fefc9303a99 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 12:19:23 +1000 Subject: [PATCH 53/65] covert-*.py: per_model_weight_count_estimation() tensor arg type is Iterable[tuple[str, LazyTensor]] --- examples/convert_legacy_llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 3b2727aed73fa..fc8a08cdf5f22 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1042,7 +1042,7 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT raise ValueError(f"Unexpected combination of types: {name_to_type}") -def per_model_weight_count_estimation(tensors: Iterable[str, LazyTensor], expert_count:int | None) -> int: +def per_model_weight_count_estimation(tensors: Iterable[tuple[str, LazyTensor]], expert_count:int | None) -> int: # TODO: Ensure parameter count is accurate throughout various model type sum_weight_estimate: int = 0 for name, lazy_tensor in tensors: From 4e3761109d1e10080461858962f3d068a0703eda Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 12:28:55 +1000 Subject: [PATCH 54/65] covert-*.py: flake8 newline missing --- gguf-py/gguf/metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index fc7c1cb9e2ba0..db9e93218a0fd 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -13,6 +13,7 @@ logger = logging.getLogger("metadata") + @dataclass class Metadata: # Authorship Metadata to be written to GGUF KV Store From f98f1098f9a830757717d08c0bc7aacbb4b10ca6 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 16:28:52 +1000 Subject: [PATCH 55/65] convert-*.py: more rigorous regexp for get_model_id_components() --- gguf-py/gguf/metadata.py | 18 +++++++--- gguf-py/tests/test_metadata.py | 61 ++++++++++++++++++++++++++++++---- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index db9e93218a0fd..0e38bf0769d67 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -170,18 +170,28 @@ def get_model_id_components(model_id: Optional[str] = None) -> tuple[str | None, # Regular expression to extract model name components # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1' - regex_match = re.compile(r'^(?P[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))' - r'(?:-(?P(?:\d+x)?\d+[A-Za-z]+)(?:-(?P[A-Za-z0-9\s-]+))?)?' - r'(?:-(?Pv\d+(?:\.\d+)*))?$').match(model_full_name_component) + regex_match = re.compile(r'^' + r'(?P[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))' + r'(?:-(?P(?:\d+x)?\d+[A-Za-z](?:-[A-Za-z]+(?:\d+x)?\d+[A-Za-z]+)?)(?:-(?P[A-Za-z0-9\s-]+))?)?' + r'(?:-(?Pv\d+(?:\.\d+)*))?' + r'$').match(model_full_name_component) if not regex_match: return model_full_name_component, org_component, None, None, None, None components = regex_match.groupdict() basename = components.get("basename") + size_label = components.get("size_label") finetune = components.get("finetune") version = components.get("version") - size_label = components.get("size_label") + + # Base name required at a minimum + if basename is None: + return model_full_name_component, None, None, None, None, None + + # Need to capture at least one component that is not basename + if size_label is None and version is None and finetune is None: + return model_full_name_component, None, None, None, None, None return model_full_name_component, org_component, basename, finetune, version, size_label diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index 6f93adc0d96e4..ba52d68be19e2 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -13,36 +13,83 @@ def test_id_to_title(self): self.assertEqual(gguf.Metadata.id_to_title("hermes-2-pro-llama-3-8b-DPO"), "Hermes 2 Pro Llama 3 8b DPO") def test_get_model_id_components(self): + # This is the basic standard form with organization marker self.assertEqual(gguf.Metadata.get_model_id_components("Mistral/Mixtral-8x7B-Instruct-v0.1"), ('Mixtral-8x7B-Instruct-v0.1', "Mistral", 'Mixtral', 'Instruct', 'v0.1', '8x7B')) + + # Similar to basic standard form but without organization marker self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-8x7B-Instruct-v0.1"), ('Mixtral-8x7B-Instruct-v0.1', None, 'Mixtral', 'Instruct', 'v0.1', '8x7B')) + + # Missing version self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-8x7B-Instruct"), ('Mixtral-8x7B-Instruct', None, 'Mixtral', 'Instruct', None, '8x7B')) + + # Missing finetune self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-8x7B-v0.1"), ('Mixtral-8x7B-v0.1', None, 'Mixtral', None, 'v0.1', '8x7B')) + + # Base name and size label only self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-8x7B"), ('Mixtral-8x7B', None, 'Mixtral', None, None, '8x7B')) - self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral"), - ('Mixtral', None, 'Mixtral', None, None, None)) + + # Base name and version only self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral-v0.1"), ('Mixtral-v0.1', None, 'Mixtral', None, 'v0.1', None)) - self.assertEqual(gguf.Metadata.get_model_id_components("hermes-2-pro-llama-3-8b-DPO"), - ('hermes-2-pro-llama-3-8b-DPO', None, 'hermes-2-pro-llama-3', 'DPO', None, '8b')) + + ## Edge Cases ## + + # This is too ambiguous... best to err on caution and output nothing + self.assertEqual(gguf.Metadata.get_model_id_components("Mixtral"), + ('Mixtral', None, None, None, None, None)) + + # Basename has numbers mixed in and also size label provided. Must avoid capturing number in basename self.assertEqual(gguf.Metadata.get_model_id_components("NousResearch/Meta-Llama-3-8B"), - ('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, "8B")) + ('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, '8B')) + + # Can't detect all non standard form in a heuristically safe way... best to err in caution and output nothing... + self.assertEqual(gguf.Metadata.get_model_id_components("Qwen1.5-MoE-A2.7B-Chat"), + ('Qwen1.5-MoE-A2.7B-Chat', None, None, None, None, None)) + + # Capture 'sub size labels' e.g. A14B in '57B-A14B' usually refers to activated params/weight count + self.assertEqual(gguf.Metadata.get_model_id_components("Qwen2-57B-A14B-Instruct"), + ('Qwen2-57B-A14B-Instruct', None, 'Qwen2', 'Instruct', None, '57B-A14B')) + + # Check that it can handle a real model id with no version code + # Note that 4k in this string is non standard and microsoft were referring to context length rather than weight count + self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Phi-3-mini-4k-instruct"), + ('Phi-3-mini-4k-instruct', 'microsoft', 'Phi-3-mini', 'instruct', None, '4k')) + + # There is some legitimate models with only thousands of parameters + self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k"), + ('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50k')) + + # None standard and not easy to disambiguate, best to err in caution and output nothing + self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"), + ('DeepSeek-Coder-V2-Lite-Instruct', None, None, None, None, None)) + + # This is a real model_id where they append 2DPO to refer to Direct Preference Optimization + # Not able to easily reject '2dpo' while keeping to simple regexp, so best to reject + self.assertEqual(gguf.Metadata.get_model_id_components("crestf411/daybreak-kunoichi-2dpo-7b"), + ('daybreak-kunoichi-2dpo-7b', 'crestf411', None, None, None, None)) + def test_apply_metadata_heuristic_from_model_card(self): model_card = { 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], - 'model-index': [{'name': 'Hermes-2-Pro-Llama-3-8B', 'results': []}], + 'model-index': [{'name': 'Mixtral-8x7B-Instruct-v0.1', 'results': []}], 'language': ['en'], 'datasets': ['teknium/OpenHermes-2.5'], 'widget': [{'example_title': 'Hermes 2 Pro', 'messages': [{'role': 'system', 'content': 'You are a sentient, superintelligent artificial general intelligence, here to teach and assist me.'}, {'role': 'user', 'content': 'Write a short story about Goku discovering kirby has teamed up with Majin Buu to destroy the world.'}]}], 'base_model': ["EmbeddedLLM/Mistral-7B-Merge-14-v0", "janai-hq/trinity-v1"] } got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) - expect = gguf.Metadata(name=None, author=None, version=None, organization=None, finetune=None, basename=None, description=None, quantized_by=None, size_label=None, url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}], tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], languages=['en'], datasets=['teknium/OpenHermes-2.5']) + expect = gguf.Metadata() + expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1'}] + expect.tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'] + expect.languages=['en'] + expect.datasets=['teknium/OpenHermes-2.5'] + self.assertEqual(got, expect) def test_apply_metadata_heuristic_from_hf_parameters(self): From 3b1766a9920f412c1a830d88fddac2877329c656 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Sun, 14 Jul 2024 16:33:19 +1000 Subject: [PATCH 56/65] convert-*.py: flake8 remove blank line --- gguf-py/tests/test_metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index ba52d68be19e2..340b1b493910a 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -73,7 +73,6 @@ def test_get_model_id_components(self): self.assertEqual(gguf.Metadata.get_model_id_components("crestf411/daybreak-kunoichi-2dpo-7b"), ('daybreak-kunoichi-2dpo-7b', 'crestf411', None, None, None, None)) - def test_apply_metadata_heuristic_from_model_card(self): model_card = { 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], From 78a42fbee5a1e1a771b04cd13eb7df06fe9abf5c Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Sun, 14 Jul 2024 15:36:50 -0400 Subject: [PATCH 57/65] gguf-py : use pyyaml instead of python-frontmatter HF transformers already depends on pyyaml for model cards, so it should already be in the environment of the users of the convert scripts, unlike python-frontmatter. This should be completely equivalent since the model cards seem to use only YAML and never TOML. --- .devops/nix/package.nix | 1 - gguf-py/gguf/metadata.py | 21 ++++++++++++------- gguf-py/pyproject.toml | 1 + .../requirements-convert_legacy_llama.txt | 1 - 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 03f57684290d8..911c42ecb16cf 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -90,7 +90,6 @@ let ps.tiktoken ps.torchWithoutCuda ps.transformers - ps.python-frontmatter # server bench ps.matplotlib diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 0e38bf0769d67..0cfaf80aa75ce 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -2,6 +2,7 @@ import re import json +import yaml import logging from pathlib import Path from typing import Any, Optional @@ -116,13 +117,19 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]: if not model_card_path.is_file(): return {} - try: - import frontmatter - with open(model_card_path, "r", encoding="utf-8") as f: - return frontmatter.load(f).to_dict() - except ModuleNotFoundError: - logger.warning("module 'frontmatter' not available. Metadata from README.md will NOT be read.") - return {} + # The model card metadata is assumed to always be in YAML + # ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473 + with open(model_card_path, "r", encoding="utf-8") as f: + if f.readline() == "---\n": + raw = f.read().partition("---\n")[0] + data = yaml.safe_load(raw) + if isinstance(data, dict): + return data + else: + logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict") + return {} + else: + return {} @staticmethod def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index 62129126bdddc..19f6761e2f912 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ python = ">=3.8" numpy = ">=1.17" tqdm = ">=4.27" +pyyaml = ">=5.1" [tool.poetry.dev-dependencies] pytest = "^5.2" diff --git a/requirements/requirements-convert_legacy_llama.txt b/requirements/requirements-convert_legacy_llama.txt index 493dbe18c5a17..1d07b09522f61 100644 --- a/requirements/requirements-convert_legacy_llama.txt +++ b/requirements/requirements-convert_legacy_llama.txt @@ -3,4 +3,3 @@ sentencepiece~=0.2.0 transformers>=4.40.1,<5.0.0 gguf>=0.1.0 protobuf>=4.21.0,<5.0.0 -python-frontmatter~=1.0.1 From 417d7a7c6205f0db672dfe3d0dc8d11ebf718fbc Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Sun, 14 Jul 2024 20:38:26 -0400 Subject: [PATCH 58/65] convert_hf : use GGUFWriter to count model parameters --- convert_hf_to_gguf.py | 104 ++++++++++++------------------- examples/convert_legacy_llama.py | 38 +++++------ gguf-py/gguf/gguf_writer.py | 31 +++++++++ gguf-py/gguf/utility.py | 25 ++++---- 4 files changed, 101 insertions(+), 97 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 37b4462d38ed0..5b96cbbb3250c 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -48,6 +48,7 @@ class Model: dir_model: Path ftype: gguf.LlamaFileType + fname_out: Path | None is_big_endian: bool endianess: gguf.GGUFEndian use_temp_file: bool @@ -58,8 +59,6 @@ class Model: block_count: int tensor_map: gguf.TensorNameMap tensor_names: set[str] | None - fname_out: Path - fname_default: str gguf_writer: gguf.GGUFWriter metadata: gguf.Metadata @@ -76,6 +75,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | self.dir_model = dir_model self.ftype = ftype + self.fname_out = fname_out self.is_big_endian = is_big_endian self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.use_temp_file = use_temp_file @@ -101,37 +101,8 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})") self.ftype = gguf.LlamaFileType.MOSTLY_BF16 - # Fallback to model directory name if metadata name is still missing - if self.metadata.name is None: - self.metadata.name = dir_model.name - - # Generate parameter weight class (useful for leader boards) if not yet determined - if self.metadata.size_label is None: - expert_count = self.hparams.get("num_local_experts", 0) - sum_weight_estimate = self.calculate_total_weight_count() - - # Calculate weight estimate per model - per_model_weight_estimate: int = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate - - self.metadata.size_label = gguf.size_label(expert_count, per_model_weight_estimate) - - # Extracts and converts the encoding scheme from the given file type name. e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' - output_type = self.ftype.name.partition("_")[2] - - # Generate default filename based on model specification and available metadata - self.fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type) - - # Filename Output - if fname_out is not None: - # custom defined filename and path was provided - # allow templating the file name with the output ftype, useful with the "auto" ftype - self.fname_out = fname_out.parent / gguf.fill_templated_filename(fname_out.name, output_type) - else: - # output in the same directory as the model by default - self.fname_out = dir_model / f"{self.fname_default}.gguf" - # Configure GGUF Writer - self.gguf_writer = gguf.GGUFWriter(path=self.fname_out, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, + self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file, split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard) @classmethod @@ -191,23 +162,6 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]: if len(sym_diff := tensor_names_from_parts.symmetric_difference(self.tensor_names)) > 0: raise ValueError(f"Mismatch between weight map and model parts for tensor names: {sym_diff}") - def calculate_total_weight_count(self) -> int: - sum_weight_estimate = 0 - for name, data_torch in self.get_tensors(): - # we don't need these - if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): - continue - - # Calculate Tensor Volume - sum_weights_in_tensor = 1 - for dim in data_torch.shape: - sum_weights_in_tensor *= dim - - # Add Tensor Volume To Running Count - sum_weight_estimate += sum_weights_in_tensor - - return sum_weight_estimate - def format_tensor_name(self, key: gguf.MODEL_TENSOR, bid: int | None = None, suffix: str = ".weight") -> str: if key not in gguf.MODEL_TENSORS[self.model_arch]: raise ValueError(f"Missing {key!r} for MODEL_TENSORS of {self.model_arch!r}") @@ -376,7 +330,37 @@ def prepare_tensors(self): self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype) - def prepare_key_value_store(self): + def prepare_metadata(self): + + # Fallback to model directory name if metadata name is still missing + if self.metadata.name is None: + self.metadata.name = self.dir_model.name + + # Generate parameter weight class (useful for leader boards) if not yet determined + if self.metadata.size_label is None: + total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count() + + if (total_params > 0): + self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count) + + # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0' + output_type = self.ftype.name.partition("_")[2] + + # Generate default filename based on model specification and available metadata + fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type) + + # Filename Output + if self.fname_out is not None: + if not self.fname_out.is_dir(): + # custom defined filename and path was provided + # allow templating the file name with the output ftype, useful with the "auto" ftype + self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type) + else: + # the target file is a directory + self.fname_out = self.fname_out / f"{fname_default}.gguf" + else: + # output in the same directory as the model by default + self.fname_out = self.dir_model / f"{fname_default}.gguf" logger.info("Set meta model") self.metadata.set_gguf_meta_model(self.gguf_writer) @@ -393,8 +377,8 @@ def prepare_key_value_store(self): def write(self): self.prepare_tensors() - self.prepare_key_value_store() - self.gguf_writer.write_header_to_file() + self.prepare_metadata() + self.gguf_writer.write_header_to_file(path=self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_tensors_to_file(progress=True) self.gguf_writer.close() @@ -403,8 +387,8 @@ def write_vocab(self): if len(self.gguf_writer.tensors) != 1: raise ValueError('Splitting the vocabulary is not supported') - self.prepare_key_value_store() - self.gguf_writer.write_header_to_file() + self.prepare_metadata() + self.gguf_writer.write_header_to_file(path=self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() @@ -3545,10 +3529,6 @@ def parse_args() -> argparse.Namespace: "--metadata", type=Path, help="Specify the path for an authorship metadata override file" ) - parser.add_argument( - "--get-outfile", action="store_true", - help="print calculated output file name then exit" - ) return parser.parse_args() @@ -3576,9 +3556,6 @@ def main() -> None: if args.verbose: logging.basicConfig(level=logging.DEBUG) - elif args.get_outfile: - # Avoid printing anything besides the dump output - logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) @@ -3629,10 +3606,6 @@ def main() -> None: split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split) - if args.get_outfile: - print(f"{model_instance.fname_default}") # noqa: NP100 - return - if args.vocab_only: logger.info("Exporting model vocab...") model_instance.write_vocab() @@ -3640,6 +3613,7 @@ def main() -> None: else: logger.info("Exporting model...") model_instance.write() + assert model_instance.fname_out is not None out_path = f"{model_instance.fname_out.parent}{os.sep}" if is_split else model_instance.fname_out logger.info(f"Model successfully exported to {out_path}") diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index fc8a08cdf5f22..9ab9ab06edf8f 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1042,9 +1042,11 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT raise ValueError(f"Unexpected combination of types: {name_to_type}") -def per_model_weight_count_estimation(tensors: Iterable[tuple[str, LazyTensor]], expert_count:int | None) -> int: - # TODO: Ensure parameter count is accurate throughout various model type - sum_weight_estimate: int = 0 +def per_model_weight_count_estimation(tensors: Iterable[tuple[str, LazyTensor]]) -> tuple[int, int, int]: + total_params = 0 + shared_params = 0 + expert_params = 0 + for name, lazy_tensor in tensors: # We don't need these if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): @@ -1057,17 +1059,15 @@ def per_model_weight_count_estimation(tensors: Iterable[tuple[str, LazyTensor]], for dim in lazy_tensor.shape: sum_weights_in_tensor *= dim - # Add Tensor Volume To Running Count - sum_weight_estimate += sum_weights_in_tensor - - if expert_count is None: - return sum_weight_estimate + if ".experts." in name: + if ".experts.0." in name: + expert_params += sum_weights_in_tensor + else: + shared_params += sum_weights_in_tensor - if expert_count is not None and expert_count == 0: - return sum_weight_estimate + total_params += sum_weights_in_tensor - # Calculate weight estimate per model - return int(sum_weight_estimate / expert_count) + return total_params, shared_params, expert_params def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel: @@ -1249,12 +1249,12 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> return vocab, special_vocab -def default_convention_outfile(file_type: GGMLFileType, expert_count:int | None, model_params_count: int, metadata: gguf.Metadata) -> str: +def default_convention_outfile(file_type: GGMLFileType, expert_count: int | None, model_params_count: tuple[int, int, int], metadata: gguf.Metadata) -> str: name = metadata.name if metadata.name is not None else None basename = metadata.basename if metadata.basename is not None else None finetune = metadata.finetune if metadata.finetune is not None else None version = metadata.version if metadata.version is not None else None - size_label = metadata.size_label if metadata.size_label is not None else gguf.size_label(expert_count, model_params_count) + size_label = metadata.size_label if metadata.size_label is not None else gguf.size_label(*model_params_count, expert_count=expert_count or 0) output_type = { GGMLFileType.AllF32: "F32", @@ -1265,7 +1265,7 @@ def default_convention_outfile(file_type: GGMLFileType, expert_count:int | None, return gguf.naming_convention(name, basename, finetune, version, size_label, output_type) -def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count:int | None, model_params_count: int, metadata: gguf.Metadata) -> Path: +def default_outfile(model_paths: list[Path], file_type: GGMLFileType, expert_count: int | None, model_params_count: tuple[int, int, int], metadata: gguf.Metadata) -> Path: default_filename = default_convention_outfile(file_type, expert_count, model_params_count, metadata) ret = model_paths[0].parent / f"{default_filename}.gguf" if ret in model_paths: @@ -1328,7 +1328,7 @@ def main(args_in: list[str] | None = None) -> None: model_plus = load_some_model(dir_model) params = Params.load(model_plus) model = convert_model_names(model_plus.model, params, args.skip_unknown) - model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) + model_params_count = per_model_weight_count_estimation(model_plus.model.items()) ftype = pick_output_type(model, args.outtype) if (metadata is None or metadata.name is None) and params.path_model is not None: @@ -1415,8 +1415,8 @@ def main(args_in: list[str] | None = None) -> None: if metadata.name is None and params.path_model is not None: metadata.name = params.path_model.name - model_params_count = per_model_weight_count_estimation(model_plus.model.items(), params.n_experts) - logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})") + model_params_count = per_model_weight_count_estimation(model_plus.model.items()) + logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count[0])})") logger.info(f"Vocab info: {vocab}") logger.info(f"Special vocab info: {special_vocab}") @@ -1426,7 +1426,7 @@ def main(args_in: list[str] | None = None) -> None: model = convert_to_output_type(model, ftype) outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.n_experts, model_params_count, metadata=metadata) - metadata.size_label = gguf.size_label(params.n_experts, model_params_count) + metadata.size_label = gguf.size_label(*model_params_count, expert_count=params.n_experts or 0) params.ftype = ftype logger.info(f"Writing {outfile}, format {ftype}") diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 9cb9415d00f37..f0f029a18b3f3 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -17,6 +17,7 @@ from .constants import ( GGUF_DEFAULT_ALIGNMENT, GGUF_MAGIC, + GGML_QUANT_SIZES, GGUF_VERSION, GGMLQuantizationType, GGUFEndian, @@ -106,6 +107,36 @@ def __init__( self.add_architecture() + def get_total_parameter_count(self) -> tuple[int, int, int, int]: + total_params = 0 + shared_params = 0 + expert_params = 0 + + expert_sum = 0 + n_expert_tensors = 0 + + for tensors in self.tensors: + for name, info in tensors.items(): + + block_size, type_size = GGML_QUANT_SIZES[info.dtype] + + size = (info.nbytes // type_size) * block_size + + if "_exps." in name: + expert_params += (size // info.shape[-3]) + expert_sum += info.shape[-3] + n_expert_tensors += 1 + else: + shared_params += size + + total_params += size + + # Hopefully this should work even for variable-expert-count models + expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0 + + # NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py + return total_params, shared_params, expert_params, expert_count + def format_shard_names(self, path: Path) -> list[Path]: if len(self.tensors) == 1: return [path] diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 88502e1808eec..873ca406c43ba 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -10,12 +10,8 @@ def fill_templated_filename(filename: str, output_type: str): OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) -def model_weight_count_rounded_notation(model_params_count: int) -> str: - if model_params_count > 1e15 : - # Quadrillion Of Parameters - scaled_model_params = model_params_count * 1e-15 - scale_suffix = "Q" - elif model_params_count > 1e12 : +def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str: + if model_params_count > 1e12 : # Trillions Of Parameters scaled_model_params = model_params_count * 1e-12 scale_suffix = "T" @@ -31,21 +27,24 @@ def model_weight_count_rounded_notation(model_params_count: int) -> str: # Thousands Of Parameters scaled_model_params = model_params_count * 1e-3 scale_suffix = "K" - return f"{round(scaled_model_params)}{scale_suffix}" + fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0) -def size_label(expert_count_int:int | None, model_params_count: int) -> str: - per_model_rounded_weight_estimate = model_weight_count_rounded_notation(model_params_count) + return f"{scaled_model_params:.{fix}f}{scale_suffix}" - if expert_count_int is not None and expert_count_int > 0: - size_class = f"{expert_count_int}x{per_model_rounded_weight_estimate}" + +def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str: + + if expert_count > 0: + pretty_size = model_weight_count_rounded_notation(shared_params + expert_params, min_digits=2) + size_class = f"{expert_count}x{pretty_size}" else: - size_class = f"{per_model_rounded_weight_estimate}" + size_class = model_weight_count_rounded_notation(total_params, min_digits=2) return size_class -def naming_convention(model_name: str | None, base_name: str | None, finetune_string:str | None, version_string:str | None, size_label: str | None, output_type: str | None) -> str: +def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: From 9a925b56a07a3f5ff5afeb6cbae2c59262d1e77b Mon Sep 17 00:00:00 2001 From: brian khuu Date: Mon, 15 Jul 2024 19:16:38 +1000 Subject: [PATCH 59/65] metadata.py: account for decimal point in size label within model id components --- gguf-py/gguf/metadata.py | 2 +- gguf-py/tests/test_metadata.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 0cfaf80aa75ce..7d6478189d7ff 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -179,7 +179,7 @@ def get_model_id_components(model_id: Optional[str] = None) -> tuple[str | None, # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1' regex_match = re.compile(r'^' r'(?P[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))' - r'(?:-(?P(?:\d+x)?\d+[A-Za-z](?:-[A-Za-z]+(?:\d+x)?\d+[A-Za-z]+)?)(?:-(?P[A-Za-z0-9\s-]+))?)?' + r'(?:-(?P(?:\d+x)?(\d+\.)?\d+[A-Za-z](?:-[A-Za-z]+(\d+\.)?\d+[A-Za-z]+)?)(?:-(?P[A-Za-z0-9\s-]+))?)?' r'(?:-(?Pv\d+(?:\.\d+)*))?' r'$').match(model_full_name_component) diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index 340b1b493910a..2c25e85d4a3fd 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -73,6 +73,10 @@ def test_get_model_id_components(self): self.assertEqual(gguf.Metadata.get_model_id_components("crestf411/daybreak-kunoichi-2dpo-7b"), ('daybreak-kunoichi-2dpo-7b', 'crestf411', None, None, None, None)) + # This is a real model id where the weight size has a decimal point + self.assertEqual(gguf.Metadata.get_model_id_components("Qwen2-0.5B-Instruct"), + ('Qwen2-0.5B-Instruct', None, 'Qwen2', 'Instruct', None, '0.5B')) + def test_apply_metadata_heuristic_from_model_card(self): model_card = { 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], From c7b3616449fe2be2f981445c2b9c56df4a52f4e2 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 16 Jul 2024 07:02:07 +1000 Subject: [PATCH 60/65] Update convert_hf_to_gguf.py It might help with the convert_lora_to_gguf.py script if default values were added here Co-authored-by: compilade --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 5b96cbbb3250c..476382a7c3d86 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -65,7 +65,7 @@ class Model: # subclasses should define this! model_arch: gguf.MODEL_ARCH - def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | None, is_big_endian: bool, use_temp_file: bool, eager: bool, metadata: gguf.Metadata, + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | None, is_big_endian: bool = False, use_temp_file: bool = False, eager: bool = False, metadata: gguf.Metadata | None = None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") From eb0bf6b92f3ca4d288d1f30b017cae371f6deb7f Mon Sep 17 00:00:00 2001 From: brian khuu Date: Wed, 17 Jul 2024 01:04:14 +1000 Subject: [PATCH 61/65] convert-*.py: Add naming_convention_vocab_only() --- convert_hf_to_gguf.py | 48 ++++++++++++++++++++++++++--------------- gguf-py/gguf/utility.py | 23 +++++++++++++++++--- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ae1901a6cab92..6af3f243a4d79 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -337,7 +337,7 @@ def prepare_tensors(self): self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype) - def prepare_metadata(self): + def prepare_metadata(self, vocab_only: bool): # Fallback to model directory name if metadata name is still missing if self.metadata.name is None: @@ -350,24 +350,38 @@ def prepare_metadata(self): if (total_params > 0): self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count) - # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0' - output_type = self.ftype.name.partition("_")[2] + # Filename Output + if self.fname_out is not None and not self.fname_out.is_dir(): + # Output path is a custom defined templated filename + # Note: `not is_dir()` is used because `.is_file()` will not detect + # file template strings as it doesn't actually exist as a file - # Generate default filename based on model specification and available metadata - fname_default = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type) + # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0' + output_type: str = self.ftype.name.partition("_")[2] - # Filename Output - if self.fname_out is not None: - if not self.fname_out.is_dir(): - # custom defined filename and path was provided - # allow templating the file name with the output ftype, useful with the "auto" ftype - self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type) + # Process templated file name with the output ftype, useful with the "auto" ftype + self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type) + else: + + # Generate default filename based on model specification and available metadata + if vocab_only: + # Vocab based default filename + fname_default: str = gguf.naming_convention_vocab_only(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version) else: - # the target file is a directory + + # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0' + output_type: str = self.ftype.name.partition("_")[2] + + # Standard default filename + fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type) + + # Check if preferred output directory path was provided + if self.fname_out is not None and self.fname_out.is_dir(): + # output path is a directory self.fname_out = self.fname_out / f"{fname_default}.gguf" - else: - # output in the same directory as the model by default - self.fname_out = self.dir_model / f"{fname_default}.gguf" + else: + # output in the same directory as the model by default + self.fname_out = self.dir_model / f"{fname_default}.gguf" logger.info("Set meta model") self.metadata.set_gguf_meta_model(self.gguf_writer) @@ -384,7 +398,7 @@ def prepare_metadata(self): def write(self): self.prepare_tensors() - self.prepare_metadata() + self.prepare_metadata(vocab_only=False) self.gguf_writer.write_header_to_file(path=self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_tensors_to_file(progress=True) @@ -394,7 +408,7 @@ def write_vocab(self): if len(self.gguf_writer.tensors) != 1: raise ValueError('Splitting the vocabulary is not supported') - self.prepare_metadata() + self.prepare_metadata(vocab_only=True) self.gguf_writer.write_header_to_file(path=self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index 873ca406c43ba..c21c5490e8634 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -1,10 +1,10 @@ from __future__ import annotations -def fill_templated_filename(filename: str, output_type: str): +def fill_templated_filename(filename: str, output_type: str | None) -> str: # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' - ftype_uppercase: str = output_type.upper() - ftype_lowercase: str = output_type.lower() + ftype_lowercase: str = output_type.lower() if output_type is not None else "" + ftype_uppercase: str = output_type.upper() if output_type is not None else "" return filename.format(ftype_lowercase, outtype=ftype_lowercase, ftype=ftype_lowercase, OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase) @@ -63,3 +63,20 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st precision = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else "" return f"{name}{parameters}{finetune}{version}{precision}" + + +def naming_convention_vocab_only(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None) -> str: + # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention + + if base_name is not None: + name = base_name.strip().title().replace(' ', '-').replace('/', '-') + elif model_name is not None: + name = model_name.strip().title().replace(' ', '-').replace('/', '-') + else: + name = "ggml-model" + + finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else "" + + version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" + + return f"{name}{finetune}{version}-vocab" From 7e9271cabfffca6d3635281ad1f6eb68faa6e73b Mon Sep 17 00:00:00 2001 From: brian khuu Date: Wed, 17 Jul 2024 01:11:27 +1000 Subject: [PATCH 62/65] convert_lora_to_gguf.py: remove model_name parameter. Doesn't exist in LoraModel() --- convert_lora_to_gguf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index 4bb939d45d6bd..f5e9fec289ffc 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -356,8 +356,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter fname_out, is_big_endian=args.bigendian, use_temp_file=False, - eager=args.no_lazy, - model_name=None, + eager=args.no_lazy ) with open(lora_config, "r") as f: From 2c18a9a4d42bc57e8f182c4b97bdbdb0e39778c2 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Wed, 17 Jul 2024 23:17:39 -0400 Subject: [PATCH 63/65] gguf-py : extract metadata from model name more resiliently Using more than one regex to annotate the parts of the name, this way, the order doesn't have to be fixed and this should work correctly for more edge cases. Also, the total parameter count of the model is used to figure out if a size label is not actually a size label, but a context size. * convert_lora : fix duplicate model type key --- convert_hf_to_gguf.py | 37 ++++++----- convert_lora_to_gguf.py | 30 ++++++--- gguf-py/gguf/gguf_writer.py | 23 +++++-- gguf-py/gguf/metadata.py | 108 ++++++++++++++++++++++++--------- gguf-py/tests/test_metadata.py | 40 +++++++----- 5 files changed, 163 insertions(+), 75 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 6af3f243a4d79..30693018003de 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -60,19 +60,19 @@ class Model: tensor_map: gguf.TensorNameMap tensor_names: set[str] | None gguf_writer: gguf.GGUFWriter - metadata: gguf.Metadata + model_name: str | None + metadata_override: Path | None # subclasses should define this! model_arch: gguf.MODEL_ARCH - def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | None, is_big_endian: bool = False, use_temp_file: bool = False, eager: bool = False, metadata: gguf.Metadata | None = None, + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | None, is_big_endian: bool = False, + use_temp_file: bool = False, eager: bool = False, + metadata_override: Path | None = None, model_name: str | None = None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") - if metadata is None: - metadata = gguf.Metadata() - self.dir_model = dir_model self.ftype = ftype self.fname_out = fname_out @@ -88,7 +88,8 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) self.tensor_names = None - self.metadata = metadata + self.metadata_override = metadata_override + self.model_name = model_name # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type if self.ftype == gguf.LlamaFileType.GUESSED: @@ -337,18 +338,22 @@ def prepare_tensors(self): self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype) + def set_type(self): + self.gguf_writer.add_type(gguf.GGUFType.MODEL) + def prepare_metadata(self, vocab_only: bool): + total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count() + + self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, total_params) + # Fallback to model directory name if metadata name is still missing if self.metadata.name is None: self.metadata.name = self.dir_model.name # Generate parameter weight class (useful for leader boards) if not yet determined - if self.metadata.size_label is None: - total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count() - - if (total_params > 0): - self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count) + if self.metadata.size_label is None and total_params > 0: + self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count) # Filename Output if self.fname_out is not None and not self.fname_out.is_dir(): @@ -383,11 +388,12 @@ def prepare_metadata(self, vocab_only: bool): # output in the same directory as the model by default self.fname_out = self.dir_model / f"{fname_default}.gguf" + self.set_type() + logger.info("Set meta model") self.metadata.set_gguf_meta_model(self.gguf_writer) logger.info("Set model parameters") - self.gguf_writer.add_type(gguf.GGUFType.MODEL) self.set_gguf_parameters() logger.info("Set model tokenizer") @@ -3607,11 +3613,8 @@ def main() -> None: else: logging.basicConfig(level=logging.INFO) - model_name = args.model_name dir_model = args.model - metadata = gguf.Metadata.load(args.metadata, dir_model, model_name) - if not dir_model.is_dir(): logger.error(f'Error: {args.model} is not a directory') sys.exit(1) @@ -3650,7 +3653,9 @@ def main() -> None: model_instance = model_class(dir_model=dir_model, ftype=output_type, fname_out=fname_out, is_big_endian=args.bigendian, use_temp_file=args.use_temp_file, - eager=args.no_lazy, metadata=metadata, split_max_tensors=args.split_max_tensors, + eager=args.no_lazy, + metadata_override=args.metadata, model_name=args.model_name, + split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split) diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index f5e9fec289ffc..66e8da37cba7c 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -251,6 +251,10 @@ def parse_args() -> argparse.Namespace: "--verbose", action="store_true", help="increase output verbosity", ) + parser.add_argument( + "--dry-run", action="store_true", + help="only print out what will be done, without writing any new files", + ) parser.add_argument( "--base", type=Path, required=True, help="directory containing base model file", @@ -300,6 +304,12 @@ def parse_args() -> argparse.Namespace: # load base model logger.info(f"Loading base model: {dir_base_model.name}") hparams = Model.load_hparams(dir_base_model) + + with open(lora_config, "r") as f: + lparams: dict[str, Any] = json.load(f) + + alpha: float = lparams["lora_alpha"] + with torch.inference_mode(): try: model_class = Model.from_model_architecture(hparams["architectures"][0]) @@ -310,6 +320,14 @@ def parse_args() -> argparse.Namespace: class LoraModel(model_class): model_arch = model_class.model_arch + def set_type(self): + self.gguf_writer.add_type(gguf.GGUFType.ADAPTER) + self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora") + + def set_gguf_parameters(self): + self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha)) + super().set_gguf_parameters() + def get_tensors(self) -> Iterator[tuple[str, Tensor]]: tensor_map: dict[str, PartialLoraTensor] = {} @@ -356,18 +374,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter fname_out, is_big_endian=args.bigendian, use_temp_file=False, - eager=args.no_lazy + eager=args.no_lazy, + dry_run=args.dry_run, ) - with open(lora_config, "r") as f: - lparams: dict[str, Any] = json.load(f) - - alpha = lparams["lora_alpha"] - - model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER) - model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora") - model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha)) - model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION) logger.info("Exporting model...") model_instance.write() logger.info(f"Model successfully exported to {model_instance.fname_out}") diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index f0f029a18b3f3..8542b3adb39eb 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -7,6 +7,7 @@ import tempfile from dataclasses import dataclass from enum import Enum, auto +from math import prod from pathlib import Path from io import BufferedWriter from typing import IO, Any, Sequence, Mapping @@ -17,7 +18,6 @@ from .constants import ( GGUF_DEFAULT_ALIGNMENT, GGUF_MAGIC, - GGML_QUANT_SIZES, GGUF_VERSION, GGMLQuantizationType, GGUFEndian, @@ -115,16 +115,29 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]: expert_sum = 0 n_expert_tensors = 0 + last_lora_a: tuple[str, TensorInfo] | None = None + for tensors in self.tensors: for name, info in tensors.items(): - block_size, type_size = GGML_QUANT_SIZES[info.dtype] + shape = info.shape + + if name.endswith(".lora_a"): + last_lora_a = (name, info) + continue + elif name.endswith(".lora_b"): + if last_lora_a is None or last_lora_a[0] != name[:-1] + "a": + # Bail when the LoRA pair can't be found trivially + logger.warning("can't measure LoRA size correctly, tensor order is unusual") + return 0, 0, 0, 0 + else: + shape = (*shape[:-1], last_lora_a[1].shape[-1]) - size = (info.nbytes // type_size) * block_size + size = prod(shape) if "_exps." in name: - expert_params += (size // info.shape[-3]) - expert_sum += info.shape[-3] + expert_params += (size // shape[-3]) + expert_sum += shape[-3] n_expert_tensors += 1 else: shared_params += size diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 7d6478189d7ff..8b599160de5de 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -5,7 +5,7 @@ import yaml import logging from pathlib import Path -from typing import Any, Optional +from typing import Any, Literal, Optional from dataclasses import dataclass from .constants import Keys @@ -44,7 +44,7 @@ class Metadata: datasets: Optional[list[str]] = None @staticmethod - def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None) -> Metadata: + def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata: # This grabs as many contextual authorship metadata as possible from the model repository # making any conversion as required to match the gguf kv store metadata format # as well as giving users the ability to override any authorship metadata that may be incorrect @@ -56,7 +56,7 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat hf_params = Metadata.load_hf_parameters(model_path) # heuristics - metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path) + metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp @@ -150,7 +150,7 @@ def id_to_title(string): return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()]) @staticmethod - def get_model_id_components(model_id: Optional[str] = None) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]: + def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]: # Huggingface often store model id as '/' # so let's parse it and apply some heuristics if possible for model name components @@ -175,35 +175,82 @@ def get_model_id_components(model_id: Optional[str] = None) -> tuple[str | None, if org_component is not None and org_component[0] == '.': org_component = None - # Regular expression to extract model name components - # Heuristic to match against cases such as 'Mixtral-8x7B-Instruct-v0.1' or 'Codestral-22B-v0.1' - regex_match = re.compile(r'^' - r'(?P[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))' - r'(?:-(?P(?:\d+x)?(\d+\.)?\d+[A-Za-z](?:-[A-Za-z]+(\d+\.)?\d+[A-Za-z]+)?)(?:-(?P[A-Za-z0-9\s-]+))?)?' - r'(?:-(?Pv\d+(?:\.\d+)*))?' - r'$').match(model_full_name_component) - - if not regex_match: - return model_full_name_component, org_component, None, None, None, None - - components = regex_match.groupdict() - basename = components.get("basename") - size_label = components.get("size_label") - finetune = components.get("finetune") - version = components.get("version") + name_parts: list[str] = model_full_name_component.split('-') + name_types: list[ + set[Literal["basename", "size_label", "finetune", "version", "type"]] + ] = [set() for _ in name_parts] + + # Annotate the name + for i, part in enumerate(name_parts): + # Version + if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE): + name_types[i].add("version") + # Quant type (should not be there for base models, but still annotated) + elif re.fullmatch(r'[iI]?[qQ]\d(_\w)*', part): + name_types[i].add("type") + name_parts[i] = part.upper() + # Model size + elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[kMBT]|small|mini|medium|large|xl)', part, re.IGNORECASE): + part = part.replace("_", ".") + if len(part) > 1 and part[-2].isdecimal(): + if part[-1] in "mbt": + part = part[:-1] + part[-1].upper() + elif part[-1] in "k": + part = part[:-1] + part[-1].lower() + if total_params > 0: + try: + label_params = float(part[:-1]) * pow(1000, " kMBT".find(part[-1])) + # Only use it as a size label if it's close or bigger than the model size + # Note that LoRA adapters don't necessarily include all layers, + # so this is why bigger label sizes are accepted. + # Do not use the size label when it's smaller than 3/4 of the model size + if total_params - label_params > total_params // 4: + # Likely a context length + name_types[i].add("finetune") + except ValueError: + # Failed to convert the size label to float, use it anyway + pass + if len(name_types[i]) == 0: + name_types[i].add("size_label") + name_parts[i] = part + # Some easy to recognize finetune names + elif i > 0 and re.fullmatch(r'chat|instruct|vision', part, re.IGNORECASE): + name_types[i].add("finetune") + + at_start = True + # Find the basename through the annotated name + for part, t in zip(name_parts, name_types): + if at_start and ((len(t) == 0 and part[0].isalpha()) or "version" in t): + t.add("basename") + else: + if at_start: + at_start = False + if len(t) == 0: + t.add("finetune") + + # Remove the basename annotation from trailing version + for part, t in zip(reversed(name_parts), reversed(name_types)): + if "basename" in t: + if len(t) > 1: + t.remove("basename") + else: + break - # Base name required at a minimum - if basename is None: - return model_full_name_component, None, None, None, None, None + basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None + size_label = "-".join(s for s, t in zip(name_parts, name_types) if "size_label" in t) or None + finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None + # TODO: should the basename version always be excluded? + # TODO: should multiple versions be joined together? + version = ([v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t] or [None])[-1] - # Need to capture at least one component that is not basename - if size_label is None and version is None and finetune is None: - return model_full_name_component, None, None, None, None, None + if size_label is None and finetune is None and version is None: + # Too ambiguous, output nothing + basename = None return model_full_name_component, org_component, basename, finetune, version, size_label @staticmethod - def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None) -> Metadata: + def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata: # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 # Model Card Heuristics @@ -242,7 +289,8 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No metadata.base_models = [] for model_id in metadata_base_models: - model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) + # NOTE: model size of base model is assumed to be similar to the size of the current model + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) base_model = {} if model_full_name_component is not None: base_model["name"] = Metadata.id_to_title(model_full_name_component) @@ -317,7 +365,7 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No # Use _name_or_path only if its actually a model name and not some computer path # e.g. 'meta-llama/Llama-2-7b-hf' model_id = hf_name_or_path - model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) if metadata.name is None and model_full_name_component is not None: metadata.name = Metadata.id_to_title(model_full_name_component) if metadata.organization is None and org_component is not None: @@ -335,7 +383,7 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No ############################################ if model_path is not None: model_id = model_path.name - model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id) + model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) if metadata.name is None and model_full_name_component is not None: metadata.name = Metadata.id_to_title(model_full_name_component) if metadata.organization is None and org_component is not None: diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index 2c25e85d4a3fd..9d048603b126b 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -1,9 +1,15 @@ #!/usr/bin/env python3 import unittest -import gguf # noqa: F401 from pathlib import Path +import os +import sys +# Necessary to load the local gguf package +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent)) + +import gguf class TestMetadataMethod(unittest.TestCase): @@ -49,7 +55,7 @@ def test_get_model_id_components(self): # Can't detect all non standard form in a heuristically safe way... best to err in caution and output nothing... self.assertEqual(gguf.Metadata.get_model_id_components("Qwen1.5-MoE-A2.7B-Chat"), - ('Qwen1.5-MoE-A2.7B-Chat', None, None, None, None, None)) + ('Qwen1.5-MoE-A2.7B-Chat', None, 'Qwen1.5-MoE', 'Chat', None, 'A2.7B')) # Capture 'sub size labels' e.g. A14B in '57B-A14B' usually refers to activated params/weight count self.assertEqual(gguf.Metadata.get_model_id_components("Qwen2-57B-A14B-Instruct"), @@ -57,26 +63,29 @@ def test_get_model_id_components(self): # Check that it can handle a real model id with no version code # Note that 4k in this string is non standard and microsoft were referring to context length rather than weight count - self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Phi-3-mini-4k-instruct"), - ('Phi-3-mini-4k-instruct', 'microsoft', 'Phi-3-mini', 'instruct', None, '4k')) + self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Phi-3-mini-4k-instruct", 4*10**9), + ('Phi-3-mini-4k-instruct', 'microsoft', 'Phi-3', '4k-instruct', None, 'mini')) # There is some legitimate models with only thousands of parameters - self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k"), + self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50*10**3), ('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50k')) - # None standard and not easy to disambiguate, best to err in caution and output nothing + # None standard and not easy to disambiguate self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"), - ('DeepSeek-Coder-V2-Lite-Instruct', None, None, None, None, None)) + ('DeepSeek-Coder-V2-Lite-Instruct', None, 'DeepSeek-Coder-V2-Lite', 'Instruct', None, None)) # This is a real model_id where they append 2DPO to refer to Direct Preference Optimization - # Not able to easily reject '2dpo' while keeping to simple regexp, so best to reject self.assertEqual(gguf.Metadata.get_model_id_components("crestf411/daybreak-kunoichi-2dpo-7b"), - ('daybreak-kunoichi-2dpo-7b', 'crestf411', None, None, None, None)) + ('daybreak-kunoichi-2dpo-7b', 'crestf411', 'daybreak-kunoichi', '2dpo', None, '7B')) # This is a real model id where the weight size has a decimal point self.assertEqual(gguf.Metadata.get_model_id_components("Qwen2-0.5B-Instruct"), ('Qwen2-0.5B-Instruct', None, 'Qwen2', 'Instruct', None, '0.5B')) + # Uses an underscore in the size label + self.assertEqual(gguf.Metadata.get_model_id_components("smallcloudai/Refact-1_6B-fim"), + ('Refact-1_6B-fim', 'smallcloudai', 'Refact', 'fim', None, '1.6B')) + def test_apply_metadata_heuristic_from_model_card(self): model_card = { 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], @@ -88,7 +97,7 @@ def test_apply_metadata_heuristic_from_model_card(self): } got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) expect = gguf.Metadata() - expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1'}] + expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'version': 'v0', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'version': 'v1', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}] expect.tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'] expect.languages=['en'] expect.datasets=['teknium/OpenHermes-2.5'] @@ -97,12 +106,15 @@ def test_apply_metadata_heuristic_from_model_card(self): def test_apply_metadata_heuristic_from_hf_parameters(self): hf_params = {"_name_or_path": "./hermes-2-pro-llama-3-8b-DPO"} - got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, hf_params, None) - expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, size_label='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) + got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card=None, hf_params=hf_params, model_path=None) + expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', finetune='DPO', basename='hermes-2-pro-llama-3', size_label='8B') self.assertEqual(got, expect) def test_apply_metadata_heuristic_from_model_dir(self): model_dir_path = Path("./hermes-2-pro-llama-3-8b-DPO") - got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), None, None, model_dir_path) - expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', author=None, version=None, organization=None, finetune='DPO', basename='hermes-2-pro-llama-3', description=None, quantized_by=None, size_label='8b', url=None, doi=None, uuid=None, repo_url=None, license=None, license_name=None, license_link=None, base_models=None, tags=None, languages=None, datasets=None) + got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card=None, hf_params=None, model_path=model_dir_path) + expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', finetune='DPO', basename='hermes-2-pro-llama-3', size_label='8B') self.assertEqual(got, expect) + +if __name__ == "__main__": + unittest.main() From 4c9932c1e184a6ffe5ce9d75c9d52b66638a837c Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Wed, 17 Jul 2024 23:26:45 -0400 Subject: [PATCH 64/65] gguf-py : fix flake8 lint --- gguf-py/tests/test_metadata.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index 9d048603b126b..b71b338089103 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -11,6 +11,7 @@ import gguf + class TestMetadataMethod(unittest.TestCase): def test_id_to_title(self): @@ -63,11 +64,11 @@ def test_get_model_id_components(self): # Check that it can handle a real model id with no version code # Note that 4k in this string is non standard and microsoft were referring to context length rather than weight count - self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Phi-3-mini-4k-instruct", 4*10**9), + self.assertEqual(gguf.Metadata.get_model_id_components("microsoft/Phi-3-mini-4k-instruct", 4 * 10**9), ('Phi-3-mini-4k-instruct', 'microsoft', 'Phi-3', '4k-instruct', None, 'mini')) # There is some legitimate models with only thousands of parameters - self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50*10**3), + self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50 * 10**3), ('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50k')) # None standard and not easy to disambiguate @@ -116,5 +117,6 @@ def test_apply_metadata_heuristic_from_model_dir(self): expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', finetune='DPO', basename='hermes-2-pro-llama-3', size_label='8B') self.assertEqual(got, expect) + if __name__ == "__main__": unittest.main() From 73899f74cfddf1aff5db435c3da257018f9979bf Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Thu, 18 Jul 2024 02:28:57 -0400 Subject: [PATCH 65/65] gguf-py : handle more name metadata extraction edge cases * gguf-py : output the split plan on stdout when using dry_run * convert_hf : unify vocab naming convention with the standard one This also adds a way to name LoRA models. --- convert_hf_to_gguf.py | 23 +++++++------------- gguf-py/gguf/gguf_writer.py | 6 ++++++ gguf-py/gguf/metadata.py | 29 +++++++++++++++++--------- gguf-py/gguf/utility.py | 29 +++++++------------------- gguf-py/tests/test_metadata.py | 38 +++++++++++++++++++++++++++++++++- 5 files changed, 78 insertions(+), 47 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 30693018003de..769d49a8b6f0a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -355,30 +355,23 @@ def prepare_metadata(self, vocab_only: bool): if self.metadata.size_label is None and total_params > 0: self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count) + # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0' + output_type: str = self.ftype.name.partition("_")[2] + # Filename Output + # Note: `not is_dir()` is used because `.is_file()` will not detect + # file template strings as it doesn't actually exist as a file if self.fname_out is not None and not self.fname_out.is_dir(): # Output path is a custom defined templated filename - # Note: `not is_dir()` is used because `.is_file()` will not detect - # file template strings as it doesn't actually exist as a file - - # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0' - output_type: str = self.ftype.name.partition("_")[2] # Process templated file name with the output ftype, useful with the "auto" ftype self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type) else: - # Generate default filename based on model specification and available metadata - if vocab_only: - # Vocab based default filename - fname_default: str = gguf.naming_convention_vocab_only(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version) + if not vocab_only: + fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type, model_type="LoRA" if total_params < 0 else None) else: - - # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0' - output_type: str = self.ftype.name.partition("_")[2] - - # Standard default filename - fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type) + fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, size_label=None, output_type=None, model_type="vocab") # Check if preferred output directory path was provided if self.fname_out is not None and self.fname_out.is_dir(): diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 8542b3adb39eb..ba6f53cda25a1 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -147,6 +147,10 @@ def get_total_parameter_count(self) -> tuple[int, int, int, int]: # Hopefully this should work even for variable-expert-count models expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0 + # Negate the total to signal it's likely not exact + if last_lora_a is not None: + total_params = -total_params + # NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py return total_params, shared_params, expert_params, expert_count @@ -181,6 +185,8 @@ def print_plan(self) -> list[Path]: if self.dry_run: logger.info("Dry run, not writing files") + for name in filenames: + print(name) # noqa: NP100 exit() return filenames diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 8b599160de5de..be297f2426f78 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -186,27 +186,34 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int = if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE): name_types[i].add("version") # Quant type (should not be there for base models, but still annotated) - elif re.fullmatch(r'[iI]?[qQ]\d(_\w)*', part): + elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE): name_types[i].add("type") name_parts[i] = part.upper() # Model size - elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[kMBT]|small|mini|medium|large|xl)', part, re.IGNORECASE): + elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE): part = part.replace("_", ".") + # Handle weird bloom-7b1 notation + if part[-1].isdecimal(): + part = part[:-2] + "." + part[-1] + part[-2] + # Normalize the size suffixes if len(part) > 1 and part[-2].isdecimal(): - if part[-1] in "mbt": + if part[-1] in "kmbt": part = part[:-1] + part[-1].upper() - elif part[-1] in "k": - part = part[:-1] + part[-1].lower() - if total_params > 0: + if total_params != 0: try: - label_params = float(part[:-1]) * pow(1000, " kMBT".find(part[-1])) + label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1])) # Only use it as a size label if it's close or bigger than the model size # Note that LoRA adapters don't necessarily include all layers, # so this is why bigger label sizes are accepted. - # Do not use the size label when it's smaller than 3/4 of the model size - if total_params - label_params > total_params // 4: + # Do not use the size label when it's smaller than 1/8 of the model size + if (total_params < 0 and label_params < abs(total_params) // 8) or ( + # Check both directions when the current model isn't a LoRA adapter + total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8 + ): # Likely a context length name_types[i].add("finetune") + # Lowercase the size when it's a context length + part = part[:-1] + part[-1].lower() except ValueError: # Failed to convert the size label to float, use it anyway pass @@ -214,8 +221,10 @@ def get_model_id_components(model_id: Optional[str] = None, total_params: int = name_types[i].add("size_label") name_parts[i] = part # Some easy to recognize finetune names - elif i > 0 and re.fullmatch(r'chat|instruct|vision', part, re.IGNORECASE): + elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE): name_types[i].add("finetune") + if part.lower() == "lora": + name_parts[i] = "LoRA" at_start = True # Find the basename through the annotated name diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py index c21c5490e8634..ef76831b521ee 100644 --- a/gguf-py/gguf/utility.py +++ b/gguf-py/gguf/utility.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Literal + def fill_templated_filename(filename: str, output_type: str | None) -> str: # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf' @@ -36,15 +38,15 @@ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str: if expert_count > 0: - pretty_size = model_weight_count_rounded_notation(shared_params + expert_params, min_digits=2) + pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2) size_class = f"{expert_count}x{pretty_size}" else: - size_class = model_weight_count_rounded_notation(total_params, min_digits=2) + size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2) return size_class -def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None) -> str: +def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str: # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention if base_name is not None: @@ -60,23 +62,8 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" - precision = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else "" - - return f"{name}{parameters}{finetune}{version}{precision}" - - -def naming_convention_vocab_only(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None) -> str: - # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention + encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else "" - if base_name is not None: - name = base_name.strip().title().replace(' ', '-').replace('/', '-') - elif model_name is not None: - name = model_name.strip().title().replace(' ', '-').replace('/', '-') - else: - name = "ggml-model" - - finetune = f"-{finetune_string.strip().title().replace(' ', '-')}" if finetune_string is not None else "" - - version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else "" + kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else "" - return f"{name}{finetune}{version}-vocab" + return f"{name}{parameters}{finetune}{version}{encoding}{kind}" diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index b71b338089103..3fac8218883f1 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -69,7 +69,7 @@ def test_get_model_id_components(self): # There is some legitimate models with only thousands of parameters self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50 * 10**3), - ('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50k')) + ('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50K')) # None standard and not easy to disambiguate self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"), @@ -87,6 +87,42 @@ def test_get_model_id_components(self): self.assertEqual(gguf.Metadata.get_model_id_components("smallcloudai/Refact-1_6B-fim"), ('Refact-1_6B-fim', 'smallcloudai', 'Refact', 'fim', None, '1.6B')) + # Uses Iter3 for the version + self.assertEqual(gguf.Metadata.get_model_id_components("UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3"), + ('Gemma-2-9B-It-SPPO-Iter3', 'UCLA-AGI', 'Gemma-2', 'It-SPPO', 'Iter3', '9B')) + + # Has two potential versions in the basename + self.assertEqual(gguf.Metadata.get_model_id_components("NousResearch/Hermes-2-Theta-Llama-3-8B"), + ('Hermes-2-Theta-Llama-3-8B', 'NousResearch', 'Hermes-2-Theta-Llama-3', None, None, '8B')) + + # Potential version in the basename + self.assertEqual(gguf.Metadata.get_model_id_components("SeaLLMs/SeaLLMs-v3-7B-Chat"), + ('SeaLLMs-v3-7B-Chat', 'SeaLLMs', 'SeaLLMs-v3', 'Chat', None, '7B')) + + # Underscore in the basename, and 1m for the context size + self.assertEqual(gguf.Metadata.get_model_id_components("internlm/internlm2_5-7b-chat-1m", 7 * 10**9), + ('internlm2_5-7b-chat-1m', 'internlm', 'internlm2_5', 'chat-1m', None, '7B')) + + # Version before the finetune name + self.assertEqual(gguf.Metadata.get_model_id_components("pszemraj/jamba-900M-v0.13-KIx2"), + ('jamba-900M-v0.13-KIx2', 'pszemraj', 'jamba', 'KIx2', 'v0.13', '900M')) + + # TODO: hf suffix which could be ignored but isn't + self.assertEqual(gguf.Metadata.get_model_id_components("state-spaces/mamba-2.8b-hf"), + ('mamba-2.8b-hf', 'state-spaces', 'mamba', 'hf', None, '2.8B')) + + # Two sizes, don't merge them, the other is the number of tokens on which it was trained + self.assertEqual(gguf.Metadata.get_model_id_components("abacaj/llama-161M-100B", 161 * 10**6), + ('llama-161M-100B', 'abacaj', 'llama', '100b', None, '161M')) + + # It's a trap, there is no size label + self.assertEqual(gguf.Metadata.get_model_id_components("SparseLLM/relu-100B", 1340 * 10**6), + ('relu-100B', 'SparseLLM', 'relu', '100b', None, None)) + + # Weird size notation + self.assertEqual(gguf.Metadata.get_model_id_components("bigscience/bloom-7b1-petals"), + ('bloom-7b1-petals', 'bigscience', 'bloom', 'petals', None, '7.1B')) + def test_apply_metadata_heuristic_from_model_card(self): model_card = { 'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'],