Skip to content

Commit 53096ef

Browse files
committed
convert-hf-to-gguf.py: add --get-outfile command and refactor
1 parent 9b82476 commit 53096ef

File tree

4 files changed

+114
-54
lines changed

4 files changed

+114
-54
lines changed

convert-hf-to-gguf.py

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class Model:
6060
tensor_map: gguf.TensorNameMap
6161
tensor_names: set[str] | None
6262
fname_out: Path
63+
fname_default: Path
6364
gguf_writer: gguf.GGUFWriter
6465

6566
# subclasses should define this!
@@ -91,10 +92,27 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
9192
else:
9293
logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
9394
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
94-
ftype_up: str = self.ftype.name.partition("_")[2].upper()
95-
ftype_lw: str = ftype_up.lower()
95+
96+
# Generate default filename based on model specification and available metadata
97+
version_string = None # TODO: Add metadata support
98+
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
99+
encodingScheme = {
100+
gguf.LlamaFileType.ALL_F32 : "F32",
101+
gguf.LlamaFileType.MOSTLY_F16 : "F16",
102+
gguf.LlamaFileType.MOSTLY_BF16 : "BF16",
103+
gguf.LlamaFileType.MOSTLY_Q8_0 : "Q8_0",
104+
}[self.ftype]
105+
self.fname_default = f"{gguf.naming_convention(dir_model.name, version_string, expert_count, self.parameter_count(), encodingScheme)}"
106+
107+
# Filename Output
108+
if fname_out is not None:
109+
# custom defined filename and path was provided
110+
self.fname_out = fname_out
111+
else:
112+
# output in the same directory as the model by default
113+
self.fname_out = dir_model.parent / self.fname_default
114+
96115
# allow templating the file name with the output ftype, useful with the "auto" ftype
97-
self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
98116
self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
99117

100118
@classmethod
@@ -240,6 +258,25 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
240258

241259
return False
242260

261+
def parameter_count(self):
262+
total_model_parameters = 0
263+
for name, data_torch in self.get_tensors():
264+
# Got A Tensor
265+
266+
# We don't need these
267+
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
268+
continue
269+
270+
# Calculate Tensor Volume
271+
sum_weights_in_tensor = 1
272+
for dim in data_torch.shape:
273+
sum_weights_in_tensor *= dim
274+
275+
# Add Tensor Volume To Running Count
276+
total_model_parameters += sum_weights_in_tensor
277+
278+
return total_model_parameters
279+
243280
def write_tensors(self):
244281
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
245282

@@ -2551,14 +2588,24 @@ def parse_args() -> argparse.Namespace:
25512588
"--verbose", action="store_true",
25522589
help="increase output verbosity",
25532590
)
2591+
parser.add_argument(
2592+
"--get-outfile", action="store_true",
2593+
help="get calculated default outfile name"
2594+
)
25542595

25552596
return parser.parse_args()
25562597

25572598

25582599
def main() -> None:
25592600
args = parse_args()
25602601

2561-
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
2602+
if args.verbose:
2603+
logging.basicConfig(level=logging.DEBUG)
2604+
elif args.get_outfile:
2605+
# Avoid printing anything besides the dump output
2606+
logging.basicConfig(level=logging.WARNING)
2607+
else:
2608+
logging.basicConfig(level=logging.INFO)
25622609

25632610
dir_model = args.model
25642611

@@ -2587,19 +2634,19 @@ def main() -> None:
25872634
"auto": gguf.LlamaFileType.GUESSED,
25882635
}
25892636

2590-
if args.outfile is not None:
2591-
fname_out = args.outfile
2592-
else:
2593-
# output in the same directory as the model by default
2594-
fname_out = dir_model / 'ggml-model-{ftype}.gguf'
2595-
25962637
logger.info(f"Loading model: {dir_model.name}")
25972638

25982639
hparams = Model.load_hparams(dir_model)
25992640

26002641
with torch.inference_mode():
2601-
model_class = Model.from_model_architecture(hparams["architectures"][0])
2602-
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy)
2642+
encodingScheme = ftype_map[args.outtype]
2643+
model_architecture = hparams["architectures"][0]
2644+
model_class = Model.from_model_architecture(model_architecture)
2645+
model_instance = model_class(dir_model, encodingScheme, args.outfile, args.bigendian, args.use_temp_file, args.no_lazy)
2646+
2647+
if args.get_outfile:
2648+
print(f"{model_instance.fname_default}") # noqa: NP100
2649+
return
26032650

26042651
logger.info("Set model parameters")
26052652
model_instance.set_gguf_parameters()

convert.py

Lines changed: 19 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,35 +1320,17 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
13201320

13211321
def model_parameter_count(model: LazyModel) -> int:
13221322
total_model_parameters = 0
1323-
for i, (name, lazy_tensor) in enumerate(model.items()):
1323+
for name, lazy_tensor in model.items():
1324+
# Got A Tensor
13241325
sum_weights_in_tensor = 1
1326+
# Tensor Volume
13251327
for dim in lazy_tensor.shape:
13261328
sum_weights_in_tensor *= dim
1329+
# Add Tensor Volume To Running Count
13271330
total_model_parameters += sum_weights_in_tensor
13281331
return total_model_parameters
13291332

13301333

1331-
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
1332-
if model_params_count > 1e12 :
1333-
# Trillions Of Parameters
1334-
scaled_model_params = model_params_count * 1e-12
1335-
scale_suffix = "T"
1336-
elif model_params_count > 1e9 :
1337-
# Billions Of Parameters
1338-
scaled_model_params = model_params_count * 1e-9
1339-
scale_suffix = "B"
1340-
elif model_params_count > 1e6 :
1341-
# Millions Of Parameters
1342-
scaled_model_params = model_params_count * 1e-6
1343-
scale_suffix = "M"
1344-
else:
1345-
# Thousands Of Parameters
1346-
scaled_model_params = model_params_count * 1e-3
1347-
scale_suffix = "K"
1348-
1349-
return f"{round(scaled_model_params)}{scale_suffix}"
1350-
1351-
13521334
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
13531335
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
13541336
for (name, tensor) in model.items()}
@@ -1529,29 +1511,24 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
15291511

15301512

15311513
def default_convention_outfile(file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> str:
1532-
quantization = {
1533-
GGMLFileType.AllF32: "F32",
1534-
GGMLFileType.MostlyF16: "F16",
1535-
GGMLFileType.MostlyQ8_0: "Q8_0",
1536-
}[file_type]
1537-
1538-
parameters = model_parameter_count_rounded_notation(model_params_count)
15391514

1540-
expert_count = ""
1541-
if params.n_experts is not None:
1542-
expert_count = f"{params.n_experts}x"
1543-
1544-
version = ""
1545-
if metadata is not None and metadata.version is not None:
1546-
version = f"-{metadata.version}"
1547-
1548-
name = "ggml-model"
1515+
name = None
15491516
if metadata is not None and metadata.name is not None:
15501517
name = metadata.name
15511518
elif params.path_model is not None:
15521519
name = params.path_model.name
15531520

1554-
return f"{name}{version}-{expert_count}{parameters}-{quantization}"
1521+
version = metadata.version if metadata is not None and metadata.version is not None else None
1522+
1523+
expert_count = params.n_experts if params.n_experts is not None else None
1524+
1525+
encodingScheme = {
1526+
GGMLFileType.AllF32: "F32",
1527+
GGMLFileType.MostlyF16: "F16",
1528+
GGMLFileType.MostlyQ8_0: "Q8_0",
1529+
}[file_type]
1530+
1531+
return gguf.naming_convention(name, version, expert_count, model_params_count, encodingScheme)
15551532

15561533

15571534
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
@@ -1612,9 +1589,9 @@ def main(args_in: list[str] | None = None) -> None:
16121589
if args.get_outfile:
16131590
model_plus = load_some_model(args.model)
16141591
params = Params.load(model_plus)
1615-
model = convert_model_names(model_plus.model, params, args.skip_unknown)
1592+
model = convert_model_names(model_plus.model, params, args.skip_unknown)
16161593
model_params_count = model_parameter_count(model_plus.model)
1617-
ftype = pick_output_type(model, args.outtype)
1594+
ftype = pick_output_type(model, args.outtype)
16181595
print(f"{default_convention_outfile(ftype, params, model_params_count, metadata)}") # noqa: NP100
16191596
return
16201597

@@ -1632,7 +1609,7 @@ def main(args_in: list[str] | None = None) -> None:
16321609
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
16331610

16341611
model_params_count = model_parameter_count(model_plus.model)
1635-
logger.info(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
1612+
logger.info(f"model parameters count : {model_params_count} ({gguf.model_parameter_count_rounded_notation(model_params_count)})")
16361613

16371614
if args.dump:
16381615
do_dump_model(model_plus)

gguf-py/gguf/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
from .quants import *
66
from .tensor_mapping import *
77
from .vocab import *
8+
from .utility import *

gguf-py/gguf/utility.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from __future__ import annotations
2+
3+
4+
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
5+
if model_params_count > 1e15 :
6+
# Quadrillion Of Parameters
7+
scaled_model_params = model_params_count * 1e-15
8+
scale_suffix = "Q"
9+
elif model_params_count > 1e12 :
10+
# Trillions Of Parameters
11+
scaled_model_params = model_params_count * 1e-12
12+
scale_suffix = "T"
13+
elif model_params_count > 1e9 :
14+
# Billions Of Parameters
15+
scaled_model_params = model_params_count * 1e-9
16+
scale_suffix = "B"
17+
elif model_params_count > 1e6 :
18+
# Millions Of Parameters
19+
scaled_model_params = model_params_count * 1e-6
20+
scale_suffix = "M"
21+
else:
22+
# Thousands Of Parameters
23+
scaled_model_params = model_params_count * 1e-3
24+
scale_suffix = "K"
25+
return f"{round(scaled_model_params)}{scale_suffix}"
26+
27+
28+
def naming_convention(model_name: str, version_string:str, expert_count_int:int, model_params_count: int, encodingScheme: str) -> str:
29+
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
30+
name = model_name.strip().replace(' ', '-') if model_name is not None else "ggml-model"
31+
version = f"-{version_string}" if version_string is not None else ""
32+
expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None else ""
33+
parameters = model_parameter_count_rounded_notation(model_params_count)
34+
encodingScheme = encodingScheme.upper()
35+
return f"{name}{version}-{expert_count_chunk}{parameters}-{encodingScheme}"

0 commit comments

Comments
 (0)