Skip to content

Commit cad22e1

Browse files
committed
gguf-py : add GGMLFileType
* convert-hf : use GGMLFileType
1 parent bffdaf4 commit cad22e1

File tree

2 files changed

+54
-14
lines changed

2 files changed

+54
-14
lines changed

convert-hf-to-gguf.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,7 @@ def write_tensors(self):
239239
data: np.ndarray = data # type hint
240240
n_dims = len(data.shape)
241241
data_dtype = data.dtype
242-
243-
# if f32 desired, convert any float16 to float32
244-
if self.ftype == 0 and data_dtype == np.float16:
245-
data = data.astype(np.float32)
242+
data_qtype: gguf.GGMLQuantizationType | None = None
246243

247244
# when both are True, f32 should win
248245
extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
@@ -254,20 +251,27 @@ def write_tensors(self):
254251
# if f16 desired, convert any float32 2-dim weight tensors to float16
255252
extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2)
256253

257-
# when both extra_f32 and extra_f16 are False, convert to float32 by default
258-
if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16):
259-
data = data.astype(np.float32)
254+
if self.ftype != gguf.GGMLFileType.ALL_F32 and extra_f16 and not extra_f32:
255+
if self.ftype == gguf.GGMLFileType.MOSTLY_F16:
256+
if data_dtype != np.float16:
257+
data = data.astype(np.float16)
258+
data_qtype = gguf.GGMLQuantizationType.F16
259+
# TODO: add more types (like BF16) here
260+
261+
else: # by default, convert to float32
262+
if data_dtype != np.float32:
263+
data = data.astype(np.float32)
264+
data_qtype = gguf.GGMLQuantizationType.F32
260265

261-
if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32:
262-
data = data.astype(np.float16)
266+
assert data_qtype is not None
263267

264268
# reverse shape to make it similar to the internal ggml dimension order
265269
shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
266270

267271
# n_dims is implicit in the shape
268-
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data.dtype}, shape = {shape_str}")
272+
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
269273

270-
self.gguf_writer.add_tensor(new_name, data)
274+
self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype)
271275

272276
def write(self):
273277
self.write_tensors()
@@ -2472,9 +2476,9 @@ def main() -> None:
24722476
logger.error(f'Error: {args.model} is not a directory')
24732477
sys.exit(1)
24742478

2475-
ftype_map = {
2476-
"f32": gguf.GGMLQuantizationType.F32,
2477-
"f16": gguf.GGMLQuantizationType.F16,
2479+
ftype_map: dict[str, gguf.GGMLFileType] = {
2480+
"f32": gguf.GGMLFileType.ALL_F32,
2481+
"f16": gguf.GGMLFileType.MOSTLY_F16,
24782482
}
24792483

24802484
if args.outfile is not None:

gguf-py/gguf/constants.py

+36
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,42 @@ class GGMLQuantizationType(IntEnum):
820820
BF16 = 30
821821

822822

823+
class GGMLFileType(IntEnum):
824+
ALL_F32 = 0
825+
MOSTLY_F16 = 1 # except 1d tensors
826+
MOSTLY_Q4_0 = 2 # except 1d tensors
827+
MOSTLY_Q4_1 = 3 # except 1d tensors
828+
MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
829+
# MOSTLY_Q4_2 = 5 # support has been removed
830+
# MOSTLY_Q4_3 = 6 # support has been removed
831+
MOSTLY_Q8_0 = 7 # except 1d tensors
832+
MOSTLY_Q5_0 = 8 # except 1d tensors
833+
MOSTLY_Q5_1 = 9 # except 1d tensors
834+
MOSTLY_Q2_K = 10 # except 1d tensors
835+
MOSTLY_Q3_K_S = 11 # except 1d tensors
836+
MOSTLY_Q3_K_M = 12 # except 1d tensors
837+
MOSTLY_Q3_K_L = 13 # except 1d tensors
838+
MOSTLY_Q4_K_S = 14 # except 1d tensors
839+
MOSTLY_Q4_K_M = 15 # except 1d tensors
840+
MOSTLY_Q5_K_S = 16 # except 1d tensors
841+
MOSTLY_Q5_K_M = 17 # except 1d tensors
842+
MOSTLY_Q6_K = 18 # except 1d tensors
843+
MOSTLY_IQ2_XXS = 19 # except 1d tensors
844+
MOSTLY_IQ2_XS = 20 # except 1d tensors
845+
MOSTLY_Q2_K_S = 21 # except 1d tensors
846+
MOSTLY_IQ3_XS = 22 # except 1d tensors
847+
MOSTLY_IQ3_XXS = 23 # except 1d tensors
848+
MOSTLY_IQ1_S = 24 # except 1d tensors
849+
MOSTLY_IQ4_NL = 25 # except 1d tensors
850+
MOSTLY_IQ3_S = 26 # except 1d tensors
851+
MOSTLY_IQ3_M = 27 # except 1d tensors
852+
MOSTLY_IQ2_S = 28 # except 1d tensors
853+
MOSTLY_IQ2_M = 29 # except 1d tensors
854+
MOSTLY_IQ4_XS = 30 # except 1d tensors
855+
MOSTLY_IQ1_M = 31 # except 1d tensors
856+
MOSTLY_BF16 = 32 # except 1d tensors
857+
858+
823859
class GGUFEndian(IntEnum):
824860
LITTLE = 0
825861
BIG = 1

0 commit comments

Comments
 (0)