Skip to content

Commit f73a17f

Browse files
committed
Add GGUF support for MiniMax-M2.1 model
1 parent fefc3fa commit f73a17f

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

src/transformers/integrations/ggml.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,23 @@
287287
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
288288
"vocab_size": "vocab_size",
289289
},
290+
"minimax-m2": {
291+
"context_length": "max_position_embeddings",
292+
"block_count": "num_hidden_layers",
293+
"feed_forward_length": "intermediate_size",
294+
"embedding_length": "hidden_size",
295+
"rope.dimension_count": "rotary_dim",
296+
"rope.freq_base": "rope_theta",
297+
"attention.head_count": "num_attention_heads",
298+
"attention.head_count_kv": "num_key_value_heads",
299+
"attention.key_length": "head_dim",
300+
"attention.value_length": None,
301+
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
302+
"expert_count": "num_local_experts",
303+
"expert_used_count": "num_experts_per_tok",
304+
"expert_feed_forward_length": None,
305+
"vocab_size": "vocab_size",
306+
},
290307
}
291308

292309
GGUF_TOKENIZER_MAPPING = {
@@ -766,6 +783,7 @@ def converted(self) -> Tokenizer:
766783
"umt5": GGUFT5Converter,
767784
"deci": GGUFLlamaConverter,
768785
"decilm": GGUFLlamaConverter,
786+
"minimax_m2": GGUFQwen2Converter,
769787
}
770788

771789

src/transformers/modeling_gguf_pytorch_utils.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,50 @@ def process(self, weights, name, **kwargs):
299299
return GGUFTensor(weights, name, {})
300300

301301

302+
class MiniMaxM2TensorProcessor(TensorProcessor):
303+
HF_EXPERT_RENAME_PATTERN = re.compile(r"block_sparse_moe\.experts\.\d+\.")
304+
HF_MOE_PATTERN = re.compile(r"(?:model\.)?layers\.(?P<bid>\d+)\.block_sparse_moe\.experts\.(?P<w>w[123])\.weight")
305+
GGUF_MOE_WEIGHTS_PATTERN = re.compile(r"(?P<name>.*\.ffn_(?P<w>gate|down|up)_exps)\.weight$")
306+
307+
HF_BIAS_PATTERN = re.compile(r"(?:model\.)?layers\.(?P<bid>\d+)\.block_sparse_moe\.e_score_correction_bias")
308+
309+
def __init__(self, config=None):
310+
super().__init__(config=config)
311+
312+
def preprocess_name(self, hf_name: str) -> str:
313+
return re.sub(self.HF_EXPERT_RENAME_PATTERN, "block_sparse_moe.experts.", hf_name)
314+
315+
def perform_fallback_tensor_mapping(
316+
self, gguf_to_hf_name_map: dict[str, str], suffix: str, qual_name: str, hf_name: str
317+
):
318+
# Map w1/w2/w3 expert names to GGUF ffn_gate/down/up_exps tensor names.
319+
# MiniMax-M2 uses w1 (gate), w2 (down), w3 (up) naming instead of
320+
# gate_proj/down_proj/up_proj, so gguf-py's name_map cannot resolve them.
321+
if m := re.fullmatch(self.HF_MOE_PATTERN, hf_name):
322+
full_hf_name = qual_name + hf_name
323+
w_map = {"w1": "gate", "w2": "down", "w3": "up"}
324+
gguf_to_hf_name_map[f"blk.{m['bid']}.ffn_{w_map[m['w']]}_exps{suffix}"] = full_hf_name
325+
# Map e_score_correction_bias to GGUF exp_probs_b.bias.
326+
# gguf-py knows "e_score_correction" but HF uses "e_score_correction_bias".
327+
elif m := re.fullmatch(self.HF_BIAS_PATTERN, hf_name):
328+
gguf_to_hf_name_map[f"blk.{m['bid']}.exp_probs_b.bias"] = qual_name + hf_name
329+
330+
def process(self, weights, name: str, **kwargs):
331+
if re.fullmatch(self.GGUF_MOE_WEIGHTS_PATTERN, name):
332+
tensor_key_mapping = kwargs.get("tensor_key_mapping")
333+
parsed_parameters = kwargs.get("parsed_parameters")
334+
if tensor_key_mapping and name in tensor_key_mapping:
335+
self._split_moe_expert_tensor(weights, parsed_parameters, tensor_key_mapping[name])
336+
return GGUFTensor(weights, None, {})
337+
return GGUFTensor(weights, name, {})
338+
339+
def _split_moe_expert_tensor(self, weights: np.ndarray, parsed_parameters: dict[str, dict], hf_name: str):
340+
w_counter = self.config.get("num_local_experts", 256)
341+
for i in range(w_counter):
342+
temp_name = hf_name.replace("block_sparse_moe.experts.", f"block_sparse_moe.experts.{i}.")
343+
parsed_parameters["tensors"][temp_name] = torch.from_numpy(np.copy(weights[i]))
344+
345+
302346
TENSOR_PROCESSORS = {
303347
"llama": LlamaTensorProcessor,
304348
"qwen2moe": Qwen2MoeTensorProcessor,
@@ -312,6 +356,7 @@ def process(self, weights, name, **kwargs):
312356
"gemma2": Gemma2TensorProcessor,
313357
"gemma3": Gemma2TensorProcessor,
314358
"lfm2": Lfm2TensorProcessor,
359+
"minimax-m2": MiniMaxM2TensorProcessor,
315360
}
316361

317362

@@ -360,6 +405,8 @@ def get_gguf_hf_weights_map(
360405
model_type = "gemma3"
361406
elif model_type == "umt5":
362407
model_type = "t5"
408+
elif model_type == "minimax_m2":
409+
model_type = "minimax-m2"
363410
arch = None
364411
for key, value in MODEL_ARCH_NAMES.items():
365412
if value == model_type:
@@ -462,6 +509,8 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
462509
updated_architecture = "qwen2_moe"
463510
elif "qwen3moe" in architecture:
464511
updated_architecture = "qwen3_moe"
512+
elif "minimax-m2" in architecture:
513+
updated_architecture = "minimax_m2"
465514

466515
# For stablelm architecture, we need to set qkv_bias and use_parallel_residual from tensors
467516
# If `qkv_bias=True`, qkv_proj with bias will be present in the tensors

0 commit comments

Comments
 (0)