@@ -299,6 +299,50 @@ def process(self, weights, name, **kwargs):
299299 return GGUFTensor (weights , name , {})
300300
301301
302+ class MiniMaxM2TensorProcessor (TensorProcessor ):
303+ HF_EXPERT_RENAME_PATTERN = re .compile (r"block_sparse_moe\.experts\.\d+\." )
304+ HF_MOE_PATTERN = re .compile (r"(?:model\.)?layers\.(?P<bid>\d+)\.block_sparse_moe\.experts\.(?P<w>w[123])\.weight" )
305+ GGUF_MOE_WEIGHTS_PATTERN = re .compile (r"(?P<name>.*\.ffn_(?P<w>gate|down|up)_exps)\.weight$" )
306+
307+ HF_BIAS_PATTERN = re .compile (r"(?:model\.)?layers\.(?P<bid>\d+)\.block_sparse_moe\.e_score_correction_bias" )
308+
309+ def __init__ (self , config = None ):
310+ super ().__init__ (config = config )
311+
312+ def preprocess_name (self , hf_name : str ) -> str :
313+ return re .sub (self .HF_EXPERT_RENAME_PATTERN , "block_sparse_moe.experts." , hf_name )
314+
315+ def perform_fallback_tensor_mapping (
316+ self , gguf_to_hf_name_map : dict [str , str ], suffix : str , qual_name : str , hf_name : str
317+ ):
318+ # Map w1/w2/w3 expert names to GGUF ffn_gate/down/up_exps tensor names.
319+ # MiniMax-M2 uses w1 (gate), w2 (down), w3 (up) naming instead of
320+ # gate_proj/down_proj/up_proj, so gguf-py's name_map cannot resolve them.
321+ if m := re .fullmatch (self .HF_MOE_PATTERN , hf_name ):
322+ full_hf_name = qual_name + hf_name
323+ w_map = {"w1" : "gate" , "w2" : "down" , "w3" : "up" }
324+ gguf_to_hf_name_map [f"blk.{ m ['bid' ]} .ffn_{ w_map [m ['w' ]]} _exps{ suffix } " ] = full_hf_name
325+ # Map e_score_correction_bias to GGUF exp_probs_b.bias.
326+ # gguf-py knows "e_score_correction" but HF uses "e_score_correction_bias".
327+ elif m := re .fullmatch (self .HF_BIAS_PATTERN , hf_name ):
328+ gguf_to_hf_name_map [f"blk.{ m ['bid' ]} .exp_probs_b.bias" ] = qual_name + hf_name
329+
330+ def process (self , weights , name : str , ** kwargs ):
331+ if re .fullmatch (self .GGUF_MOE_WEIGHTS_PATTERN , name ):
332+ tensor_key_mapping = kwargs .get ("tensor_key_mapping" )
333+ parsed_parameters = kwargs .get ("parsed_parameters" )
334+ if tensor_key_mapping and name in tensor_key_mapping :
335+ self ._split_moe_expert_tensor (weights , parsed_parameters , tensor_key_mapping [name ])
336+ return GGUFTensor (weights , None , {})
337+ return GGUFTensor (weights , name , {})
338+
339+ def _split_moe_expert_tensor (self , weights : np .ndarray , parsed_parameters : dict [str , dict ], hf_name : str ):
340+ w_counter = self .config .get ("num_local_experts" , 256 )
341+ for i in range (w_counter ):
342+ temp_name = hf_name .replace ("block_sparse_moe.experts." , f"block_sparse_moe.experts.{ i } ." )
343+ parsed_parameters ["tensors" ][temp_name ] = torch .from_numpy (np .copy (weights [i ]))
344+
345+
302346TENSOR_PROCESSORS = {
303347 "llama" : LlamaTensorProcessor ,
304348 "qwen2moe" : Qwen2MoeTensorProcessor ,
@@ -312,6 +356,7 @@ def process(self, weights, name, **kwargs):
312356 "gemma2" : Gemma2TensorProcessor ,
313357 "gemma3" : Gemma2TensorProcessor ,
314358 "lfm2" : Lfm2TensorProcessor ,
359+ "minimax-m2" : MiniMaxM2TensorProcessor ,
315360}
316361
317362
@@ -360,6 +405,8 @@ def get_gguf_hf_weights_map(
360405 model_type = "gemma3"
361406 elif model_type == "umt5" :
362407 model_type = "t5"
408+ elif model_type == "minimax_m2" :
409+ model_type = "minimax-m2"
363410 arch = None
364411 for key , value in MODEL_ARCH_NAMES .items ():
365412 if value == model_type :
@@ -462,6 +509,8 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
462509 updated_architecture = "qwen2_moe"
463510 elif "qwen3moe" in architecture :
464511 updated_architecture = "qwen3_moe"
512+ elif "minimax-m2" in architecture :
513+ updated_architecture = "minimax_m2"
465514
466515 # For stablelm architecture, we need to set qkv_bias and use_parallel_residual from tensors
467516 # If `qkv_bias=True`, qkv_proj with bias will be present in the tensors
0 commit comments