@@ -3255,13 +3255,13 @@ Input absmax is stored in same type as original type of B(float32, float16) with
32553255 .SetDomain (kMSDomain )
32563256 .SinceVersion (1 )
32573257 .SetDoc (MatMulBnb4_ver1_doc)
3258- .Attr (" K" , " Size of each input feature. " , AttributeProto::INT)
3259- .Attr (" N" , " Size of each output feature. " , AttributeProto::INT)
3260- .Attr (" block_size" , " Number of groupsize used for weight quantization." , AttributeProto::INT)
3261- .Attr (" quant_type" , " Type of quantization used . 0 for FP4, 1 for NF4." , AttributeProto::INT)
3262- .Input (0 , " A" , " The input tensor, not quantized. " , " T1" )
3263- .Input (1 , " B" , " Quantized data for weight. " , " T2" )
3264- .Input (2 , " absmax" , " Quantization constants for each block. " , " T1" )
3258+ .Attr (" K" , " size of each input feature" , AttributeProto::INT)
3259+ .Attr (" N" , " size of each output feature" , AttributeProto::INT)
3260+ .Attr (" block_size" , " number of groupsize used for weight quantization. It needs to be a power of 2 and not smaller than 16 ." , AttributeProto::INT)
3261+ .Attr (" quant_type" , " quantization data type . 0 for FP4, 1 for NF4." , AttributeProto::INT)
3262+ .Input (0 , " A" , " The input tensor, not quantized" , " T1" )
3263+ .Input (1 , " B" , " 1-dimensional quantized data for weight" , " T2" )
3264+ .Input (2 , " absmax" , " quantization constants" , " T1" )
32653265 .Output (0 , " Y" , " tensor. The output tensor has the same rank as the input. " , " T1" )
32663266 .TypeConstraint (" T1" , {" tensor(float)" , " tensor(float16)" }, " Constrain input and output types to float/half_float tensors." )
32673267 .TypeConstraint (" T2" , {" tensor(uint8)" }, " Constrain quantized weight types to uint8." )
0 commit comments