You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+7-7Lines changed: 7 additions & 7 deletions
Original file line number
Diff line number
Diff line change
@@ -3255,13 +3255,13 @@ Input absmax is stored in same type as original type of B(float32, float16) with
3255
3255
.SetDomain(kMSDomain)
3256
3256
.SinceVersion(1)
3257
3257
.SetDoc(MatMulBnb4_ver1_doc)
3258
-
.Attr("K", "Size of each input feature.", AttributeProto::INT)
3259
-
.Attr("N", "Size of each output feature.", AttributeProto::INT)
3260
-
.Attr("block_size", "Number of groupsize used for weight quantization.", AttributeProto::INT)
3261
-
.Attr("quant_type", "Type of quantization used. 0 for FP4, 1 for NF4.", AttributeProto::INT)
3262
-
.Input(0, "A", "The input tensor, not quantized.", "T1")
3263
-
.Input(1, "B", "Quantized data for weight.", "T2")
3264
-
.Input(2, "absmax", "Quantization constants for each block.", "T1")
3258
+
.Attr("K", "size of each input feature", AttributeProto::INT)
3259
+
.Attr("N", "size of each output feature", AttributeProto::INT)
3260
+
.Attr("block_size", "number of groupsize used for weight quantization,(default 128). It needs to be a power of 2 and not smaller than 16.", AttributeProto::INT)
3261
+
.Attr("quant_type", "quantization data type. 0 for FP4, 1 for NF4.", AttributeProto::INT)
3262
+
.Input(0, "A", "The input tensor, not quantized", "T1")
3263
+
.Input(1, "B", "1-dimensional quantized data for weight", "T2")
0 commit comments