Skip to content

Commit c2969f1

Browse files
cccclaifacebook-github-bot
authored andcommitted
add 16a8w matmul custom annotation (#5864)
Summary: Pull Request resolved: #5864 As title, separate the 16a8w matmul custom annotation from the large chunk of PR. Reviewed By: kirklandsign Differential Revision: D63876215 fbshipit-source-id: 484fd93c31cda7b755e5e343fdc2513a29749304
1 parent 20a157f commit c2969f1

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

backends/qualcomm/quantizer/custom_annotation.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,29 @@ def annotate_matmul_input1(node: Node, quantization_config: QuantizationConfig):
118118
if "SDPA" in full_qualified_name:
119119
annotate_matmul(node, quantization_config_16a8w)
120120
annotate_matmul_input1(node.args[1], quantization_config_8a8w)
121+
122+
123+
def custom_annotate_matmul_16a8w(gm: torch.fx.GraphModule):
124+
"""
125+
Annotate matmul op with 16a8w quantization config
126+
"""
127+
128+
def annotate_matmul(node: Node, quantization_config: QuantizationConfig):
129+
input_qspec_map = {}
130+
input_act = node.args[0]
131+
input_spec = quantization_config.input_activation
132+
input_qspec_map[input_act] = input_spec
133+
input_act1 = node.args[1]
134+
input_spec1 = quantization_config.weight
135+
input_qspec_map[input_act1] = input_spec1
136+
node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
137+
input_qspec_map=input_qspec_map,
138+
output_qspec=quantization_config.output_activation,
139+
_annotated=True,
140+
)
141+
142+
# Annotate 16a8w for matmul op to get better performance
143+
quantization_config_16a8w = get_16a8w_qnn_ptq_config()
144+
for node in gm.graph.nodes:
145+
if node.op == "call_function" and node.target == torch.ops.aten.matmul.default:
146+
annotate_matmul(node, quantization_config_16a8w)

0 commit comments

Comments
 (0)