Skip to content

Qualcomm AI Engine Direct - Fix UT example script hang when exception happened #4355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions backends/qualcomm/passes/annotate_quant_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,11 @@ def _dequant_fold_params(self, n, quant_attrs, param):
def _annotate_quant_attrs(
self, graph_module: torch.fx.GraphModule
) -> torch.fx.GraphModule:
# Keep track of const params that has been dequant, so it does not get
# dequant multiple times if the const param has more than 1 user
visited_const_param = set()
for n in graph_module.graph.nodes:
self._annotate_requant(n)

# With fold_quant enabled, check if the input of dq op is quantized param.
param = None
if n.target in dq_ops:
Expand All @@ -106,7 +108,8 @@ def _annotate_quant_attrs(
quant_attrs = get_quant_attrs(self.edge_program, n)
self._annotate_source_nodes(n, quant_attrs)

if param is not None:
if param is not None and n.args[0] not in visited_const_param:
visited_const_param.add(n.args[0])
self._dequant_fold_params(n, quant_attrs, param)

return graph_module
Expand Down
8 changes: 7 additions & 1 deletion backends/qualcomm/passes/recompose_pixel_unshuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,13 @@ def call(self, graph_module: torch.fx.GraphModule):
for node in graph.nodes:
if node.op == "call_function" and node.target == self.reshape_target:
with graph.inserting_after(node):
premute_node = node.args[0]

# Clone op still exists between permute and reshape_target during quantization,
# so we need to check for args[0].args[0] to get permute node
if self.quantization_capture:
premute_node = node.args[0].args[0]
else:
premute_node = node.args[0]
if any(
[
len(node.args[1]) != 4,
Expand Down
2 changes: 0 additions & 2 deletions backends/qualcomm/quantizer/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
RecomposePixelUnshuffle,
)
from executorch.backends.qualcomm.passes.reduce_dynamic_range import ReduceDynamicRange
from executorch.backends.qualcomm.passes.remove_redundancy import RemoveRedundancy
from executorch.backends.qualcomm.passes.replace_inf_buffer import ReplaceInfBuffer
from executorch.backends.transforms.decompose_sdpa import (
DecomposeScaledDotProductAttention,
Expand Down Expand Up @@ -182,7 +181,6 @@ def set_per_channel_linear_quant(self, enable: bool) -> None:
self._update_per_channel_weight_quant_ops(linear_ops, enable)

def transform_for_annotation(self, model: GraphModule) -> GraphModule:
model = RemoveRedundancy()(model).graph_module
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we have a follow up to add this back? Feel like we may have perf regresssion without it

Copy link
Collaborator Author

@winskuo-quic winskuo-quic Jul 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for reviewing. Although the pass RemoveRedundancy() is removed during quantizer, it will still be called during capture_program(), so the final performance should be the same.

RemoveRedundancy()(graph_module)

model = ReduceDynamicRange()(model).graph_module
model = RecomposePixelUnshuffle(quantization_capture=True)(model).graph_module
model = DecomposeScaledDotProductAttention()(model).graph_module
Expand Down
116 changes: 82 additions & 34 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1581,8 +1581,11 @@ def test_fbnet(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 90)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 90)

def test_gMLP(self):
if not self.required_envs([self.image_dataset]):
Expand Down Expand Up @@ -1614,8 +1617,11 @@ def test_gMLP(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 90)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 90)

def test_ssd300_vgg16(self):
if not self.required_envs([self.pretrained_weight, self.oss_repo]):
Expand Down Expand Up @@ -1649,7 +1655,10 @@ def test_ssd300_vgg16(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["mAP"], 0.70)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["mAP"], 0.70)

def test_dino_v2(self):
if not self.required_envs([self.image_dataset]):
Expand Down Expand Up @@ -1680,8 +1689,11 @@ def test_dino_v2(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 70)
self.assertGreaterEqual(msg["top_5"], 85)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 70)
self.assertGreaterEqual(msg["top_5"], 85)

def test_esrgan(self):
if not self.required_envs():
Expand Down Expand Up @@ -1714,8 +1726,11 @@ def test_esrgan(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["PSNR"], 24)
self.assertGreaterEqual(msg["SSIM"], 0.8)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["PSNR"], 24)
self.assertGreaterEqual(msg["SSIM"], 0.8)

def test_squeezenet(self):
if not self.required_envs([self.image_dataset]):
Expand Down Expand Up @@ -1747,8 +1762,11 @@ def test_squeezenet(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 40)
self.assertGreaterEqual(msg["top_5"], 70)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 40)
self.assertGreaterEqual(msg["top_5"], 70)


class TestExampleScript(TestQNN):
Expand Down Expand Up @@ -1794,8 +1812,11 @@ def test_mobilenet_v2(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)

def test_mobilenet_v3(self):
if not self.required_envs([self.image_dataset]):
Expand Down Expand Up @@ -1829,8 +1850,11 @@ def test_mobilenet_v3(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)

def test_inception_v3(self):
if not self.required_envs([self.image_dataset]):
Expand Down Expand Up @@ -1864,8 +1888,11 @@ def test_inception_v3(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)

def test_inception_v4(self):
if not self.required_envs([self.image_dataset]):
Expand Down Expand Up @@ -1899,8 +1926,11 @@ def test_inception_v4(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 80)

def test_vit(self):
if not self.required_envs([self.image_dataset]):
Expand Down Expand Up @@ -1934,8 +1964,11 @@ def test_vit(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["top_1"], 70)
self.assertGreaterEqual(msg["top_5"], 90)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 70)
self.assertGreaterEqual(msg["top_5"], 90)

def test_edsr(self):
if not self.required_envs():
Expand Down Expand Up @@ -1968,8 +2001,11 @@ def test_edsr(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["PSNR"], 25)
self.assertGreaterEqual(msg["SSIM"], 0.8)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["PSNR"], 25)
self.assertGreaterEqual(msg["SSIM"], 0.8)

def test_deeplab_v3(self):
if not self.required_envs():
Expand Down Expand Up @@ -2002,9 +2038,12 @@ def test_deeplab_v3(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
self.assertGreaterEqual(msg["PA"], 0.85)
self.assertGreaterEqual(msg["MPA"], 0.70)
self.assertGreaterEqual(msg["MIoU"], 0.55)
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["PA"], 0.85)
self.assertGreaterEqual(msg["MPA"], 0.70)
self.assertGreaterEqual(msg["MIoU"], 0.55)

def test_stories_single_llama(self):
if not self.required_envs():
Expand Down Expand Up @@ -2049,8 +2088,11 @@ def test_stories_single_llama(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
model_out = msg["result"][0]
self.assertTrue(model_out.startswith(golden_start_with))
if "Error" in msg:
self.fail(msg["Error"])
else:
model_out = msg["result"][0]
self.assertTrue(model_out.startswith(golden_start_with))

def test_mobilebert(self):
if not self.required_envs([self.pretrained_weight]):
Expand Down Expand Up @@ -2085,9 +2127,12 @@ def test_mobilebert(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
cpu, htp = msg["CPU"], msg["HTP"]
for k, v in cpu.items():
self.assertLessEqual(abs(v[0] - htp[k][0]), 2)
if "Error" in msg:
self.fail(msg["Error"])
else:
cpu, htp = msg["CPU"], msg["HTP"]
for k, v in cpu.items():
self.assertLessEqual(abs(v[0] - htp[k][0]), 2)

@unittest.skip("will be enabled after TODOs got resolved")
def test_ptq_mobilebert(self):
Expand Down Expand Up @@ -2127,9 +2172,12 @@ def test_ptq_mobilebert(self):
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
cpu, htp = msg["CPU"], msg["HTP"]
for k, v in cpu.items():
self.assertLessEqual(abs(v[0] - htp[k][0]), 5)
if "Error" in msg:
self.fail(msg["Error"])
else:
cpu, htp = msg["CPU"], msg["HTP"]
for k, v in cpu.items():
self.assertLessEqual(abs(v[0] - htp[k][0]), 5)

def test_export_example(self):
if not self.required_envs([self.model_name]):
Expand Down
9 changes: 8 additions & 1 deletion examples/qualcomm/llama2/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,4 +586,11 @@ def post_process():
if args.compile_only:
exit(f"Finish compile_only and save to {args.artifact}")

inference(args)
try:
inference(args)
except Exception as e:
if args.ip and args.port != -1:
with Client((args.ip, args.port)) as conn:
conn.send(json.dumps({"Error": str(e)}))
else:
raise Exception(e)
60 changes: 35 additions & 25 deletions examples/qualcomm/oss_scripts/dino_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,31 +67,7 @@ def get_instance():
return model.eval()


if __name__ == "__main__":
parser = setup_common_args_and_variables()

parser.add_argument(
"-a",
"--artifact",
help="Path for storing generated artifacts by this example. Default ./dino_v2",
default="./dino_v2",
type=str,
)

parser.add_argument(
"-d",
"--dataset",
help=(
"path to the validation folder of ImageNet dataset. "
"e.g. --dataset imagenet-mini/val "
"for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)"
),
type=str,
required=True,
)

args = parser.parse_args()

def main(args):
skip_node_id_set, skip_node_op_set = parse_skip_delegation_node(args)

# ensure the working directory exist.
Expand Down Expand Up @@ -170,3 +146,37 @@ def get_instance():
else:
for i, k in enumerate(k_val):
print(f"top_{k}->{topk[i]}%")


if __name__ == "__main__":
parser = setup_common_args_and_variables()

parser.add_argument(
"-a",
"--artifact",
help="Path for storing generated artifacts by this example. Default ./dino_v2",
default="./dino_v2",
type=str,
)

parser.add_argument(
"-d",
"--dataset",
help=(
"path to the validation folder of ImageNet dataset. "
"e.g. --dataset imagenet-mini/val "
"for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)"
),
type=str,
required=True,
)

args = parser.parse_args()
try:
main(args)
except Exception as e:
if args.ip and args.port != -1:
with Client((args.ip, args.port)) as conn:
conn.send(json.dumps({"Error": str(e)}))
else:
raise Exception(e)
Loading
Loading