Skip to content

Commit a7bc5c8

Browse files
agrabezhigcbot
authored andcommitted
Set extra cost for some math intrinsics
We need to set `TCC_Expensive` cost for some math intrinsics to reduce the speculative execution of `sqrt`, `sin`, `cos`.
1 parent 7710a72 commit a7bc5c8

File tree

2 files changed

+102
-0
lines changed

2 files changed

+102
-0
lines changed

IGC/Compiler/GenTTI.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,20 @@ namespace llvm {
589589
, const User* U
590590
#endif
591591
) {
592+
// The extra cost of speculative execution for math intrinsics
593+
if (auto *II = dyn_cast_or_null<IntrinsicInst>(U)) {
594+
if (Intrinsic::ID IID = II->getIntrinsicID()) {
595+
switch (IID) {
596+
case Intrinsic::cos:
597+
case Intrinsic::sin:
598+
case Intrinsic::sqrt:
599+
return TTI::TCC_Expensive;
600+
default:
601+
break;
602+
}
603+
}
604+
}
605+
592606
IGC::CodeGenContext* CGC = this->ctx;
593607
if (!CGC->enableFunctionCall() && !GenISAIntrinsic::isIntrinsic(F) &&
594608
!F->isIntrinsic()) {
@@ -621,6 +635,20 @@ namespace llvm {
621635
llvm::InstructionCost GenIntrinsicsTTIImpl::getUserCost(const User* U, ArrayRef<const Value*> Operands, TTI::TargetCostKind CostKind)
622636
#endif
623637
{
638+
// The extra cost of speculative execution for math intrinsics
639+
if (auto *II = dyn_cast_or_null<IntrinsicInst>(U)) {
640+
if (Intrinsic::ID IID = II->getIntrinsicID()) {
641+
switch (IID) {
642+
case Intrinsic::cos:
643+
case Intrinsic::sin:
644+
case Intrinsic::sqrt:
645+
return TTI::TCC_Expensive;
646+
default:
647+
break;
648+
}
649+
}
650+
}
651+
624652
const Function* F = dyn_cast<Function>(U);
625653
if (F != nullptr)
626654
{
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-14-plus
10+
; RUN: igc_opt %s -S -o - -simplifycfg -gen-tti | FileCheck -check-prefix=CHECK %s
11+
12+
; This test checks that SimplifyCFG do not merge nodes with expensive math intrinsics
13+
; such as sqrt, sin, cos
14+
15+
define internal spir_func i8 @intersectWorldBoolean(i1 %cmp.i, float %sub7.i) {
16+
; CHECK-LABEL: @intersectWorldBoolean(
17+
; CHECK: if.end.i:
18+
; CHECK-NEXT: [[CALL_I_I_I2:%.*]] = call float @llvm.sqrt.f32
19+
entry:
20+
br label %for.cond
21+
22+
for.cond: ; preds = %for.inc, %entry
23+
br label %for.body
24+
25+
for.body: ; preds = %for.cond
26+
br i1 undef, label %if.then9.i.i, label %if.else11.i.i
27+
28+
if.then9.i.i: ; preds = %for.body
29+
ret i8 0
30+
31+
if.else11.i.i: ; preds = %for.body
32+
br label %_Z15__spirv_ocl_cosf.exit
33+
34+
_Z15__spirv_ocl_cosf.exit: ; preds = %if.else11.i.i
35+
br i1 %cmp.i, label %if.then.i, label %if.end.i
36+
37+
if.then.i: ; preds = %_Z15__spirv_ocl_cosf.exit
38+
br label %intersectRaySphere.exit
39+
40+
if.end.i: ; preds = %_Z15__spirv_ocl_cosf.exit
41+
%call.i.i.i2 = call float @llvm.sqrt.f32(float %sub7.i)
42+
%i57 = fadd fast float 0.000000e+00, %call.i.i.i2
43+
%div.i = fmul fast float %i57, 0.000000e+00
44+
br label %intersectRaySphere.exit
45+
46+
intersectRaySphere.exit: ; preds = %if.end.i, %if.then.i
47+
%tRay.1 = phi float [ 0.000000e+00, %if.then.i ], [ %div.i, %if.end.i ]
48+
br i1 undef, label %land.lhs.true, label %if.end
49+
50+
land.lhs.true: ; preds = %intersectRaySphere.exit
51+
%cmp10 = fcmp olt float 0.000000e+00, 0.000000e+00
52+
br i1 %cmp10, label %if.then, label %if.end
53+
54+
if.then: ; preds = %land.lhs.true
55+
br i1 undef, label %if.end.i3, label %_Z21__spirv_ocl_normalizeDv4_f.exit
56+
57+
if.end.i3: ; preds = %if.then
58+
ret i8 0
59+
60+
_Z21__spirv_ocl_normalizeDv4_f.exit: ; preds = %if.then
61+
%i98 = insertelement <4 x float> zeroinitializer, float 0.000000e+00, i32 0
62+
br label %if.end
63+
64+
if.end: ; preds = %_Z21__spirv_ocl_normalizeDv4_f.exit, %land.lhs.true, %intersectRaySphere.exit
65+
br label %for.inc
66+
67+
for.inc: ; preds = %if.end
68+
br label %for.cond
69+
}
70+
71+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
72+
declare float @llvm.sqrt.f32(float) #0
73+
74+
attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }

0 commit comments

Comments
 (0)