Skip to content

Commit 43b455b

Browse files
perlfutru
authored andcommitted
[AMDGPU] Disable inline constants for pseudo scalar transcendentals (#104395)
Prevent operand folding from inlining constants into pseudo scalar transcendental f16 instructions. However still allow literal constants. (cherry picked from commit fc6300a)
1 parent 38f3dbe commit 43b455b

File tree

4 files changed

+138
-0
lines changed

4 files changed

+138
-0
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.h

+6
Original file line numberDiff line numberDiff line change
@@ -1289,6 +1289,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
12891289
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
12901290
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
12911291

1292+
/// \returns true if inline constants are not supported for F16 pseudo
1293+
/// scalar transcendentals.
1294+
bool hasNoF16PseudoScalarTransInlineConstants() const {
1295+
return getGeneration() == GFX12;
1296+
}
1297+
12921298
/// \returns The maximum number of instructions that can be enclosed in an
12931299
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
12941300
/// instruction.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -5768,6 +5768,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
57685768
return false;
57695769
}
57705770
}
5771+
} else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
5772+
isF16PseudoScalarTrans(MI.getOpcode()) &&
5773+
isInlineConstant(*MO, OpInfo)) {
5774+
return false;
57715775
}
57725776

57735777
if (MO->isReg()) {

llvm/lib/Target/AMDGPU/SIInstrInfo.h

+8
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
946946
Opcode == AMDGPU::DS_GWS_BARRIER;
947947
}
948948

949+
static bool isF16PseudoScalarTrans(unsigned Opcode) {
950+
return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
951+
Opcode == AMDGPU::V_S_LOG_F16_e64 ||
952+
Opcode == AMDGPU::V_S_RCP_F16_e64 ||
953+
Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
954+
Opcode == AMDGPU::V_S_SQRT_F16_e64;
955+
}
956+
949957
static bool doesNotReadTiedSource(const MachineInstr &MI) {
950958
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
951959
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
3+
4+
# Do not use inline constants for f16 pseudo scalar transcendentals.
5+
# But allow literal constants.
6+
7+
---
8+
name: exp_f16_imm
9+
tracksRegLiveness: true
10+
body: |
11+
bb.0:
12+
; GCN-LABEL: name: exp_f16_imm
13+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
14+
; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
15+
%0:sgpr_32 = S_MOV_B32 15360
16+
%1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
17+
...
18+
19+
---
20+
name: exp_f16_literal
21+
tracksRegLiveness: true
22+
body: |
23+
bb.0:
24+
; GCN-LABEL: name: exp_f16_literal
25+
; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
26+
%0:sgpr_32 = S_MOV_B32 16960
27+
%1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
28+
...
29+
30+
---
31+
name: log_f16_imm
32+
tracksRegLiveness: true
33+
body: |
34+
bb.0:
35+
; GCN-LABEL: name: log_f16_imm
36+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
37+
; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
38+
%0:sgpr_32 = S_MOV_B32 15360
39+
%1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
40+
...
41+
42+
---
43+
name: log_f16_literal
44+
tracksRegLiveness: true
45+
body: |
46+
bb.0:
47+
; GCN-LABEL: name: log_f16_literal
48+
; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
49+
%0:sgpr_32 = S_MOV_B32 16960
50+
%1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
51+
...
52+
53+
---
54+
name: rcp_f16_imm
55+
tracksRegLiveness: true
56+
body: |
57+
bb.0:
58+
; GCN-LABEL: name: rcp_f16_imm
59+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
60+
; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
61+
%0:sgpr_32 = S_MOV_B32 15360
62+
%1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
63+
...
64+
65+
---
66+
name: rcp_f16_literal
67+
tracksRegLiveness: true
68+
body: |
69+
bb.0:
70+
; GCN-LABEL: name: rcp_f16_literal
71+
; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
72+
%0:sgpr_32 = S_MOV_B32 16960
73+
%1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
74+
...
75+
76+
---
77+
name: rsq_f16_imm
78+
tracksRegLiveness: true
79+
body: |
80+
bb.0:
81+
; GCN-LABEL: name: rsq_f16_imm
82+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
83+
; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
84+
%0:sgpr_32 = S_MOV_B32 15360
85+
%1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
86+
...
87+
88+
---
89+
name: rsq_f16_literal
90+
tracksRegLiveness: true
91+
body: |
92+
bb.0:
93+
; GCN-LABEL: name: rsq_f16_literal
94+
; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
95+
%0:sgpr_32 = S_MOV_B32 16960
96+
%1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
97+
...
98+
99+
---
100+
name: sqrt_f16_imm
101+
tracksRegLiveness: true
102+
body: |
103+
bb.0:
104+
; GCN-LABEL: name: sqrt_f16_imm
105+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
106+
; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
107+
%0:sgpr_32 = S_MOV_B32 15360
108+
%1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
109+
...
110+
111+
---
112+
name: sqrt_f16_literal
113+
tracksRegLiveness: true
114+
body: |
115+
bb.0:
116+
; GCN-LABEL: name: sqrt_f16_literal
117+
; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
118+
%0:sgpr_32 = S_MOV_B32 16960
119+
%1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
120+
...

0 commit comments

Comments
 (0)