Skip to content

Commit 7591a7b

Browse files
committed
[AMDGPU][MC] Clean up DPP bound_ctrl handling
At the moment, we set the BC bit in DPP for both bound_ctrl:0 and bound_ctrl:1, for compatibility with sp3 (see PR35397). However, this hack is only needed for GFX8. For newer GFXs, sp3 behaves as expected, i.e. it sets the bit when bound_ctrl:1 and clears it when bound_ctrl:0. This patch updates LLVM to do the same for GFX11 or newer. We preserve the current behaviour for GFX9 and 10 so we don't break any existing code. Differential Revision: https://reviews.llvm.org/D149254
1 parent dfa42a6 commit 7591a7b

13 files changed

+662
-658
lines changed

llvm/docs/AMDGPUModifierSyntax.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1406,9 +1406,11 @@ invalid lanes is disabled.
14061406

14071407
Accessing data from an invalid lane will
14081408
return zero.
1409+
1410+
bound_ctrl:0 (GFX11+) Disables data sharing with invalid lanes.
14091411
======================================== ================================================
14101412

1411-
.. WARNING:: For historical reasons, *bound_ctrl:0* has the same meaning as *bound_ctrl:1*.
1413+
.. WARNING:: For historical reasons, *bound_ctrl:0* has the same meaning as *bound_ctrl:1* for older architectures.
14121414

14131415
.. _amdgpu_synid_fi16:
14141416

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,7 +1552,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
15521552
OperandMatchResultTy
15531553
parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
15541554
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1555-
bool (*ConvertResult)(int64_t &) = nullptr);
1555+
std::function<bool(int64_t &)> ConvertResult = nullptr);
15561556

15571557
OperandMatchResultTy
15581558
parseOperandArrayWithPrefix(const char *Prefix,
@@ -1785,6 +1785,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
17851785

17861786
bool parseDimId(unsigned &Encoding);
17871787
OperandMatchResultTy parseDim(OperandVector &Operands);
1788+
bool convertDppBoundCtrl(int64_t &BoundCtrl);
17881789
OperandMatchResultTy parseDPP8(OperandVector &Operands);
17891790
OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
17901791
bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
@@ -5929,10 +5930,9 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
59295930
return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
59305931
}
59315932

5932-
OperandMatchResultTy
5933-
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5934-
AMDGPUOperand::ImmTy ImmTy,
5935-
bool (*ConvertResult)(int64_t&)) {
5933+
OperandMatchResultTy AMDGPUAsmParser::parseIntWithPrefix(
5934+
const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
5935+
std::function<bool(int64_t &)> ConvertResult) {
59365936
SMLoc S = getLoc();
59375937
int64_t Value = 0;
59385938

@@ -8011,12 +8011,13 @@ static bool ConvertOmodDiv(int64_t &Div) {
80118011
return false;
80128012
}
80138013

8014-
// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8014+
// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
80158015
// This is intentional and ensures compatibility with sp3.
80168016
// See bug 35397 for details.
8017-
static bool ConvertDppBoundCtrl(int64_t &BoundCtrl) {
8017+
bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
80188018
if (BoundCtrl == 0 || BoundCtrl == 1) {
8019-
BoundCtrl = 1;
8019+
if (!isGFX11Plus())
8020+
BoundCtrl = 1;
80208021
return true;
80218022
}
80228023
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1273,7 +1273,8 @@ def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
12731273

12741274
def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
12751275
def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
1276-
def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl", "ConvertDppBoundCtrl">;
1276+
def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl",
1277+
"[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">;
12771278
def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
12781279

12791280
def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s

Lines changed: 67 additions & 67 deletions
Large diffs are not rendered by default.

llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s

Lines changed: 53 additions & 53 deletions
Large diffs are not rendered by default.

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s

Lines changed: 98 additions & 98 deletions
Large diffs are not rendered by default.

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s

Lines changed: 67 additions & 67 deletions
Large diffs are not rendered by default.

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s

Lines changed: 41 additions & 41 deletions
Large diffs are not rendered by default.

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s

Lines changed: 64 additions & 64 deletions
Large diffs are not rendered by default.

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s

Lines changed: 64 additions & 64 deletions
Large diffs are not rendered by default.

llvm/test/MC/AMDGPU/gfx11_asm_vop3p_dpp16.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
33

44
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[0,0,0] quad_perm:[2,2,3,1] bound_ctrl:0 fi:1
5-
// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x00,0x00,0x13,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x7a,0x0c,0xff]
5+
// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x00,0x13,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x7a,0x04,0xff]
66

77
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] bank_mask:0xe
88
// GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x05,0x13,0xcc,0xfa,0x04,0x0e,0x64,0x01,0x1b,0x00,0xfe]
99

1010
v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] row_ror:7 bank_mask:0x1 bound_ctrl:0
11-
// GFX11: v_fma_mix_f32_e64_dpp v0, v1, v2, v3 row_ror:7 row_mask:0xf bank_mask:0x1 bound_ctrl:1 ; encoding: [0x00,0x00,0x20,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x27,0x09,0xf1]
11+
// GFX11: v_fma_mix_f32_e64_dpp v0, v1, v2, v3 row_ror:7 row_mask:0xf bank_mask:0x1 ; encoding: [0x00,0x00,0x20,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x27,0x01,0xf1]
1212

1313
v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp quad_perm:[0,2,3,1] row_mask:0x0
1414
// GFX11: v_fma_mixhi_f16_e64_dpp v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x00,0xc0,0x22,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x78,0x00,0x0f]

0 commit comments

Comments
 (0)