-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic #114349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Regarding the warnings:
|
6b7e6b4
to
0b4cde1
Compare
llvm/lib/Target/DirectX/DXIL.td
Outdated
int value = value_; | ||
} | ||
|
||
defset list<DXILConstant> BarrierModes = { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Following from here: #111884 (comment).
We could also do:
defvar BarrierMode_DeviceMemoryBarrier = 2;
...
defvar BarrierMode_AllMemoryBarrierWithGroupSync = 11;
without needing to define the DXILConstant
class and the list. I suppose you no longer have the concreting grouping and scope of the definitions, but I think the potential simplicity it will give to the Arg
class and hence DXILEmitter
, would be worth it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did not know about defvar
. New change got rid of it. Thank you.
@llvm/pr-subscribers-backend-directx Author: Adam Yang (adam-yang) Changesfixes #112974 This change was reverted so some issues could be fixed. Changes
Related PRs
Full diff: https://github.com/llvm/llvm-project/pull/114349.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index e30d37f69f781e..dada426368995d 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -92,4 +92,6 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
}
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index af12b74351058e..bb07c9492a1a4b 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -294,6 +294,37 @@ class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
list<DXILAttribute> op_attrs = attrs;
}
+defvar BarrierMode_DeviceMemoryBarrier = 2;
+defvar BarrierMode_DeviceMemoryBarrierWithGroupSync = 3;
+defvar BarrierMode_GroupMemoryBarrier = 8;
+defvar BarrierMode_GroupMemoryBarrierWithGroupSync = 9;
+defvar BarrierMode_AllMemoryBarrier = 10;
+defvar BarrierMode_AllMemoryBarrierWithGroupSync = 11;
+
+// Intrinsic arg selection
+class Arg {
+ int index = -1;
+ int value = 0;
+ bit is_i8 = 0;
+ bit is_i32 = 0;
+}
+class ArgSelect<int index_> : Arg {
+ let index = index_;
+}
+class ArgI32<int value_> : Arg {
+ let value = value_;
+ let is_i32 = 1;
+}
+class ArgI8<int value_> : Arg {
+ let value = value_;
+ let is_i8 = 1;
+}
+
+class IntrinsicSelect<Intrinsic intrinsic_, list<Arg> args_> {
+ Intrinsic intrinsic = intrinsic_;
+ list<Arg> args = args_;
+}
+
// Abstraction DXIL Operation
class DXILOp<int opcode, DXILOpClass opclass> {
// A short description of the operation
@@ -308,6 +339,9 @@ class DXILOp<int opcode, DXILOpClass opclass> {
// LLVM Intrinsic DXIL Operation maps to
Intrinsic LLVMIntrinsic = ?;
+ // Non-trivial LLVM Intrinsics DXIL Operation maps to
+ list<IntrinsicSelect> intrinsic_selects = [];
+
// Result type of the op
DXILOpParamType result;
@@ -829,3 +863,17 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+
+def Barrier : DXILOp<80, barrier> {
+ let Doc = "inserts a memory barrier in the shader";
+ let intrinsic_selects = [
+ IntrinsicSelect<
+ int_dx_group_memory_barrier_with_group_sync,
+ [ ArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
+ ];
+
+ let arguments = [Int32Ty];
+ let result = VoidTy;
+ let stages = [Stages<DXIL1_0, [compute, library]>];
+ let attributes = [Attributes<DXIL1_0, []>];
+}
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 8acc9c1efa08c0..5f278b609690e2 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -106,17 +106,41 @@ class OpLowerer {
return false;
}
- [[nodiscard]]
- bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) {
+ struct ArgSelect {
+ enum class Type {
+ Index,
+ I8,
+ I32,
+ };
+ Type Type = Type::Index;
+ int Value = -1;
+ };
+
+ [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
+ ArrayRef<ArgSelect> ArgSelects) {
bool IsVectorArgExpansion = isVectorArgExpansion(F);
return replaceFunction(F, [&](CallInst *CI) -> Error {
- SmallVector<Value *> Args;
OpBuilder.getIRB().SetInsertPoint(CI);
- if (IsVectorArgExpansion) {
- SmallVector<Value *> NewArgs = argVectorFlatten(CI, OpBuilder.getIRB());
- Args.append(NewArgs.begin(), NewArgs.end());
- } else
+ SmallVector<Value *> Args;
+ if (ArgSelects.size()) {
+ for (const ArgSelect &A : ArgSelects) {
+ switch (A.Type) {
+ case ArgSelect::Type::Index:
+ Args.push_back(CI->getArgOperand(A.Value));
+ break;
+ case ArgSelect::Type::I8:
+ Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
+ break;
+ case ArgSelect::Type::I32:
+ Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
+ break;
+ }
+ }
+ } else if (IsVectorArgExpansion) {
+ Args = argVectorFlatten(CI, OpBuilder.getIRB());
+ } else {
Args.append(CI->arg_begin(), CI->arg_end());
+ }
Expected<CallInst *> OpCall =
OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
@@ -583,9 +607,10 @@ class OpLowerer {
switch (ID) {
default:
continue;
-#define DXIL_OP_INTRINSIC(OpCode, Intrin) \
+#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...) \
case Intrin: \
- HasErrors |= replaceFunctionWithOp(F, OpCode); \
+ HasErrors |= \
+ replaceFunctionWithOp(F, OpCode, ArrayRef<ArgSelect>{__VA_ARGS__}); \
break;
#include "DXILOperation.inc"
case Intrinsic::dx_handle_fromBinding:
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
new file mode 100644
index 00000000000000..baf93d4e177f0f
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
+
+define void @test_group_memory_barrier_with_group_sync() {
+entry:
+ ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+ call void @llvm.dx.group.memory.barrier.with.group.sync()
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index e74fc00015b404..b75696db410525 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -32,6 +32,20 @@ using namespace llvm::dxil;
namespace {
+struct DXILArgSelect {
+ enum class Type {
+ Index,
+ I32,
+ I8,
+ };
+ Type Type = Type::Index;
+ int Value = -1;
+};
+struct DXILIntrinsicSelect {
+ StringRef Intrinsic;
+ SmallVector<DXILArgSelect, 4> Args;
+};
+
struct DXILOperationDesc {
std::string OpName; // name of DXIL operation
int OpCode; // ID of DXIL operation
@@ -42,8 +56,7 @@ struct DXILOperationDesc {
SmallVector<const Record *> OverloadRecs;
SmallVector<const Record *> StageRecs;
SmallVector<const Record *> AttrRecs;
- StringRef Intrinsic; // The llvm intrinsic map to OpName. Default is "" which
- // means no map exists
+ SmallVector<DXILIntrinsicSelect> IntrinsicSelects;
SmallVector<StringRef, 4>
ShaderStages; // shader stages to which this applies, empty for all.
int OverloadParamIndex; // Index of parameter with overload type.
@@ -71,6 +84,21 @@ static void ascendingSortByVersion(std::vector<const Record *> &Recs) {
});
}
+/// Take a `int_{intrinsic_name}` and return just the intrinsic_name part if
+/// available. Otherwise return the empty string.
+static StringRef GetIntrinsicName(const RecordVal *RV) {
+ if (RV && RV->getValue()) {
+ if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
+ auto *IntrinsicDef = DI->getDef();
+ auto DefName = IntrinsicDef->getName();
+ assert(DefName.starts_with("int_") && "invalid intrinsic name");
+ // Remove the int_ from intrinsic name.
+ return DefName.substr(4);
+ }
+ }
+ return "";
+}
+
/// Construct an object using the DXIL Operation records specified
/// in DXIL.td. This serves as the single source of reference of
/// the information extracted from the specified Record R, for
@@ -157,14 +185,53 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
OpName);
}
- const RecordVal *RV = R->getValue("LLVMIntrinsic");
- if (RV && RV->getValue()) {
- if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
- auto *IntrinsicDef = DI->getDef();
- auto DefName = IntrinsicDef->getName();
- assert(DefName.starts_with("int_") && "invalid intrinsic name");
- // Remove the int_ from intrinsic name.
- Intrinsic = DefName.substr(4);
+ {
+ DXILIntrinsicSelect IntrSelect;
+ IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("LLVMIntrinsic"));
+ if (IntrSelect.Intrinsic.size())
+ IntrinsicSelects.emplace_back(std::move(IntrSelect));
+ }
+
+ auto IntrinsicSelectRecords = R->getValueAsListOfDefs("intrinsic_selects");
+ if (IntrinsicSelectRecords.size()) {
+ if (IntrinsicSelects.size()) {
+ PrintFatalError(
+ R, Twine("LLVMIntrinsic and intrinsic_selects cannot be both "
+ "defined for DXIL operation - ") +
+ OpName);
+ } else {
+ for (const Record *R : IntrinsicSelectRecords) {
+ DXILIntrinsicSelect IntrSelect;
+ IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("intrinsic"));
+ auto Args = R->getValueAsListOfDefs("args");
+ for (const Record *Arg : Args) {
+ bool IsI8 = Arg->getValueAsBit("is_i8");
+ bool IsI32 = Arg->getValueAsBit("is_i32");
+ int Index = Arg->getValueAsInt("index");
+ int Value = Arg->getValueAsInt("value");
+
+ DXILArgSelect ArgSelect;
+ if (IsI8) {
+ ArgSelect.Type = DXILArgSelect::Type::I8;
+ ArgSelect.Value = Value;
+ } else if (IsI32) {
+ ArgSelect.Type = DXILArgSelect::Type::I32;
+ ArgSelect.Value = Value;
+ } else {
+ if (Index < 0) {
+ PrintFatalError(
+ R, Twine("Index in ArgSelect<index> must be equal to or "
+ "greater than 0 for DXIL operation - ") +
+ OpName);
+ }
+ ArgSelect.Type = DXILArgSelect::Type::Index;
+ ArgSelect.Value = Index;
+ }
+
+ IntrSelect.Args.emplace_back(std::move(ArgSelect));
+ }
+ IntrinsicSelects.emplace_back(std::move(IntrSelect));
+ }
}
}
}
@@ -377,10 +444,29 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
OS << "#ifdef DXIL_OP_INTRINSIC\n";
OS << "\n";
for (const auto &Op : Ops) {
- if (Op.Intrinsic.empty())
+ if (Op.IntrinsicSelects.empty()) {
continue;
- OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
- << ", Intrinsic::" << Op.Intrinsic << ")\n";
+ }
+ for (const DXILIntrinsicSelect &MappedIntr : Op.IntrinsicSelects) {
+ OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
+ << ", Intrinsic::" << MappedIntr.Intrinsic << ", ";
+ for (const DXILArgSelect &ArgSelect : MappedIntr.Args) {
+ OS << "(ArgSelect { ";
+ switch (ArgSelect.Type) {
+ case DXILArgSelect::Type::Index:
+ OS << "ArgSelect::Type::Index, ";
+ break;
+ case DXILArgSelect::Type::I8:
+ OS << "ArgSelect::Type::I8, ";
+ break;
+ case DXILArgSelect::Type::I32:
+ OS << "ArgSelect::Type::I32, ";
+ break;
+ }
+ OS << ArgSelect.Value << "}), ";
+ }
+ OS << ")\n";
+ }
}
OS << "\n";
OS << "#undef DXIL_OP_INTRINSIC\n";
|
llvm/lib/Target/DirectX/DXIL.td
Outdated
// Intrinsic arg selection | ||
class Arg { | ||
int index = -1; | ||
int value = 0; | ||
bit is_i8 = 0; | ||
bit is_i32 = 0; | ||
} | ||
class ArgSelect<int index_> : Arg { | ||
let index = index_; | ||
} | ||
class ArgI32<int value_> : Arg { | ||
let value = value_; | ||
let is_i32 = 1; | ||
} | ||
class ArgI8<int value_> : Arg { | ||
let value = value_; | ||
let is_i8 = 1; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The structure of this is duplicated across three different places (here, in DXILEmitter, and again in DXILOpLowering). It feels like we're breaking some abstraction that we need to repeat these definitions in three different places. Can we drive the set of types by generating a table from the DXIL.td definitions and use it elsewhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is no longer duplicated in three places, instead a struct + enum type are generated into DXILOperation.inc that DXILOpLowering uses.
llvm/lib/Target/DirectX/DXIL.td
Outdated
class IntrinsicSelect<Intrinsic intrinsic_, list<Arg> args_> { | ||
Intrinsic intrinsic = intrinsic_; | ||
list<Arg> args = args_; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a comment by this definition explaining how to use it? It isn't clear to me how we define where the arguments end up or what the difference between "ArgSelect" and "ArgI32" are from the definitions alone.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
llvm/lib/Target/DirectX/DXIL.td
Outdated
// LLVM Intrinsic DXIL Operation maps to | ||
Intrinsic LLVMIntrinsic = ?; | ||
|
||
// Non-trivial LLVM Intrinsics DXIL Operation maps to | ||
list<IntrinsicSelect> intrinsic_selects = []; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't love having mutually exclusive ways to specify intrinsics here. Could we combine this into a design that handles both models well?
Names are hard, but I'm thinking we could have a versions of IntrinsicSelect
with and without arguments, so that can just a single list<IntrinsicSelect> IntrinsicMapping
in DXILOp, then we have a couple of cases:
// Basic mapping to `llvm.dx.wave.readlane`
let IntrinsicMapping = [Intrin<int_dx_wave_readlane>];
// barrier:
let IntrinsicMapping = [
IntrinWithArgs<
int_dx_group_memory_barrier_with_group_sync,
[ ArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
];
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got rid of the old LLVMIntrinsic, everything goes through the same system now.
llvm/lib/Target/DirectX/DXIL.td
Outdated
// >, | ||
// ] | ||
//========================================================================================= | ||
// to the dxil op. This can be used in conjunction with IntrinArgIndex: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like this is missing some text
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Put them back in. Also formatted the paragraphs slightly.
llvm/lib/Target/DirectX/DXIL.td
Outdated
// LLVM Intrinsics DXIL Operation maps from | ||
list<IntrinSelect> intrinsic_selects = []; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think "_selects" is adding much to the name now that we don't need to differentiate it from the other way of specifying intrinsics. Just "intrinsic" is probably fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed it to use intrinsics
instead, since there could be multiple.
llvm/utils/TableGen/DXILEmitter.cpp
Outdated
if (IntrinsicSelects.size()) { | ||
PrintFatalError( | ||
R, Twine("LLVMIntrinsic and intrinsic_selects cannot be both " | ||
"defined for DXIL operation - ") + | ||
OpName); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is either no longer possible or no longer the correct error message.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, it's gone now.
llvm/utils/TableGen/DXILEmitter.cpp
Outdated
OS << "struct IntrinArgSelect {\n"; | ||
OS << " enum class Type {\n"; | ||
for (const Record *Records : | ||
Records.getAllDerivedDefinitions("IntrinArgSelectType")) { | ||
StringRef StrippedName = StripIntrinArgSelectTypePrefix(Records->getName()); | ||
OS << " " << StrippedName << ",\n"; | ||
} | ||
OS << " };\n"; | ||
OS << " Type Type;\n"; | ||
OS << " int Value;\n"; | ||
OS << "};\n"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's better to just define the values in tablegen, rather than having the generated code inject names into the scope of the #include
directive. See DXIL_OPCODE/DXIL_OPCLASS elsewhere in this file and in DXILConstants.h for examples of that style.
In this case, we'd define the struct IntrinArgSelect
in DXILOpLowering
and just use tablegen for the enum values themselves.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed it, except the enums aren't given explicit values, since it doesn't matter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
This reverts commit c7d83cf.
…y to the barrier dxil op
ddb5081
to
1aaa5f7
Compare
fixes #112974
partially fixes #70103
This change was reverted so some issues could be fixed.
Changes
Related PRs