Skip to content

[SPIR-V] Fix BB ordering & register lifetime #111026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
assert(false && "Unhandled terminator type.");
}

AllocaInst *CreateVariable(Function &F, Type *Type,
BasicBlock::iterator Position) {
const DataLayout &DL = F.getDataLayout();
return new AllocaInst(Type, DL.getAllocaAddrSpace(), nullptr, "reg",
Position);
}

// Run the pass on the given convergence region, ignoring the sub-regions.
// Returns true if the CFG changed, false otherwise.
bool runOnConvergenceRegionNoRecurse(LoopInfo &LI,
Expand All @@ -152,6 +159,9 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
auto NewExitTarget = BasicBlock::Create(F->getContext(), "new.exit", F);
IRBuilder<> Builder(NewExitTarget);

AllocaInst *Variable = CreateVariable(*F, Builder.getInt32Ty(),
F->begin()->getFirstInsertionPt());

// CodeGen output needs to be stable. Using the set as-is would order
// the targets differently depending on the allocation pattern.
// Sorting per basic-block ordering in the function.
Expand All @@ -176,18 +186,16 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
std::vector<std::pair<BasicBlock *, Value *>> ExitToVariable;
for (auto Exit : SortedExits) {
llvm::Value *Value = createExitVariable(Exit, TargetToValue);
IRBuilder<> B2(Exit);
B2.SetInsertPoint(Exit->getFirstInsertionPt());
B2.CreateStore(Value, Variable);
ExitToVariable.emplace_back(std::make_pair(Exit, Value));
}

// Gather the correct value depending on the exit we came from.
llvm::PHINode *node =
Builder.CreatePHI(Builder.getInt32Ty(), ExitToVariable.size());
for (auto [BB, Value] : ExitToVariable) {
node->addIncoming(Value, BB);
}
llvm::Value *Load = Builder.CreateLoad(Builder.getInt32Ty(), Variable);

// Creating the switch to jump to the correct exit target.
llvm::SwitchInst *Sw = Builder.CreateSwitch(node, SortedExitTargets[0],
llvm::SwitchInst *Sw = Builder.CreateSwitch(Load, SortedExitTargets[0],
SortedExitTargets.size() - 1);
for (size_t i = 1; i < SortedExitTargets.size(); i++) {
BasicBlock *BB = SortedExitTargets[i];
Expand Down
131 changes: 56 additions & 75 deletions llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ BasicBlock *getExitFor(const ConvergenceRegion *CR) {
// Returns the merge block designated by I if I is a merge instruction, nullptr
// otherwise.
BasicBlock *getDesignatedMergeBlock(Instruction *I) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
IntrinsicInst *II = dyn_cast_or_null<IntrinsicInst>(I);
if (II == nullptr)
return nullptr;

Expand All @@ -102,7 +102,7 @@ BasicBlock *getDesignatedMergeBlock(Instruction *I) {
// Returns the continue block designated by I if I is an OpLoopMerge, nullptr
// otherwise.
BasicBlock *getDesignatedContinueBlock(Instruction *I) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
IntrinsicInst *II = dyn_cast_or_null<IntrinsicInst>(I);
if (II == nullptr)
return nullptr;

Expand Down Expand Up @@ -284,18 +284,6 @@ void replaceBranchTargets(BasicBlock *BB, BasicBlock *OldTarget,
assert(false && "Unhandled terminator type.");
}

// Replaces basic bloc operands |OldSrc| or OpPhi instructions in |BB| by
// |NewSrc|. This function does not simplify the OpPhi instruction once
// transformed.
void replacePhiTargets(BasicBlock *BB, BasicBlock *OldSrc, BasicBlock *NewSrc) {
for (PHINode &Phi : BB->phis()) {
int index = Phi.getBasicBlockIndex(OldSrc);
if (index == -1)
continue;
Phi.setIncomingBlock(index, NewSrc);
}
}

} // anonymous namespace

// Given a reducible CFG, produces a structurized CFG in the SPIR-V sense,
Expand Down Expand Up @@ -423,7 +411,7 @@ class SPIRVStructurizer : public FunctionPass {
}

// Splits the given edges by recreating proxy nodes so that the destination
// OpPhi instruction can still be viable.
// has unique incoming edges from this region.
//
// clang-format off
//
Expand All @@ -436,66 +424,58 @@ class SPIRVStructurizer : public FunctionPass {
// A -> D -> C
// B -> D -> C
//
// But if C had a phi node, adding such proxy-block breaks it. In such case, we must add 1 new block per
// exit, and patchup the phi node:
// This is fine (assuming C has no PHI nodes), but requires handling the merge instruction here.
// By adding a proxy node, we create a regular divergent shape which can easily be regularized later on.
// A -> D -> D1 -> C
// B -> D -> D2 -> C
//
// A, B, D belongs to the construct. D is the exit. D1 and D2 are empty, just used as
// source operands for C's phi node.
// A, B, D belongs to the construct. D is the exit. D1 and D2 are empty.
//
// clang-format on
std::vector<Edge>
createAliasBlocksForComplexEdges(std::vector<Edge> Edges) {
std::unordered_map<BasicBlock *, BasicBlock *> Seen;
std::unordered_set<BasicBlock *> Seen;
std::vector<Edge> Output;
Output.reserve(Edges.size());

for (auto &[Src, Dst] : Edges) {
auto [iterator, inserted] = Seen.insert({Src, Dst});
if (inserted) {
Output.emplace_back(Src, Dst);
continue;
auto [Iterator, Inserted] = Seen.insert(Src);
if (!Inserted) {
// Src already a source node. Cannot have 2 edges from A to B.
// Creating alias source block.
BasicBlock *NewSrc = BasicBlock::Create(
F.getContext(), Src->getName() + ".new.src", &F);
replaceBranchTargets(Src, Dst, NewSrc);
IRBuilder<> Builder(NewSrc);
Builder.CreateBr(Dst);
Src = NewSrc;
}

// The exact same edge was already seen. Ignoring.
if (iterator->second == Dst)
continue;

// The same Src block branches to 2 distinct blocks. This will be an
// issue for the generated OpPhi. Creating alias block.
BasicBlock *NewSrc =
BasicBlock::Create(F.getContext(), "new.exit.src", &F);
replaceBranchTargets(Src, Dst, NewSrc);
replacePhiTargets(Dst, Src, NewSrc);

IRBuilder<> Builder(NewSrc);
Builder.CreateBr(Dst);

Seen.emplace(NewSrc, Dst);
Output.emplace_back(NewSrc, Dst);
Output.emplace_back(Src, Dst);
}

return Output;
}

AllocaInst *CreateVariable(Function &F, Type *Type,
BasicBlock::iterator Position) {
const DataLayout &DL = F.getDataLayout();
return new AllocaInst(Type, DL.getAllocaAddrSpace(), nullptr, "reg",
Position);
}

// Given a construct defined by |Header|, and a list of exiting edges
// |Edges|, creates a new single exit node, fixing up those edges.
BasicBlock *createSingleExitNode(BasicBlock *Header,
std::vector<Edge> &Edges) {
auto NewExit = BasicBlock::Create(F.getContext(), "new.exit", &F);
IRBuilder<> ExitBuilder(NewExit);

std::vector<BasicBlock *> Dsts;
std::unordered_map<BasicBlock *, ConstantInt *> DstToIndex;

// Given 2 edges: Src1 -> Dst, Src2 -> Dst:
// If Dst has an PHI node, and Src1 and Src2 are both operands, both Src1
// and Src2 cannot be hidden by NewExit. Create 2 new nodes: Alias1,
// Alias2 to which NewExit will branch before going to Dst. Then, patchup
// Dst PHI node to look for Alias1 and Alias2.
std::vector<Edge> FixedEdges = createAliasBlocksForComplexEdges(Edges);

std::vector<BasicBlock *> Dsts;
std::unordered_map<BasicBlock *, ConstantInt *> DstToIndex;
auto NewExit = BasicBlock::Create(F.getContext(),
Header->getName() + ".new.exit", &F);
IRBuilder<> ExitBuilder(NewExit);
for (auto &[Src, Dst] : FixedEdges) {
if (DstToIndex.count(Dst) != 0)
continue;
Expand All @@ -506,33 +486,34 @@ class SPIRVStructurizer : public FunctionPass {
if (Dsts.size() == 1) {
for (auto &[Src, Dst] : FixedEdges) {
replaceBranchTargets(Src, Dst, NewExit);
replacePhiTargets(Dst, Src, NewExit);
}
ExitBuilder.CreateBr(Dsts[0]);
return NewExit;
}

PHINode *PhiNode =
ExitBuilder.CreatePHI(ExitBuilder.getInt32Ty(), FixedEdges.size());

AllocaInst *Variable = CreateVariable(F, ExitBuilder.getInt32Ty(),
F.begin()->getFirstInsertionPt());
for (auto &[Src, Dst] : FixedEdges) {
PhiNode->addIncoming(DstToIndex[Dst], Src);
IRBuilder<> B2(Src);
B2.SetInsertPoint(Src->getFirstInsertionPt());
B2.CreateStore(DstToIndex[Dst], Variable);
replaceBranchTargets(Src, Dst, NewExit);
replacePhiTargets(Dst, Src, NewExit);
}

llvm::Value *Load =
ExitBuilder.CreateLoad(ExitBuilder.getInt32Ty(), Variable);

// If we can avoid an OpSwitch, generate an OpBranch. Reason is some
// OpBranch are allowed to exist without a new OpSelectionMerge if one of
// the branch is the parent's merge node, while OpSwitches are not.
if (Dsts.size() == 2) {
Value *Condition = ExitBuilder.CreateCmp(CmpInst::ICMP_EQ,
DstToIndex[Dsts[0]], PhiNode);
Value *Condition =
ExitBuilder.CreateCmp(CmpInst::ICMP_EQ, DstToIndex[Dsts[0]], Load);
ExitBuilder.CreateCondBr(Condition, Dsts[0], Dsts[1]);
return NewExit;
}

SwitchInst *Sw =
ExitBuilder.CreateSwitch(PhiNode, Dsts[0], Dsts.size() - 1);
SwitchInst *Sw = ExitBuilder.CreateSwitch(Load, Dsts[0], Dsts.size() - 1);
for (auto It = Dsts.begin() + 1; It != Dsts.end(); ++It) {
Sw->addCase(DstToIndex[*It], *It);
}
Expand Down Expand Up @@ -576,7 +557,7 @@ class SPIRVStructurizer : public FunctionPass {

// Creates a new basic block in F with a single OpUnreachable instruction.
BasicBlock *CreateUnreachable(Function &F) {
BasicBlock *BB = BasicBlock::Create(F.getContext(), "new.exit", &F);
BasicBlock *BB = BasicBlock::Create(F.getContext(), "unreachable", &F);
IRBuilder<> Builder(BB);
Builder.CreateUnreachable();
return BB;
Expand Down Expand Up @@ -1027,17 +1008,8 @@ class SPIRVStructurizer : public FunctionPass {
return Modified;
}

bool IsRequiredForPhiNode(BasicBlock *BB) {
for (BasicBlock *Successor : successors(BB)) {
for (PHINode &Phi : Successor->phis()) {
if (Phi.getBasicBlockIndex(BB) != -1)
return true;
}
}

return false;
}

// Removes blocks not contributing to any structured CFG. This assumes there
// is no PHI nodes.
bool removeUselessBlocks(Function &F) {
std::vector<BasicBlock *> ToRemove;

Expand All @@ -1054,9 +1026,6 @@ class SPIRVStructurizer : public FunctionPass {
if (MergeBlocks.count(&BB) != 0 || ContinueBlocks.count(&BB) != 0)
continue;

if (IsRequiredForPhiNode(&BB))
continue;

if (BB.getUniqueSuccessor() == nullptr)
continue;

Expand Down Expand Up @@ -1127,6 +1096,18 @@ class SPIRVStructurizer : public FunctionPass {
continue;

Modified = true;

if (Merge == nullptr) {
Merge = *successors(Header).begin();
IRBuilder<> Builder(Header);
Builder.SetInsertPoint(Header->getTerminator());

auto MergeAddress = BlockAddress::get(Merge->getParent(), Merge);
SmallVector<Value *, 1> Args = {MergeAddress};
Builder.CreateIntrinsic(Intrinsic::spv_selection_merge, {}, {Args});
continue;
}

Instruction *SplitInstruction = Merge->getTerminator();
if (isMergeInstruction(SplitInstruction->getPrevNode()))
SplitInstruction = SplitInstruction->getPrevNode();
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar/Reg2Mem.h"
#include "llvm/Transforms/Utils.h"
#include <optional>

Expand Down Expand Up @@ -169,13 +170,21 @@ void SPIRVPassConfig::addIRPasses() {
// - loops have a single back-edge.
addPass(createLoopSimplifyPass());

// 2. Merge the convergence region exit nodes into one. After this step,
// 2. Removes registers whose lifetime spans across basic blocks. Also
// removes phi nodes. This will greatly simplify the next steps.
addPass(createRegToMemWrapperPass());

// 3. Merge the convergence region exit nodes into one. After this step,
// regions are single-entry, single-exit. This will help determine the
// correct merge block.
addPass(createSPIRVMergeRegionExitTargetsPass());

// 3. Structurize.
// 4. Structurize.
addPass(createSPIRVStructurizerPass());

// 5. Reduce the amount of variables required by pushing some operations
// back to virtual registers.
addPass(createPromoteMemoryToRegisterPass());
}

addPass(createSPIRVRegularizerPass());
Expand Down
Loading
Loading