diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 42392e22643b2..1444115b98d65 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -270,6 +270,12 @@ struct AMDGPUAtomicOptimizerPass : PassInfoMixin { ScanOptions ScanImpl; }; +struct AMDGPUInsertDelayAluPass + : public PassInfoMixin { + PreservedAnalyses run(MachineFunction &F, + MachineFunctionAnalysisManager &MFAM); +}; + Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel); ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); @@ -416,7 +422,7 @@ extern char &SIMemoryLegalizerID; void initializeSIModeRegisterPass(PassRegistry&); extern char &SIModeRegisterID; -void initializeAMDGPUInsertDelayAluPass(PassRegistry &); +void initializeAMDGPUInsertDelayAluLegacyPass(PassRegistry &); extern char &AMDGPUInsertDelayAluID; void initializeSIInsertHardClausesPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp index 3f2bb5df8836b..b3e371cdff8fd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp @@ -23,22 +23,13 @@ using namespace llvm; namespace { -class AMDGPUInsertDelayAlu : public MachineFunctionPass { +class AMDGPUInsertDelayAlu { public: - static char ID; - const SIInstrInfo *SII; const TargetRegisterInfo *TRI; const TargetSchedModel *SchedModel; - AMDGPUInsertDelayAlu() : MachineFunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - // Return true if MI waits for all outstanding VALU instructions to complete. static bool instructionWaitsForVALU(const MachineInstr &MI) { // These instruction types wait for VA_VDST==0 before issuing. @@ -416,10 +407,7 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass { return Changed; } - bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(MF.getFunction())) - return false; - + bool run(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName() << "\n"); @@ -454,11 +442,39 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass { } }; +class AMDGPUInsertDelayAluLegacy : public MachineFunctionPass { +public: + static char ID; + + AMDGPUInsertDelayAluLegacy() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + AMDGPUInsertDelayAlu Impl; + return Impl.run(MF); + } +}; } // namespace -char AMDGPUInsertDelayAlu::ID = 0; +PreservedAnalyses +AMDGPUInsertDelayAluPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + if (!AMDGPUInsertDelayAlu().run(MF)) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet(); + return PA; +} // end namespace llvm + +char AMDGPUInsertDelayAluLegacy::ID = 0; -char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAlu::ID; +char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAluLegacy::ID; -INITIALIZE_PASS(AMDGPUInsertDelayAlu, DEBUG_TYPE, "AMDGPU Insert Delay ALU", - false, false) +INITIALIZE_PASS(AMDGPUInsertDelayAluLegacy, DEBUG_TYPE, + "AMDGPU Insert Delay ALU", false, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index fd1341e8c91b2..468fbff31233c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -96,6 +96,7 @@ FUNCTION_PASS_WITH_PARAMS( #ifndef MACHINE_FUNCTION_PASS #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) #endif +MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass()) MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this)) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass()) MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) @@ -120,7 +121,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #undef MACHINE_FUNCTION_PASS #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) -DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7c9377e61230b..f8431cbf05489 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -531,7 +531,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPURewriteUndefForPHILegacyPass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); initializeSIAnnotateControlFlowLegacyPass(*PR); - initializeAMDGPUInsertDelayAluPass(*PR); + initializeAMDGPUInsertDelayAluLegacyPass(*PR); initializeSIInsertHardClausesPass(*PR); initializeSIInsertWaitcntsPass(*PR); initializeSIModeRegisterPass(*PR); @@ -2145,6 +2145,46 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const { Base::addPostRegAlloc(addPass); } +void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const { + if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) { + // TODO: addPass(GCNCreateVOPDPass()); + } + // TODO: addPass(SIMemoryLegalizerPass()); + // TODO: addPass(SIInsertWaitcntsPass()); + + // TODO: addPass(SIModeRegisterPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::None) { + // TODO: addPass(SIInsertHardClausesPass()); + } + + // addPass(SILateBranchLoweringPass()); + if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) { + // TODO: addPass(AMDGPUSetWavePriorityPass()); + } + + if (TM.getOptLevel() > CodeGenOptLevel::None) { + // TODO: addPass(SIPreEmitPeepholePass()); + } + + // The hazard recognizer that runs as part of the post-ra scheduler does not + // guarantee to be able handle all hazards correctly. This is because if there + // are multiple scheduling regions in a basic block, the regions are scheduled + // bottom up, so when we begin to schedule a region we don't know what + // instructions were emitted directly before it. + // + // Here we add a stand-alone hazard recognizer pass which can handle all + // cases. + // TODO: addPass(PostRAHazardRecognizerPass()); + addPass(AMDGPUWaitSGPRHazardsPass()); + + if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) { + addPass(AMDGPUInsertDelayAluPass()); + } + + // TODO: addPass(BranchRelaxationPass()); +} + bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt &Opt, CodeGenOptLevel Level) const { if (Opt.getNumOccurrences()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index eb5a9ca1f86d6..3df4115324ac2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -180,6 +180,7 @@ class AMDGPUCodeGenPassBuilder void addPreRewrite(AddMachinePass &) const; void addMachineSSAOptimization(AddMachinePass &) const; void addPostRegAlloc(AddMachinePass &) const; + void addPreEmitPass(AddMachinePass &) const; /// Check if a pass is enabled given \p Opt option. The option always /// overrides defaults if explicitly used. Otherwise its default will be used diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir index 266da50f6e543..ea8c7c956f776 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-delay-alu %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -passes=amdgpu-insert-delay-alu %s -o - | FileCheck %s --- name: valu_dep_1