From f6795cb61e48ec4dde0aab8601159cfb9c274535 Mon Sep 17 00:00:00 2001 From: Artem Chikin Date: Wed, 20 Dec 2023 10:56:42 -0800 Subject: [PATCH 01/14] Parameterize Initialization of 'clang::CodeGenerator' on a TargetInfo instance which may differ from the one in the ASTContext As per https://github.com/apple/swift/pull/65930, Swift compiler's built-in Clang instance may require to perform type-checking against one OS version and compilation/code-generation against an earlier version. This change allows Swift to configure it's built-in Clang code-generator with a custom 'TargetInfo'. Part of rdar://113712186 (cherry picked from commit 9894e7ae111b126abad711e123c5f0b6168aaeab) --- clang/include/clang/AST/ASTConsumer.h | 9 +++++++++ clang/lib/CodeGen/CodeGenModule.cpp | 20 ++++++++++--------- clang/lib/CodeGen/CodeGenModule.h | 4 +++- clang/lib/CodeGen/ModuleBuilder.cpp | 17 ++++++++++------ .../CodeGen/ObjectFilePCHContainerWriter.cpp | 7 ++++++- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/AST/ASTConsumer.h b/clang/include/clang/AST/ASTConsumer.h index 447f2592d2359..6cf4504dcfa60 100644 --- a/clang/include/clang/AST/ASTConsumer.h +++ b/clang/include/clang/AST/ASTConsumer.h @@ -27,6 +27,7 @@ namespace clang { class VarDecl; class FunctionDecl; class ImportDecl; + class TargetInfo; /// ASTConsumer - This is an abstract interface that should be implemented by /// clients that read ASTs. This abstraction layer allows the client to be @@ -47,6 +48,14 @@ class ASTConsumer { /// ASTContext. virtual void Initialize(ASTContext &Context) {} + /// Initialize - This is called to initialize the consumer, providing the + /// ASTContext. 'CodeGenTargetInfo' specifies the code-generation configuration + /// for this compilation instance, which may differ from the one carried + /// by the Context itself only in the OS Version number - + /// for example when type-checking must be performed against an epoch OS version + /// while code-generation must run according to the user-specified OS version. + virtual void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) {} + /// HandleTopLevelDecl - Handle the specified top-level declaration. This is /// called by the parser to process every top-level Decl*. /// diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 14b2f4857d8fd..51c75455f21a6 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -335,12 +335,14 @@ CodeGenModule::CodeGenModule(ASTContext &C, IntrusiveRefCntPtr FS, const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO, - const CodeGenOptions &CGO, llvm::Module &M, + const CodeGenOptions &CGO, + const TargetInfo &CGTI, + llvm::Module &M, DiagnosticsEngine &diags, CoverageSourceInfo *CoverageInfo) : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), - Target(C.getTargetInfo()), ABI(createCXXABI(*this)), + Target(CGTI), ABI(createCXXABI(*this)), VMContext(M.getContext()), VTables(*this), StackHandler(diags), SanitizerMD(new SanitizerMetadata(*this)), AtomicOpts(Target.getAtomicOpts()) { @@ -357,19 +359,19 @@ CodeGenModule::CodeGenModule(ASTContext &C, BFloatTy = llvm::Type::getBFloatTy(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); - PointerWidthInBits = C.getTargetInfo().getPointerWidth(LangAS::Default); + PointerWidthInBits = Target.getPointerWidth(LangAS::Default); PointerAlignInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(LangAS::Default)) + C.toCharUnitsFromBits(Target.getPointerAlign(LangAS::Default)) .getQuantity(); SizeSizeInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); + C.toCharUnitsFromBits(Target.getMaxPointerWidth()).getQuantity(); IntAlignInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity(); + C.toCharUnitsFromBits(Target.getIntAlign()).getQuantity(); CharTy = - llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getCharWidth()); - IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); + llvm::IntegerType::get(LLVMContext, Target.getCharWidth()); + IntTy = llvm::IntegerType::get(LLVMContext, Target.getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, - C.getTargetInfo().getMaxPointerWidth()); + Target.getMaxPointerWidth()); Int8PtrTy = llvm::PointerType::get(LLVMContext, C.getTargetAddressSpace(LangAS::Default)); const llvm::DataLayout &DL = M.getDataLayout(); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 87d62da87cc47..1c6dbaad1f0d3 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -690,7 +690,9 @@ class CodeGenModule : public CodeGenTypeCache { CodeGenModule(ASTContext &C, IntrusiveRefCntPtr FS, const HeaderSearchOptions &headersearchopts, const PreprocessorOptions &ppopts, - const CodeGenOptions &CodeGenOpts, llvm::Module &M, + const CodeGenOptions &CodeGenOpts, + const TargetInfo &CodeGenTargetInfo, + llvm::Module &M, DiagnosticsEngine &Diags, CoverageSourceInfo *CoverageInfo = nullptr); diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp index 09a7d79ae4afb..e283777d1c092 100644 --- a/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/clang/lib/CodeGen/ModuleBuilder.cpp @@ -149,21 +149,26 @@ namespace { } void Initialize(ASTContext &Context) override { + Initialize(Context, Context.getTargetInfo()); + } + + void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) override { Ctx = &Context; - M->setTargetTriple(Ctx->getTargetInfo().getTriple()); - M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString()); - const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion(); + M->setTargetTriple(CodeGenTargetInfo.getTriple()); + M->setDataLayout(CodeGenTargetInfo.getDataLayoutString()); + const auto &SDKVersion = CodeGenTargetInfo.getSDKVersion(); if (!SDKVersion.empty()) M->setSDKVersion(SDKVersion); - if (const auto *TVT = Ctx->getTargetInfo().getDarwinTargetVariantTriple()) + if (const auto *TVT = CodeGenTargetInfo.getDarwinTargetVariantTriple()) M->setDarwinTargetVariantTriple(TVT->getTriple()); if (auto TVSDKVersion = - Ctx->getTargetInfo().getDarwinTargetVariantSDKVersion()) + CodeGenTargetInfo.getDarwinTargetVariantSDKVersion()) M->setDarwinTargetVariantSDKVersion(*TVSDKVersion); Builder.reset(new CodeGen::CodeGenModule(Context, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, - *M, Diags, CoverageInfo)); + CodeGenTargetInfo, *M, + Diags, CoverageInfo)); for (auto &&Lib : CodeGenOpts.DependentLibraries) Builder->AddDependentLib(Lib); diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp index 384685e3f1eec..8b54552623872 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp @@ -174,6 +174,10 @@ class PCHContainerGenerator : public ASTConsumer { ~PCHContainerGenerator() override = default; void Initialize(ASTContext &Context) override { + Initialize(Context, Context.getTargetInfo()); + } + + void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) override { assert(!Ctx && "initialized multiple times"); Ctx = &Context; @@ -181,7 +185,8 @@ class PCHContainerGenerator : public ASTConsumer { M.reset(new llvm::Module(MainFileName, *VMContext)); M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString()); Builder.reset(new CodeGen::CodeGenModule( - *Ctx, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags)); + *Ctx, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, + CodeGenTargetInfo, *M, Diags)); // Prepare CGDebugInfo to emit debug info for a clang module. auto *DI = Builder->getModuleDebugInfo(); From 71609c07f06985dccc88d1dfce847d3f05e06ae3 Mon Sep 17 00:00:00 2001 From: Nate Chandler Date: Tue, 25 Feb 2025 16:35:25 -0800 Subject: [PATCH 02/14] [Coro] Add variant of retcon.once ABI. Like async coroutines, it's fixed-per-function-size frame is caller allocated--the size is stored in a global "coro function pointer". Like retcon coroutines, dynamic allocations are performed via intrinsic-provided allocation and deallocation functions. Unlike both, it takes an allocator struct as an argument which is forwarded to the allocation/deallocation functions. (cherry picked from commit b0e1dc9a189cef3ec452500c57151cb27197eb7a) --- llvm/include/llvm/IR/Intrinsics.td | 4 + .../llvm/Transforms/Coroutines/CoroInstr.h | 67 +++++++++++ .../llvm/Transforms/Coroutines/CoroShape.h | 23 +++- .../lib/Transforms/Coroutines/CoroCleanup.cpp | 1 + llvm/lib/Transforms/Coroutines/CoroCloner.h | 2 + llvm/lib/Transforms/Coroutines/CoroEarly.cpp | 1 + llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 22 +++- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 62 +++++++++-- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 105 ++++++++++++++---- llvm/lib/Transforms/Coroutines/SpillUtils.cpp | 3 +- 10 files changed, 251 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index fe83c9df5731e..2bc85c08f32c0 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1742,6 +1742,10 @@ def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty], []>; +def int_coro_id_retcon_once_dynamic : Intrinsic<[llvm_token_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, + llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + []>; def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>; def int_coro_id_async : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h b/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h index 1ad5f7fefc8cb..24f5da8d6993b 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h @@ -135,6 +135,7 @@ class AnyCoroIdInst : public IntrinsicInst { auto ID = I->getIntrinsicID(); return ID == Intrinsic::coro_id || ID == Intrinsic::coro_id_retcon || ID == Intrinsic::coro_id_retcon_once || + ID == Intrinsic::coro_id_retcon_once_dynamic || ID == Intrinsic::coro_id_async; } @@ -314,6 +315,72 @@ class CoroIdRetconOnceInst : public AnyCoroIdRetconInst { } }; +/// This represents the llvm.coro.id.retcon.once.dynamic instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdRetconOnceDynamicInst + : public AnyCoroIdInst { + enum { + SizeArg, + AlignArg, + CoroFuncPtrArg, + AllocatorArg, + StorageArg, + PrototypeArg, + AllocArg, + DeallocArg + }; + +public: + void checkWellFormed() const; + + uint64_t getStorageSize() const { + return cast(getArgOperand(SizeArg))->getZExtValue(); + } + + Align getStorageAlignment() const { + return cast(getArgOperand(AlignArg))->getAlignValue(); + } + + Value *getStorage() const { return getArgOperand(StorageArg); } + + /// Return the coro function pointer address. This should be the address of + /// a coro function pointer struct for the current coro function. + /// struct coro_function_pointer { + /// uint32_t frame size; + /// uint32_t relative_pointer(coro_function); + /// }; + GlobalVariable *getCoroFunctionPointer() const { + return cast( + getArgOperand(CoroFuncPtrArg)->stripPointerCasts()); + } + + /// Return the prototype for the continuation function. The type, + /// attributes, and calling convention of the continuation function(s) + /// are taken from this declaration. + Function *getPrototype() const { + return cast(getArgOperand(PrototypeArg)->stripPointerCasts()); + } + + /// Return the function to use for allocating memory. + Function *getAllocFunction() const { + return cast(getArgOperand(AllocArg)->stripPointerCasts()); + } + + /// Return the function to use for deallocating memory. + Function *getDeallocFunction() const { + return cast(getArgOperand(DeallocArg)->stripPointerCasts()); + } + + Value *getAllocator() const { return getArgOperand(AllocatorArg); } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id_retcon_once_dynamic; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.id.async instruction. class CoroIdAsyncInst : public AnyCoroIdInst { enum { SizeArg, AlignArg, StorageArg, AsyncFuncPtrArg }; diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h index 2e98b089358bc..a60f813785a59 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h @@ -45,6 +45,10 @@ enum class ABI { /// single continuation function. The continuation function is available as an /// intrinsic. Async, + + /// The variant of RetconOnce which features a dynamically-sized caller + /// allocation. + RetconOnceDynamic, }; // Holds structural Coroutine Intrinsics for a particular function and other @@ -127,9 +131,18 @@ struct Shape { Function *ResumePrototype; Function *Alloc; Function *Dealloc; + Value *Allocator; BasicBlock *ReturnBlock; bool IsFrameInlineInStorage; ConstantInt* TypeId; + GlobalVariable *CoroFuncPointer; + Value *Storage; + uint64_t StorageSize; + Align StorageAlignment; + // computed during splitting: + uint64_t ContextSize; + + Align getStorageAlignment() const { return Align(StorageAlignment); } }; struct AsyncLoweringStorage { @@ -194,6 +207,7 @@ struct Shape { /*IsVarArg=*/false); case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: return RetconLowering.ResumePrototype->getFunctionType(); case coro::ABI::Async: // Not used. The function type depends on the active suspend. @@ -204,7 +218,8 @@ struct Shape { } ArrayRef getRetconResultTypes() const { - assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce); + assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce || + ABI == coro::ABI::RetconOnceDynamic); auto FTy = CoroBegin->getFunction()->getFunctionType(); // The safety of all this is checked by checkWFRetconPrototype. @@ -216,7 +231,8 @@ struct Shape { } ArrayRef getRetconResumeTypes() const { - assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce); + assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce || + ABI == coro::ABI::RetconOnceDynamic); // The safety of all this is checked by checkWFRetconPrototype. auto FTy = RetconLowering.ResumePrototype->getFunctionType(); @@ -230,6 +246,7 @@ struct Shape { case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: return RetconLowering.ResumePrototype->getCallingConv(); case coro::ABI::Async: return AsyncLowering.AsyncCC; @@ -262,7 +279,7 @@ struct Shape { /// \param CG - if non-null, will be updated for the new call void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; - Shape() = default; + Shape() = delete; explicit Shape(Function &F) { SmallVector CoroFrames; SmallVector UnusedCoroSaves; diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index a0a26827aa09d..25eadd04e52ef 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -69,6 +69,7 @@ bool Lowerer::lower(Function &F) { case Intrinsic::coro_id: case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: + case Intrinsic::coro_id_retcon_once_dynamic: case Intrinsic::coro_id_async: II->replaceAllUsesWith(ConstantTokenNone::get(Context)); break; diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h index d1887980fb3bc..e2db4fdbe38f6 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCloner.h +++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h @@ -67,6 +67,7 @@ class BaseCloner { Builder(OrigF.getContext()), TTI(TTI), NewF(NewF), ActiveSuspend(ActiveSuspend) { assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic || Shape.ABI == ABI::Async); assert(NewF && "need existing function for continuation"); assert(ActiveSuspend && "need active suspend point for continuation"); @@ -86,6 +87,7 @@ class BaseCloner { AnyCoroSuspendInst *ActiveSuspend, TargetTransformInfo &TTI) { assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic || Shape.ABI == ABI::Async); TimeTraceScope FunctionScope("BaseCloner"); diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 5375448d2d2e2..3093323da67b8 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -209,6 +209,7 @@ void Lowerer::lowerEarlyIntrinsics(Function &F) { break; case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: + case Intrinsic::coro_id_retcon_once_dynamic: case Intrinsic::coro_id_async: F.setPresplitCoroutine(); break; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index d08170438bd2a..4259e37892df9 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -962,6 +962,19 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, B.getStructAlign() <= Id->getStorageAlignment()); break; } + case coro::ABI::RetconOnceDynamic: { + // In the dynamic retcon.once ABI, the frame is always inline in the + // storage. + Shape.RetconLowering.IsFrameInlineInStorage = true; + Shape.RetconLowering.ContextSize = + alignTo(Shape.FrameSize, Shape.RetconLowering.StorageAlignment); + if (Shape.RetconLowering.StorageAlignment < Shape.FrameAlign) { + report_fatal_error( + "The alignment requirment of frame variables cannot be higher than " + "the alignment of the coro function context"); + } + break; + } case coro::ABI::Async: { Shape.AsyncLowering.FrameOffset = alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign); @@ -1188,7 +1201,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { // retcon and retcon.once lowering assumes all uses have been sunk. if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || - Shape.ABI == coro::ABI::Async) { + Shape.ABI == coro::ABI::Async || + Shape.ABI == coro::ABI::RetconOnceDynamic) { // If we found any allocas, replace all of their remaining uses with Geps. Builder.SetInsertPoint(SpillBlock, SpillBlock->begin()); for (const auto &P : FrameData.Allocas) { @@ -2078,7 +2092,8 @@ void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) { const DominatorTree DT(F); if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon && - Shape.ABI != coro::ABI::RetconOnce) + Shape.ABI != coro::ABI::RetconOnce && + Shape.ABI != coro::ABI::RetconOnceDynamic) sinkLifetimeStartMarkers(F, Shape, Checker, DT); // All values (that are not allocas) that needs to be spilled to the frame. @@ -2098,7 +2113,8 @@ void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) { LLVM_DEBUG(dumpSpills("Spills", Spills)); if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || - Shape.ABI == coro::ABI::Async) + Shape.ABI == coro::ABI::Async || + Shape.ABI == coro::ABI::RetconOnceDynamic) sinkSpillUsesAfterCoroBegin(DT, Shape.CoroBegin, Spills, Allocas); // Build frame diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index f9a6c70fedc2d..cd7f5f174d31c 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -157,7 +157,8 @@ static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) { static void maybeFreeRetconStorage(IRBuilder<> &Builder, const coro::Shape &Shape, Value *FramePtr, CallGraph *CG) { - assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); + assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic); if (Shape.RetconLowering.IsFrameInlineInStorage) return; @@ -239,7 +240,8 @@ static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, // In unique continuation lowering, the continuations always return void. // But we may have implicitly allocated storage. - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); auto *CoroEnd = cast(End); auto *RetTy = Shape.getResumeFunctionType()->getReturnType(); @@ -368,6 +370,7 @@ static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, // In continuation-lowering, this frees the continuation storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); break; } @@ -469,6 +472,7 @@ static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, /// This assumes that the builder has a meaningful insertion point. void coro::BaseCloner::replaceRetconOrAsyncSuspendUses() { assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic || Shape.ABI == coro::ABI::Async); auto NewS = VMap[ActiveSuspend]; @@ -538,6 +542,7 @@ void coro::BaseCloner::replaceCoroSuspends() { // spilled. case coro::ABI::RetconOnce: case coro::ABI::Retcon: + case coro::ABI::RetconOnceDynamic: return; } @@ -707,14 +712,16 @@ void coro::BaseCloner::replaceEntryBlock() { } case coro::ABI::Async: case coro::ABI::Retcon: - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { // In continuation ABIs, we want to branch to immediately after the // active suspend point. Earlier phases will have put the suspend in its // own basic block, so just thread our jump directly to its successor. assert((Shape.ABI == coro::ABI::Async && isa(ActiveSuspend)) || ((Shape.ABI == coro::ABI::Retcon || - Shape.ABI == coro::ABI::RetconOnce) && + Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic) && isa(ActiveSuspend))); auto *MappedCS = cast(VMap[ActiveSuspend]); auto Branch = cast(MappedCS->getNextNode()); @@ -779,7 +786,8 @@ Value *coro::BaseCloner::deriveNewFramePointer() { } // In continuation-lowering, the argument is the opaque storage. case coro::ABI::Retcon: - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { Argument *NewStorage = &*NewF->arg_begin(); auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext()); @@ -997,6 +1005,11 @@ void coro::BaseCloner::create() { /*NoAlias=*/true); break; + case coro::ABI::RetconOnceDynamic: + // If we have a continuation prototype, just use its attributes, + // full-stop. + NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); + break; } switch (Shape.ABI) { @@ -1006,6 +1019,7 @@ void coro::BaseCloner::create() { // this is fine because we can't suspend twice. case coro::ABI::Switch: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: // Remove old returns. for (ReturnInst *Return : Returns) changeToUnreachable(Return); @@ -1063,6 +1077,13 @@ void coro::BaseCloner::create() { if (OldVFrame != NewVFrame) OldVFrame->replaceAllUsesWith(NewVFrame); + // Remap allocator pointer. + if (Shape.ABI == coro::ABI::RetconOnceDynamic) { + Value *OldAllocatorPointer = VMap[Shape.RetconLowering.Allocator]; + Argument *NewAllocatorPointer = &*NewF->getArg(1); + OldAllocatorPointer->replaceAllUsesWith(NewAllocatorPointer); + } + // All uses of the arguments should have been resolved by this point, // so we can safely remove the dummy values. for (Instruction *DummyArg : DummyArgs) { @@ -1081,6 +1102,7 @@ void coro::BaseCloner::create() { case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: // Replace uses of the active suspend with the corresponding // continuation-function arguments. assert(ActiveSuspend != nullptr && @@ -1139,9 +1161,26 @@ static TypeSize getFrameSizeForShape(coro::Shape &Shape) { return DL.getTypeAllocSize(Shape.FrameTy); } +static void updateCoroFuncPointerContextSize(coro::Shape &Shape) { + assert(Shape.ABI == coro::ABI::RetconOnceDynamic); + + auto *FuncPtrStruct = cast( + Shape.RetconLowering.CoroFuncPointer->getInitializer()); + auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0); + auto *OrigContextSize = FuncPtrStruct->getOperand(1); + auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), + Shape.RetconLowering.ContextSize); + auto *NewFuncPtrStruct = ConstantStruct::get( + FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize); + + Shape.RetconLowering.CoroFuncPointer->setInitializer(NewFuncPtrStruct); +} + static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); + if (Shape.ABI == coro::ABI::RetconOnceDynamic) + updateCoroFuncPointerContextSize(Shape); for (CoroAlignInst *CA : Shape.CoroAligns) { CA->replaceAllUsesWith( @@ -1200,6 +1239,7 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) { case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: CoroBegin->replaceAllUsesWith(PoisonValue::get(CoroBegin->getType())); break; } @@ -1795,7 +1835,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape, void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, TargetTransformInfo &TTI) { - assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); + assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic); assert(Clones.empty()); // Reset various things that the optimizer might have decided it @@ -1805,10 +1846,10 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, F.removeRetAttr(Attribute::NonNull); // Allocate the frame. - auto *Id = Shape.getRetconCoroId(); + auto *Id = Shape.CoroBegin->getId(); Value *RawFramePtr; if (Shape.RetconLowering.IsFrameInlineInStorage) { - RawFramePtr = Id->getStorage(); + RawFramePtr = Shape.RetconLowering.Storage; } else { IRBuilder<> Builder(Id); @@ -1824,7 +1865,7 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); // Stash the allocated frame pointer in the continuation storage. - Builder.CreateStore(RawFramePtr, Id->getStorage()); + Builder.CreateStore(RawFramePtr, Shape.RetconLowering.Storage); } // Map all uses of llvm.coro.begin to the allocated frame pointer. @@ -2039,6 +2080,7 @@ static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit( case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: // Each clone in the Async/Retcon lowering references of the other clones. // Let the LazyCallGraph know about all of them at once. if (!Clones.empty()) @@ -2137,6 +2179,8 @@ CreateNewABI(Function &F, coro::Shape &S, return std::make_unique(F, S, IsMatCallback); case coro::ABI::RetconOnce: return std::make_unique(F, S, IsMatCallback); + case coro::ABI::RetconOnceDynamic: + return std::make_unique(F, S, IsMatCallback); } llvm_unreachable("Unknown ABI"); } diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 71f2bdd50f210..1e73c658fce85 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -325,6 +325,23 @@ void coro::Shape::analyze(Function &F, AsyncLowering.AsyncCC = F.getCallingConv(); break; } + case Intrinsic::coro_id_retcon_once_dynamic: { + auto ContinuationId = cast(Id); + ABI = coro::ABI::RetconOnceDynamic; + auto Prototype = ContinuationId->getPrototype(); + RetconLowering.ResumePrototype = Prototype; + RetconLowering.Alloc = ContinuationId->getAllocFunction(); + RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); + RetconLowering.Storage = ContinuationId->getStorage(); + RetconLowering.Allocator = ContinuationId->getAllocator(); + RetconLowering.ReturnBlock = nullptr; + RetconLowering.IsFrameInlineInStorage = false; + RetconLowering.ContextSize = 0; + RetconLowering.StorageSize = ContinuationId->getStorageSize(); + RetconLowering.StorageAlignment = ContinuationId->getStorageAlignment(); + RetconLowering.CoroFuncPointer = ContinuationId->getCoroFunctionPointer(); + break; + } case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: { ABI = IntrID == Intrinsic::coro_id_retcon ? coro::ABI::Retcon @@ -335,6 +352,7 @@ void coro::Shape::analyze(Function &F, RetconLowering.ResumePrototype = Prototype; RetconLowering.Alloc = ContinuationId->getAllocFunction(); RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); + RetconLowering.Storage = ContinuationId->getStorage(); RetconLowering.ReturnBlock = nullptr; RetconLowering.IsFrameInlineInStorage = false; RetconLowering.TypeId = ContinuationId->getTypeId(); @@ -396,7 +414,8 @@ void coro::SwitchABI::init() { void coro::AsyncABI::init() { assert(Shape.ABI == coro::ABI::Async); } void coro::AnyRetconABI::init() { - assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); + assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic); { // Determine the result value types, and make sure they match up with // the values passed to the suspends. @@ -504,30 +523,39 @@ static void addCallToCallGraph(CallGraph *CG, CallInst *Call, Function *Callee){ Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const { + unsigned sizeParamIndex = UINT_MAX; switch (ABI) { case coro::ABI::Switch: llvm_unreachable("can't allocate memory in coro switch-lowering"); case coro::ABI::Retcon: - case coro::ABI::RetconOnce: { - auto Alloc = RetconLowering.Alloc; - Size = Builder.CreateIntCast(Size, - Alloc->getFunctionType()->getParamType(0), - /*is signed*/ false); - ConstantInt* TypeId = RetconLowering.TypeId; - CallInst *Call; - if (TypeId == nullptr) - Call = Builder.CreateCall(Alloc, Size); - else - Call = Builder.CreateCall(Alloc, {Size, TypeId}); - propagateCallAttrsFromCallee(Call, Alloc); - addCallToCallGraph(CG, Call, Alloc); - return Call; - } + case coro::ABI::RetconOnce: + sizeParamIndex = 0; + break; + case coro::ABI::RetconOnceDynamic: + sizeParamIndex = 1; + break; case coro::ABI::Async: llvm_unreachable("can't allocate memory in coro async-lowering"); } - llvm_unreachable("Unknown coro::ABI enum"); + auto Alloc = RetconLowering.Alloc; + Size = Builder.CreateIntCast( + Size, Alloc->getFunctionType()->getParamType(sizeParamIndex), + /*is signed*/ false); + SmallVector Args; + if (ABI == coro::ABI::RetconOnceDynamic) { + Args.push_back(RetconLowering.Allocator); + } + Args.push_back(Size); + if (ABI == coro::ABI::RetconOnce) { + ConstantInt *TypeId = RetconLowering.TypeId; + if (TypeId != nullptr) + Args.push_back(TypeId); + } + auto *Call = Builder.CreateCall(Alloc, Args); + propagateCallAttrsFromCallee(Call, Alloc); + addCallToCallGraph(CG, Call, Alloc); + return Call; } void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, @@ -537,11 +565,19 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, llvm_unreachable("can't allocate memory in coro switch-lowering"); case coro::ABI::Retcon: - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { auto Dealloc = RetconLowering.Dealloc; - Ptr = Builder.CreateBitCast(Ptr, - Dealloc->getFunctionType()->getParamType(0)); - auto *Call = Builder.CreateCall(Dealloc, Ptr); + SmallVector Args; + unsigned sizeParamIndex = 0; + if (ABI == coro::ABI::RetconOnceDynamic) { + sizeParamIndex = 1; + Args.push_back(RetconLowering.Allocator); + } + Ptr = Builder.CreateBitCast( + Ptr, Dealloc->getFunctionType()->getParamType(sizeParamIndex)); + Args.push_back(Ptr); + auto *Call = Builder.CreateCall(Dealloc, Args); propagateCallAttrsFromCallee(Call, Dealloc); addCallToCallGraph(CG, Call, Dealloc); return; @@ -567,7 +603,7 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, /// Check that the given value is a well-formed prototype for the /// llvm.coro.id.retcon.* intrinsics. -static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) { +static void checkWFRetconPrototype(const AnyCoroIdInst *I, Value *V) { auto F = dyn_cast(V->stripPointerCasts()); if (!F) fail(I, "llvm.coro.id.retcon.* prototype not a Function", V); @@ -594,7 +630,7 @@ static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) { fail(I, "llvm.coro.id.retcon prototype return type must be same as" "current function return type", F); } else { - // No meaningful validation to do here for llvm.coro.id.unique.once. + // No meaningful validation to do here for llvm.coro.id.retcon.once. } if (FT->getNumParams() == 0 || !FT->getParamType(0)->isPointerTy()) @@ -654,6 +690,29 @@ void AnyCoroIdRetconInst::checkWellFormed() const { checkWFDealloc(this, getArgOperand(DeallocArg)); } +static void checkCoroFuncPointer(const Instruction *I, Value *V) { + auto *CoroFuncPtrAddr = dyn_cast(V->stripPointerCasts()); + if (!CoroFuncPtrAddr) + fail(I, "coro.id.retcon.once.dynamic coro function pointer not a global", + V); +} + +void CoroIdRetconOnceDynamicInst::checkWellFormed() const { + checkConstantInt( + this, getArgOperand(SizeArg), + "size argument to coro.id.retcon.once.dynamic must be constant"); + checkConstantInt( + this, getArgOperand(AlignArg), + "alignment argument to coro.id.retcon.once.dynamic must be constant"); + checkConstantInt(this, getArgOperand(StorageArg), + "storage argument offset to coro.id.retcon.once.dynamic " + "must be constant"); + checkCoroFuncPointer(this, getArgOperand(CoroFuncPtrArg)); + checkWFRetconPrototype(this, getArgOperand(PrototypeArg)); + checkWFAlloc(this, getArgOperand(AllocArg)); + checkWFDealloc(this, getArgOperand(DeallocArg)); +} + static void checkAsyncFuncPointer(const Instruction *I, Value *V) { auto *AsyncFuncPtrAddr = dyn_cast(V->stripPointerCasts()); if (!AsyncFuncPtrAddr) diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp index b3e5b7fa6e0b5..f27640141379b 100644 --- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp @@ -435,7 +435,8 @@ static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape, // code. bool ShouldUseLifetimeStartInfo = (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon && - Shape.ABI != coro::ABI::RetconOnce); + Shape.ABI != coro::ABI::RetconOnce && + Shape.ABI != coro::ABI::RetconOnceDynamic); AllocaUseVisitor Visitor{AI->getDataLayout(), DT, Shape, Checker, ShouldUseLifetimeStartInfo}; Visitor.visitPtr(*AI); From 82d1bf3117b2bd02fc1cf0e653876a662c7eb345 Mon Sep 17 00:00:00 2001 From: Nate Chandler Date: Thu, 27 Feb 2025 17:24:53 -0800 Subject: [PATCH 03/14] [Coro] Popless return on null allocator. When the coroutine is using the swiftcorocc convention, branch on the nullness of the allocator argument. If it's null, use a popless return. Otherwise, use a regular return. (cherry picked from commit 570f7b45648661e345f7154d0e708e6374dde76e) --- llvm/include/llvm/IR/Intrinsics.td | 4 +++ .../lib/Transforms/Coroutines/CoroCleanup.cpp | 4 +++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 29 ++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 2bc85c08f32c0..a9c1902c2125d 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1817,6 +1817,10 @@ def int_coro_await_suspend_handle : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], [Throws]>; +// FIXME: enforce musttail +// XXX: attrs; not throws, wb DefaultAttrsIntrinsic +def int_coro_return : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], []>; + // Coroutine Lowering Intrinsics. Used internally by coroutine passes. def int_coro_subfn_addr : DefaultAttrsIntrinsic< diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index 25eadd04e52ef..ff3ba9e3daa86 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -73,6 +73,10 @@ bool Lowerer::lower(Function &F) { case Intrinsic::coro_id_async: II->replaceAllUsesWith(ConstantTokenNone::get(Context)); break; + case Intrinsic::coro_return: + // FIXME: Remove this case with backend support. + II->replaceAllUsesWith(II->getArgOperand(0)); + break; case Intrinsic::coro_subfn_addr: lowerSubFn(Builder, cast(II)); break; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index cd7f5f174d31c..ce19e34f07c5d 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1942,7 +1942,34 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, RetV = Builder.CreateInsertValue(RetV, Phi, ValueIdx++); } - Builder.CreateRet(RetV); + if (Shape.ABI == coro::ABI::RetconOnceDynamic && + F.getCallingConv() == CallingConv::SwiftCoro) { + // %retval = ... + // %null_allocator = icmp %1, null + // br i1 %null_allocator, label %popless, label %normal + // popless: + // ret %retval + // normal: + // %popless_retval = musttail call i64 @llvm.coro.return(%retval) + // ret %popless_retval + auto *NullAllocator = Builder.CreateCmp( + CmpInst::Predicate::ICMP_EQ, Shape.RetconLowering.Allocator, + ConstantPointerNull::get( + cast(Shape.RetconLowering.Allocator->getType()))); + auto *PoplessReturnBB = BasicBlock::Create( + F.getContext(), "coro.return.popless", &F, NewSuspendBB); + auto *NormalReturnBB = BasicBlock::Create( + F.getContext(), "coro.return.normal", &F, NewSuspendBB); + Builder.CreateCondBr(NullAllocator, PoplessReturnBB, NormalReturnBB); + IRBuilder<> PoplessBuilder(PoplessReturnBB); + auto *WrapRetV = PoplessBuilder.CreateIntrinsic( + RetV->getType(), Intrinsic::coro_return, {RetV}); + PoplessBuilder.CreateRet(WrapRetV); + IRBuilder<> NormalBuilder(NormalReturnBB); + NormalBuilder.CreateRet(RetV); + } else { + Builder.CreateRet(RetV); + } } // Branch to the return block. From 92fd354d51d0e7d607fea99b1748eea8715f05a3 Mon Sep 17 00:00:00 2001 From: Nate Chandler Date: Wed, 5 Mar 2025 16:34:29 -0800 Subject: [PATCH 04/14] [Coro] Adopt ret.popless intrinsic. Delete provisional coro.return intrinsic. (cherry picked from commit 3026ca372263083b744878308617fb0cd894f2c3) --- llvm/include/llvm/IR/Intrinsics.td | 4 - .../lib/Transforms/Coroutines/CoroCleanup.cpp | 4 - llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 9 +- .../Coroutines/coro-retcon-once-dynamic.ll | 103 ++++++++++++++++++ 4 files changed, 109 insertions(+), 11 deletions(-) create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index a9c1902c2125d..2bc85c08f32c0 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1817,10 +1817,6 @@ def int_coro_await_suspend_handle : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], [Throws]>; -// FIXME: enforce musttail -// XXX: attrs; not throws, wb DefaultAttrsIntrinsic -def int_coro_return : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], []>; - // Coroutine Lowering Intrinsics. Used internally by coroutine passes. def int_coro_subfn_addr : DefaultAttrsIntrinsic< diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index ff3ba9e3daa86..25eadd04e52ef 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -73,10 +73,6 @@ bool Lowerer::lower(Function &F) { case Intrinsic::coro_id_async: II->replaceAllUsesWith(ConstantTokenNone::get(Context)); break; - case Intrinsic::coro_return: - // FIXME: Remove this case with backend support. - II->replaceAllUsesWith(II->getArgOperand(0)); - break; case Intrinsic::coro_subfn_addr: lowerSubFn(Builder, cast(II)); break; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index ce19e34f07c5d..eef047026a764 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1962,9 +1962,12 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, F.getContext(), "coro.return.normal", &F, NewSuspendBB); Builder.CreateCondBr(NullAllocator, PoplessReturnBB, NormalReturnBB); IRBuilder<> PoplessBuilder(PoplessReturnBB); - auto *WrapRetV = PoplessBuilder.CreateIntrinsic( - RetV->getType(), Intrinsic::coro_return, {RetV}); - PoplessBuilder.CreateRet(WrapRetV); + auto &Context = F.getContext(); + auto *VoidTy = Type::getVoidTy(Context); + auto *RetPopless = + PoplessBuilder.CreateIntrinsic(VoidTy, Intrinsic::ret_popless, {}); + RetPopless->setTailCallKind(CallInst::TailCallKind::TCK_MustTail); + PoplessBuilder.CreateRet(RetV); IRBuilder<> NormalBuilder(NormalReturnBB); NormalBuilder.CreateRet(RetV); } else { diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll new file mode 100644 index 0000000000000..fd403845bccf8 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll @@ -0,0 +1,103 @@ +; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split),module(coro-cleanup)' -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "arm64-apple-macos99.99" + +; CHECK-LABEL: %func.Frame = type { ptr } + +; CHECK-LABEL: @func_cfp = constant <{ i32, i32 }> +; CHECK-SAME: <{ +; CHECK-SAME: i32 trunc +; CHECK-SAME: i32 16 +; CHECK-SAME: }> +@func_cfp = constant <{ i32, i32 }> + <{ i32 trunc ( ; offset to @func from @func_cfp + i64 sub ( + i64 ptrtoint (ptr @func to i64), + i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @func_cfp, i32 0, i32 1) to i64) + ) + to i32), + i32 64 ; frame size +}> + + +; CHECK-LABEL: @func( +; CHECK-SAME: ptr %buffer, +; CHECK-SAME: ptr %allocator +; CHECK-SAME: ptr %array +; CHECK-SAME: ) { +; CHECK: %array.spill.addr = getelementptr inbounds %func.Frame, ptr %buffer, i32 0, i32 0 +; CHECK: store ptr %array, ptr %array.spill.addr +; CHECK: %load = load i32, ptr %array +; CHECK: %load.positive = icmp sgt i32 %load, 0 +; CHECK: [[CONTINUATION:%.*]] = select i1 %load.positive +; CHECK-SAME: ptr @func.resume.0 +; CHECK-SAME: ptr @func.resume.1 +; CHECK: [[RETVAL_1:%.*]] = insertvalue { ptr, i32 } poison, ptr [[CONTINUATION:%.*]], 0 +; CHECK: [[RETVAL_2:%.*]] = insertvalue { ptr, i32 } [[RETVAL_1:%.*]], i32 %load, 1 +; CHECK: [[DONT_POP:%.*]] = icmp eq ptr %allocator, null +; CHECK: br i1 [[DONT_POP:%[^,]+]], +; CHECK-SAME: label %coro.return.popless +; CHECK-SAME: label %coro.return.normal +; CHECK: coro.return.popless: +; CHECK: musttail call void @llvm.ret.popless() +; CHECK: ret { ptr, i32 } [[RETVAL_2:%.*]] +; CHECK: coro.return.normal: +; CHECK: ret { ptr, i32 } [[RETVAL_2:%.*]] +; CHECK: } + +; CHECK-LABEL: @func.resume.0( +; CHECK-SAME: ptr [[BUFFER:[^,]+]] +; CHECK-SAME: ptr [[ALLOCATOR:%[^)]+]] +; CHECK-SAME: ) { +; CHECK: %array.reload.addr3 = getelementptr inbounds %func.Frame, ptr [[BUFFER:%.*]], i32 0, i32 0 +; CHECK: %array.reload4 = load ptr, ptr %array.reload.addr3 +; CHECK: store i32 0, ptr %array.reload4 +; CHECK: ret void +; CHECK: } + +; CHECK-LABEL: @func.resume.1( +; CHECK-SAME: ptr [[BUFFER:[^,]+]] +; CHECK-SAME: ptr [[ALLOCATOR:%[^)]+]] +; CHECK-SAME: ) { +; CHECK: %array.reload.addr = getelementptr inbounds %func.Frame, ptr [[BUFFER:%.*]], i32 0, i32 0 +; CHECK: %array.reload = load ptr, ptr %array.reload.addr +; CHECK: store i32 10, ptr %array.reload +; CHECK: ret void +; CHECK: } +define swiftcorocc {ptr, i32} @func(ptr %buffer, ptr %allocator, ptr %array) { +entry: + %id = call token @llvm.coro.id.retcon.once.dynamic( + i32 -1, + i32 16, + ptr @func_cfp, + ptr %allocator, + ptr %buffer, + ptr @continuation_prototype, + ptr @allocate, + ptr @deallocate + ) + %handle = call ptr @llvm.coro.begin(token %id, ptr null) + %load = load i32, ptr %array + %load.positive = icmp sgt i32 %load, 0 + br i1 %load.positive, label %positive, label %negative + +positive: + call ptr (...) @llvm.coro.suspend.retcon.p0(i32 %load) + store i32 0, ptr %array, align 4 + br label %cleanup + +negative: + call ptr (...) @llvm.coro.suspend.retcon.p0(i32 %load) + store i32 10, ptr %array, align 4 + br label %cleanup + +cleanup: + call i1 @llvm.coro.end(ptr %handle, i1 0, token none) + unreachable +} + +declare void @continuation_prototype(ptr, ptr) + +declare swiftcorocc noalias ptr @allocate(i32 %size) +declare void @deallocate(ptr %ptr) From 3a43d18a864cc19c3a37931dac9951ad2a878fc1 Mon Sep 17 00:00:00 2001 From: Nate Chandler Date: Thu, 13 Mar 2025 12:00:35 -0700 Subject: [PATCH 05/14] [NFC] Coro: Re-cover this switch. It became uncovered in https://github.com/swiftlang/llvm-project/pull/10120 . Fix that here. (cherry picked from commit 82d3bcda4ad3b48b76105cd0b93572cca01a2147) --- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 47 +++++++++---------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 1e73c658fce85..5092c77d3320a 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -523,39 +523,38 @@ static void addCallToCallGraph(CallGraph *CG, CallInst *Call, Function *Callee){ Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const { - unsigned sizeParamIndex = UINT_MAX; switch (ABI) { case coro::ABI::Switch: llvm_unreachable("can't allocate memory in coro switch-lowering"); case coro::ABI::Retcon: case coro::ABI::RetconOnce: - sizeParamIndex = 0; - break; - case coro::ABI::RetconOnceDynamic: - sizeParamIndex = 1; - break; + case coro::ABI::RetconOnceDynamic: { + unsigned sizeParamIndex = 0; + SmallVector Args; + if (ABI == coro::ABI::RetconOnceDynamic) { + sizeParamIndex = 1; + Args.push_back(RetconLowering.Allocator); + } + auto Alloc = RetconLowering.Alloc; + Size = Builder.CreateIntCast( + Size, Alloc->getFunctionType()->getParamType(sizeParamIndex), + /*is signed*/ false); + Args.push_back(Size); + if (ABI == coro::ABI::RetconOnce) { + ConstantInt *TypeId = RetconLowering.TypeId; + if (TypeId != nullptr) + Args.push_back(TypeId); + } + auto *Call = Builder.CreateCall(Alloc, Args); + propagateCallAttrsFromCallee(Call, Alloc); + addCallToCallGraph(CG, Call, Alloc); + return Call; + } case coro::ABI::Async: llvm_unreachable("can't allocate memory in coro async-lowering"); } - auto Alloc = RetconLowering.Alloc; - Size = Builder.CreateIntCast( - Size, Alloc->getFunctionType()->getParamType(sizeParamIndex), - /*is signed*/ false); - SmallVector Args; - if (ABI == coro::ABI::RetconOnceDynamic) { - Args.push_back(RetconLowering.Allocator); - } - Args.push_back(Size); - if (ABI == coro::ABI::RetconOnce) { - ConstantInt *TypeId = RetconLowering.TypeId; - if (TypeId != nullptr) - Args.push_back(TypeId); - } - auto *Call = Builder.CreateCall(Alloc, Args); - propagateCallAttrsFromCallee(Call, Alloc); - addCallToCallGraph(CG, Call, Alloc); - return Call; + llvm_unreachable("Unknown coro::ABI enum"); } void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, From 46cec3a446b6abc20adc58dcff96d166ac0bd6d1 Mon Sep 17 00:00:00 2001 From: Nate Chandler Date: Mon, 17 Mar 2025 12:31:04 -0700 Subject: [PATCH 06/14] [Coro] Delete ret.popless before rewritten return. When CoroSplit rewrites returns as unreachables, look for an llvm.ret.popless intrinsic before the return and delete it. Fixes a verification error. (cherry picked from commit 8abe108d7e76e36dc1ab85e7cbc4fcec9e4b4af6) --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 16 ++++- .../coro-retcon-once-dynamic-nocleanup.ll | 65 +++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index eef047026a764..a1055d3d829ce 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -881,6 +881,18 @@ static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); } +static void eraseIntrinsicRetPoplessBefore(ReturnInst *Return) { + if (Return == &*Return->getParent()->begin()) + return; + auto *Prev = &*std::prev(Return->getIterator()); + auto *Intr = dyn_cast(Prev); + if (!Intr) + return; + if (Intr->getIntrinsicID() != Intrinsic::ret_popless) + return; + Intr->eraseFromParent(); +} + /// Clone the body of the original function into a resume function of /// some sort. void coro::BaseCloner::create() { @@ -1021,8 +1033,10 @@ void coro::BaseCloner::create() { case coro::ABI::RetconOnce: case coro::ABI::RetconOnceDynamic: // Remove old returns. - for (ReturnInst *Return : Returns) + for (ReturnInst *Return : Returns) { + eraseIntrinsicRetPoplessBefore(Return); changeToUnreachable(Return); + } break; // With multi-suspend continuations, we'll already have eliminated the diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll new file mode 100644 index 0000000000000..786116a833265 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split)' -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "arm64-apple-macos99.99" + + +@func_cfp = constant <{ i32, i32 }> + <{ i32 trunc ( + i64 sub ( + i64 ptrtoint (ptr @func to i64), + i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @func_cfp, i32 0, i32 1) to i64) + ) + to i32), + i32 64 +}> + + +%func_int = type <{ i64 }> +%func_obj = type <{ %func_int, ptr }> +%func_guts = type <{ %func_obj }> +%func_impl = type <{ %func_guts }> +%func_self = type <{ %func_impl }> + +declare swiftcorocc void @func_continuation_prototype(ptr noalias, ptr) + +; CHECK-LABEL: @func.resume.0( +; CHECK-SAME: ptr noalias %0, +; CHECK-SAME: ptr %1 +; CHECK-SAME: ) { +; CHECK: coro.return.popless: +; CHECK-NEXT: unreachable +; CHECK: coro.return.normal: +; CHECK-NEXT: unreachable +; CHECK: } + +define swiftcorocc { ptr, ptr } @func(ptr noalias %buffer, ptr %allocator, ptr nocapture swiftself dereferenceable(16) %2) { +entry: + %3 = call token @llvm.coro.id.retcon.once.dynamic( + i32 -1, + i32 16, + ptr @func_cfp, + ptr %allocator, + ptr %buffer, + ptr @func_continuation_prototype, + ptr @allocate, + ptr @deallocate + ) + %handle = call ptr @llvm.coro.begin(token %3, ptr null) + %yielded = getelementptr inbounds %func_self, ptr %2, i32 0, i32 0 + call ptr (...) @llvm.coro.suspend.retcon.p0(ptr %yielded) + br i1 false, label %unwind, label %normal + +normal: + br label %coro.end + +unwind: + br label %coro.end + +coro.end: + %8 = call i1 @llvm.coro.end(ptr %handle, i1 false, token none) + unreachable +} + +declare swiftcorocc noalias ptr @allocate(i32 %size) +declare void @deallocate(ptr %ptr) From 1c25872cf6ffee2838a75ab706e323262aab2e52 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 3 Mar 2025 19:28:46 -0800 Subject: [PATCH 07/14] [IR] Define 'swiftcorocc' calling convention. The 'swiftcorocc' calling convention is a variant of 'swiftcc', but additionally allows the 'swiftcorocc' function to have popless returns. "popless" returns don't fully restore the stack, thereby allowing the caller to access some stack allocations made in the 'swiftcorocc' callee. Calls to these functions don't restore SP (but do restore FP). So the most important characteristic of a 'swiftcorocc' call is that it forces the caller function to access its stack through FP, like it does with e.g., variable-size allocas. This patch only implements the 'swiftcorocc' keyword and CallingConv, but doesn't implement its support on any target yet. (cherry picked from commit 1bbe5a233cc6c3e35501f82213bbcbb39e32474a) --- llvm/include/llvm/AsmParser/LLToken.h | 1 + llvm/include/llvm/IR/CallingConv.h | 4 ++++ llvm/lib/AsmParser/LLLexer.cpp | 1 + llvm/lib/AsmParser/LLParser.cpp | 2 ++ llvm/lib/IR/AsmWriter.cpp | 1 + llvm/test/Bitcode/compatibility.ll | 4 ++++ 6 files changed, 13 insertions(+) diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index a8f9c71781701..6560ae5fc5231 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -162,6 +162,7 @@ enum Kind { kw_anyregcc, kw_swiftcc, kw_swifttailcc, + kw_swiftcorocc, kw_preserve_mostcc, kw_preserve_allcc, kw_preserve_nonecc, diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index d68491eb5535c..d1fbe46e74682 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -284,6 +284,10 @@ namespace CallingConv { RISCV_VLSCall_32768 = 122, RISCV_VLSCall_65536 = 123, + /// This follows the Swift calling convention in how arguments are passed + /// but doesn't clean up the stack on a return. + SwiftCoro = 124, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 4d25b12c9ab06..0334f684cb94d 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -659,6 +659,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_regcallcc); KEYWORD(swiftcc); KEYWORD(swifttailcc); + KEYWORD(swiftcorocc); KEYWORD(anyregcc); KEYWORD(preserve_mostcc); KEYWORD(preserve_allcc); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 3b9b466ddeeab..a0969f9141629 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2188,6 +2188,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'ghccc' /// ::= 'swiftcc' /// ::= 'swifttailcc' +/// ::= 'swiftcorocc' /// ::= 'x86_intrcc' /// ::= 'hhvmcc' /// ::= 'hhvm_ccc' @@ -2254,6 +2255,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) { case lltok::kw_ghccc: CC = CallingConv::GHC; break; case lltok::kw_swiftcc: CC = CallingConv::Swift; break; case lltok::kw_swifttailcc: CC = CallingConv::SwiftTail; break; + case lltok::kw_swiftcorocc: CC = CallingConv::SwiftCoro; break; case lltok::kw_x86_intrcc: CC = CallingConv::X86_INTR; break; case lltok::kw_hhvmcc: CC = CallingConv::DUMMY_HHVM; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index ac8aa0d35ea30..f57e65980e731 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -378,6 +378,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break; case CallingConv::Swift: Out << "swiftcc"; break; case CallingConv::SwiftTail: Out << "swifttailcc"; break; + case CallingConv::SwiftCoro: Out << "swiftcorocc"; break; case CallingConv::X86_INTR: Out << "x86_intrcc"; break; case CallingConv::DUMMY_HHVM: Out << "hhvmcc"; diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index d9e594abcd50c..2b137628fb39f 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -564,6 +564,10 @@ declare riscv_vls_cc(32768) void @riscv_vls_cc_32768() ; CHECK: declare riscv_vls_cc(32768) void @riscv_vls_cc_32768() declare riscv_vls_cc(65536) void @riscv_vls_cc_65536() ; CHECK: declare riscv_vls_cc(65536) void @riscv_vls_cc_65536() ++declare cc124 void @f.cc124() ++; CHECK: declare swiftcorocc void @f.cc124() ++declare swiftcorocc void @f.swiftcorocc() ++; CHECK: declare swiftcorocc void @f.swiftcorocc() declare cc1023 void @f.cc1023() ; CHECK: declare cc1023 void @f.cc1023() From d5f63affaebb7e00e7afe694410cd32e44499fd5 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 3 Mar 2025 19:32:41 -0800 Subject: [PATCH 08/14] [IR][AArch64] Add 'swiftcoro' parameter attribute. It doesn't have any really interesting treatment, other than being passed in a fixed register. In most of our AArch64 calling conventions, that's X23. In effect, this is mostly similar to swiftself. rdar://135984630 (cherry picked from commit d257da7cddb9744cc821ff8121487c5244fb21bb) --- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 1 + llvm/include/llvm/CodeGen/TargetCallingConv.h | 5 ++ llvm/include/llvm/CodeGen/TargetLowering.h | 3 +- llvm/include/llvm/IR/Attributes.td | 3 + llvm/include/llvm/Target/TargetCallingConv.td | 5 ++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 + llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 + llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 2 + llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 2 + .../SelectionDAG/SelectionDAGBuilder.cpp | 5 ++ .../CodeGen/SelectionDAG/TargetLowering.cpp | 1 + llvm/lib/IR/Verifier.cpp | 7 ++ .../AArch64/AArch64CallingConvention.td | 9 +++ llvm/lib/Target/AArch64/AArch64FastISel.cpp | 2 + .../Target/AArch64/AArch64ISelLowering.cpp | 4 +- .../WebAssembly/WebAssemblyISelLowering.cpp | 2 + llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 + llvm/test/Bitcode/attributes.ll | 6 ++ llvm/test/Bitcode/compatibility.ll | 10 +-- llvm/test/CodeGen/AArch64/swiftcoro.ll | 70 +++++++++++++++++++ llvm/test/Verifier/swiftcoro.ll | 4 ++ 21 files changed, 139 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/swiftcoro.ll create mode 100644 llvm/test/Verifier/swiftcoro.ll diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 92b6e68d9d0a7..da62bf6554aa0 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -791,6 +791,7 @@ enum AttributeKindCodes { ATTR_KIND_NO_DIVERGENCE_SOURCE = 100, ATTR_KIND_SANITIZE_TYPE = 101, ATTR_KIND_CAPTURES = 102, + ATTR_KIND_SWIFT_CORO = 103, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h index a28c7a99fb3b5..b0820b17aeb1d 100644 --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -41,6 +41,7 @@ namespace ISD { unsigned IsSplitEnd : 1; ///< Last part of a split unsigned IsSwiftSelf : 1; ///< Swift self parameter unsigned IsSwiftAsync : 1; ///< Swift async context parameter + unsigned IsSwiftCoro : 1; ///< Swift coro parameter unsigned IsSwiftError : 1; ///< Swift error parameter unsigned IsCFGuardTarget : 1; ///< Control Flow Guard target unsigned IsHva : 1; ///< HVA field for @@ -64,6 +65,7 @@ namespace ISD { : IsZExt(0), IsSExt(0), IsNoExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsByRef(0), IsNest(0), IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0), IsSplitEnd(0), IsSwiftSelf(0), IsSwiftAsync(0), + IsSwiftCoro(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), IsHvaStart(0), IsSecArgPass(0), MemAlign(0), OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), @@ -104,6 +106,9 @@ namespace ISD { bool isSwiftAsync() const { return IsSwiftAsync; } void setSwiftAsync() { IsSwiftAsync = 1; } + bool isSwiftCoro() const { return IsSwiftCoro; } + void setSwiftCoro() { IsSwiftCoro = 1; } + bool isSwiftError() const { return IsSwiftError; } void setSwiftError() { IsSwiftError = 1; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index b528079c2d594..50e6ec433039d 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -315,6 +315,7 @@ class TargetLoweringBase { bool IsSwiftSelf : 1; bool IsSwiftAsync : 1; bool IsSwiftError : 1; + bool IsSwiftCoro : 1; bool IsCFGuardTarget : 1; MaybeAlign Alignment = std::nullopt; Type *IndirectType = nullptr; @@ -324,7 +325,7 @@ class TargetLoweringBase { IsSRet(false), IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false), IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), IsSwiftAsync(false), IsSwiftError(false), - IsCFGuardTarget(false) {} + IsSwiftCoro(false), IsCFGuardTarget(false) {} void setAttributes(const CallBase *Call, unsigned ArgIdx); }; diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index fb94926043fc7..44cebfdb48949 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -358,6 +358,9 @@ def SwiftSelf : EnumAttr<"swiftself", IntersectPreserve, [ParamAttr]>; /// Argument is swift async context. def SwiftAsync : EnumAttr<"swiftasync", IntersectPreserve, [ParamAttr]>; +/// Argument is swift coro allocator. +def SwiftCoro : EnumAttr<"swiftcoro", IntersectPreserve, [ParamAttr]>; + /// Function must be in a unwind table. def UWTable : IntAttr<"uwtable", IntersectPreserve, [FnAttr]>; diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td index 18b7ff4aec95f..6b3be7b6ed3fb 100644 --- a/llvm/include/llvm/Target/TargetCallingConv.td +++ b/llvm/include/llvm/Target/TargetCallingConv.td @@ -61,6 +61,11 @@ class CCIfSwiftAsync : CCIf<"ArgFlags.isSwiftAsync()", A> { class CCIfSwiftError : CCIf<"ArgFlags.isSwiftError()", A> { } +/// CCIfSwiftCoro - If the current argument has swiftcoro parameter attribute, +/// apply Action A. +class CCIfSwiftCoro : CCIf<"ArgFlags.isSwiftCoro()", A> { +} + /// CCIfCFGuardTarget - If the current argument has cfguardtarget parameter /// attribute, apply Action A. class CCIfCFGuardTarget : CCIf<"ArgFlags.isCFGuardTarget()", A> { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 5c62ef4ad8e4e..c055f7c296711 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2202,6 +2202,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::SwiftSelf; case bitc::ATTR_KIND_SWIFT_ASYNC: return Attribute::SwiftAsync; + case bitc::ATTR_KIND_SWIFT_CORO: + return Attribute::SwiftCoro; case bitc::ATTR_KIND_UW_TABLE: return Attribute::UWTable; case bitc::ATTR_KIND_VSCALE_RANGE: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1a39205d93e19..33c95f6b95650 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -875,6 +875,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_SWIFT_SELF; case Attribute::SwiftAsync: return bitc::ATTR_KIND_SWIFT_ASYNC; + case Attribute::SwiftCoro: + return bitc::ATTR_KIND_SWIFT_CORO; case Attribute::UWTable: return bitc::ATTR_KIND_UW_TABLE; case Attribute::VScaleRange: diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 0af70f333f864..79ada17b23a21 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -62,6 +62,8 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, Flags.setSwiftAsync(); if (AttrFn(Attribute::SwiftError)) Flags.setSwiftError(); + if (AttrFn(Attribute::SwiftCoro)) + Flags.setSwiftCoro(); } ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index fbc0264961bc7..85db1a064980d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1050,6 +1050,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { Flags.setSwiftAsync(); if (Arg.IsSwiftError) Flags.setSwiftError(); + if (Arg.IsSwiftCoro) + Flags.setSwiftCoro(); if (Arg.IsCFGuardTarget) Flags.setCFGuardTarget(); if (Arg.IsByVal) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f592e69d57fec..f3483d0b6c2f4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -11001,6 +11001,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSwiftSelf = false; Entry.IsSwiftAsync = false; Entry.IsSwiftError = false; + Entry.IsSwiftCoro = false; Entry.IsCFGuardTarget = false; Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); @@ -11120,6 +11121,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setSwiftAsync(); if (Args[i].IsSwiftError) Flags.setSwiftError(); + if (Args[i].IsSwiftCoro) + Flags.setSwiftCoro(); if (Args[i].IsCFGuardTarget) Flags.setCFGuardTarget(); if (Args[i].IsByVal) @@ -11651,6 +11654,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setSwiftAsync(); if (Arg.hasAttribute(Attribute::SwiftError)) Flags.setSwiftError(); + if (Arg.hasAttribute(Attribute::SwiftCoro)) + Flags.setSwiftCoro(); if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); if (Arg.hasAttribute(Attribute::ByRef)) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3995216e3d689..60ebf71b3dd3c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -128,6 +128,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); + IsSwiftCoro = Call->paramHasAttr(ArgIdx, Attribute::SwiftCoro); Alignment = Call->getParamStackAlign(ArgIdx); IndirectType = nullptr; assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 && diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index b0ccab120ccf0..04fa334af1cc6 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2239,6 +2239,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, bool SawSwiftSelf = false; bool SawSwiftAsync = false; bool SawSwiftError = false; + bool SawSwiftCoro = false; // Verify return value attributes. AttributeSet RetAttrs = Attrs.getRetAttrs(); @@ -2315,6 +2316,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, SawSwiftError = true; } + if (ArgAttrs.hasAttribute(Attribute::SwiftCoro)) { + Check(!SawSwiftCoro, "Cannot have multiple 'swiftcoro' parameters!", V); + SawSwiftCoro = true; + } + if (ArgAttrs.hasAttribute(Attribute::InAlloca)) { Check(i == FT->getNumParams() - 1, "inalloca isn't on the last parameter!", V); @@ -3953,6 +3959,7 @@ static AttrBuilder getParameterABIAttributes(LLVMContext& C, unsigned I, Attribu static const Attribute::AttrKind ABIAttrs[] = { Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, Attribute::InReg, Attribute::StackAlignment, Attribute::SwiftSelf, + Attribute::SwiftCoro, Attribute::SwiftAsync, Attribute::SwiftError, Attribute::Preallocated, Attribute::ByRef}; AttrBuilder Copy(C); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 7cca6d9bc6b9c..e071689dacddc 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -72,6 +72,9 @@ defvar AArch64_Common = [ // preserved for normal function calls. CCIfSwiftAsync>>, + // Pass SwiftCoro in X23. + CCIfSwiftCoro>>, + CCIfConsecutiveRegs>, CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, @@ -241,6 +244,9 @@ def CC_AArch64_Arm64EC_Thunk : CallingConv<[ // normal functions don't need to save it somewhere. CCIfSwiftAsync>>, + // Pass SwiftCoro in X22. + CCIfSwiftCoro>>, + // The 'CFGuardTarget' parameter, if any, is passed in RAX (R8). CCIfCFGuardTarget>, @@ -374,6 +380,9 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // preserved for normal function calls. CCIfSwiftAsync>>, + // Pass SwiftCoro in X23. + CCIfSwiftCoro>>, + CCIfConsecutiveRegs>, CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 37241f3a70471..acecf9835f086 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -2926,6 +2926,7 @@ bool AArch64FastISel::fastLowerArguments() { Arg.hasAttribute(Attribute::SwiftSelf) || Arg.hasAttribute(Attribute::SwiftAsync) || Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::SwiftCoro) || Arg.hasAttribute(Attribute::Nest)) return false; @@ -3193,6 +3194,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { for (auto Flag : CLI.OutFlags) if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || + Flag.isSwiftCoro() || Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) return false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dc5471f038043..c517b6ee28c14 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8391,7 +8391,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( if (CallConv == CallingConv::PreserveNone) { for (const ISD::InputArg &I : Ins) { if (I.Flags.isSwiftSelf() || I.Flags.isSwiftError() || - I.Flags.isSwiftAsync()) { + I.Flags.isSwiftAsync() || I.Flags.isSwiftCoro()) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose(DiagnosticInfoUnsupported( MF.getFunction(), @@ -9743,7 +9743,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, if (CallConv == CallingConv::PreserveNone) { for (const ISD::OutputArg &O : Outs) { if (O.Flags.isSwiftSelf() || O.Flags.isSwiftError() || - O.Flags.isSwiftAsync()) { + O.Flags.isSwiftAsync() || O.Flags.isSwiftCoro()) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose(DiagnosticInfoUnsupported( MF.getFunction(), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 794db887bd073..fe56c5c7a2a5f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1284,6 +1284,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue &OutVal = OutVals[I]; HasSwiftSelfArg |= Out.Flags.isSwiftSelf(); HasSwiftErrorArg |= Out.Flags.isSwiftError(); + if (Out.Flags.isSwiftCoro()) + fail(DL, DAG, "WebAssembly hasn't implemented swiftcoro arguments"); if (Out.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (Out.Flags.isInAlloca()) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 18af0972bc36d..8be1dbe49696f 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1011,6 +1011,7 @@ Function *CodeExtractor::constructFunctionDeclaration( case Attribute::SwiftError: case Attribute::SwiftSelf: case Attribute::SwiftAsync: + case Attribute::SwiftCoro: case Attribute::ZExt: case Attribute::ImmArg: case Attribute::ByRef: diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index 7dd86a8c0eb16..affcaef4593cd 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -292,6 +292,12 @@ define void @f50(ptr swiftself %0) ret void; } +; CHECK: define void @swiftcoro(ptr swiftcoro %0) +define void @swiftcoro(ptr swiftcoro %0) +{ + ret void; +} + ; CHECK: define i32 @f51(ptr swifterror %0) define i32 @f51(ptr swifterror %0) { diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 2b137628fb39f..afe247aae1549 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -564,10 +564,10 @@ declare riscv_vls_cc(32768) void @riscv_vls_cc_32768() ; CHECK: declare riscv_vls_cc(32768) void @riscv_vls_cc_32768() declare riscv_vls_cc(65536) void @riscv_vls_cc_65536() ; CHECK: declare riscv_vls_cc(65536) void @riscv_vls_cc_65536() -+declare cc124 void @f.cc124() -+; CHECK: declare swiftcorocc void @f.cc124() -+declare swiftcorocc void @f.swiftcorocc() -+; CHECK: declare swiftcorocc void @f.swiftcorocc() +declare cc124 void @f.cc124() +; CHECK: declare swiftcorocc void @f.cc124() +declare swiftcorocc void @f.swiftcorocc() +; CHECK: declare swiftcorocc void @f.swiftcorocc() declare cc1023 void @f.cc1023() ; CHECK: declare cc1023 void @f.cc1023() @@ -630,6 +630,8 @@ declare void @f.param.swiftasync(ptr swiftasync) ; CHECK: declare void @f.param.swiftasync(ptr swiftasync) declare void @f.param.swifterror(ptr swifterror) ; CHECK: declare void @f.param.swifterror(ptr swifterror) +declare void @f.param.swiftcoro(ptr swiftcoro) +; CHECK: declare void @f.param.swiftcoro(ptr swiftcoro) declare void @f.param.allocalign(i32 allocalign) ; CHECK: declare void @f.param.allocalign(i32 allocalign) declare void @f.param.allocptr(ptr allocptr) diff --git a/llvm/test/CodeGen/AArch64/swiftcoro.ll b/llvm/test/CodeGen/AArch64/swiftcoro.ll new file mode 100644 index 0000000000000..d03201d8bd013 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/swiftcoro.ll @@ -0,0 +1,70 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s +; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTARM64_32 %s + +; Parameter with swiftcoro should be allocated to x23. +; CHECK-LABEL: swiftcoro_param: +; CHECK: mov x0, x23 +; CHECK-NEXT: ret +define ptr @swiftcoro_param(ptr swiftcoro %addr0) { + ret ptr %addr0 +} + +; Check that x23 is used to pass a swiftcoro argument. +; CHECK-LABEL: call_swiftcoro: +; CHECK: mov x23, x0 +; CHECK: bl {{_?}}swiftcoro_param +; CHECK: ret +define ptr @call_swiftcoro(ptr %arg) { + %res = call ptr @swiftcoro_param(ptr swiftcoro %arg) + ret ptr %res +} + +; x23 should be saved by the callee even if used for swiftcoro +; CHECK-LABEL: swiftcoro_clobber: +; CHECK: {{stp|str}} {{.*}}x23{{.*}}sp +; ... +; CHECK: {{ldp|ldr}} {{.*}}x23{{.*}}sp +; CHECK: ret +define ptr @swiftcoro_clobber(ptr swiftcoro %addr0) { + call void asm sideeffect "", "~{x23}"() + ret ptr %addr0 +} + +; Demonstrate that we do not need any movs when calling multiple functions +; with swiftcoro argument. +; CHECK-LABEL: swiftcoro_passthrough: +; OPT-NOT: mov{{.*}}x23 +; OPT: bl {{_?}}swiftcoro_param +; OPT-NOT: mov{{.*}}x23 +; OPT-NEXT: bl {{_?}}swiftcoro_param +; OPT: ret +define void @swiftcoro_passthrough(ptr swiftcoro %addr0) { + call ptr @swiftcoro_param(ptr swiftcoro %addr0) + call ptr @swiftcoro_param(ptr swiftcoro %addr0) + ret void +} + +; We can use a tail call if the callee swiftcoro is the same as the caller one. +; This should also work with fast-isel. +; CHECK-LABEL: swiftcoro_tail: +; OPTAARCH64: b {{_?}}swiftcoro_param +; OPTAARCH64-NOT: ret +; OPTARM64_32: b {{_?}}swiftcoro_param +define ptr @swiftcoro_tail(ptr swiftcoro %addr0) { + call void asm sideeffect "", "~{x23}"() + %res = musttail call ptr @swiftcoro_param(ptr swiftcoro %addr0) + ret ptr %res +} + +; We can not use a tail call if the callee swiftcoro is not the same as the +; caller one. +; CHECK-LABEL: swiftcoro_notail: +; CHECK: mov x23, x0 +; CHECK: bl {{_?}}swiftcoro_param +; CHECK: ret +define ptr @swiftcoro_notail(ptr swiftcoro %addr0, ptr %addr1) nounwind { + %res = tail call ptr @swiftcoro_param(ptr swiftcoro %addr1) + ret ptr %res +} diff --git a/llvm/test/Verifier/swiftcoro.ll b/llvm/test/Verifier/swiftcoro.ll new file mode 100644 index 0000000000000..fcc980478a5bb --- /dev/null +++ b/llvm/test/Verifier/swiftcoro.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @a(ptr swiftcoro %a, ptr swiftcoro %b) +; CHECK: Cannot have multiple 'swiftcoro' parameters! From e319ce31b42715e5fa484520aadd057b7016834b Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 3 Mar 2025 19:40:51 -0800 Subject: [PATCH 09/14] [AArch64] Support 'swiftcorocc' "popless" calls. 'swiftcorocc' calls are allowed to have "popless" returns, which don't fully restore the stack, thereby allowing the caller to access some stack allocations made in the 'swiftcorocc' callee. Concretely, calls to these functions don't restore SP (but do restore FP). So the most important characteristic of a 'swiftcorocc' call is that it forces the caller function to access its stack through FP, like it does with e.g., variable-size allocas. Support this on AArch64 by marking the frame as having a popless call, which we generally honor when we decide whether the frame needs FP and FP-based addressing, as we do today for variably-sized allocas. rdar://135984630 (cherry picked from commit 6f2f9bda3e9e36e2bfb87dfce48afb15e26a2056) --- llvm/include/llvm/CodeGen/MachineFrameInfo.h | 7 + .../Target/AArch64/AArch64FrameLowering.cpp | 22 ++- .../Target/AArch64/AArch64ISelLowering.cpp | 4 + .../AArch64/GISel/AArch64CallLowering.cpp | 3 + llvm/test/CodeGen/AArch64/swiftcorocc-call.ll | 128 ++++++++++++++++++ 5 files changed, 158 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/swiftcorocc-call.ll diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 172c3e8c9a847..771fd90f32645 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -319,6 +319,10 @@ class MachineFrameInfo { /// instructions which manipulate the stack pointer. bool HasCopyImplyingStackAdjustment = false; + /// True if the function contains a call using a calling convention that + /// allows it to be "popless", i.e., to not restore SP when the call returns. + bool HasPoplessCall = false; + /// True if the function contains a call to the llvm.vastart intrinsic. bool HasVAStart = false; @@ -634,6 +638,9 @@ class MachineFrameInfo { HasCopyImplyingStackAdjustment = B; } + bool hasPoplessCall() const { return HasPoplessCall; } + void setHasPoplessCall(bool B = true) { HasPoplessCall = B; } + /// Returns true if the function calls the llvm.va_start intrinsic. bool hasVAStart() const { return HasVAStart; } void setHasVAStart(bool B) { HasVAStart = B; } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 642d59d06b123..dcf2e5ce08e70 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -350,7 +350,8 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( // Bail on stack adjustment needed on return for simplicity. const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)) + if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) || + MFI.hasPoplessCall()) return false; if (Exit && getArgumentStackToRestore(MF, *Exit)) return false; @@ -502,6 +503,7 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const { if (MF.getTarget().Options.DisableFramePointerElim(MF)) return true; if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || + MFI.hasPoplessCall() || MFI.hasStackMap() || MFI.hasPatchPoint() || RegInfo->hasStackRealignment(MF)) return true; @@ -1186,6 +1188,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( if (MFI.hasVarSizedObjects()) return false; + if (MFI.hasPoplessCall()) + return false; + if (RegInfo->hasStackRealignment(MF)) return false; @@ -2214,7 +2219,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes); allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || LocalsSize); + MFI.hasVarSizedObjects() || LocalsSize || + MFI.hasPoplessCall()); CFAOffset += SVECalleeSavesSize; if (EmitAsyncCFI) @@ -2231,7 +2237,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding, SVELocalsSize + StackOffset::getFixed(NumBytes), NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, - CFAOffset, MFI.hasVarSizedObjects()); + CFAOffset, + MFI.hasVarSizedObjects() || MFI.hasPoplessCall()); } // If we need a base pointer, set it up here. It's whatever the value of the @@ -2551,7 +2558,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // If we have stack realignment or variable sized objects on the stack, // restore the stack pointer from the frame pointer prior to SVE CSR // restoration. - if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) { + if (AFI->isStackRealigned() || MFI.hasVarSizedObjects() || + MFI.hasPoplessCall()) { if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { // Set SP to start of SVE callee-save area from which they can // be reloaded. The code below will deallocate the stack space @@ -2623,7 +2631,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. - if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { + if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned() || + MFI.hasPoplessCall())) { emitFrameOffset( MBB, LastPopI, DL, AArch64::SP, AArch64::FP, StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), @@ -2825,7 +2834,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( // If the FPOffset is positive, that'll always be best, as the SP/BP // will be even further away. UseFP = true; - } else if (MFI.hasVarSizedObjects()) { + } else if (MFI.hasVarSizedObjects() || MFI.hasPoplessCall()) { // If we have variable sized objects, we can use either FP or BP, as the // SP offset is unknown. We can use the base pointer if we have one and // FP is not preferred. If not, we're stuck with using FP. @@ -5082,6 +5091,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( // Go to common code if we cannot provide sp + offset. if (MFI.hasVarSizedObjects() || + MFI.hasPoplessCall() || MF.getInfo()->getStackSizeSVE() || MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF)) return getFrameIndexReference(MF, FI, FrameReg); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c517b6ee28c14..e5893132d9bfb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7869,6 +7869,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: case CallingConv::SwiftTail: + case CallingConv::SwiftCoro: case CallingConv::Tail: case CallingConv::GRAAL: if (Subtarget->isTargetWindows()) { @@ -9047,6 +9048,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, CallConv = CallingConv::AArch64_SVE_VectorCall; } + if (CallConv == CallingConv::SwiftCoro) + MF.getFrameInfo().setHasPoplessCall(); + if (IsTailCall) { // Check if it's really possible to do a tail call. IsTailCall = isEligibleForTailCallOptimization(CLI); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index e4719b26cab52..ac98a3233ce42 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1455,6 +1455,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; } + if (Info.CallConv == CallingConv::SwiftCoro) + MF.getFrameInfo().setHasPoplessCall(); + if (Info.SwiftErrorVReg) { MIB.addDef(AArch64::X21, RegState::Implicit); MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); diff --git a/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll b/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll new file mode 100644 index 0000000000000..117cbfd04a5df --- /dev/null +++ b/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple arm64e-apple-darwin -verify-machineinstrs -o - %s \ +; RUN: | FileCheck %s + +; RUN: llc -mtriple arm64e-apple-darwin -verify-machineinstrs -o - %s \ +; RUN: -global-isel -global-isel-abort=2 | FileCheck %s + +declare i64 @g(ptr, ptr) + +define i64 @test_call_to_swiftcoro() #0 { +; CHECK-LABEL: test_call_to_swiftcoro: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _test_call +; CHECK-NEXT: sub x0, x29, #24 +; CHECK-NEXT: sub x1, x29, #32 +; CHECK-NEXT: bl _g +; CHECK-NEXT: sub sp, x29, #16 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call swiftcorocc i64 @test_call() + %v4 = call i64 @g(ptr %v1, ptr %v2) + ret i64 %v4 +} + +define i64 @test_call_to_normal() #0 { +; CHECK-LABEL: test_call_to_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _test_call_normal +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: bl _g +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @test_call_normal() + %v4 = call i64 @g(ptr %v1, ptr %v2) + ret i64 %v4 +} + +define swiftcorocc i64 @test_call() #0 { +; CHECK-LABEL: test_call: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: bl _g +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @g(ptr %v1, ptr %v2) + ret i64 %v3 +} + +define i64 @test_call_normal() #0 { +; CHECK-LABEL: test_call_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: bl _g +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @g(ptr %v1, ptr %v2) + ret i64 %v3 +} + + +attributes #0 = { "ptrauth-returns" } From d8adbf7b72be40ce7162fee3197db569586cdd53 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 3 Feb 2025 19:20:23 -0800 Subject: [PATCH 10/14] [IR] Define @llvm.ret.popless intrinsic, a ret that doesn't restore SP. Marks the following ret instruction as a "popless" return, one that does not not restore SP to its function-entry value (i.e., does not deallocate the stack frame), allowing allocations made in the function to be accessible by the caller. The function must be annotated with an appropriate target-specific calling convention, so the caller can generate stack accesses accordingly, generally by treating the call as a variably-sized alloca, so using FP-based addressing for its own frame rather than relying on statically known SP offsets. The single argument is forwarded as a return value, that must then be used as the operand to the following ret instruction. Calls to this intrinsic need to be musttail, but don't follow the other ABI requirements for musttail calls, since this is really annotating the ret. This doesn't implement any lowering, but only adds the intrinsic definition, basic verifier checks, and an inliner opt-out. rdar://135984630 (cherry picked from commit f563fd5dbd8628ab89ac9376154c336372a5d1bf) --- llvm/include/llvm/IR/Intrinsics.td | 14 ++++++++++++++ llvm/lib/Analysis/InlineCost.cpp | 1 + llvm/lib/IR/BasicBlock.cpp | 8 ++++++++ llvm/lib/IR/Verifier.cpp | 9 +++++++++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 ++++++ llvm/test/Verifier/ret_popless.ll | 14 ++++++++++++++ 6 files changed, 52 insertions(+) create mode 100644 llvm/test/Verifier/ret_popless.ll diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 2bc85c08f32c0..e6a59e70b4dbb 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -865,6 +865,20 @@ def int_localrecover : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; +// Marks the following ret instruction as a "popless" return, one that does not +// not restore SP to its function-entry value (i.e., does not deallocate the +// stack frame), allowing allocations made in the function to be accessible +// by the caller. +// +// The function must be annotated with an appropriate target-specific calling +// convention, so the caller can generate stack accesses accordingly, generally +// by treating the call as a variably-sized alloca, so using FP-based addressing +// for its own frame rather than relying on statically known SP offsets. +// +// Calls to this intrinsic need to be musttail, but don't follow the other ABI +// requirements for musttail calls, since this is really annotating the ret. +def int_ret_popless : DefaultAttrsIntrinsic<[], [], [IntrNoMem]>; + // Given the frame pointer passed into an SEH filter function, returns a // pointer to the local variable area suitable for use with llvm.localrecover. def int_eh_recoverfp : DefaultAttrsIntrinsic<[llvm_ptr_ty], diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 30e1af602667c..6b431f2f71594 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -2361,6 +2361,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { return false; case Intrinsic::icall_branch_funnel: case Intrinsic::localescape: + case Intrinsic::ret_popless: HasUninlineableIntrinsic = true; return false; case Intrinsic::vastart: diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index c632b1b2dc2ab..20050459b3c3e 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -311,6 +311,14 @@ const CallInst *BasicBlock::getTerminatingMustTailCall() const { if (!Prev) return nullptr; + // Some musttail intrinsic calls are special in being really simply ret + // annotations, and only need to be the last instruction before the ret. + // We don't need to look through the return value in those cases. + // FIXME: we should generalize getTerminatingDeoptimizeCall for this case. + if (auto *CI = dyn_cast(Prev)) + if (CI->isMustTailCall() && CI->getIntrinsicID() == Intrinsic::ret_popless) + return CI; + if (Value *RV = RI->getReturnValue()) { if (RV != Prev) return nullptr; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 04fa334af1cc6..336ab9b8acc0a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3994,6 +3994,15 @@ void Verifier::verifyMustTailCall(CallInst &CI) { &CI); } #endif + Check(CI.getIntrinsicID() != Intrinsic::ret_popless, + "llvm.ret.popless call must be musttail", &CI); + return; + } + + // Some musttail intrinsic calls are special, and don't have all the rules. + if (CI.getIntrinsicID() == Intrinsic::ret_popless) { + ReturnInst *Ret = dyn_cast_or_null(CI.getNextNode()); + Check(Ret, "musttail intrinsic call must precede a ret", &CI); return; } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7f53aa7d4f73d..29ffae9b53c51 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1505,6 +1505,12 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) { if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize) return false; + // Similarly for llvm.ret.popless (and likely generalizable to all musttail + // intrinsics). + if (auto *CB = dyn_cast(I)) + if (CB->getIntrinsicID() == Intrinsic::ret_popless) + return false; + // It's also unsafe/illegal to hoist an instruction above its instruction // operands BasicBlock *BB = I->getParent(); diff --git a/llvm/test/Verifier/ret_popless.ll b/llvm/test/Verifier/ret_popless.ll new file mode 100644 index 0000000000000..6747d3fd039ed --- /dev/null +++ b/llvm/test/Verifier/ret_popless.ll @@ -0,0 +1,14 @@ +; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s + +define void @test_ret_popless_not_musttail() { +; CHECK: llvm.ret.popless call must be musttail + call void @llvm.ret.popless() + ret void +} + +define i64 @test_ret_popless_not_returned(i64 %a) { +; CHECK: musttail intrinsic call must precede a ret + musttail call void @llvm.ret.popless() + %res = bitcast i64 %a to i64 + ret i64 %res +} From bc143ef60733e715c135e833e2a0604636b12737 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 3 Feb 2025 19:21:45 -0800 Subject: [PATCH 11/14] [AArch64] Lower @llvm.ret.popless in swiftcorocc functions. On AArch64, swiftcorocc functions are the only functions yet that can support popless returns. In the backend, that's done by recognizing the musttail call to llvm.ret.popless preceding a ret instruction, and asking the target to adjust that ret to be popless. Throughout most of the backend, that's not an interesting difference. In frame lowering, these popless rets now induce several special behaviors in their (never shrink-wrapped) epilogues, all consequences of not restoring SP: - they of course don't do the SP adjustment or restore itself. - most importantly, they force the epilogue callee-save restores to be FP-based rather than SP-based. - they restore FP/LR last, as we still need the old FP, pointing at the frame being destroyed, to do the CSR restoring. - with ptrauth-returns, they first derive the entry SP from FP, into X16, to use as a discriminator for a standalone AUTIB. rdar://135984630 (cherry picked from commit 52307abb836325aa77ba3d8675da140be1bc0695) --- .../llvm/CodeGen/GlobalISel/CallLowering.h | 9 + llvm/include/llvm/CodeGen/TargetLowering.h | 4 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 9 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 24 +++ .../Target/AArch64/AArch64FrameLowering.cpp | 145 ++++++++++++++++- .../Target/AArch64/AArch64ISelLowering.cpp | 13 ++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 + .../AArch64/AArch64MachineFunctionInfo.h | 11 ++ .../AArch64/GISel/AArch64CallLowering.cpp | 15 ++ .../AArch64/GISel/AArch64CallLowering.h | 2 + .../AArch64/swiftcorocc-ret-popless.ll | 154 ++++++++++++++++++ 12 files changed, 396 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 9c8226660e087..dd8c2469f4cba 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -538,6 +538,15 @@ class CallLowering { return false; } + /// This hook must be implemented to lower @llvm.ret.popless intrinsics, + /// which are required to be musttail, and are effectively annotating a + /// return instruction to mark it "popless", i.e., not restoring SP. + /// This "adjustment" step runs after lowerReturn, and is only meant to make + /// it a little less painful to maintain before we add this to the main hook. + virtual bool adjustReturnToPopless(MachineIRBuilder &MIRBuilder) const { + report_fatal_error("Popless returns not implemented for this target"); + } + virtual bool fallBackToDAGISel(const MachineFunction &MF) const { return false; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 50e6ec433039d..a9bef1df0040e 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4890,6 +4890,10 @@ class TargetLowering : public TargetLoweringBase { llvm_unreachable("Not Implemented"); } + virtual SDValue adjustReturnPopless(SDValue Chain, SelectionDAG &DAG) const { + report_fatal_error("Popless returns not implemented for this target"); + } + /// Return true if result of the specified node is used by a return node /// only. It also compute and return the input chain for the tail call. /// diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 931e4fe19e69a..80868e3f2f5f2 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -391,7 +391,14 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); + bool Success = + CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); + + if (auto *MustTailCI = RI.getParent()->getTerminatingMustTailCall()) + if (MustTailCI->getIntrinsicID() == Intrinsic::ret_popless) + Success &= CLI->adjustReturnToPopless(MIRBuilder); + + return Success; } void IRTranslator::emitBranchForMergedCondition( diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f3483d0b6c2f4..40455e6792ced 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2153,6 +2153,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { return; } + // Musttail calls to @llvm.ret.popless are used to annotate the ret as + // "popless". Keep track of it here, and ask the target to do so later. + bool IsPoplessReturn = false; + if (auto *MustTailCI = I.getParent()->getTerminatingMustTailCall()) + if (MustTailCI->getIntrinsicID() == Intrinsic::ret_popless) + IsPoplessReturn = true; + if (!FuncInfo.CanLowerReturn) { Register DemoteReg = FuncInfo.DemoteRegister; @@ -2287,6 +2294,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); + // If we did find this return instruction to be popless, make it so now. + // It's still a normal return in almost all regards, we just need to remember + // it's popless, for when we lower the return and emit the epilogue later. + // Ideally we'd ask LowerReturn to do that, but the API is enough of a pain + // as it is, and all targets would have to learn about that. + if (IsPoplessReturn) { + SDValue NewChain = + DAG.getTargetLoweringInfo().adjustReturnPopless(Chain, DAG); + DAG.RemoveDeadNode(Chain.getNode()); + Chain = NewChain; + } + // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && "LowerReturn didn't return a valid chain!"); @@ -8018,6 +8037,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask)); return; } + + case Intrinsic::ret_popless: + // We're handling this on the associated ret itself. + return; + case Intrinsic::threadlocal_address: { setValue(&I, getValue(I.getOperand(0))); return; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index dcf2e5ce08e70..b3f362b721aea 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -507,6 +507,11 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const { MFI.hasStackMap() || MFI.hasPatchPoint() || RegInfo->hasStackRealignment(MF)) return true; + + const AArch64FunctionInfo *AFI = MF.getInfo(); + if (AFI->hasPoplessEpilogue()) + return true; + // With large callframes around we may need to use FP to access the scavenging // emergency spillslot. // @@ -1125,6 +1130,12 @@ bool AArch64FrameLowering::canUseAsPrologue( return false; } + // If we have some return path that's popless, it needs its own very-special + // epilogue, so we can't shrink-wrap it away. + // FIXME: this and some of the below checks belong in enableShrinkWrapping. + if (AFI->hasPoplessEpilogue()) + return false; + // Certain stack probing sequences might clobber flags, then we can't use // the block as a prologue if the flags register is a live-in. if (MF->getInfo()->hasStackProbing() && @@ -1210,6 +1221,12 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue( MachineBasicBlock &MBB, uint64_t StackBumpBytes) const { + + MachineFunction &MF = *MBB.getParent(); + AArch64FunctionInfo *AFI = MF.getInfo(); + if (AFI->hasPoplessEpilogue()) + return false; + if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes)) return false; if (MBB.empty()) @@ -1577,6 +1594,47 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( return std::prev(MBB.erase(MBBI)); } +static void fixupCalleeSaveRestoreToFPBased(MachineInstr &MI, + uint64_t FPSPOffset) { + assert(!AArch64InstrInfo::isSEHInstruction(MI)); + + unsigned Opc = MI.getOpcode(); + unsigned Scale; + switch (Opc) { + case AArch64::STPXi: + case AArch64::STRXui: + case AArch64::STPDi: + case AArch64::STRDui: + case AArch64::LDPXi: + case AArch64::LDRXui: + case AArch64::LDPDi: + case AArch64::LDRDui: + Scale = 8; + break; + case AArch64::STPQi: + case AArch64::STRQui: + case AArch64::LDPQi: + case AArch64::LDRQui: + Scale = 16; + break; + default: + llvm_unreachable("Unexpected callee-save save/restore opcode!"); + } + + unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; + + MachineOperand &BaseRegOpnd = MI.getOperand(OffsetIdx - 1); + assert(BaseRegOpnd.getReg() == AArch64::SP && + "Unexpected base register in callee-save save/restore instruction!"); + BaseRegOpnd.setReg(AArch64::FP); // XXX TRI + + // Last operand is immediate offset that needs fixing. + MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); + // All generated opcodes have scaled offsets. + assert(FPSPOffset % Scale == 0); + OffsetOpnd.setImm(OffsetOpnd.getImm() - FPSPOffset / Scale); +} + // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, @@ -2319,10 +2377,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF); bool HasWinCFI = false; bool IsFunclet = false; + bool IsSwiftCoroPartialReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); IsFunclet = isFuncletReturnInstr(*MBBI); + IsSwiftCoroPartialReturn = MBBI->getOpcode() == AArch64::RET_POPLESS; + } + + if (IsSwiftCoroPartialReturn) { + // The partial-return intrin/instr requires the swiftcoro cc + if (MF.getFunction().getCallingConv() != CallingConv::SwiftCoro) + report_fatal_error("llvm.ret.popless requires swiftcorocc"); + assert(MBBI->getOpcode() == AArch64::RET_POPLESS); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::RET_ReallyLR)) + .setMIFlag(MachineInstr::FrameDestroy); + MBB.erase(MBBI); } MachineBasicBlock::iterator EpilogStartI = MBB.end(); @@ -2371,6 +2441,39 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (Info.getReg() != AArch64::LR) continue; MachineBasicBlock::iterator TI = MBB.getFirstTerminator(); + + // When we're doing a popless ret (i.e., that doesn't restore SP), we + // can't rely on the exit SP being the same as the entry, but they need + // to match for the LR auth to succeed. Instead, derive the entry SP + // from our FP (using a -16 static offset for the size of the frame + // record itself), save that into X16, and use that as the discriminator + // in an AUTIB. + if (IsSwiftCoroPartialReturn) { + const auto *TRI = Subtarget.getRegisterInfo(); + + MachineBasicBlock::iterator EpilogStartI = MBB.getFirstTerminator(); + MachineBasicBlock::iterator Begin = MBB.begin(); + while (EpilogStartI != Begin) { + --EpilogStartI; + if (!EpilogStartI->getFlag(MachineInstr::FrameDestroy)) { + ++EpilogStartI; + break; + } + if (EpilogStartI->readsRegister(AArch64::X16, TRI) || + EpilogStartI->modifiesRegister(AArch64::X16, TRI)) + report_fatal_error("unable to use x16 for popless ret LR auth"); + } + + emitFrameOffset(MBB, EpilogStartI, DL, AArch64::X16, AArch64::FP, + StackOffset::getFixed(16), TII, + MachineInstr::FrameDestroy); + BuildMI(MBB, TI, DL, TII->get(AArch64::AUTIB), AArch64::LR) + .addUse(AArch64::LR) + .addUse(AArch64::X16) + .setMIFlag(MachineInstr::FrameDestroy); + return; + } + if (TI != MBB.end() && TI->getOpcode() == AArch64::RET_ReallyLR) { // If there is a terminator and it's a RET, we can fold AUTH into it. // Be careful to keep the implicitly returned registers. @@ -2404,6 +2507,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, AFI->setLocalStackSize(NumBytes - PrologueSaveSize); if (homogeneousPrologEpilog(MF, &MBB)) { assert(!NeedsWinCFI); + assert(!IsSwiftCoroPartialReturn); auto LastPopI = MBB.getFirstTerminator(); if (LastPopI != MBB.begin()) { auto HomogeneousEpilog = std::prev(LastPopI); @@ -2424,7 +2528,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); // Assume we can't combine the last pop with the sp restore. bool CombineAfterCSRBump = false; - if (!CombineSPBump && PrologueSaveSize != 0) { + if (!CombineSPBump && PrologueSaveSize != 0 && !IsSwiftCoroPartialReturn) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || AArch64InstrInfo::isSEHInstruction(*Pop)) @@ -2460,6 +2564,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, IsSVECalleeSave(LastPopI)) { ++LastPopI; break; + } else if (IsSwiftCoroPartialReturn) { + assert(!EmitCFI); + assert(hasFP(MF)); + fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), + NeedsWinCFI, &HasWinCFI); + // if FP-based addressing, rewrite CSR restores from SP to FP + fixupCalleeSaveRestoreToFPBased( + *LastPopI, AFI->getCalleeSaveBaseToFrameRecordOffset()); } else if (CombineSPBump) fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); @@ -2479,6 +2591,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, } if (hasFP(MF) && AFI->hasSwiftAsyncContext()) { + assert(!IsSwiftCoroPartialReturn); switch (MF.getTarget().Options.SwiftAsyncFramePointer) { case SwiftAsyncFramePointerMode::DeploymentBased: // Avoid the reload as it is GOT relative, and instead fall back to the @@ -2512,6 +2625,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + assert(!IsSwiftCoroPartialReturn); // When we are about to restore the CSRs, the CFA register is SP again. if (EmitCFI && hasFP(MF)) { @@ -2597,6 +2711,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, } if (!hasFP(MF)) { + assert(!IsSwiftCoroPartialReturn); bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the // stack pointer (but we may need to pop stack args for fastcc). @@ -2627,6 +2742,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, NumBytes = 0; } + if (IsSwiftCoroPartialReturn) + return; + // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to @@ -3504,9 +3622,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( DebugLoc DL; SmallVector RegPairs; bool NeedsWinCFI = needsWinCFI(MF); + bool IsSwiftCoroPartialReturn = false; - if (MBBI != MBB.end()) + if (MBBI != MBB.end()) { DL = MBBI->getDebugLoc(); + IsSwiftCoroPartialReturn = MBBI->getOpcode() == AArch64::RET_POPLESS; + } + + // The partial-return intrin/instr requires the swiftcoro cc + if (IsSwiftCoroPartialReturn && + MF.getFunction().getCallingConv() != CallingConv::SwiftCoro) + report_fatal_error("llvm.ret.popless requires swiftcorocc"); computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); if (homogeneousPrologEpilog(MF, &MBB)) { @@ -3519,6 +3645,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } + // If doing a partial/popless return, CSR restores are from FP, so do it last. + if (IsSwiftCoroPartialReturn) { + auto IsFPLR = [](const RegPairInfo &c) { + return c.Reg1 == AArch64::LR && c.Reg2 == AArch64::FP; + }; + auto FPLRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsFPLR); + const RegPairInfo FPLRRPI = *FPLRBegin; + FPLRBegin = std::remove_if(RegPairs.begin(), RegPairs.end(), IsFPLR); + *FPLRBegin = FPLRRPI; + } + // For performance reasons restore SVE register in increasing order auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; }; auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); @@ -5199,6 +5336,10 @@ void AArch64FrameLowering::orderFrameObjects( const AArch64FunctionInfo &AFI = *MF.getInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (AFI.hasPoplessEpilogue()) + return; + std::vector FrameObjects(MFI.getObjectIndexEnd()); for (auto &Obj : ObjectsToAllocate) { FrameObjects[Obj].IsValid = true; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e5893132d9bfb..8bbf210195917 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2714,6 +2714,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::AUTH_CALL_RVMARKER) MAKE_CASE(AArch64ISD::LOADgot) MAKE_CASE(AArch64ISD::RET_GLUE) + MAKE_CASE(AArch64ISD::RET_POPLESS) MAKE_CASE(AArch64ISD::BRCOND) MAKE_CASE(AArch64ISD::CSEL) MAKE_CASE(AArch64ISD::CSINV) @@ -8856,6 +8857,18 @@ bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; } +SDValue AArch64TargetLowering::adjustReturnPopless(SDValue RetChain, + SelectionDAG &DAG) const { + if (RetChain.getOpcode() != AArch64ISD::RET_GLUE) + report_fatal_error("Unsupported aarch64 return for popless ret lowering"); + + auto *AFI = DAG.getMachineFunction().getInfo(); + AFI->setHasPoplessEpilogue(); + + return DAG.getNode(AArch64ISD::RET_POPLESS, SDLoc(RetChain), + MVT::Other, RetChain->ops()); +} + // Check if the value is zero-extended from i1 to i8 static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) { unsigned SizeInBits = Arg.getValueType().getSizeInBits(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 0d51ef2be8631..89a2f4aa05fee 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -90,6 +90,7 @@ enum NodeType : unsigned { LOADgot, // Load from automatically generated descriptor (e.g. Global // Offset Table, TLS record). RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. + RET_POPLESS, // Same as RET_GLUE, though "popless", = doesn't clean the stack. BRCOND, // Conditional branch instruction; "b.cond". CSEL, CSINV, // Conditional select invert. @@ -1104,6 +1105,9 @@ class AArch64TargetLowering : public TargetLowering { void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, SDValue &Chain) const; + SDValue adjustReturnPopless(SDValue RetChain, + SelectionDAG &DAG) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index fbeacaa6a498c..a1634ac33597f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -779,6 +779,8 @@ def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def AArch64ret_popless : SDNode<"AArch64ISD::RET_POPLESS", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, @@ -3312,6 +3314,13 @@ def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>, let isReturn = 1; } +def RET_POPLESS : Pseudo<(outs), (ins), [(AArch64ret_popless)]>, + Sched<[WriteBrReg]> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; +} + // This is a directive-like pseudo-instruction. The purpose is to insert an // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction // (which in the usual case is a BLR). diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index d3026ca45c349..08985c038c29c 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -205,6 +205,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// The stack slot where the Swift asynchronous context is stored. int SwiftAsyncContextFrameIdx = std::numeric_limits::max(); + /// Whether this function has a swift coro return that doesn't restore + /// the stack. + bool HasPoplessEpilogue = false; + bool IsMTETagged = false; /// The function has Scalable Vector or Scalable Predicate register argument @@ -549,6 +553,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { } int getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } + bool hasPoplessEpilogue() const { + return HasPoplessEpilogue; + } + void setHasPoplessEpilogue(bool PE = true) { + HasPoplessEpilogue = PE; + } + bool needsDwarfUnwindInfo(const MachineFunction &MF) const; bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index ac98a3233ce42..ccae95c67fda5 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -483,6 +483,21 @@ bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv)); } +bool AArch64CallLowering::adjustReturnToPopless( + MachineIRBuilder &MIRBuilder) const { + MachineFunction &MF = MIRBuilder.getMF(); + + auto MBBI = std::prev(MIRBuilder.getMBB().end()); + assert(MBBI->getOpcode() == AArch64::RET_ReallyLR); + + auto *TII = MF.getSubtarget().getInstrInfo(); + MBBI->setDesc(TII->get(AArch64::RET_POPLESS)); + + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setHasPoplessEpilogue(); + return true; +} + /// Helper function to compute forwarded registers for musttail calls. Computes /// the forwarded registers, sets MBB liveness, and emits COPY instructions that /// can be used to save + restore registers later. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h index 9ae175274d5d9..34f233480b77d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h @@ -39,6 +39,8 @@ class AArch64CallLowering: public CallLowering { SmallVectorImpl &Outs, bool IsVarArg) const override; + bool adjustReturnToPopless(MachineIRBuilder &MIRBuilder) const override; + bool fallBackToDAGISel(const MachineFunction &MF) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, diff --git a/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll new file mode 100644 index 0000000000000..c4090e6bf2990 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -o - %s | FileCheck %s --check-prefixes=CHECK,SDISEL +; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=2 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL + +declare i64 @g(ptr, ptr) + +define swiftcorocc i64 @test_intrin_basic() #0 { +; CHECK-LABEL: test_intrin_basic: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: add x16, x29, #16 +; CHECK-NEXT: ldp x29, x30, [x29] ; 16-byte Folded Reload +; CHECK-NEXT: autib x30, x16 +; CHECK-NEXT: ret + musttail call void @llvm.ret.popless() + ret i64 0 +} + +define swiftcorocc i64 @test_intrin() #0 { +; CHECK-LABEL: test_intrin: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #32 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: bl _g +; CHECK-NEXT: cbz x0, LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %else +; CHECK-NEXT: add x16, x29, #16 +; CHECK-NEXT: ldp x26, x25, [x29] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [x29, #16] ; 16-byte Folded Reload +; CHECK-NEXT: autib x30, x16 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %then +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @g(ptr %v1, ptr %v2) + %c = icmp eq i64 %v3, 0 + br i1 %c, label %then, label %else +then: + ret i64 %v3 +else: + musttail call void @llvm.ret.popless() + ret i64 %v3 +} + +define swiftcorocc i64 @test_vla(i32 %n) #0 { +; SDISEL-LABEL: test_vla: +; SDISEL: ; %bb.0: +; SDISEL-NEXT: pacibsp +; SDISEL-NEXT: stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill +; SDISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; SDISEL-NEXT: add x29, sp, #16 +; SDISEL-NEXT: .cfi_def_cfa w29, 16 +; SDISEL-NEXT: .cfi_offset w30, -8 +; SDISEL-NEXT: .cfi_offset w29, -16 +; SDISEL-NEXT: .cfi_offset w25, -24 +; SDISEL-NEXT: .cfi_offset w26, -32 +; SDISEL-NEXT: ; kill: def $w0 killed $w0 def $x0 +; SDISEL-NEXT: ubfiz x8, x0, #3, #32 +; SDISEL-NEXT: add x8, x8, #15 +; SDISEL-NEXT: and x8, x8, #0xffffffff0 +; SDISEL-NEXT: mov x9, sp +; SDISEL-NEXT: sub x0, x9, x8 +; SDISEL-NEXT: mov sp, x0 +; SDISEL-NEXT: mov x9, sp +; SDISEL-NEXT: sub x1, x9, x8 +; SDISEL-NEXT: mov sp, x1 +; SDISEL-NEXT: ; InlineAsm Start +; SDISEL-NEXT: ; InlineAsm End +; SDISEL-NEXT: bl _g +; SDISEL-NEXT: cbz x0, LBB2_2 +; SDISEL-NEXT: ; %bb.1: ; %else +; SDISEL-NEXT: add x16, x29, #16 +; SDISEL-NEXT: ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload +; SDISEL-NEXT: ldp x29, x30, [x29] ; 16-byte Folded Reload +; SDISEL-NEXT: autib x30, x16 +; SDISEL-NEXT: ret +; SDISEL-NEXT: LBB2_2: ; %then +; SDISEL-NEXT: sub sp, x29, #16 +; SDISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; SDISEL-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload +; SDISEL-NEXT: retab +; +; GISEL-LABEL: test_vla: +; GISEL: ; %bb.0: +; GISEL-NEXT: pacibsp +; GISEL-NEXT: stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: add x29, sp, #16 +; GISEL-NEXT: .cfi_def_cfa w29, 16 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w25, -24 +; GISEL-NEXT: .cfi_offset w26, -32 +; GISEL-NEXT: mov x8, sp +; GISEL-NEXT: mov w9, w0 +; GISEL-NEXT: lsl x9, x9, #3 +; GISEL-NEXT: add x9, x9, #15 +; GISEL-NEXT: and x9, x9, #0xfffffffffffffff0 +; GISEL-NEXT: sub x0, x8, x9 +; GISEL-NEXT: mov sp, x0 +; GISEL-NEXT: mov x8, sp +; GISEL-NEXT: sub x1, x8, x9 +; GISEL-NEXT: mov sp, x1 +; GISEL-NEXT: ; InlineAsm Start +; GISEL-NEXT: ; InlineAsm End +; GISEL-NEXT: bl _g +; GISEL-NEXT: cbz x0, LBB2_2 +; GISEL-NEXT: ; %bb.1: ; %else +; GISEL-NEXT: add x16, x29, #16 +; GISEL-NEXT: ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload +; GISEL-NEXT: ldp x29, x30, [x29] ; 16-byte Folded Reload +; GISEL-NEXT: autib x30, x16 +; GISEL-NEXT: ret +; GISEL-NEXT: LBB2_2: ; %then +; GISEL-NEXT: sub sp, x29, #16 +; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload +; GISEL-NEXT: retab + %v1 = alloca i64, i32 %n + %v2 = alloca i64, i32 %n + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @g(ptr %v1, ptr %v2) + %c = icmp eq i64 %v3, 0 + br i1 %c, label %then, label %else +then: + ret i64 %v3 +else: + musttail call void @llvm.ret.popless() + ret i64 %v3 +} + +attributes #0 = { "ptrauth-returns" } From a82efa9f5cea6e29bf5f8df3a02a83898813dff0 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 26 Mar 2025 16:35:31 -0700 Subject: [PATCH 12/14] [IR] Don't DCE llvm.ret.popless. We originally had the intrinsic forward its return value to the ret to have musttail-like behavior, which ensured it was always preserved. Now that the intrinsic call is musttail but doesn't have any forwarded operands, it needs to be kept alive through other means. It might make sense to mark it as having side effects, and not duplicable, but that shouldn't be necessary, and it's as duplicable as any musttail call+ret sequence would be. Because of this, we can't rely on it being DCE'd in ISel either, so drop it explicitly in IRTranslator for GISel. We already had to do it in SDISel anyway. While there, explicitly reject it in FastISel. rdar://147236255 (cherry picked from commit 384770e93e3d3c0677d091dd1563e3829fb5c280) --- llvm/include/llvm/IR/Intrinsics.td | 2 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 7 +++++++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 5 ++++- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 3 +++ llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll | 2 +- llvm/test/Transforms/DCE/int_ret_popless.ll | 11 +++++++++++ 6 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/DCE/int_ret_popless.ll diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e6a59e70b4dbb..3728bb1769a2f 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -877,7 +877,7 @@ def int_localrecover : DefaultAttrsIntrinsic<[llvm_ptr_ty], // // Calls to this intrinsic need to be musttail, but don't follow the other ABI // requirements for musttail calls, since this is really annotating the ret. -def int_ret_popless : DefaultAttrsIntrinsic<[], [], [IntrNoMem]>; +def int_ret_popless : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly]>; // Given the frame pointer passed into an SEH filter function, returns a // pointer to the local variable area suitable for use with llvm.localrecover. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 80868e3f2f5f2..464dd11381a46 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2431,6 +2431,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, {getOrCreateVReg(*CI.getArgOperand(0))}); return true; } + case Intrinsic::ret_popless: { + // The ret.popless intrin call itself is only annotating the following ret. + // To achieve that, it does need to be musttail and reachable from the ret. + assert(CI.getParent()->getTerminatingMustTailCall() == &CI && + "llvm.ret.popless not in musttail position"); + return true; + } case Intrinsic::cttz: case Intrinsic::ctlz: { ConstantInt *Cst = cast(CI.getArgOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 40455e6792ced..8cc9be15e4859 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8039,7 +8039,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::ret_popless: - // We're handling this on the associated ret itself. + // The ret.popless intrin call itself is only annotating the following ret. + // To achieve that, it does need to be musttail and reachable from the ret. + assert(I.getParent()->getTerminatingMustTailCall() == &I && + "llvm.ret.popless not in musttail position"); return; case Intrinsic::threadlocal_address: { diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index acecf9835f086..b609fcf58eee7 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -3863,6 +3863,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (TLI.supportSplitCSR(FuncInfo.MF)) return false; + if (I->getParent()->getTerminatingMustTailCall()) + return false; + // Build a list of return value registers. SmallVector RetRegs; diff --git a/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll index c4090e6bf2990..bb3f2f08875a0 100644 --- a/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll +++ b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -o - %s | FileCheck %s --check-prefixes=CHECK,SDISEL -; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=2 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL declare i64 @g(ptr, ptr) diff --git a/llvm/test/Transforms/DCE/int_ret_popless.ll b/llvm/test/Transforms/DCE/int_ret_popless.ll new file mode 100644 index 0000000000000..4e6fd4d05e89e --- /dev/null +++ b/llvm/test/Transforms/DCE/int_ret_popless.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S < %s -passes=dce | FileCheck %s + +define void @test_ret_popless() { +; CHECK-LABEL: define void @test_ret_popless() { +; CHECK-NEXT: musttail call void @llvm.ret.popless() +; CHECK-NEXT: ret void +; + musttail call void @llvm.ret.popless() + ret void +} From 5e60cb2b63c275aa8ccdc913f1115930321e62fd Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 26 Mar 2025 17:11:27 -0700 Subject: [PATCH 13/14] [AArch64] Fix offset in FP-based epilogue restore for popless ret. In a swiftcorocc function, on the restoreless epilogue path (using llvm.ret.popless), we're using FP-based addressing to restore callee-saved registers, as we can't rely on SP having been restored to its initial value, since we're not restoring it at all. FP-based CSR restore is novel and bound to find interesting divergence from all of our existing epilogues. In this case, at least the problem is pretty simple, and was even visible in one of the original test case: we were missing the statically-sized locals. I haven't gotten to the point of convincing myself this is sufficient yet, and I'm confident I'm missing some other convoluted PEI-ism, but with this we can actually successfully run a bunch of end-to-end swift tests! While there, add an assert that checks that the FP/LR frame record itself is only ever loaded from FP+0, without an offset. If there's an offset from FP, we must have goofed somewhere, since that breaks the frame record linked list. rdar://147838968 (cherry picked from commit 8fc6907b29440527ddfdb99332ccd2b0348a6543) --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 15 +++++++++++---- .../CodeGen/AArch64/swiftcorocc-ret-popless.ll | 4 ++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index b3f362b721aea..546d2aaca4e98 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1595,7 +1595,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( } static void fixupCalleeSaveRestoreToFPBased(MachineInstr &MI, - uint64_t FPSPOffset) { + int64_t FPSPOffset) { assert(!AArch64InstrInfo::isSEHInstruction(MI)); unsigned Opc = MI.getOpcode(); @@ -1632,7 +1632,13 @@ static void fixupCalleeSaveRestoreToFPBased(MachineInstr &MI, MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); // All generated opcodes have scaled offsets. assert(FPSPOffset % Scale == 0); - OffsetOpnd.setImm(OffsetOpnd.getImm() - FPSPOffset / Scale); + int64_t ResidualOffset = OffsetOpnd.getImm() - (FPSPOffset / Scale); + OffsetOpnd.setImm(ResidualOffset); + + assert((!MI.getOperand(0).isReg() || + MI.getOperand(0).getReg() != AArch64::FP || ResidualOffset == 0) && + "FP/LR frame record should be restored from FP+0"); + } // Fixup callee-save register save/restore instructions to take into account @@ -2570,8 +2576,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); // if FP-based addressing, rewrite CSR restores from SP to FP - fixupCalleeSaveRestoreToFPBased( - *LastPopI, AFI->getCalleeSaveBaseToFrameRecordOffset()); + int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset() + + AFI->getLocalStackSize(); + fixupCalleeSaveRestoreToFPBased(*LastPopI, FPOffset); } else if (CombineSPBump) fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); diff --git a/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll index bb3f2f08875a0..3c6fb76d905e4 100644 --- a/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll +++ b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll @@ -42,8 +42,8 @@ define swiftcorocc i64 @test_intrin() #0 { ; CHECK-NEXT: cbz x0, LBB1_2 ; CHECK-NEXT: ; %bb.1: ; %else ; CHECK-NEXT: add x16, x29, #16 -; CHECK-NEXT: ldp x26, x25, [x29] ; 16-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [x29, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [x29] ; 16-byte Folded Reload ; CHECK-NEXT: autib x30, x16 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB1_2: ; %then From 2218c69838ca33797bdfdab3c90973e0d509c9fe Mon Sep 17 00:00:00 2001 From: Nate Chandler Date: Fri, 4 Apr 2025 08:58:47 -0700 Subject: [PATCH 14/14] [Coro] Retcon.Dynamic: Cap spill alignment. Apply to dynamic retcon coroutines the machinery added in 2937f8d14840f54bb10daf71c7af236f4d897884 for async coroutines. And test aligning of allocas with large alignments. rdar://148782254 (cherry picked from commit 9281ab22a7ed22b087827d35b75c63d94be18f4a) --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 2 + .../Coroutines/coro-retcon-once-dynamic.ll | 56 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 4259e37892df9..d65edb132165d 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -874,6 +874,8 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, std::optional MaxFrameAlignment; if (Shape.ABI == coro::ABI::Async) MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment(); + if (Shape.ABI == coro::ABI::RetconOnceDynamic) + MaxFrameAlignment = Shape.RetconLowering.getStorageAlignment(); FrameTypeBuilder B(C, DL, MaxFrameAlignment); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll index fd403845bccf8..6f491584c877e 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll @@ -4,6 +4,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "arm64-apple-macos99.99" ; CHECK-LABEL: %func.Frame = type { ptr } +; CHECK-LABEL: %big_types.Frame = type { <32 x i8>, [16 x i8], i64, ptr, %Integer8 } ; CHECK-LABEL: @func_cfp = constant <{ i32, i32 }> ; CHECK-SAME: <{ @@ -20,6 +21,16 @@ target triple = "arm64-apple-macos99.99" i32 64 ; frame size }> +@big_types_cfp = constant <{ i32, i32 }> + <{ i32 trunc ( ; offset to @func from @big_types_cfp + i64 sub ( + i64 ptrtoint (ptr @big_types to i64), + i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @big_types_cfp, i32 0, i32 1) to i64) + ) + to i32), + i32 64 ; frame size +}> + ; CHECK-LABEL: @func( ; CHECK-SAME: ptr %buffer, @@ -101,3 +112,48 @@ declare void @continuation_prototype(ptr, ptr) declare swiftcorocc noalias ptr @allocate(i32 %size) declare void @deallocate(ptr %ptr) + +%Integer8 = type { i8 } + +; CHECK-LABEL: @big_types( +; CHECK-SAME: ptr noalias %frame, +; CHECK-SAME: ptr swiftcoro %allocator, +; CHECK-SAME: i64 %index, +; CHECK-SAME: ptr swiftself dereferenceable(32) %vec_addr +; CHECK-SAME: ) { +; CHECK: [[VEC_STK_BASE_PTR:%.*]] = getelementptr inbounds %big_types.Frame, ptr %frame, i32 0, i32 0 +; CHECK: [[VEC_STK_BASE_INT:%.*]] = ptrtoint ptr [[VEC_STK_BASE_PTR]] to i64 +; CHECK: [[VEC_STK_BIASED_INT:%.*]] = add i64 [[VEC_STK_BASE_INT]], 31 +; CHECK: [[VEC_STK_ALIGNED_INT:%.*]] = and i64 [[VEC_STK_BIASED_INT]], -32 +; CHECK: %vec_stk = inttoptr i64 [[VEC_STK_ALIGNED_INT]] to ptr +define swiftcorocc { ptr, ptr } @big_types(ptr noalias %frame, ptr swiftcoro %allocator, i64 %index, ptr nocapture swiftself dereferenceable(32) %vec_addr) { + %element_addr = alloca %Integer8, align 1 + %id = tail call token @llvm.coro.id.retcon.once.dynamic( + i32 -1, + i32 16, + ptr nonnull @big_types_cfp, + ptr %allocator, + ptr %frame, + ptr @continuation_prototype, + ptr nonnull @allocate, + ptr nonnull @deallocate + ) + %handle = tail call ptr @llvm.coro.begin(token %id, ptr null) + call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %element_addr) + %vec_original = load <32 x i8>, ptr %vec_addr, align 32 + %vec_stk = alloca <32 x i8>, align 32 + store <32 x i8> %vec_original, ptr %vec_stk, align 32 + %vec_original_2 = load <32 x i8>, ptr %vec_stk, align 32 + %index32 = trunc i64 %index to i32 + %element_original = extractelement <32 x i8> %vec_original_2, i32 %index32 + store i8 %element_original, ptr %element_addr, align 1 + call ptr (...) @llvm.coro.suspend.retcon.p0(ptr nonnull %element_addr) + %element_modified = load i8, ptr %element_addr, align 1 + %vec_original_3 = load <32 x i8>, ptr %vec_stk, align 32 + %vec_modified = insertelement <32 x i8> %vec_original_3, i8 %element_modified, i32 %index32 + store <32 x i8> %vec_modified, ptr %vec_addr, align 32 + call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %element_addr) + call i1 @llvm.coro.end(ptr %handle, i1 false, token none) + unreachable +} +