diff --git a/clang/include/clang/AST/ASTConsumer.h b/clang/include/clang/AST/ASTConsumer.h index 447f2592d2359..6cf4504dcfa60 100644 --- a/clang/include/clang/AST/ASTConsumer.h +++ b/clang/include/clang/AST/ASTConsumer.h @@ -27,6 +27,7 @@ namespace clang { class VarDecl; class FunctionDecl; class ImportDecl; + class TargetInfo; /// ASTConsumer - This is an abstract interface that should be implemented by /// clients that read ASTs. This abstraction layer allows the client to be @@ -47,6 +48,14 @@ class ASTConsumer { /// ASTContext. virtual void Initialize(ASTContext &Context) {} + /// Initialize - This is called to initialize the consumer, providing the + /// ASTContext. 'CodeGenTargetInfo' specifies the code-generation configuration + /// for this compilation instance, which may differ from the one carried + /// by the Context itself only in the OS Version number - + /// for example when type-checking must be performed against an epoch OS version + /// while code-generation must run according to the user-specified OS version. + virtual void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) {} + /// HandleTopLevelDecl - Handle the specified top-level declaration. This is /// called by the parser to process every top-level Decl*. /// diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 14b2f4857d8fd..51c75455f21a6 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -335,12 +335,14 @@ CodeGenModule::CodeGenModule(ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO, - const CodeGenOptions &CGO, llvm::Module &M, + const CodeGenOptions &CGO, + const TargetInfo &CGTI, + llvm::Module &M, DiagnosticsEngine &diags, CoverageSourceInfo *CoverageInfo) : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), - Target(C.getTargetInfo()), ABI(createCXXABI(*this)), + Target(CGTI), ABI(createCXXABI(*this)), VMContext(M.getContext()), VTables(*this), StackHandler(diags), SanitizerMD(new SanitizerMetadata(*this)), AtomicOpts(Target.getAtomicOpts()) { @@ -357,19 +359,19 @@ CodeGenModule::CodeGenModule(ASTContext &C, BFloatTy = llvm::Type::getBFloatTy(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); - PointerWidthInBits = C.getTargetInfo().getPointerWidth(LangAS::Default); + PointerWidthInBits = Target.getPointerWidth(LangAS::Default); PointerAlignInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(LangAS::Default)) + C.toCharUnitsFromBits(Target.getPointerAlign(LangAS::Default)) .getQuantity(); SizeSizeInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); + C.toCharUnitsFromBits(Target.getMaxPointerWidth()).getQuantity(); IntAlignInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity(); + C.toCharUnitsFromBits(Target.getIntAlign()).getQuantity(); CharTy = - llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getCharWidth()); - IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); + llvm::IntegerType::get(LLVMContext, Target.getCharWidth()); + IntTy = llvm::IntegerType::get(LLVMContext, Target.getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, - C.getTargetInfo().getMaxPointerWidth()); + Target.getMaxPointerWidth()); Int8PtrTy = llvm::PointerType::get(LLVMContext, C.getTargetAddressSpace(LangAS::Default)); const llvm::DataLayout &DL = M.getDataLayout(); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 87d62da87cc47..1c6dbaad1f0d3 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -690,7 +690,9 @@ class CodeGenModule : public CodeGenTypeCache { CodeGenModule(ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, const HeaderSearchOptions &headersearchopts, const PreprocessorOptions &ppopts, - const CodeGenOptions &CodeGenOpts, llvm::Module &M, + const CodeGenOptions &CodeGenOpts, + const TargetInfo &CodeGenTargetInfo, + llvm::Module &M, DiagnosticsEngine &Diags, CoverageSourceInfo *CoverageInfo = nullptr); diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp index 09a7d79ae4afb..e283777d1c092 100644 --- a/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/clang/lib/CodeGen/ModuleBuilder.cpp @@ -149,21 +149,26 @@ namespace { } void Initialize(ASTContext &Context) override { + Initialize(Context, Context.getTargetInfo()); + } + + void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) override { Ctx = &Context; - M->setTargetTriple(Ctx->getTargetInfo().getTriple()); - M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString()); - const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion(); + M->setTargetTriple(CodeGenTargetInfo.getTriple()); + M->setDataLayout(CodeGenTargetInfo.getDataLayoutString()); + const auto &SDKVersion = CodeGenTargetInfo.getSDKVersion(); if (!SDKVersion.empty()) M->setSDKVersion(SDKVersion); - if (const auto *TVT = Ctx->getTargetInfo().getDarwinTargetVariantTriple()) + if (const auto *TVT = CodeGenTargetInfo.getDarwinTargetVariantTriple()) M->setDarwinTargetVariantTriple(TVT->getTriple()); if (auto TVSDKVersion = - Ctx->getTargetInfo().getDarwinTargetVariantSDKVersion()) + CodeGenTargetInfo.getDarwinTargetVariantSDKVersion()) M->setDarwinTargetVariantSDKVersion(*TVSDKVersion); Builder.reset(new CodeGen::CodeGenModule(Context, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, - *M, Diags, CoverageInfo)); + CodeGenTargetInfo, *M, + Diags, CoverageInfo)); for (auto &&Lib : CodeGenOpts.DependentLibraries) Builder->AddDependentLib(Lib); diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp index 384685e3f1eec..8b54552623872 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp @@ -174,6 +174,10 @@ class PCHContainerGenerator : public ASTConsumer { ~PCHContainerGenerator() override = default; void Initialize(ASTContext &Context) override { + Initialize(Context, Context.getTargetInfo()); + } + + void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) override { assert(!Ctx && "initialized multiple times"); Ctx = &Context; @@ -181,7 +185,8 @@ class PCHContainerGenerator : public ASTConsumer { M.reset(new llvm::Module(MainFileName, *VMContext)); M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString()); Builder.reset(new CodeGen::CodeGenModule( - *Ctx, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags)); + *Ctx, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, + CodeGenTargetInfo, *M, Diags)); // Prepare CGDebugInfo to emit debug info for a clang module. auto *DI = Builder->getModuleDebugInfo(); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index a8f9c71781701..6560ae5fc5231 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -162,6 +162,7 @@ enum Kind { kw_anyregcc, kw_swiftcc, kw_swifttailcc, + kw_swiftcorocc, kw_preserve_mostcc, kw_preserve_allcc, kw_preserve_nonecc, diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 92b6e68d9d0a7..da62bf6554aa0 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -791,6 +791,7 @@ enum AttributeKindCodes { ATTR_KIND_NO_DIVERGENCE_SOURCE = 100, ATTR_KIND_SANITIZE_TYPE = 101, ATTR_KIND_CAPTURES = 102, + ATTR_KIND_SWIFT_CORO = 103, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 9c8226660e087..dd8c2469f4cba 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -538,6 +538,15 @@ class CallLowering { return false; } + /// This hook must be implemented to lower @llvm.ret.popless intrinsics, + /// which are required to be musttail, and are effectively annotating a + /// return instruction to mark it "popless", i.e., not restoring SP. + /// This "adjustment" step runs after lowerReturn, and is only meant to make + /// it a little less painful to maintain before we add this to the main hook. + virtual bool adjustReturnToPopless(MachineIRBuilder &MIRBuilder) const { + report_fatal_error("Popless returns not implemented for this target"); + } + virtual bool fallBackToDAGISel(const MachineFunction &MF) const { return false; } diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 172c3e8c9a847..771fd90f32645 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -319,6 +319,10 @@ class MachineFrameInfo { /// instructions which manipulate the stack pointer. bool HasCopyImplyingStackAdjustment = false; + /// True if the function contains a call using a calling convention that + /// allows it to be "popless", i.e., to not restore SP when the call returns. + bool HasPoplessCall = false; + /// True if the function contains a call to the llvm.vastart intrinsic. bool HasVAStart = false; @@ -634,6 +638,9 @@ class MachineFrameInfo { HasCopyImplyingStackAdjustment = B; } + bool hasPoplessCall() const { return HasPoplessCall; } + void setHasPoplessCall(bool B = true) { HasPoplessCall = B; } + /// Returns true if the function calls the llvm.va_start intrinsic. bool hasVAStart() const { return HasVAStart; } void setHasVAStart(bool B) { HasVAStart = B; } diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h index a28c7a99fb3b5..b0820b17aeb1d 100644 --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -41,6 +41,7 @@ namespace ISD { unsigned IsSplitEnd : 1; ///< Last part of a split unsigned IsSwiftSelf : 1; ///< Swift self parameter unsigned IsSwiftAsync : 1; ///< Swift async context parameter + unsigned IsSwiftCoro : 1; ///< Swift coro parameter unsigned IsSwiftError : 1; ///< Swift error parameter unsigned IsCFGuardTarget : 1; ///< Control Flow Guard target unsigned IsHva : 1; ///< HVA field for @@ -64,6 +65,7 @@ namespace ISD { : IsZExt(0), IsSExt(0), IsNoExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsByRef(0), IsNest(0), IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0), IsSplitEnd(0), IsSwiftSelf(0), IsSwiftAsync(0), + IsSwiftCoro(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), IsHvaStart(0), IsSecArgPass(0), MemAlign(0), OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), @@ -104,6 +106,9 @@ namespace ISD { bool isSwiftAsync() const { return IsSwiftAsync; } void setSwiftAsync() { IsSwiftAsync = 1; } + bool isSwiftCoro() const { return IsSwiftCoro; } + void setSwiftCoro() { IsSwiftCoro = 1; } + bool isSwiftError() const { return IsSwiftError; } void setSwiftError() { IsSwiftError = 1; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index b528079c2d594..a9bef1df0040e 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -315,6 +315,7 @@ class TargetLoweringBase { bool IsSwiftSelf : 1; bool IsSwiftAsync : 1; bool IsSwiftError : 1; + bool IsSwiftCoro : 1; bool IsCFGuardTarget : 1; MaybeAlign Alignment = std::nullopt; Type *IndirectType = nullptr; @@ -324,7 +325,7 @@ class TargetLoweringBase { IsSRet(false), IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false), IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), IsSwiftAsync(false), IsSwiftError(false), - IsCFGuardTarget(false) {} + IsSwiftCoro(false), IsCFGuardTarget(false) {} void setAttributes(const CallBase *Call, unsigned ArgIdx); }; @@ -4889,6 +4890,10 @@ class TargetLowering : public TargetLoweringBase { llvm_unreachable("Not Implemented"); } + virtual SDValue adjustReturnPopless(SDValue Chain, SelectionDAG &DAG) const { + report_fatal_error("Popless returns not implemented for this target"); + } + /// Return true if result of the specified node is used by a return node /// only. It also compute and return the input chain for the tail call. /// diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index fb94926043fc7..44cebfdb48949 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -358,6 +358,9 @@ def SwiftSelf : EnumAttr<"swiftself", IntersectPreserve, [ParamAttr]>; /// Argument is swift async context. def SwiftAsync : EnumAttr<"swiftasync", IntersectPreserve, [ParamAttr]>; +/// Argument is swift coro allocator. +def SwiftCoro : EnumAttr<"swiftcoro", IntersectPreserve, [ParamAttr]>; + /// Function must be in a unwind table. def UWTable : IntAttr<"uwtable", IntersectPreserve, [FnAttr]>; diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index d68491eb5535c..d1fbe46e74682 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -284,6 +284,10 @@ namespace CallingConv { RISCV_VLSCall_32768 = 122, RISCV_VLSCall_65536 = 123, + /// This follows the Swift calling convention in how arguments are passed + /// but doesn't clean up the stack on a return. + SwiftCoro = 124, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index fe83c9df5731e..3728bb1769a2f 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -865,6 +865,20 @@ def int_localrecover : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; +// Marks the following ret instruction as a "popless" return, one that does not +// not restore SP to its function-entry value (i.e., does not deallocate the +// stack frame), allowing allocations made in the function to be accessible +// by the caller. +// +// The function must be annotated with an appropriate target-specific calling +// convention, so the caller can generate stack accesses accordingly, generally +// by treating the call as a variably-sized alloca, so using FP-based addressing +// for its own frame rather than relying on statically known SP offsets. +// +// Calls to this intrinsic need to be musttail, but don't follow the other ABI +// requirements for musttail calls, since this is really annotating the ret. +def int_ret_popless : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly]>; + // Given the frame pointer passed into an SEH filter function, returns a // pointer to the local variable area suitable for use with llvm.localrecover. def int_eh_recoverfp : DefaultAttrsIntrinsic<[llvm_ptr_ty], @@ -1742,6 +1756,10 @@ def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty], []>; +def int_coro_id_retcon_once_dynamic : Intrinsic<[llvm_token_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, + llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + []>; def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>; def int_coro_id_async : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td index 18b7ff4aec95f..6b3be7b6ed3fb 100644 --- a/llvm/include/llvm/Target/TargetCallingConv.td +++ b/llvm/include/llvm/Target/TargetCallingConv.td @@ -61,6 +61,11 @@ class CCIfSwiftAsync<CCAction A> : CCIf<"ArgFlags.isSwiftAsync()", A> { class CCIfSwiftError<CCAction A> : CCIf<"ArgFlags.isSwiftError()", A> { } +/// CCIfSwiftCoro - If the current argument has swiftcoro parameter attribute, +/// apply Action A. +class CCIfSwiftCoro<CCAction A> : CCIf<"ArgFlags.isSwiftCoro()", A> { +} + /// CCIfCFGuardTarget - If the current argument has cfguardtarget parameter /// attribute, apply Action A. class CCIfCFGuardTarget<CCAction A> : CCIf<"ArgFlags.isCFGuardTarget()", A> { diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h b/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h index 1ad5f7fefc8cb..24f5da8d6993b 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h @@ -135,6 +135,7 @@ class AnyCoroIdInst : public IntrinsicInst { auto ID = I->getIntrinsicID(); return ID == Intrinsic::coro_id || ID == Intrinsic::coro_id_retcon || ID == Intrinsic::coro_id_retcon_once || + ID == Intrinsic::coro_id_retcon_once_dynamic || ID == Intrinsic::coro_id_async; } @@ -314,6 +315,72 @@ class CoroIdRetconOnceInst : public AnyCoroIdRetconInst { } }; +/// This represents the llvm.coro.id.retcon.once.dynamic instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdRetconOnceDynamicInst + : public AnyCoroIdInst { + enum { + SizeArg, + AlignArg, + CoroFuncPtrArg, + AllocatorArg, + StorageArg, + PrototypeArg, + AllocArg, + DeallocArg + }; + +public: + void checkWellFormed() const; + + uint64_t getStorageSize() const { + return cast<ConstantInt>(getArgOperand(SizeArg))->getZExtValue(); + } + + Align getStorageAlignment() const { + return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue(); + } + + Value *getStorage() const { return getArgOperand(StorageArg); } + + /// Return the coro function pointer address. This should be the address of + /// a coro function pointer struct for the current coro function. + /// struct coro_function_pointer { + /// uint32_t frame size; + /// uint32_t relative_pointer(coro_function); + /// }; + GlobalVariable *getCoroFunctionPointer() const { + return cast<GlobalVariable>( + getArgOperand(CoroFuncPtrArg)->stripPointerCasts()); + } + + /// Return the prototype for the continuation function. The type, + /// attributes, and calling convention of the continuation function(s) + /// are taken from this declaration. + Function *getPrototype() const { + return cast<Function>(getArgOperand(PrototypeArg)->stripPointerCasts()); + } + + /// Return the function to use for allocating memory. + Function *getAllocFunction() const { + return cast<Function>(getArgOperand(AllocArg)->stripPointerCasts()); + } + + /// Return the function to use for deallocating memory. + Function *getDeallocFunction() const { + return cast<Function>(getArgOperand(DeallocArg)->stripPointerCasts()); + } + + Value *getAllocator() const { return getArgOperand(AllocatorArg); } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id_retcon_once_dynamic; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + /// This represents the llvm.coro.id.async instruction. class CoroIdAsyncInst : public AnyCoroIdInst { enum { SizeArg, AlignArg, StorageArg, AsyncFuncPtrArg }; diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h index 2e98b089358bc..a60f813785a59 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h @@ -45,6 +45,10 @@ enum class ABI { /// single continuation function. The continuation function is available as an /// intrinsic. Async, + + /// The variant of RetconOnce which features a dynamically-sized caller + /// allocation. + RetconOnceDynamic, }; // Holds structural Coroutine Intrinsics for a particular function and other @@ -127,9 +131,18 @@ struct Shape { Function *ResumePrototype; Function *Alloc; Function *Dealloc; + Value *Allocator; BasicBlock *ReturnBlock; bool IsFrameInlineInStorage; ConstantInt* TypeId; + GlobalVariable *CoroFuncPointer; + Value *Storage; + uint64_t StorageSize; + Align StorageAlignment; + // computed during splitting: + uint64_t ContextSize; + + Align getStorageAlignment() const { return Align(StorageAlignment); } }; struct AsyncLoweringStorage { @@ -194,6 +207,7 @@ struct Shape { /*IsVarArg=*/false); case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: return RetconLowering.ResumePrototype->getFunctionType(); case coro::ABI::Async: // Not used. The function type depends on the active suspend. @@ -204,7 +218,8 @@ struct Shape { } ArrayRef<Type *> getRetconResultTypes() const { - assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce); + assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce || + ABI == coro::ABI::RetconOnceDynamic); auto FTy = CoroBegin->getFunction()->getFunctionType(); // The safety of all this is checked by checkWFRetconPrototype. @@ -216,7 +231,8 @@ struct Shape { } ArrayRef<Type *> getRetconResumeTypes() const { - assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce); + assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce || + ABI == coro::ABI::RetconOnceDynamic); // The safety of all this is checked by checkWFRetconPrototype. auto FTy = RetconLowering.ResumePrototype->getFunctionType(); @@ -230,6 +246,7 @@ struct Shape { case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: return RetconLowering.ResumePrototype->getCallingConv(); case coro::ABI::Async: return AsyncLowering.AsyncCC; @@ -262,7 +279,7 @@ struct Shape { /// \param CG - if non-null, will be updated for the new call void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; - Shape() = default; + Shape() = delete; explicit Shape(Function &F) { SmallVector<CoroFrameInst *, 8> CoroFrames; SmallVector<CoroSaveInst *, 2> UnusedCoroSaves; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 30e1af602667c..6b431f2f71594 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -2361,6 +2361,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { return false; case Intrinsic::icall_branch_funnel: case Intrinsic::localescape: + case Intrinsic::ret_popless: HasUninlineableIntrinsic = true; return false; case Intrinsic::vastart: diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 4d25b12c9ab06..0334f684cb94d 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -659,6 +659,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_regcallcc); KEYWORD(swiftcc); KEYWORD(swifttailcc); + KEYWORD(swiftcorocc); KEYWORD(anyregcc); KEYWORD(preserve_mostcc); KEYWORD(preserve_allcc); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 3b9b466ddeeab..a0969f9141629 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2188,6 +2188,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'ghccc' /// ::= 'swiftcc' /// ::= 'swifttailcc' +/// ::= 'swiftcorocc' /// ::= 'x86_intrcc' /// ::= 'hhvmcc' /// ::= 'hhvm_ccc' @@ -2254,6 +2255,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) { case lltok::kw_ghccc: CC = CallingConv::GHC; break; case lltok::kw_swiftcc: CC = CallingConv::Swift; break; case lltok::kw_swifttailcc: CC = CallingConv::SwiftTail; break; + case lltok::kw_swiftcorocc: CC = CallingConv::SwiftCoro; break; case lltok::kw_x86_intrcc: CC = CallingConv::X86_INTR; break; case lltok::kw_hhvmcc: CC = CallingConv::DUMMY_HHVM; diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 5c62ef4ad8e4e..c055f7c296711 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2202,6 +2202,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::SwiftSelf; case bitc::ATTR_KIND_SWIFT_ASYNC: return Attribute::SwiftAsync; + case bitc::ATTR_KIND_SWIFT_CORO: + return Attribute::SwiftCoro; case bitc::ATTR_KIND_UW_TABLE: return Attribute::UWTable; case bitc::ATTR_KIND_VSCALE_RANGE: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1a39205d93e19..33c95f6b95650 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -875,6 +875,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_SWIFT_SELF; case Attribute::SwiftAsync: return bitc::ATTR_KIND_SWIFT_ASYNC; + case Attribute::SwiftCoro: + return bitc::ATTR_KIND_SWIFT_CORO; case Attribute::UWTable: return bitc::ATTR_KIND_UW_TABLE; case Attribute::VScaleRange: diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 0af70f333f864..79ada17b23a21 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -62,6 +62,8 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, Flags.setSwiftAsync(); if (AttrFn(Attribute::SwiftError)) Flags.setSwiftError(); + if (AttrFn(Attribute::SwiftCoro)) + Flags.setSwiftCoro(); } ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 931e4fe19e69a..464dd11381a46 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -391,7 +391,14 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); + bool Success = + CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); + + if (auto *MustTailCI = RI.getParent()->getTerminatingMustTailCall()) + if (MustTailCI->getIntrinsicID() == Intrinsic::ret_popless) + Success &= CLI->adjustReturnToPopless(MIRBuilder); + + return Success; } void IRTranslator::emitBranchForMergedCondition( @@ -2424,6 +2431,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, {getOrCreateVReg(*CI.getArgOperand(0))}); return true; } + case Intrinsic::ret_popless: { + // The ret.popless intrin call itself is only annotating the following ret. + // To achieve that, it does need to be musttail and reachable from the ret. + assert(CI.getParent()->getTerminatingMustTailCall() == &CI && + "llvm.ret.popless not in musttail position"); + return true; + } case Intrinsic::cttz: case Intrinsic::ctlz: { ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index fbc0264961bc7..85db1a064980d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1050,6 +1050,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { Flags.setSwiftAsync(); if (Arg.IsSwiftError) Flags.setSwiftError(); + if (Arg.IsSwiftCoro) + Flags.setSwiftCoro(); if (Arg.IsCFGuardTarget) Flags.setCFGuardTarget(); if (Arg.IsByVal) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f592e69d57fec..8cc9be15e4859 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2153,6 +2153,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { return; } + // Musttail calls to @llvm.ret.popless are used to annotate the ret as + // "popless". Keep track of it here, and ask the target to do so later. + bool IsPoplessReturn = false; + if (auto *MustTailCI = I.getParent()->getTerminatingMustTailCall()) + if (MustTailCI->getIntrinsicID() == Intrinsic::ret_popless) + IsPoplessReturn = true; + if (!FuncInfo.CanLowerReturn) { Register DemoteReg = FuncInfo.DemoteRegister; @@ -2287,6 +2294,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); + // If we did find this return instruction to be popless, make it so now. + // It's still a normal return in almost all regards, we just need to remember + // it's popless, for when we lower the return and emit the epilogue later. + // Ideally we'd ask LowerReturn to do that, but the API is enough of a pain + // as it is, and all targets would have to learn about that. + if (IsPoplessReturn) { + SDValue NewChain = + DAG.getTargetLoweringInfo().adjustReturnPopless(Chain, DAG); + DAG.RemoveDeadNode(Chain.getNode()); + Chain = NewChain; + } + // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && "LowerReturn didn't return a valid chain!"); @@ -8018,6 +8037,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask)); return; } + + case Intrinsic::ret_popless: + // The ret.popless intrin call itself is only annotating the following ret. + // To achieve that, it does need to be musttail and reachable from the ret. + assert(I.getParent()->getTerminatingMustTailCall() == &I && + "llvm.ret.popless not in musttail position"); + return; + case Intrinsic::threadlocal_address: { setValue(&I, getValue(I.getOperand(0))); return; @@ -11001,6 +11028,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSwiftSelf = false; Entry.IsSwiftAsync = false; Entry.IsSwiftError = false; + Entry.IsSwiftCoro = false; Entry.IsCFGuardTarget = false; Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); @@ -11120,6 +11148,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setSwiftAsync(); if (Args[i].IsSwiftError) Flags.setSwiftError(); + if (Args[i].IsSwiftCoro) + Flags.setSwiftCoro(); if (Args[i].IsCFGuardTarget) Flags.setCFGuardTarget(); if (Args[i].IsByVal) @@ -11651,6 +11681,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setSwiftAsync(); if (Arg.hasAttribute(Attribute::SwiftError)) Flags.setSwiftError(); + if (Arg.hasAttribute(Attribute::SwiftCoro)) + Flags.setSwiftCoro(); if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); if (Arg.hasAttribute(Attribute::ByRef)) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3995216e3d689..60ebf71b3dd3c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -128,6 +128,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); + IsSwiftCoro = Call->paramHasAttr(ArgIdx, Attribute::SwiftCoro); Alignment = Call->getParamStackAlign(ArgIdx); IndirectType = nullptr; assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 && diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index ac8aa0d35ea30..f57e65980e731 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -378,6 +378,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break; case CallingConv::Swift: Out << "swiftcc"; break; case CallingConv::SwiftTail: Out << "swifttailcc"; break; + case CallingConv::SwiftCoro: Out << "swiftcorocc"; break; case CallingConv::X86_INTR: Out << "x86_intrcc"; break; case CallingConv::DUMMY_HHVM: Out << "hhvmcc"; diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index c632b1b2dc2ab..20050459b3c3e 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -311,6 +311,14 @@ const CallInst *BasicBlock::getTerminatingMustTailCall() const { if (!Prev) return nullptr; + // Some musttail intrinsic calls are special in being really simply ret + // annotations, and only need to be the last instruction before the ret. + // We don't need to look through the return value in those cases. + // FIXME: we should generalize getTerminatingDeoptimizeCall for this case. + if (auto *CI = dyn_cast<CallInst>(Prev)) + if (CI->isMustTailCall() && CI->getIntrinsicID() == Intrinsic::ret_popless) + return CI; + if (Value *RV = RI->getReturnValue()) { if (RV != Prev) return nullptr; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index b0ccab120ccf0..336ab9b8acc0a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2239,6 +2239,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, bool SawSwiftSelf = false; bool SawSwiftAsync = false; bool SawSwiftError = false; + bool SawSwiftCoro = false; // Verify return value attributes. AttributeSet RetAttrs = Attrs.getRetAttrs(); @@ -2315,6 +2316,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, SawSwiftError = true; } + if (ArgAttrs.hasAttribute(Attribute::SwiftCoro)) { + Check(!SawSwiftCoro, "Cannot have multiple 'swiftcoro' parameters!", V); + SawSwiftCoro = true; + } + if (ArgAttrs.hasAttribute(Attribute::InAlloca)) { Check(i == FT->getNumParams() - 1, "inalloca isn't on the last parameter!", V); @@ -3953,6 +3959,7 @@ static AttrBuilder getParameterABIAttributes(LLVMContext& C, unsigned I, Attribu static const Attribute::AttrKind ABIAttrs[] = { Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, Attribute::InReg, Attribute::StackAlignment, Attribute::SwiftSelf, + Attribute::SwiftCoro, Attribute::SwiftAsync, Attribute::SwiftError, Attribute::Preallocated, Attribute::ByRef}; AttrBuilder Copy(C); @@ -3987,6 +3994,15 @@ void Verifier::verifyMustTailCall(CallInst &CI) { &CI); } #endif + Check(CI.getIntrinsicID() != Intrinsic::ret_popless, + "llvm.ret.popless call must be musttail", &CI); + return; + } + + // Some musttail intrinsic calls are special, and don't have all the rules. + if (CI.getIntrinsicID() == Intrinsic::ret_popless) { + ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(CI.getNextNode()); + Check(Ret, "musttail intrinsic call must precede a ret", &CI); return; } diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 7cca6d9bc6b9c..e071689dacddc 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -72,6 +72,9 @@ defvar AArch64_Common = [ // preserved for normal function calls. CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>, + // Pass SwiftCoro in X23. + CCIfSwiftCoro<CCIfType<[i64], CCAssignToReg<[X23]>>>, + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, @@ -241,6 +244,9 @@ def CC_AArch64_Arm64EC_Thunk : CallingConv<[ // normal functions don't need to save it somewhere. CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X21]>>>, + // Pass SwiftCoro in X22. + CCIfSwiftCoro<CCIfType<[i64], CCAssignToReg<[X22]>>>, + // The 'CFGuardTarget' parameter, if any, is passed in RAX (R8). CCIfCFGuardTarget<CCAssignToReg<[X8]>>, @@ -374,6 +380,9 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // preserved for normal function calls. CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>, + // Pass SwiftCoro in X23. + CCIfSwiftCoro<CCIfType<[i64], CCAssignToReg<[X23]>>>, + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 37241f3a70471..b609fcf58eee7 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -2926,6 +2926,7 @@ bool AArch64FastISel::fastLowerArguments() { Arg.hasAttribute(Attribute::SwiftSelf) || Arg.hasAttribute(Attribute::SwiftAsync) || Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::SwiftCoro) || Arg.hasAttribute(Attribute::Nest)) return false; @@ -3193,6 +3194,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { for (auto Flag : CLI.OutFlags) if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || + Flag.isSwiftCoro() || Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) return false; @@ -3861,6 +3863,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (TLI.supportSplitCSR(FuncInfo.MF)) return false; + if (I->getParent()->getTerminatingMustTailCall()) + return false; + // Build a list of return value registers. SmallVector<Register, 4> RetRegs; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 642d59d06b123..546d2aaca4e98 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -350,7 +350,8 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( // Bail on stack adjustment needed on return for simplicity. const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)) + if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) || + MFI.hasPoplessCall()) return false; if (Exit && getArgumentStackToRestore(MF, *Exit)) return false; @@ -502,9 +503,15 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const { if (MF.getTarget().Options.DisableFramePointerElim(MF)) return true; if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || + MFI.hasPoplessCall() || MFI.hasStackMap() || MFI.hasPatchPoint() || RegInfo->hasStackRealignment(MF)) return true; + + const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + if (AFI->hasPoplessEpilogue()) + return true; + // With large callframes around we may need to use FP to access the scavenging // emergency spillslot. // @@ -1123,6 +1130,12 @@ bool AArch64FrameLowering::canUseAsPrologue( return false; } + // If we have some return path that's popless, it needs its own very-special + // epilogue, so we can't shrink-wrap it away. + // FIXME: this and some of the below checks belong in enableShrinkWrapping. + if (AFI->hasPoplessEpilogue()) + return false; + // Certain stack probing sequences might clobber flags, then we can't use // the block as a prologue if the flags register is a live-in. if (MF->getInfo<AArch64FunctionInfo>()->hasStackProbing() && @@ -1186,6 +1199,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( if (MFI.hasVarSizedObjects()) return false; + if (MFI.hasPoplessCall()) + return false; + if (RegInfo->hasStackRealignment(MF)) return false; @@ -1205,6 +1221,12 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue( MachineBasicBlock &MBB, uint64_t StackBumpBytes) const { + + MachineFunction &MF = *MBB.getParent(); + AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + if (AFI->hasPoplessEpilogue()) + return false; + if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes)) return false; if (MBB.empty()) @@ -1572,6 +1594,53 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( return std::prev(MBB.erase(MBBI)); } +static void fixupCalleeSaveRestoreToFPBased(MachineInstr &MI, + int64_t FPSPOffset) { + assert(!AArch64InstrInfo::isSEHInstruction(MI)); + + unsigned Opc = MI.getOpcode(); + unsigned Scale; + switch (Opc) { + case AArch64::STPXi: + case AArch64::STRXui: + case AArch64::STPDi: + case AArch64::STRDui: + case AArch64::LDPXi: + case AArch64::LDRXui: + case AArch64::LDPDi: + case AArch64::LDRDui: + Scale = 8; + break; + case AArch64::STPQi: + case AArch64::STRQui: + case AArch64::LDPQi: + case AArch64::LDRQui: + Scale = 16; + break; + default: + llvm_unreachable("Unexpected callee-save save/restore opcode!"); + } + + unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; + + MachineOperand &BaseRegOpnd = MI.getOperand(OffsetIdx - 1); + assert(BaseRegOpnd.getReg() == AArch64::SP && + "Unexpected base register in callee-save save/restore instruction!"); + BaseRegOpnd.setReg(AArch64::FP); // XXX TRI + + // Last operand is immediate offset that needs fixing. + MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); + // All generated opcodes have scaled offsets. + assert(FPSPOffset % Scale == 0); + int64_t ResidualOffset = OffsetOpnd.getImm() - (FPSPOffset / Scale); + OffsetOpnd.setImm(ResidualOffset); + + assert((!MI.getOperand(0).isReg() || + MI.getOperand(0).getReg() != AArch64::FP || ResidualOffset == 0) && + "FP/LR frame record should be restored from FP+0"); + +} + // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, @@ -2214,7 +2283,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes); allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || LocalsSize); + MFI.hasVarSizedObjects() || LocalsSize || + MFI.hasPoplessCall()); CFAOffset += SVECalleeSavesSize; if (EmitAsyncCFI) @@ -2231,7 +2301,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding, SVELocalsSize + StackOffset::getFixed(NumBytes), NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, - CFAOffset, MFI.hasVarSizedObjects()); + CFAOffset, + MFI.hasVarSizedObjects() || MFI.hasPoplessCall()); } // If we need a base pointer, set it up here. It's whatever the value of the @@ -2312,10 +2383,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF); bool HasWinCFI = false; bool IsFunclet = false; + bool IsSwiftCoroPartialReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); IsFunclet = isFuncletReturnInstr(*MBBI); + IsSwiftCoroPartialReturn = MBBI->getOpcode() == AArch64::RET_POPLESS; + } + + if (IsSwiftCoroPartialReturn) { + // The partial-return intrin/instr requires the swiftcoro cc + if (MF.getFunction().getCallingConv() != CallingConv::SwiftCoro) + report_fatal_error("llvm.ret.popless requires swiftcorocc"); + assert(MBBI->getOpcode() == AArch64::RET_POPLESS); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::RET_ReallyLR)) + .setMIFlag(MachineInstr::FrameDestroy); + MBB.erase(MBBI); } MachineBasicBlock::iterator EpilogStartI = MBB.end(); @@ -2364,6 +2447,39 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (Info.getReg() != AArch64::LR) continue; MachineBasicBlock::iterator TI = MBB.getFirstTerminator(); + + // When we're doing a popless ret (i.e., that doesn't restore SP), we + // can't rely on the exit SP being the same as the entry, but they need + // to match for the LR auth to succeed. Instead, derive the entry SP + // from our FP (using a -16 static offset for the size of the frame + // record itself), save that into X16, and use that as the discriminator + // in an AUTIB. + if (IsSwiftCoroPartialReturn) { + const auto *TRI = Subtarget.getRegisterInfo(); + + MachineBasicBlock::iterator EpilogStartI = MBB.getFirstTerminator(); + MachineBasicBlock::iterator Begin = MBB.begin(); + while (EpilogStartI != Begin) { + --EpilogStartI; + if (!EpilogStartI->getFlag(MachineInstr::FrameDestroy)) { + ++EpilogStartI; + break; + } + if (EpilogStartI->readsRegister(AArch64::X16, TRI) || + EpilogStartI->modifiesRegister(AArch64::X16, TRI)) + report_fatal_error("unable to use x16 for popless ret LR auth"); + } + + emitFrameOffset(MBB, EpilogStartI, DL, AArch64::X16, AArch64::FP, + StackOffset::getFixed(16), TII, + MachineInstr::FrameDestroy); + BuildMI(MBB, TI, DL, TII->get(AArch64::AUTIB), AArch64::LR) + .addUse(AArch64::LR) + .addUse(AArch64::X16) + .setMIFlag(MachineInstr::FrameDestroy); + return; + } + if (TI != MBB.end() && TI->getOpcode() == AArch64::RET_ReallyLR) { // If there is a terminator and it's a RET, we can fold AUTH into it. // Be careful to keep the implicitly returned registers. @@ -2397,6 +2513,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, AFI->setLocalStackSize(NumBytes - PrologueSaveSize); if (homogeneousPrologEpilog(MF, &MBB)) { assert(!NeedsWinCFI); + assert(!IsSwiftCoroPartialReturn); auto LastPopI = MBB.getFirstTerminator(); if (LastPopI != MBB.begin()) { auto HomogeneousEpilog = std::prev(LastPopI); @@ -2417,7 +2534,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); // Assume we can't combine the last pop with the sp restore. bool CombineAfterCSRBump = false; - if (!CombineSPBump && PrologueSaveSize != 0) { + if (!CombineSPBump && PrologueSaveSize != 0 && !IsSwiftCoroPartialReturn) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || AArch64InstrInfo::isSEHInstruction(*Pop)) @@ -2453,6 +2570,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, IsSVECalleeSave(LastPopI)) { ++LastPopI; break; + } else if (IsSwiftCoroPartialReturn) { + assert(!EmitCFI); + assert(hasFP(MF)); + fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), + NeedsWinCFI, &HasWinCFI); + // if FP-based addressing, rewrite CSR restores from SP to FP + int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset() + + AFI->getLocalStackSize(); + fixupCalleeSaveRestoreToFPBased(*LastPopI, FPOffset); } else if (CombineSPBump) fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); @@ -2472,6 +2598,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, } if (hasFP(MF) && AFI->hasSwiftAsyncContext()) { + assert(!IsSwiftCoroPartialReturn); switch (MF.getTarget().Options.SwiftAsyncFramePointer) { case SwiftAsyncFramePointerMode::DeploymentBased: // Avoid the reload as it is GOT relative, and instead fall back to the @@ -2505,6 +2632,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + assert(!IsSwiftCoroPartialReturn); // When we are about to restore the CSRs, the CFA register is SP again. if (EmitCFI && hasFP(MF)) { @@ -2551,7 +2679,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // If we have stack realignment or variable sized objects on the stack, // restore the stack pointer from the frame pointer prior to SVE CSR // restoration. - if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) { + if (AFI->isStackRealigned() || MFI.hasVarSizedObjects() || + MFI.hasPoplessCall()) { if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { // Set SP to start of SVE callee-save area from which they can // be reloaded. The code below will deallocate the stack space @@ -2589,6 +2718,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, } if (!hasFP(MF)) { + assert(!IsSwiftCoroPartialReturn); bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the // stack pointer (but we may need to pop stack args for fastcc). @@ -2619,11 +2749,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, NumBytes = 0; } + if (IsSwiftCoroPartialReturn) + return; + // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. - if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { + if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned() || + MFI.hasPoplessCall())) { emitFrameOffset( MBB, LastPopI, DL, AArch64::SP, AArch64::FP, StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), @@ -2825,7 +2959,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( // If the FPOffset is positive, that'll always be best, as the SP/BP // will be even further away. UseFP = true; - } else if (MFI.hasVarSizedObjects()) { + } else if (MFI.hasVarSizedObjects() || MFI.hasPoplessCall()) { // If we have variable sized objects, we can use either FP or BP, as the // SP offset is unknown. We can use the base pointer if we have one and // FP is not preferred. If not, we're stuck with using FP. @@ -3495,9 +3629,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; bool NeedsWinCFI = needsWinCFI(MF); + bool IsSwiftCoroPartialReturn = false; - if (MBBI != MBB.end()) + if (MBBI != MBB.end()) { DL = MBBI->getDebugLoc(); + IsSwiftCoroPartialReturn = MBBI->getOpcode() == AArch64::RET_POPLESS; + } + + // The partial-return intrin/instr requires the swiftcoro cc + if (IsSwiftCoroPartialReturn && + MF.getFunction().getCallingConv() != CallingConv::SwiftCoro) + report_fatal_error("llvm.ret.popless requires swiftcorocc"); computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); if (homogeneousPrologEpilog(MF, &MBB)) { @@ -3510,6 +3652,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } + // If doing a partial/popless return, CSR restores are from FP, so do it last. + if (IsSwiftCoroPartialReturn) { + auto IsFPLR = [](const RegPairInfo &c) { + return c.Reg1 == AArch64::LR && c.Reg2 == AArch64::FP; + }; + auto FPLRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsFPLR); + const RegPairInfo FPLRRPI = *FPLRBegin; + FPLRBegin = std::remove_if(RegPairs.begin(), RegPairs.end(), IsFPLR); + *FPLRBegin = FPLRRPI; + } + // For performance reasons restore SVE register in increasing order auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; }; auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); @@ -5082,6 +5235,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( // Go to common code if we cannot provide sp + offset. if (MFI.hasVarSizedObjects() || + MFI.hasPoplessCall() || MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() || MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF)) return getFrameIndexReference(MF, FI, FrameReg); @@ -5189,6 +5343,10 @@ void AArch64FrameLowering::orderFrameObjects( const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); const MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (AFI.hasPoplessEpilogue()) + return; + std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd()); for (auto &Obj : ObjectsToAllocate) { FrameObjects[Obj].IsValid = true; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dc5471f038043..8bbf210195917 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2714,6 +2714,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::AUTH_CALL_RVMARKER) MAKE_CASE(AArch64ISD::LOADgot) MAKE_CASE(AArch64ISD::RET_GLUE) + MAKE_CASE(AArch64ISD::RET_POPLESS) MAKE_CASE(AArch64ISD::BRCOND) MAKE_CASE(AArch64ISD::CSEL) MAKE_CASE(AArch64ISD::CSINV) @@ -7869,6 +7870,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: case CallingConv::SwiftTail: + case CallingConv::SwiftCoro: case CallingConv::Tail: case CallingConv::GRAAL: if (Subtarget->isTargetWindows()) { @@ -8391,7 +8393,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( if (CallConv == CallingConv::PreserveNone) { for (const ISD::InputArg &I : Ins) { if (I.Flags.isSwiftSelf() || I.Flags.isSwiftError() || - I.Flags.isSwiftAsync()) { + I.Flags.isSwiftAsync() || I.Flags.isSwiftCoro()) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose(DiagnosticInfoUnsupported( MF.getFunction(), @@ -8855,6 +8857,18 @@ bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; } +SDValue AArch64TargetLowering::adjustReturnPopless(SDValue RetChain, + SelectionDAG &DAG) const { + if (RetChain.getOpcode() != AArch64ISD::RET_GLUE) + report_fatal_error("Unsupported aarch64 return for popless ret lowering"); + + auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); + AFI->setHasPoplessEpilogue(); + + return DAG.getNode(AArch64ISD::RET_POPLESS, SDLoc(RetChain), + MVT::Other, RetChain->ops()); +} + // Check if the value is zero-extended from i1 to i8 static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) { unsigned SizeInBits = Arg.getValueType().getSizeInBits(); @@ -9047,6 +9061,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, CallConv = CallingConv::AArch64_SVE_VectorCall; } + if (CallConv == CallingConv::SwiftCoro) + MF.getFrameInfo().setHasPoplessCall(); + if (IsTailCall) { // Check if it's really possible to do a tail call. IsTailCall = isEligibleForTailCallOptimization(CLI); @@ -9743,7 +9760,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, if (CallConv == CallingConv::PreserveNone) { for (const ISD::OutputArg &O : Outs) { if (O.Flags.isSwiftSelf() || O.Flags.isSwiftError() || - O.Flags.isSwiftAsync()) { + O.Flags.isSwiftAsync() || O.Flags.isSwiftCoro()) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose(DiagnosticInfoUnsupported( MF.getFunction(), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 0d51ef2be8631..89a2f4aa05fee 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -90,6 +90,7 @@ enum NodeType : unsigned { LOADgot, // Load from automatically generated descriptor (e.g. Global // Offset Table, TLS record). RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. + RET_POPLESS, // Same as RET_GLUE, though "popless", = doesn't clean the stack. BRCOND, // Conditional branch instruction; "b.cond". CSEL, CSINV, // Conditional select invert. @@ -1104,6 +1105,9 @@ class AArch64TargetLowering : public TargetLowering { void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, SDValue &Chain) const; + SDValue adjustReturnPopless(SDValue RetChain, + SelectionDAG &DAG) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index fbeacaa6a498c..a1634ac33597f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -779,6 +779,8 @@ def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def AArch64ret_popless : SDNode<"AArch64ISD::RET_POPLESS", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, @@ -3312,6 +3314,13 @@ def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>, let isReturn = 1; } +def RET_POPLESS : Pseudo<(outs), (ins), [(AArch64ret_popless)]>, + Sched<[WriteBrReg]> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; +} + // This is a directive-like pseudo-instruction. The purpose is to insert an // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction // (which in the usual case is a BLR). diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index d3026ca45c349..08985c038c29c 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -205,6 +205,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// The stack slot where the Swift asynchronous context is stored. int SwiftAsyncContextFrameIdx = std::numeric_limits<int>::max(); + /// Whether this function has a swift coro return that doesn't restore + /// the stack. + bool HasPoplessEpilogue = false; + bool IsMTETagged = false; /// The function has Scalable Vector or Scalable Predicate register argument @@ -549,6 +553,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { } int getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } + bool hasPoplessEpilogue() const { + return HasPoplessEpilogue; + } + void setHasPoplessEpilogue(bool PE = true) { + HasPoplessEpilogue = PE; + } + bool needsDwarfUnwindInfo(const MachineFunction &MF) const; bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index e4719b26cab52..ccae95c67fda5 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -483,6 +483,21 @@ bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv)); } +bool AArch64CallLowering::adjustReturnToPopless( + MachineIRBuilder &MIRBuilder) const { + MachineFunction &MF = MIRBuilder.getMF(); + + auto MBBI = std::prev(MIRBuilder.getMBB().end()); + assert(MBBI->getOpcode() == AArch64::RET_ReallyLR); + + auto *TII = MF.getSubtarget().getInstrInfo(); + MBBI->setDesc(TII->get(AArch64::RET_POPLESS)); + + AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); + FuncInfo->setHasPoplessEpilogue(); + return true; +} + /// Helper function to compute forwarded registers for musttail calls. Computes /// the forwarded registers, sets MBB liveness, and emits COPY instructions that /// can be used to save + restore registers later. @@ -1455,6 +1470,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; } + if (Info.CallConv == CallingConv::SwiftCoro) + MF.getFrameInfo().setHasPoplessCall(); + if (Info.SwiftErrorVReg) { MIB.addDef(AArch64::X21, RegState::Implicit); MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h index 9ae175274d5d9..34f233480b77d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h @@ -39,6 +39,8 @@ class AArch64CallLowering: public CallLowering { SmallVectorImpl<BaseArgInfo> &Outs, bool IsVarArg) const override; + bool adjustReturnToPopless(MachineIRBuilder &MIRBuilder) const override; + bool fallBackToDAGISel(const MachineFunction &MF) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 794db887bd073..fe56c5c7a2a5f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1284,6 +1284,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue &OutVal = OutVals[I]; HasSwiftSelfArg |= Out.Flags.isSwiftSelf(); HasSwiftErrorArg |= Out.Flags.isSwiftError(); + if (Out.Flags.isSwiftCoro()) + fail(DL, DAG, "WebAssembly hasn't implemented swiftcoro arguments"); if (Out.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (Out.Flags.isInAlloca()) diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index a0a26827aa09d..25eadd04e52ef 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -69,6 +69,7 @@ bool Lowerer::lower(Function &F) { case Intrinsic::coro_id: case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: + case Intrinsic::coro_id_retcon_once_dynamic: case Intrinsic::coro_id_async: II->replaceAllUsesWith(ConstantTokenNone::get(Context)); break; diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h index d1887980fb3bc..e2db4fdbe38f6 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCloner.h +++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h @@ -67,6 +67,7 @@ class BaseCloner { Builder(OrigF.getContext()), TTI(TTI), NewF(NewF), ActiveSuspend(ActiveSuspend) { assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic || Shape.ABI == ABI::Async); assert(NewF && "need existing function for continuation"); assert(ActiveSuspend && "need active suspend point for continuation"); @@ -86,6 +87,7 @@ class BaseCloner { AnyCoroSuspendInst *ActiveSuspend, TargetTransformInfo &TTI) { assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic || Shape.ABI == ABI::Async); TimeTraceScope FunctionScope("BaseCloner"); diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 5375448d2d2e2..3093323da67b8 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -209,6 +209,7 @@ void Lowerer::lowerEarlyIntrinsics(Function &F) { break; case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: + case Intrinsic::coro_id_retcon_once_dynamic: case Intrinsic::coro_id_async: F.setPresplitCoroutine(); break; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index d08170438bd2a..d65edb132165d 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -874,6 +874,8 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, std::optional<Align> MaxFrameAlignment; if (Shape.ABI == coro::ABI::Async) MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment(); + if (Shape.ABI == coro::ABI::RetconOnceDynamic) + MaxFrameAlignment = Shape.RetconLowering.getStorageAlignment(); FrameTypeBuilder B(C, DL, MaxFrameAlignment); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); @@ -962,6 +964,19 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, B.getStructAlign() <= Id->getStorageAlignment()); break; } + case coro::ABI::RetconOnceDynamic: { + // In the dynamic retcon.once ABI, the frame is always inline in the + // storage. + Shape.RetconLowering.IsFrameInlineInStorage = true; + Shape.RetconLowering.ContextSize = + alignTo(Shape.FrameSize, Shape.RetconLowering.StorageAlignment); + if (Shape.RetconLowering.StorageAlignment < Shape.FrameAlign) { + report_fatal_error( + "The alignment requirment of frame variables cannot be higher than " + "the alignment of the coro function context"); + } + break; + } case coro::ABI::Async: { Shape.AsyncLowering.FrameOffset = alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign); @@ -1188,7 +1203,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { // retcon and retcon.once lowering assumes all uses have been sunk. if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || - Shape.ABI == coro::ABI::Async) { + Shape.ABI == coro::ABI::Async || + Shape.ABI == coro::ABI::RetconOnceDynamic) { // If we found any allocas, replace all of their remaining uses with Geps. Builder.SetInsertPoint(SpillBlock, SpillBlock->begin()); for (const auto &P : FrameData.Allocas) { @@ -2078,7 +2094,8 @@ void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) { const DominatorTree DT(F); if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon && - Shape.ABI != coro::ABI::RetconOnce) + Shape.ABI != coro::ABI::RetconOnce && + Shape.ABI != coro::ABI::RetconOnceDynamic) sinkLifetimeStartMarkers(F, Shape, Checker, DT); // All values (that are not allocas) that needs to be spilled to the frame. @@ -2098,7 +2115,8 @@ void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) { LLVM_DEBUG(dumpSpills("Spills", Spills)); if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || - Shape.ABI == coro::ABI::Async) + Shape.ABI == coro::ABI::Async || + Shape.ABI == coro::ABI::RetconOnceDynamic) sinkSpillUsesAfterCoroBegin(DT, Shape.CoroBegin, Spills, Allocas); // Build frame diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index f9a6c70fedc2d..a1055d3d829ce 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -157,7 +157,8 @@ static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) { static void maybeFreeRetconStorage(IRBuilder<> &Builder, const coro::Shape &Shape, Value *FramePtr, CallGraph *CG) { - assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); + assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic); if (Shape.RetconLowering.IsFrameInlineInStorage) return; @@ -239,7 +240,8 @@ static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, // In unique continuation lowering, the continuations always return void. // But we may have implicitly allocated storage. - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); auto *CoroEnd = cast<CoroEndInst>(End); auto *RetTy = Shape.getResumeFunctionType()->getReturnType(); @@ -368,6 +370,7 @@ static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, // In continuation-lowering, this frees the continuation storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); break; } @@ -469,6 +472,7 @@ static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, /// This assumes that the builder has a meaningful insertion point. void coro::BaseCloner::replaceRetconOrAsyncSuspendUses() { assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic || Shape.ABI == coro::ABI::Async); auto NewS = VMap[ActiveSuspend]; @@ -538,6 +542,7 @@ void coro::BaseCloner::replaceCoroSuspends() { // spilled. case coro::ABI::RetconOnce: case coro::ABI::Retcon: + case coro::ABI::RetconOnceDynamic: return; } @@ -707,14 +712,16 @@ void coro::BaseCloner::replaceEntryBlock() { } case coro::ABI::Async: case coro::ABI::Retcon: - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { // In continuation ABIs, we want to branch to immediately after the // active suspend point. Earlier phases will have put the suspend in its // own basic block, so just thread our jump directly to its successor. assert((Shape.ABI == coro::ABI::Async && isa<CoroSuspendAsyncInst>(ActiveSuspend)) || ((Shape.ABI == coro::ABI::Retcon || - Shape.ABI == coro::ABI::RetconOnce) && + Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic) && isa<CoroSuspendRetconInst>(ActiveSuspend))); auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]); auto Branch = cast<BranchInst>(MappedCS->getNextNode()); @@ -779,7 +786,8 @@ Value *coro::BaseCloner::deriveNewFramePointer() { } // In continuation-lowering, the argument is the opaque storage. case coro::ABI::Retcon: - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { Argument *NewStorage = &*NewF->arg_begin(); auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext()); @@ -873,6 +881,18 @@ static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); } +static void eraseIntrinsicRetPoplessBefore(ReturnInst *Return) { + if (Return == &*Return->getParent()->begin()) + return; + auto *Prev = &*std::prev(Return->getIterator()); + auto *Intr = dyn_cast<IntrinsicInst>(Prev); + if (!Intr) + return; + if (Intr->getIntrinsicID() != Intrinsic::ret_popless) + return; + Intr->eraseFromParent(); +} + /// Clone the body of the original function into a resume function of /// some sort. void coro::BaseCloner::create() { @@ -997,6 +1017,11 @@ void coro::BaseCloner::create() { /*NoAlias=*/true); break; + case coro::ABI::RetconOnceDynamic: + // If we have a continuation prototype, just use its attributes, + // full-stop. + NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); + break; } switch (Shape.ABI) { @@ -1006,9 +1031,12 @@ void coro::BaseCloner::create() { // this is fine because we can't suspend twice. case coro::ABI::Switch: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: // Remove old returns. - for (ReturnInst *Return : Returns) + for (ReturnInst *Return : Returns) { + eraseIntrinsicRetPoplessBefore(Return); changeToUnreachable(Return); + } break; // With multi-suspend continuations, we'll already have eliminated the @@ -1063,6 +1091,13 @@ void coro::BaseCloner::create() { if (OldVFrame != NewVFrame) OldVFrame->replaceAllUsesWith(NewVFrame); + // Remap allocator pointer. + if (Shape.ABI == coro::ABI::RetconOnceDynamic) { + Value *OldAllocatorPointer = VMap[Shape.RetconLowering.Allocator]; + Argument *NewAllocatorPointer = &*NewF->getArg(1); + OldAllocatorPointer->replaceAllUsesWith(NewAllocatorPointer); + } + // All uses of the arguments should have been resolved by this point, // so we can safely remove the dummy values. for (Instruction *DummyArg : DummyArgs) { @@ -1081,6 +1116,7 @@ void coro::BaseCloner::create() { case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: // Replace uses of the active suspend with the corresponding // continuation-function arguments. assert(ActiveSuspend != nullptr && @@ -1139,9 +1175,26 @@ static TypeSize getFrameSizeForShape(coro::Shape &Shape) { return DL.getTypeAllocSize(Shape.FrameTy); } +static void updateCoroFuncPointerContextSize(coro::Shape &Shape) { + assert(Shape.ABI == coro::ABI::RetconOnceDynamic); + + auto *FuncPtrStruct = cast<ConstantStruct>( + Shape.RetconLowering.CoroFuncPointer->getInitializer()); + auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0); + auto *OrigContextSize = FuncPtrStruct->getOperand(1); + auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), + Shape.RetconLowering.ContextSize); + auto *NewFuncPtrStruct = ConstantStruct::get( + FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize); + + Shape.RetconLowering.CoroFuncPointer->setInitializer(NewFuncPtrStruct); +} + static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); + if (Shape.ABI == coro::ABI::RetconOnceDynamic) + updateCoroFuncPointerContextSize(Shape); for (CoroAlignInst *CA : Shape.CoroAligns) { CA->replaceAllUsesWith( @@ -1200,6 +1253,7 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) { case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: CoroBegin->replaceAllUsesWith(PoisonValue::get(CoroBegin->getType())); break; } @@ -1795,7 +1849,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape, void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl<Function *> &Clones, TargetTransformInfo &TTI) { - assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); + assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic); assert(Clones.empty()); // Reset various things that the optimizer might have decided it @@ -1805,10 +1860,10 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, F.removeRetAttr(Attribute::NonNull); // Allocate the frame. - auto *Id = Shape.getRetconCoroId(); + auto *Id = Shape.CoroBegin->getId(); Value *RawFramePtr; if (Shape.RetconLowering.IsFrameInlineInStorage) { - RawFramePtr = Id->getStorage(); + RawFramePtr = Shape.RetconLowering.Storage; } else { IRBuilder<> Builder(Id); @@ -1824,7 +1879,7 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); // Stash the allocated frame pointer in the continuation storage. - Builder.CreateStore(RawFramePtr, Id->getStorage()); + Builder.CreateStore(RawFramePtr, Shape.RetconLowering.Storage); } // Map all uses of llvm.coro.begin to the allocated frame pointer. @@ -1901,7 +1956,37 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, RetV = Builder.CreateInsertValue(RetV, Phi, ValueIdx++); } - Builder.CreateRet(RetV); + if (Shape.ABI == coro::ABI::RetconOnceDynamic && + F.getCallingConv() == CallingConv::SwiftCoro) { + // %retval = ... + // %null_allocator = icmp %1, null + // br i1 %null_allocator, label %popless, label %normal + // popless: + // ret %retval + // normal: + // %popless_retval = musttail call i64 @llvm.coro.return(%retval) + // ret %popless_retval + auto *NullAllocator = Builder.CreateCmp( + CmpInst::Predicate::ICMP_EQ, Shape.RetconLowering.Allocator, + ConstantPointerNull::get( + cast<PointerType>(Shape.RetconLowering.Allocator->getType()))); + auto *PoplessReturnBB = BasicBlock::Create( + F.getContext(), "coro.return.popless", &F, NewSuspendBB); + auto *NormalReturnBB = BasicBlock::Create( + F.getContext(), "coro.return.normal", &F, NewSuspendBB); + Builder.CreateCondBr(NullAllocator, PoplessReturnBB, NormalReturnBB); + IRBuilder<> PoplessBuilder(PoplessReturnBB); + auto &Context = F.getContext(); + auto *VoidTy = Type::getVoidTy(Context); + auto *RetPopless = + PoplessBuilder.CreateIntrinsic(VoidTy, Intrinsic::ret_popless, {}); + RetPopless->setTailCallKind(CallInst::TailCallKind::TCK_MustTail); + PoplessBuilder.CreateRet(RetV); + IRBuilder<> NormalBuilder(NormalReturnBB); + NormalBuilder.CreateRet(RetV); + } else { + Builder.CreateRet(RetV); + } } // Branch to the return block. @@ -2039,6 +2124,7 @@ static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit( case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: // Each clone in the Async/Retcon lowering references of the other clones. // Let the LazyCallGraph know about all of them at once. if (!Clones.empty()) @@ -2137,6 +2223,8 @@ CreateNewABI(Function &F, coro::Shape &S, return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback); case coro::ABI::RetconOnce: return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback); + case coro::ABI::RetconOnceDynamic: + return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback); } llvm_unreachable("Unknown ABI"); } diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 71f2bdd50f210..5092c77d3320a 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -325,6 +325,23 @@ void coro::Shape::analyze(Function &F, AsyncLowering.AsyncCC = F.getCallingConv(); break; } + case Intrinsic::coro_id_retcon_once_dynamic: { + auto ContinuationId = cast<CoroIdRetconOnceDynamicInst>(Id); + ABI = coro::ABI::RetconOnceDynamic; + auto Prototype = ContinuationId->getPrototype(); + RetconLowering.ResumePrototype = Prototype; + RetconLowering.Alloc = ContinuationId->getAllocFunction(); + RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); + RetconLowering.Storage = ContinuationId->getStorage(); + RetconLowering.Allocator = ContinuationId->getAllocator(); + RetconLowering.ReturnBlock = nullptr; + RetconLowering.IsFrameInlineInStorage = false; + RetconLowering.ContextSize = 0; + RetconLowering.StorageSize = ContinuationId->getStorageSize(); + RetconLowering.StorageAlignment = ContinuationId->getStorageAlignment(); + RetconLowering.CoroFuncPointer = ContinuationId->getCoroFunctionPointer(); + break; + } case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: { ABI = IntrID == Intrinsic::coro_id_retcon ? coro::ABI::Retcon @@ -335,6 +352,7 @@ void coro::Shape::analyze(Function &F, RetconLowering.ResumePrototype = Prototype; RetconLowering.Alloc = ContinuationId->getAllocFunction(); RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); + RetconLowering.Storage = ContinuationId->getStorage(); RetconLowering.ReturnBlock = nullptr; RetconLowering.IsFrameInlineInStorage = false; RetconLowering.TypeId = ContinuationId->getTypeId(); @@ -396,7 +414,8 @@ void coro::SwitchABI::init() { void coro::AsyncABI::init() { assert(Shape.ABI == coro::ABI::Async); } void coro::AnyRetconABI::init() { - assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); + assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::RetconOnceDynamic); { // Determine the result value types, and make sure they match up with // the values passed to the suspends. @@ -509,17 +528,25 @@ Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size, llvm_unreachable("can't allocate memory in coro switch-lowering"); case coro::ABI::Retcon: - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { + unsigned sizeParamIndex = 0; + SmallVector<Value *, 2> Args; + if (ABI == coro::ABI::RetconOnceDynamic) { + sizeParamIndex = 1; + Args.push_back(RetconLowering.Allocator); + } auto Alloc = RetconLowering.Alloc; - Size = Builder.CreateIntCast(Size, - Alloc->getFunctionType()->getParamType(0), - /*is signed*/ false); - ConstantInt* TypeId = RetconLowering.TypeId; - CallInst *Call; - if (TypeId == nullptr) - Call = Builder.CreateCall(Alloc, Size); - else - Call = Builder.CreateCall(Alloc, {Size, TypeId}); + Size = Builder.CreateIntCast( + Size, Alloc->getFunctionType()->getParamType(sizeParamIndex), + /*is signed*/ false); + Args.push_back(Size); + if (ABI == coro::ABI::RetconOnce) { + ConstantInt *TypeId = RetconLowering.TypeId; + if (TypeId != nullptr) + Args.push_back(TypeId); + } + auto *Call = Builder.CreateCall(Alloc, Args); propagateCallAttrsFromCallee(Call, Alloc); addCallToCallGraph(CG, Call, Alloc); return Call; @@ -537,11 +564,19 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, llvm_unreachable("can't allocate memory in coro switch-lowering"); case coro::ABI::Retcon: - case coro::ABI::RetconOnce: { + case coro::ABI::RetconOnce: + case coro::ABI::RetconOnceDynamic: { auto Dealloc = RetconLowering.Dealloc; - Ptr = Builder.CreateBitCast(Ptr, - Dealloc->getFunctionType()->getParamType(0)); - auto *Call = Builder.CreateCall(Dealloc, Ptr); + SmallVector<Value *, 2> Args; + unsigned sizeParamIndex = 0; + if (ABI == coro::ABI::RetconOnceDynamic) { + sizeParamIndex = 1; + Args.push_back(RetconLowering.Allocator); + } + Ptr = Builder.CreateBitCast( + Ptr, Dealloc->getFunctionType()->getParamType(sizeParamIndex)); + Args.push_back(Ptr); + auto *Call = Builder.CreateCall(Dealloc, Args); propagateCallAttrsFromCallee(Call, Dealloc); addCallToCallGraph(CG, Call, Dealloc); return; @@ -567,7 +602,7 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, /// Check that the given value is a well-formed prototype for the /// llvm.coro.id.retcon.* intrinsics. -static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) { +static void checkWFRetconPrototype(const AnyCoroIdInst *I, Value *V) { auto F = dyn_cast<Function>(V->stripPointerCasts()); if (!F) fail(I, "llvm.coro.id.retcon.* prototype not a Function", V); @@ -594,7 +629,7 @@ static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) { fail(I, "llvm.coro.id.retcon prototype return type must be same as" "current function return type", F); } else { - // No meaningful validation to do here for llvm.coro.id.unique.once. + // No meaningful validation to do here for llvm.coro.id.retcon.once. } if (FT->getNumParams() == 0 || !FT->getParamType(0)->isPointerTy()) @@ -654,6 +689,29 @@ void AnyCoroIdRetconInst::checkWellFormed() const { checkWFDealloc(this, getArgOperand(DeallocArg)); } +static void checkCoroFuncPointer(const Instruction *I, Value *V) { + auto *CoroFuncPtrAddr = dyn_cast<GlobalVariable>(V->stripPointerCasts()); + if (!CoroFuncPtrAddr) + fail(I, "coro.id.retcon.once.dynamic coro function pointer not a global", + V); +} + +void CoroIdRetconOnceDynamicInst::checkWellFormed() const { + checkConstantInt( + this, getArgOperand(SizeArg), + "size argument to coro.id.retcon.once.dynamic must be constant"); + checkConstantInt( + this, getArgOperand(AlignArg), + "alignment argument to coro.id.retcon.once.dynamic must be constant"); + checkConstantInt(this, getArgOperand(StorageArg), + "storage argument offset to coro.id.retcon.once.dynamic " + "must be constant"); + checkCoroFuncPointer(this, getArgOperand(CoroFuncPtrArg)); + checkWFRetconPrototype(this, getArgOperand(PrototypeArg)); + checkWFAlloc(this, getArgOperand(AllocArg)); + checkWFDealloc(this, getArgOperand(DeallocArg)); +} + static void checkAsyncFuncPointer(const Instruction *I, Value *V) { auto *AsyncFuncPtrAddr = dyn_cast<GlobalVariable>(V->stripPointerCasts()); if (!AsyncFuncPtrAddr) diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp index b3e5b7fa6e0b5..f27640141379b 100644 --- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp @@ -435,7 +435,8 @@ static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape, // code. bool ShouldUseLifetimeStartInfo = (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon && - Shape.ABI != coro::ABI::RetconOnce); + Shape.ABI != coro::ABI::RetconOnce && + Shape.ABI != coro::ABI::RetconOnceDynamic); AllocaUseVisitor Visitor{AI->getDataLayout(), DT, Shape, Checker, ShouldUseLifetimeStartInfo}; Visitor.visitPtr(*AI); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 18af0972bc36d..8be1dbe49696f 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1011,6 +1011,7 @@ Function *CodeExtractor::constructFunctionDeclaration( case Attribute::SwiftError: case Attribute::SwiftSelf: case Attribute::SwiftAsync: + case Attribute::SwiftCoro: case Attribute::ZExt: case Attribute::ImmArg: case Attribute::ByRef: diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7f53aa7d4f73d..29ffae9b53c51 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1505,6 +1505,12 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) { if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize) return false; + // Similarly for llvm.ret.popless (and likely generalizable to all musttail + // intrinsics). + if (auto *CB = dyn_cast<CallBase>(I)) + if (CB->getIntrinsicID() == Intrinsic::ret_popless) + return false; + // It's also unsafe/illegal to hoist an instruction above its instruction // operands BasicBlock *BB = I->getParent(); diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index 7dd86a8c0eb16..affcaef4593cd 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -292,6 +292,12 @@ define void @f50(ptr swiftself %0) ret void; } +; CHECK: define void @swiftcoro(ptr swiftcoro %0) +define void @swiftcoro(ptr swiftcoro %0) +{ + ret void; +} + ; CHECK: define i32 @f51(ptr swifterror %0) define i32 @f51(ptr swifterror %0) { diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index d9e594abcd50c..afe247aae1549 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -564,6 +564,10 @@ declare riscv_vls_cc(32768) void @riscv_vls_cc_32768() ; CHECK: declare riscv_vls_cc(32768) void @riscv_vls_cc_32768() declare riscv_vls_cc(65536) void @riscv_vls_cc_65536() ; CHECK: declare riscv_vls_cc(65536) void @riscv_vls_cc_65536() +declare cc124 void @f.cc124() +; CHECK: declare swiftcorocc void @f.cc124() +declare swiftcorocc void @f.swiftcorocc() +; CHECK: declare swiftcorocc void @f.swiftcorocc() declare cc1023 void @f.cc1023() ; CHECK: declare cc1023 void @f.cc1023() @@ -626,6 +630,8 @@ declare void @f.param.swiftasync(ptr swiftasync) ; CHECK: declare void @f.param.swiftasync(ptr swiftasync) declare void @f.param.swifterror(ptr swifterror) ; CHECK: declare void @f.param.swifterror(ptr swifterror) +declare void @f.param.swiftcoro(ptr swiftcoro) +; CHECK: declare void @f.param.swiftcoro(ptr swiftcoro) declare void @f.param.allocalign(i32 allocalign) ; CHECK: declare void @f.param.allocalign(i32 allocalign) declare void @f.param.allocptr(ptr allocptr) diff --git a/llvm/test/CodeGen/AArch64/swiftcoro.ll b/llvm/test/CodeGen/AArch64/swiftcoro.ll new file mode 100644 index 0000000000000..d03201d8bd013 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/swiftcoro.ll @@ -0,0 +1,70 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s +; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTARM64_32 %s + +; Parameter with swiftcoro should be allocated to x23. +; CHECK-LABEL: swiftcoro_param: +; CHECK: mov x0, x23 +; CHECK-NEXT: ret +define ptr @swiftcoro_param(ptr swiftcoro %addr0) { + ret ptr %addr0 +} + +; Check that x23 is used to pass a swiftcoro argument. +; CHECK-LABEL: call_swiftcoro: +; CHECK: mov x23, x0 +; CHECK: bl {{_?}}swiftcoro_param +; CHECK: ret +define ptr @call_swiftcoro(ptr %arg) { + %res = call ptr @swiftcoro_param(ptr swiftcoro %arg) + ret ptr %res +} + +; x23 should be saved by the callee even if used for swiftcoro +; CHECK-LABEL: swiftcoro_clobber: +; CHECK: {{stp|str}} {{.*}}x23{{.*}}sp +; ... +; CHECK: {{ldp|ldr}} {{.*}}x23{{.*}}sp +; CHECK: ret +define ptr @swiftcoro_clobber(ptr swiftcoro %addr0) { + call void asm sideeffect "", "~{x23}"() + ret ptr %addr0 +} + +; Demonstrate that we do not need any movs when calling multiple functions +; with swiftcoro argument. +; CHECK-LABEL: swiftcoro_passthrough: +; OPT-NOT: mov{{.*}}x23 +; OPT: bl {{_?}}swiftcoro_param +; OPT-NOT: mov{{.*}}x23 +; OPT-NEXT: bl {{_?}}swiftcoro_param +; OPT: ret +define void @swiftcoro_passthrough(ptr swiftcoro %addr0) { + call ptr @swiftcoro_param(ptr swiftcoro %addr0) + call ptr @swiftcoro_param(ptr swiftcoro %addr0) + ret void +} + +; We can use a tail call if the callee swiftcoro is the same as the caller one. +; This should also work with fast-isel. +; CHECK-LABEL: swiftcoro_tail: +; OPTAARCH64: b {{_?}}swiftcoro_param +; OPTAARCH64-NOT: ret +; OPTARM64_32: b {{_?}}swiftcoro_param +define ptr @swiftcoro_tail(ptr swiftcoro %addr0) { + call void asm sideeffect "", "~{x23}"() + %res = musttail call ptr @swiftcoro_param(ptr swiftcoro %addr0) + ret ptr %res +} + +; We can not use a tail call if the callee swiftcoro is not the same as the +; caller one. +; CHECK-LABEL: swiftcoro_notail: +; CHECK: mov x23, x0 +; CHECK: bl {{_?}}swiftcoro_param +; CHECK: ret +define ptr @swiftcoro_notail(ptr swiftcoro %addr0, ptr %addr1) nounwind { + %res = tail call ptr @swiftcoro_param(ptr swiftcoro %addr1) + ret ptr %res +} diff --git a/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll b/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll new file mode 100644 index 0000000000000..117cbfd04a5df --- /dev/null +++ b/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple arm64e-apple-darwin -verify-machineinstrs -o - %s \ +; RUN: | FileCheck %s + +; RUN: llc -mtriple arm64e-apple-darwin -verify-machineinstrs -o - %s \ +; RUN: -global-isel -global-isel-abort=2 | FileCheck %s + +declare i64 @g(ptr, ptr) + +define i64 @test_call_to_swiftcoro() #0 { +; CHECK-LABEL: test_call_to_swiftcoro: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _test_call +; CHECK-NEXT: sub x0, x29, #24 +; CHECK-NEXT: sub x1, x29, #32 +; CHECK-NEXT: bl _g +; CHECK-NEXT: sub sp, x29, #16 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call swiftcorocc i64 @test_call() + %v4 = call i64 @g(ptr %v1, ptr %v2) + ret i64 %v4 +} + +define i64 @test_call_to_normal() #0 { +; CHECK-LABEL: test_call_to_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _test_call_normal +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: bl _g +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @test_call_normal() + %v4 = call i64 @g(ptr %v1, ptr %v2) + ret i64 %v4 +} + +define swiftcorocc i64 @test_call() #0 { +; CHECK-LABEL: test_call: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: bl _g +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @g(ptr %v1, ptr %v2) + ret i64 %v3 +} + +define i64 @test_call_normal() #0 { +; CHECK-LABEL: test_call_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: bl _g +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @g(ptr %v1, ptr %v2) + ret i64 %v3 +} + + +attributes #0 = { "ptrauth-returns" } diff --git a/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll new file mode 100644 index 0000000000000..3c6fb76d905e4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -o - %s | FileCheck %s --check-prefixes=CHECK,SDISEL +; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL + +declare i64 @g(ptr, ptr) + +define swiftcorocc i64 @test_intrin_basic() #0 { +; CHECK-LABEL: test_intrin_basic: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: add x16, x29, #16 +; CHECK-NEXT: ldp x29, x30, [x29] ; 16-byte Folded Reload +; CHECK-NEXT: autib x30, x16 +; CHECK-NEXT: ret + musttail call void @llvm.ret.popless() + ret i64 0 +} + +define swiftcorocc i64 @test_intrin() #0 { +; CHECK-LABEL: test_intrin: +; CHECK: ; %bb.0: +; CHECK-NEXT: pacibsp +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #32 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w25, -24 +; CHECK-NEXT: .cfi_offset w26, -32 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: bl _g +; CHECK-NEXT: cbz x0, LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %else +; CHECK-NEXT: add x16, x29, #16 +; CHECK-NEXT: ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [x29] ; 16-byte Folded Reload +; CHECK-NEXT: autib x30, x16 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %then +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload +; CHECK-NEXT: retab + %v1 = alloca i64 + %v2 = alloca i64 + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @g(ptr %v1, ptr %v2) + %c = icmp eq i64 %v3, 0 + br i1 %c, label %then, label %else +then: + ret i64 %v3 +else: + musttail call void @llvm.ret.popless() + ret i64 %v3 +} + +define swiftcorocc i64 @test_vla(i32 %n) #0 { +; SDISEL-LABEL: test_vla: +; SDISEL: ; %bb.0: +; SDISEL-NEXT: pacibsp +; SDISEL-NEXT: stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill +; SDISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; SDISEL-NEXT: add x29, sp, #16 +; SDISEL-NEXT: .cfi_def_cfa w29, 16 +; SDISEL-NEXT: .cfi_offset w30, -8 +; SDISEL-NEXT: .cfi_offset w29, -16 +; SDISEL-NEXT: .cfi_offset w25, -24 +; SDISEL-NEXT: .cfi_offset w26, -32 +; SDISEL-NEXT: ; kill: def $w0 killed $w0 def $x0 +; SDISEL-NEXT: ubfiz x8, x0, #3, #32 +; SDISEL-NEXT: add x8, x8, #15 +; SDISEL-NEXT: and x8, x8, #0xffffffff0 +; SDISEL-NEXT: mov x9, sp +; SDISEL-NEXT: sub x0, x9, x8 +; SDISEL-NEXT: mov sp, x0 +; SDISEL-NEXT: mov x9, sp +; SDISEL-NEXT: sub x1, x9, x8 +; SDISEL-NEXT: mov sp, x1 +; SDISEL-NEXT: ; InlineAsm Start +; SDISEL-NEXT: ; InlineAsm End +; SDISEL-NEXT: bl _g +; SDISEL-NEXT: cbz x0, LBB2_2 +; SDISEL-NEXT: ; %bb.1: ; %else +; SDISEL-NEXT: add x16, x29, #16 +; SDISEL-NEXT: ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload +; SDISEL-NEXT: ldp x29, x30, [x29] ; 16-byte Folded Reload +; SDISEL-NEXT: autib x30, x16 +; SDISEL-NEXT: ret +; SDISEL-NEXT: LBB2_2: ; %then +; SDISEL-NEXT: sub sp, x29, #16 +; SDISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; SDISEL-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload +; SDISEL-NEXT: retab +; +; GISEL-LABEL: test_vla: +; GISEL: ; %bb.0: +; GISEL-NEXT: pacibsp +; GISEL-NEXT: stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: add x29, sp, #16 +; GISEL-NEXT: .cfi_def_cfa w29, 16 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w25, -24 +; GISEL-NEXT: .cfi_offset w26, -32 +; GISEL-NEXT: mov x8, sp +; GISEL-NEXT: mov w9, w0 +; GISEL-NEXT: lsl x9, x9, #3 +; GISEL-NEXT: add x9, x9, #15 +; GISEL-NEXT: and x9, x9, #0xfffffffffffffff0 +; GISEL-NEXT: sub x0, x8, x9 +; GISEL-NEXT: mov sp, x0 +; GISEL-NEXT: mov x8, sp +; GISEL-NEXT: sub x1, x8, x9 +; GISEL-NEXT: mov sp, x1 +; GISEL-NEXT: ; InlineAsm Start +; GISEL-NEXT: ; InlineAsm End +; GISEL-NEXT: bl _g +; GISEL-NEXT: cbz x0, LBB2_2 +; GISEL-NEXT: ; %bb.1: ; %else +; GISEL-NEXT: add x16, x29, #16 +; GISEL-NEXT: ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload +; GISEL-NEXT: ldp x29, x30, [x29] ; 16-byte Folded Reload +; GISEL-NEXT: autib x30, x16 +; GISEL-NEXT: ret +; GISEL-NEXT: LBB2_2: ; %then +; GISEL-NEXT: sub sp, x29, #16 +; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload +; GISEL-NEXT: retab + %v1 = alloca i64, i32 %n + %v2 = alloca i64, i32 %n + call void asm sideeffect "", "~{x25},~{x26}"() + %v3 = call i64 @g(ptr %v1, ptr %v2) + %c = icmp eq i64 %v3, 0 + br i1 %c, label %then, label %else +then: + ret i64 %v3 +else: + musttail call void @llvm.ret.popless() + ret i64 %v3 +} + +attributes #0 = { "ptrauth-returns" } diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll new file mode 100644 index 0000000000000..786116a833265 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split)' -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "arm64-apple-macos99.99" + + +@func_cfp = constant <{ i32, i32 }> + <{ i32 trunc ( + i64 sub ( + i64 ptrtoint (ptr @func to i64), + i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @func_cfp, i32 0, i32 1) to i64) + ) + to i32), + i32 64 +}> + + +%func_int = type <{ i64 }> +%func_obj = type <{ %func_int, ptr }> +%func_guts = type <{ %func_obj }> +%func_impl = type <{ %func_guts }> +%func_self = type <{ %func_impl }> + +declare swiftcorocc void @func_continuation_prototype(ptr noalias, ptr) + +; CHECK-LABEL: @func.resume.0( +; CHECK-SAME: ptr noalias %0, +; CHECK-SAME: ptr %1 +; CHECK-SAME: ) { +; CHECK: coro.return.popless: +; CHECK-NEXT: unreachable +; CHECK: coro.return.normal: +; CHECK-NEXT: unreachable +; CHECK: } + +define swiftcorocc { ptr, ptr } @func(ptr noalias %buffer, ptr %allocator, ptr nocapture swiftself dereferenceable(16) %2) { +entry: + %3 = call token @llvm.coro.id.retcon.once.dynamic( + i32 -1, + i32 16, + ptr @func_cfp, + ptr %allocator, + ptr %buffer, + ptr @func_continuation_prototype, + ptr @allocate, + ptr @deallocate + ) + %handle = call ptr @llvm.coro.begin(token %3, ptr null) + %yielded = getelementptr inbounds %func_self, ptr %2, i32 0, i32 0 + call ptr (...) @llvm.coro.suspend.retcon.p0(ptr %yielded) + br i1 false, label %unwind, label %normal + +normal: + br label %coro.end + +unwind: + br label %coro.end + +coro.end: + %8 = call i1 @llvm.coro.end(ptr %handle, i1 false, token none) + unreachable +} + +declare swiftcorocc noalias ptr @allocate(i32 %size) +declare void @deallocate(ptr %ptr) diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll new file mode 100644 index 0000000000000..6f491584c877e --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll @@ -0,0 +1,159 @@ +; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split),module(coro-cleanup)' -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "arm64-apple-macos99.99" + +; CHECK-LABEL: %func.Frame = type { ptr } +; CHECK-LABEL: %big_types.Frame = type { <32 x i8>, [16 x i8], i64, ptr, %Integer8 } + +; CHECK-LABEL: @func_cfp = constant <{ i32, i32 }> +; CHECK-SAME: <{ +; CHECK-SAME: i32 trunc +; CHECK-SAME: i32 16 +; CHECK-SAME: }> +@func_cfp = constant <{ i32, i32 }> + <{ i32 trunc ( ; offset to @func from @func_cfp + i64 sub ( + i64 ptrtoint (ptr @func to i64), + i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @func_cfp, i32 0, i32 1) to i64) + ) + to i32), + i32 64 ; frame size +}> + +@big_types_cfp = constant <{ i32, i32 }> + <{ i32 trunc ( ; offset to @func from @big_types_cfp + i64 sub ( + i64 ptrtoint (ptr @big_types to i64), + i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @big_types_cfp, i32 0, i32 1) to i64) + ) + to i32), + i32 64 ; frame size +}> + + +; CHECK-LABEL: @func( +; CHECK-SAME: ptr %buffer, +; CHECK-SAME: ptr %allocator +; CHECK-SAME: ptr %array +; CHECK-SAME: ) { +; CHECK: %array.spill.addr = getelementptr inbounds %func.Frame, ptr %buffer, i32 0, i32 0 +; CHECK: store ptr %array, ptr %array.spill.addr +; CHECK: %load = load i32, ptr %array +; CHECK: %load.positive = icmp sgt i32 %load, 0 +; CHECK: [[CONTINUATION:%.*]] = select i1 %load.positive +; CHECK-SAME: ptr @func.resume.0 +; CHECK-SAME: ptr @func.resume.1 +; CHECK: [[RETVAL_1:%.*]] = insertvalue { ptr, i32 } poison, ptr [[CONTINUATION:%.*]], 0 +; CHECK: [[RETVAL_2:%.*]] = insertvalue { ptr, i32 } [[RETVAL_1:%.*]], i32 %load, 1 +; CHECK: [[DONT_POP:%.*]] = icmp eq ptr %allocator, null +; CHECK: br i1 [[DONT_POP:%[^,]+]], +; CHECK-SAME: label %coro.return.popless +; CHECK-SAME: label %coro.return.normal +; CHECK: coro.return.popless: +; CHECK: musttail call void @llvm.ret.popless() +; CHECK: ret { ptr, i32 } [[RETVAL_2:%.*]] +; CHECK: coro.return.normal: +; CHECK: ret { ptr, i32 } [[RETVAL_2:%.*]] +; CHECK: } + +; CHECK-LABEL: @func.resume.0( +; CHECK-SAME: ptr [[BUFFER:[^,]+]] +; CHECK-SAME: ptr [[ALLOCATOR:%[^)]+]] +; CHECK-SAME: ) { +; CHECK: %array.reload.addr3 = getelementptr inbounds %func.Frame, ptr [[BUFFER:%.*]], i32 0, i32 0 +; CHECK: %array.reload4 = load ptr, ptr %array.reload.addr3 +; CHECK: store i32 0, ptr %array.reload4 +; CHECK: ret void +; CHECK: } + +; CHECK-LABEL: @func.resume.1( +; CHECK-SAME: ptr [[BUFFER:[^,]+]] +; CHECK-SAME: ptr [[ALLOCATOR:%[^)]+]] +; CHECK-SAME: ) { +; CHECK: %array.reload.addr = getelementptr inbounds %func.Frame, ptr [[BUFFER:%.*]], i32 0, i32 0 +; CHECK: %array.reload = load ptr, ptr %array.reload.addr +; CHECK: store i32 10, ptr %array.reload +; CHECK: ret void +; CHECK: } +define swiftcorocc {ptr, i32} @func(ptr %buffer, ptr %allocator, ptr %array) { +entry: + %id = call token @llvm.coro.id.retcon.once.dynamic( + i32 -1, + i32 16, + ptr @func_cfp, + ptr %allocator, + ptr %buffer, + ptr @continuation_prototype, + ptr @allocate, + ptr @deallocate + ) + %handle = call ptr @llvm.coro.begin(token %id, ptr null) + %load = load i32, ptr %array + %load.positive = icmp sgt i32 %load, 0 + br i1 %load.positive, label %positive, label %negative + +positive: + call ptr (...) @llvm.coro.suspend.retcon.p0(i32 %load) + store i32 0, ptr %array, align 4 + br label %cleanup + +negative: + call ptr (...) @llvm.coro.suspend.retcon.p0(i32 %load) + store i32 10, ptr %array, align 4 + br label %cleanup + +cleanup: + call i1 @llvm.coro.end(ptr %handle, i1 0, token none) + unreachable +} + +declare void @continuation_prototype(ptr, ptr) + +declare swiftcorocc noalias ptr @allocate(i32 %size) +declare void @deallocate(ptr %ptr) + +%Integer8 = type { i8 } + +; CHECK-LABEL: @big_types( +; CHECK-SAME: ptr noalias %frame, +; CHECK-SAME: ptr swiftcoro %allocator, +; CHECK-SAME: i64 %index, +; CHECK-SAME: ptr swiftself dereferenceable(32) %vec_addr +; CHECK-SAME: ) { +; CHECK: [[VEC_STK_BASE_PTR:%.*]] = getelementptr inbounds %big_types.Frame, ptr %frame, i32 0, i32 0 +; CHECK: [[VEC_STK_BASE_INT:%.*]] = ptrtoint ptr [[VEC_STK_BASE_PTR]] to i64 +; CHECK: [[VEC_STK_BIASED_INT:%.*]] = add i64 [[VEC_STK_BASE_INT]], 31 +; CHECK: [[VEC_STK_ALIGNED_INT:%.*]] = and i64 [[VEC_STK_BIASED_INT]], -32 +; CHECK: %vec_stk = inttoptr i64 [[VEC_STK_ALIGNED_INT]] to ptr +define swiftcorocc { ptr, ptr } @big_types(ptr noalias %frame, ptr swiftcoro %allocator, i64 %index, ptr nocapture swiftself dereferenceable(32) %vec_addr) { + %element_addr = alloca %Integer8, align 1 + %id = tail call token @llvm.coro.id.retcon.once.dynamic( + i32 -1, + i32 16, + ptr nonnull @big_types_cfp, + ptr %allocator, + ptr %frame, + ptr @continuation_prototype, + ptr nonnull @allocate, + ptr nonnull @deallocate + ) + %handle = tail call ptr @llvm.coro.begin(token %id, ptr null) + call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %element_addr) + %vec_original = load <32 x i8>, ptr %vec_addr, align 32 + %vec_stk = alloca <32 x i8>, align 32 + store <32 x i8> %vec_original, ptr %vec_stk, align 32 + %vec_original_2 = load <32 x i8>, ptr %vec_stk, align 32 + %index32 = trunc i64 %index to i32 + %element_original = extractelement <32 x i8> %vec_original_2, i32 %index32 + store i8 %element_original, ptr %element_addr, align 1 + call ptr (...) @llvm.coro.suspend.retcon.p0(ptr nonnull %element_addr) + %element_modified = load i8, ptr %element_addr, align 1 + %vec_original_3 = load <32 x i8>, ptr %vec_stk, align 32 + %vec_modified = insertelement <32 x i8> %vec_original_3, i8 %element_modified, i32 %index32 + store <32 x i8> %vec_modified, ptr %vec_addr, align 32 + call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %element_addr) + call i1 @llvm.coro.end(ptr %handle, i1 false, token none) + unreachable +} + diff --git a/llvm/test/Transforms/DCE/int_ret_popless.ll b/llvm/test/Transforms/DCE/int_ret_popless.ll new file mode 100644 index 0000000000000..4e6fd4d05e89e --- /dev/null +++ b/llvm/test/Transforms/DCE/int_ret_popless.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S < %s -passes=dce | FileCheck %s + +define void @test_ret_popless() { +; CHECK-LABEL: define void @test_ret_popless() { +; CHECK-NEXT: musttail call void @llvm.ret.popless() +; CHECK-NEXT: ret void +; + musttail call void @llvm.ret.popless() + ret void +} diff --git a/llvm/test/Verifier/ret_popless.ll b/llvm/test/Verifier/ret_popless.ll new file mode 100644 index 0000000000000..6747d3fd039ed --- /dev/null +++ b/llvm/test/Verifier/ret_popless.ll @@ -0,0 +1,14 @@ +; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s + +define void @test_ret_popless_not_musttail() { +; CHECK: llvm.ret.popless call must be musttail + call void @llvm.ret.popless() + ret void +} + +define i64 @test_ret_popless_not_returned(i64 %a) { +; CHECK: musttail intrinsic call must precede a ret + musttail call void @llvm.ret.popless() + %res = bitcast i64 %a to i64 + ret i64 %res +} diff --git a/llvm/test/Verifier/swiftcoro.ll b/llvm/test/Verifier/swiftcoro.ll new file mode 100644 index 0000000000000..fcc980478a5bb --- /dev/null +++ b/llvm/test/Verifier/swiftcoro.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @a(ptr swiftcoro %a, ptr swiftcoro %b) +; CHECK: Cannot have multiple 'swiftcoro' parameters!