diff --git a/clang/include/clang/AST/ASTConsumer.h b/clang/include/clang/AST/ASTConsumer.h
index 447f2592d2359..6cf4504dcfa60 100644
--- a/clang/include/clang/AST/ASTConsumer.h
+++ b/clang/include/clang/AST/ASTConsumer.h
@@ -27,6 +27,7 @@ namespace clang {
   class VarDecl;
   class FunctionDecl;
   class ImportDecl;
+  class TargetInfo;
 
 /// ASTConsumer - This is an abstract interface that should be implemented by
 /// clients that read ASTs.  This abstraction layer allows the client to be
@@ -47,6 +48,14 @@ class ASTConsumer {
   /// ASTContext.
   virtual void Initialize(ASTContext &Context) {}
 
+  /// Initialize - This is called to initialize the consumer, providing the
+  /// ASTContext. 'CodeGenTargetInfo' specifies the code-generation configuration
+  /// for this compilation instance, which may differ from the one carried
+  /// by the Context itself only in the OS Version number -
+  /// for example when type-checking must be performed against an epoch OS version
+  /// while code-generation must run according to the user-specified OS version.
+  virtual void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) {}
+
   /// HandleTopLevelDecl - Handle the specified top-level declaration.  This is
   /// called by the parser to process every top-level Decl*.
   ///
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 14b2f4857d8fd..51c75455f21a6 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -335,12 +335,14 @@ CodeGenModule::CodeGenModule(ASTContext &C,
                              IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
                              const HeaderSearchOptions &HSO,
                              const PreprocessorOptions &PPO,
-                             const CodeGenOptions &CGO, llvm::Module &M,
+                             const CodeGenOptions &CGO,
+                             const TargetInfo &CGTI,
+                             llvm::Module &M,
                              DiagnosticsEngine &diags,
                              CoverageSourceInfo *CoverageInfo)
     : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO),
       PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
-      Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
+      Target(CGTI), ABI(createCXXABI(*this)),
       VMContext(M.getContext()), VTables(*this), StackHandler(diags),
       SanitizerMD(new SanitizerMetadata(*this)),
       AtomicOpts(Target.getAtomicOpts()) {
@@ -357,19 +359,19 @@ CodeGenModule::CodeGenModule(ASTContext &C,
   BFloatTy = llvm::Type::getBFloatTy(LLVMContext);
   FloatTy = llvm::Type::getFloatTy(LLVMContext);
   DoubleTy = llvm::Type::getDoubleTy(LLVMContext);
-  PointerWidthInBits = C.getTargetInfo().getPointerWidth(LangAS::Default);
+  PointerWidthInBits = Target.getPointerWidth(LangAS::Default);
   PointerAlignInBytes =
-      C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(LangAS::Default))
+      C.toCharUnitsFromBits(Target.getPointerAlign(LangAS::Default))
           .getQuantity();
   SizeSizeInBytes =
-    C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity();
+    C.toCharUnitsFromBits(Target.getMaxPointerWidth()).getQuantity();
   IntAlignInBytes =
-    C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity();
+    C.toCharUnitsFromBits(Target.getIntAlign()).getQuantity();
   CharTy =
-    llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getCharWidth());
-  IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth());
+    llvm::IntegerType::get(LLVMContext, Target.getCharWidth());
+  IntTy = llvm::IntegerType::get(LLVMContext, Target.getIntWidth());
   IntPtrTy = llvm::IntegerType::get(LLVMContext,
-    C.getTargetInfo().getMaxPointerWidth());
+    Target.getMaxPointerWidth());
   Int8PtrTy = llvm::PointerType::get(LLVMContext,
                                      C.getTargetAddressSpace(LangAS::Default));
   const llvm::DataLayout &DL = M.getDataLayout();
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 87d62da87cc47..1c6dbaad1f0d3 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -690,7 +690,9 @@ class CodeGenModule : public CodeGenTypeCache {
   CodeGenModule(ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
                 const HeaderSearchOptions &headersearchopts,
                 const PreprocessorOptions &ppopts,
-                const CodeGenOptions &CodeGenOpts, llvm::Module &M,
+                const CodeGenOptions &CodeGenOpts,
+                const TargetInfo &CodeGenTargetInfo,
+                llvm::Module &M,
                 DiagnosticsEngine &Diags,
                 CoverageSourceInfo *CoverageInfo = nullptr);
 
diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index 09a7d79ae4afb..e283777d1c092 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -149,21 +149,26 @@ namespace {
     }
 
     void Initialize(ASTContext &Context) override {
+      Initialize(Context, Context.getTargetInfo());
+    }
+
+    void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) override {
       Ctx = &Context;
 
-      M->setTargetTriple(Ctx->getTargetInfo().getTriple());
-      M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
-      const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion();
+      M->setTargetTriple(CodeGenTargetInfo.getTriple());
+      M->setDataLayout(CodeGenTargetInfo.getDataLayoutString());
+      const auto &SDKVersion = CodeGenTargetInfo.getSDKVersion();
       if (!SDKVersion.empty())
         M->setSDKVersion(SDKVersion);
-      if (const auto *TVT = Ctx->getTargetInfo().getDarwinTargetVariantTriple())
+      if (const auto *TVT = CodeGenTargetInfo.getDarwinTargetVariantTriple())
         M->setDarwinTargetVariantTriple(TVT->getTriple());
       if (auto TVSDKVersion =
-              Ctx->getTargetInfo().getDarwinTargetVariantSDKVersion())
+          CodeGenTargetInfo.getDarwinTargetVariantSDKVersion())
         M->setDarwinTargetVariantSDKVersion(*TVSDKVersion);
       Builder.reset(new CodeGen::CodeGenModule(Context, FS, HeaderSearchOpts,
                                                PreprocessorOpts, CodeGenOpts,
-                                               *M, Diags, CoverageInfo));
+                                               CodeGenTargetInfo, *M,
+                                               Diags, CoverageInfo));
 
       for (auto &&Lib : CodeGenOpts.DependentLibraries)
         Builder->AddDependentLib(Lib);
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
index 384685e3f1eec..8b54552623872 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
@@ -174,6 +174,10 @@ class PCHContainerGenerator : public ASTConsumer {
   ~PCHContainerGenerator() override = default;
 
   void Initialize(ASTContext &Context) override {
+    Initialize(Context, Context.getTargetInfo());
+  }
+
+  void Initialize(ASTContext &Context, const TargetInfo &CodeGenTargetInfo) override {
     assert(!Ctx && "initialized multiple times");
 
     Ctx = &Context;
@@ -181,7 +185,8 @@ class PCHContainerGenerator : public ASTConsumer {
     M.reset(new llvm::Module(MainFileName, *VMContext));
     M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
     Builder.reset(new CodeGen::CodeGenModule(
-        *Ctx, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags));
+        *Ctx, FS, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts,
+        CodeGenTargetInfo, *M, Diags));
 
     // Prepare CGDebugInfo to emit debug info for a clang module.
     auto *DI = Builder->getModuleDebugInfo();
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index a8f9c71781701..6560ae5fc5231 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -162,6 +162,7 @@ enum Kind {
   kw_anyregcc,
   kw_swiftcc,
   kw_swifttailcc,
+  kw_swiftcorocc,
   kw_preserve_mostcc,
   kw_preserve_allcc,
   kw_preserve_nonecc,
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 92b6e68d9d0a7..da62bf6554aa0 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -791,6 +791,7 @@ enum AttributeKindCodes {
   ATTR_KIND_NO_DIVERGENCE_SOURCE = 100,
   ATTR_KIND_SANITIZE_TYPE = 101,
   ATTR_KIND_CAPTURES = 102,
+  ATTR_KIND_SWIFT_CORO = 103,
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 9c8226660e087..dd8c2469f4cba 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -538,6 +538,15 @@ class CallLowering {
     return false;
   }
 
+  /// This hook must be implemented to lower @llvm.ret.popless intrinsics,
+  /// which are required to be musttail, and are effectively annotating a
+  /// return instruction to mark it "popless", i.e., not restoring SP.
+  /// This "adjustment" step runs after lowerReturn, and is only meant to make
+  /// it a little less painful to maintain before we add this to the main hook.
+  virtual bool adjustReturnToPopless(MachineIRBuilder &MIRBuilder) const {
+    report_fatal_error("Popless returns not implemented for this target");
+  }
+
   virtual bool fallBackToDAGISel(const MachineFunction &MF) const {
     return false;
   }
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 172c3e8c9a847..771fd90f32645 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -319,6 +319,10 @@ class MachineFrameInfo {
   /// instructions which manipulate the stack pointer.
   bool HasCopyImplyingStackAdjustment = false;
 
+  /// True if the function contains a call using a calling convention that
+  /// allows it to be "popless", i.e., to not restore SP when the call returns.
+  bool HasPoplessCall = false;
+
   /// True if the function contains a call to the llvm.vastart intrinsic.
   bool HasVAStart = false;
 
@@ -634,6 +638,9 @@ class MachineFrameInfo {
     HasCopyImplyingStackAdjustment = B;
   }
 
+  bool hasPoplessCall() const { return HasPoplessCall; }
+  void setHasPoplessCall(bool B = true) { HasPoplessCall = B; }
+
   /// Returns true if the function calls the llvm.va_start intrinsic.
   bool hasVAStart() const { return HasVAStart; }
   void setHasVAStart(bool B) { HasVAStart = B; }
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index a28c7a99fb3b5..b0820b17aeb1d 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -41,6 +41,7 @@ namespace ISD {
     unsigned IsSplitEnd : 1;   ///< Last part of a split
     unsigned IsSwiftSelf : 1;  ///< Swift self parameter
     unsigned IsSwiftAsync : 1;  ///< Swift async context parameter
+    unsigned IsSwiftCoro : 1;  ///< Swift coro parameter
     unsigned IsSwiftError : 1; ///< Swift error parameter
     unsigned IsCFGuardTarget : 1; ///< Control Flow Guard target
     unsigned IsHva : 1;        ///< HVA field for
@@ -64,6 +65,7 @@ namespace ISD {
         : IsZExt(0), IsSExt(0), IsNoExt(0), IsInReg(0), IsSRet(0), IsByVal(0),
           IsByRef(0), IsNest(0), IsReturned(0), IsSplit(0), IsInAlloca(0),
           IsPreallocated(0), IsSplitEnd(0), IsSwiftSelf(0), IsSwiftAsync(0),
+          IsSwiftCoro(0),
           IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), IsHvaStart(0),
           IsSecArgPass(0), MemAlign(0), OrigAlign(0),
           IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
@@ -104,6 +106,9 @@ namespace ISD {
     bool isSwiftAsync() const { return IsSwiftAsync; }
     void setSwiftAsync() { IsSwiftAsync = 1; }
 
+    bool isSwiftCoro() const { return IsSwiftCoro; }
+    void setSwiftCoro() { IsSwiftCoro = 1; }
+
     bool isSwiftError() const { return IsSwiftError; }
     void setSwiftError() { IsSwiftError = 1; }
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index b528079c2d594..a9bef1df0040e 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -315,6 +315,7 @@ class TargetLoweringBase {
     bool IsSwiftSelf : 1;
     bool IsSwiftAsync : 1;
     bool IsSwiftError : 1;
+    bool IsSwiftCoro : 1;
     bool IsCFGuardTarget : 1;
     MaybeAlign Alignment = std::nullopt;
     Type *IndirectType = nullptr;
@@ -324,7 +325,7 @@ class TargetLoweringBase {
           IsSRet(false), IsNest(false), IsByVal(false), IsByRef(false),
           IsInAlloca(false), IsPreallocated(false), IsReturned(false),
           IsSwiftSelf(false), IsSwiftAsync(false), IsSwiftError(false),
-          IsCFGuardTarget(false) {}
+          IsSwiftCoro(false), IsCFGuardTarget(false) {}
 
     void setAttributes(const CallBase *Call, unsigned ArgIdx);
   };
@@ -4889,6 +4890,10 @@ class TargetLowering : public TargetLoweringBase {
     llvm_unreachable("Not Implemented");
   }
 
+  virtual SDValue adjustReturnPopless(SDValue Chain, SelectionDAG &DAG) const {
+    report_fatal_error("Popless returns not implemented for this target");
+  }
+
   /// Return true if result of the specified node is used by a return node
   /// only. It also compute and return the input chain for the tail call.
   ///
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index fb94926043fc7..44cebfdb48949 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -358,6 +358,9 @@ def SwiftSelf : EnumAttr<"swiftself", IntersectPreserve, [ParamAttr]>;
 /// Argument is swift async context.
 def SwiftAsync : EnumAttr<"swiftasync", IntersectPreserve, [ParamAttr]>;
 
+/// Argument is swift coro allocator.
+def SwiftCoro : EnumAttr<"swiftcoro", IntersectPreserve, [ParamAttr]>;
+
 /// Function must be in a unwind table.
 def UWTable : IntAttr<"uwtable", IntersectPreserve, [FnAttr]>;
 
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index d68491eb5535c..d1fbe46e74682 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -284,6 +284,10 @@ namespace CallingConv {
     RISCV_VLSCall_32768 = 122,
     RISCV_VLSCall_65536 = 123,
 
+    /// This follows the Swift calling convention in how arguments are passed
+    /// but doesn't clean up the stack on a return.
+    SwiftCoro = 124,
+
     /// The highest possible ID. Must be some 2^k - 1.
     MaxID = 1023
   };
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index fe83c9df5731e..3728bb1769a2f 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -865,6 +865,20 @@ def int_localrecover : DefaultAttrsIntrinsic<[llvm_ptr_ty],
                                  [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
                                  [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 
+// Marks the following ret instruction as a "popless" return, one that does not
+// not restore SP to its function-entry value (i.e., does not deallocate the
+// stack frame), allowing allocations made in the function to be accessible
+// by the caller.
+//
+// The function must be annotated with an appropriate target-specific calling
+// convention, so the caller can generate stack accesses accordingly, generally
+// by treating the call as a variably-sized alloca, so using FP-based addressing
+// for its own frame rather than relying on statically known SP offsets.
+//
+// Calls to this intrinsic need to be musttail, but don't follow the other ABI
+// requirements for musttail calls, since this is really annotating the ret.
+def int_ret_popless : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly]>;
+
 // Given the frame pointer passed into an SEH filter function, returns a
 // pointer to the local variable area suitable for use with llvm.localrecover.
 def int_eh_recoverfp : DefaultAttrsIntrinsic<[llvm_ptr_ty],
@@ -1742,6 +1756,10 @@ def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty],
     [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty,
      llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty],
     []>;
+def int_coro_id_retcon_once_dynamic : Intrinsic<[llvm_token_ty],
+    [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty,
+     llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+    []>;
 def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>;
 def int_coro_id_async : Intrinsic<[llvm_token_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty],
diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td
index 18b7ff4aec95f..6b3be7b6ed3fb 100644
--- a/llvm/include/llvm/Target/TargetCallingConv.td
+++ b/llvm/include/llvm/Target/TargetCallingConv.td
@@ -61,6 +61,11 @@ class CCIfSwiftAsync<CCAction A> : CCIf<"ArgFlags.isSwiftAsync()", A> {
 class CCIfSwiftError<CCAction A> : CCIf<"ArgFlags.isSwiftError()", A> {
 }
 
+/// CCIfSwiftCoro - If the current argument has swiftcoro parameter attribute,
+/// apply Action A.
+class CCIfSwiftCoro<CCAction A> : CCIf<"ArgFlags.isSwiftCoro()", A> {
+}
+
 /// CCIfCFGuardTarget - If the current argument has cfguardtarget parameter
 /// attribute, apply Action A.
 class CCIfCFGuardTarget<CCAction A> : CCIf<"ArgFlags.isCFGuardTarget()", A> {
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h b/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h
index 1ad5f7fefc8cb..24f5da8d6993b 100644
--- a/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h
@@ -135,6 +135,7 @@ class AnyCoroIdInst : public IntrinsicInst {
     auto ID = I->getIntrinsicID();
     return ID == Intrinsic::coro_id || ID == Intrinsic::coro_id_retcon ||
            ID == Intrinsic::coro_id_retcon_once ||
+           ID == Intrinsic::coro_id_retcon_once_dynamic ||
            ID == Intrinsic::coro_id_async;
   }
 
@@ -314,6 +315,72 @@ class CoroIdRetconOnceInst : public AnyCoroIdRetconInst {
   }
 };
 
+/// This represents the llvm.coro.id.retcon.once.dynamic instruction.
+class LLVM_LIBRARY_VISIBILITY CoroIdRetconOnceDynamicInst
+    : public AnyCoroIdInst {
+  enum {
+    SizeArg,
+    AlignArg,
+    CoroFuncPtrArg,
+    AllocatorArg,
+    StorageArg,
+    PrototypeArg,
+    AllocArg,
+    DeallocArg
+  };
+
+public:
+  void checkWellFormed() const;
+
+  uint64_t getStorageSize() const {
+    return cast<ConstantInt>(getArgOperand(SizeArg))->getZExtValue();
+  }
+
+  Align getStorageAlignment() const {
+    return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue();
+  }
+
+  Value *getStorage() const { return getArgOperand(StorageArg); }
+
+  /// Return the coro function pointer address. This should be the address of
+  /// a coro function pointer struct for the current coro function.
+  /// struct coro_function_pointer {
+  ///   uint32_t frame size;
+  ///   uint32_t relative_pointer(coro_function);
+  ///  };
+  GlobalVariable *getCoroFunctionPointer() const {
+    return cast<GlobalVariable>(
+        getArgOperand(CoroFuncPtrArg)->stripPointerCasts());
+  }
+
+  /// Return the prototype for the continuation function.  The type,
+  /// attributes, and calling convention of the continuation function(s)
+  /// are taken from this declaration.
+  Function *getPrototype() const {
+    return cast<Function>(getArgOperand(PrototypeArg)->stripPointerCasts());
+  }
+
+  /// Return the function to use for allocating memory.
+  Function *getAllocFunction() const {
+    return cast<Function>(getArgOperand(AllocArg)->stripPointerCasts());
+  }
+
+  /// Return the function to use for deallocating memory.
+  Function *getDeallocFunction() const {
+    return cast<Function>(getArgOperand(DeallocArg)->stripPointerCasts());
+  }
+
+  Value *getAllocator() const { return getArgOperand(AllocatorArg); }
+
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_id_retcon_once_dynamic;
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
 /// This represents the llvm.coro.id.async instruction.
 class CoroIdAsyncInst : public AnyCoroIdInst {
   enum { SizeArg, AlignArg, StorageArg, AsyncFuncPtrArg };
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h
index 2e98b089358bc..a60f813785a59 100644
--- a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h
@@ -45,6 +45,10 @@ enum class ABI {
   /// single continuation function. The continuation function is available as an
   /// intrinsic.
   Async,
+
+  /// The variant of RetconOnce which features a dynamically-sized caller
+  /// allocation.
+  RetconOnceDynamic,
 };
 
 // Holds structural Coroutine Intrinsics for a particular function and other
@@ -127,9 +131,18 @@ struct Shape {
     Function *ResumePrototype;
     Function *Alloc;
     Function *Dealloc;
+    Value *Allocator;
     BasicBlock *ReturnBlock;
     bool IsFrameInlineInStorage;
     ConstantInt* TypeId;
+    GlobalVariable *CoroFuncPointer;
+    Value *Storage;
+    uint64_t StorageSize;
+    Align StorageAlignment;
+    // computed during splitting:
+    uint64_t ContextSize;
+
+    Align getStorageAlignment() const { return Align(StorageAlignment); }
   };
 
   struct AsyncLoweringStorage {
@@ -194,6 +207,7 @@ struct Shape {
                                /*IsVarArg=*/false);
     case coro::ABI::Retcon:
     case coro::ABI::RetconOnce:
+    case coro::ABI::RetconOnceDynamic:
       return RetconLowering.ResumePrototype->getFunctionType();
     case coro::ABI::Async:
       // Not used. The function type depends on the active suspend.
@@ -204,7 +218,8 @@ struct Shape {
   }
 
   ArrayRef<Type *> getRetconResultTypes() const {
-    assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce);
+    assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce ||
+           ABI == coro::ABI::RetconOnceDynamic);
     auto FTy = CoroBegin->getFunction()->getFunctionType();
 
     // The safety of all this is checked by checkWFRetconPrototype.
@@ -216,7 +231,8 @@ struct Shape {
   }
 
   ArrayRef<Type *> getRetconResumeTypes() const {
-    assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce);
+    assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce ||
+           ABI == coro::ABI::RetconOnceDynamic);
 
     // The safety of all this is checked by checkWFRetconPrototype.
     auto FTy = RetconLowering.ResumePrototype->getFunctionType();
@@ -230,6 +246,7 @@ struct Shape {
 
     case coro::ABI::Retcon:
     case coro::ABI::RetconOnce:
+    case coro::ABI::RetconOnceDynamic:
       return RetconLowering.ResumePrototype->getCallingConv();
     case coro::ABI::Async:
       return AsyncLowering.AsyncCC;
@@ -262,7 +279,7 @@ struct Shape {
   /// \param CG - if non-null, will be updated for the new call
   void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
 
-  Shape() = default;
+  Shape() = delete;
   explicit Shape(Function &F) {
     SmallVector<CoroFrameInst *, 8> CoroFrames;
     SmallVector<CoroSaveInst *, 2> UnusedCoroSaves;
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 30e1af602667c..6b431f2f71594 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -2361,6 +2361,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
       return false;
     case Intrinsic::icall_branch_funnel:
     case Intrinsic::localescape:
+    case Intrinsic::ret_popless:
       HasUninlineableIntrinsic = true;
       return false;
     case Intrinsic::vastart:
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 4d25b12c9ab06..0334f684cb94d 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -659,6 +659,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(x86_regcallcc);
   KEYWORD(swiftcc);
   KEYWORD(swifttailcc);
+  KEYWORD(swiftcorocc);
   KEYWORD(anyregcc);
   KEYWORD(preserve_mostcc);
   KEYWORD(preserve_allcc);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 3b9b466ddeeab..a0969f9141629 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -2188,6 +2188,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
 ///   ::= 'ghccc'
 ///   ::= 'swiftcc'
 ///   ::= 'swifttailcc'
+///   ::= 'swiftcorocc'
 ///   ::= 'x86_intrcc'
 ///   ::= 'hhvmcc'
 ///   ::= 'hhvm_ccc'
@@ -2254,6 +2255,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_ghccc:          CC = CallingConv::GHC; break;
   case lltok::kw_swiftcc:        CC = CallingConv::Swift; break;
   case lltok::kw_swifttailcc:    CC = CallingConv::SwiftTail; break;
+  case lltok::kw_swiftcorocc:    CC = CallingConv::SwiftCoro; break;
   case lltok::kw_x86_intrcc:     CC = CallingConv::X86_INTR; break;
   case lltok::kw_hhvmcc:
     CC = CallingConv::DUMMY_HHVM;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 5c62ef4ad8e4e..c055f7c296711 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2202,6 +2202,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::SwiftSelf;
   case bitc::ATTR_KIND_SWIFT_ASYNC:
     return Attribute::SwiftAsync;
+  case bitc::ATTR_KIND_SWIFT_CORO:
+    return Attribute::SwiftCoro;
   case bitc::ATTR_KIND_UW_TABLE:
     return Attribute::UWTable;
   case bitc::ATTR_KIND_VSCALE_RANGE:
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1a39205d93e19..33c95f6b95650 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -875,6 +875,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_SWIFT_SELF;
   case Attribute::SwiftAsync:
     return bitc::ATTR_KIND_SWIFT_ASYNC;
+  case Attribute::SwiftCoro:
+    return bitc::ATTR_KIND_SWIFT_CORO;
   case Attribute::UWTable:
     return bitc::ATTR_KIND_UW_TABLE;
   case Attribute::VScaleRange:
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 0af70f333f864..79ada17b23a21 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -62,6 +62,8 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
     Flags.setSwiftAsync();
   if (AttrFn(Attribute::SwiftError))
     Flags.setSwiftError();
+  if (AttrFn(Attribute::SwiftCoro))
+    Flags.setSwiftCoro();
 }
 
 ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 931e4fe19e69a..464dd11381a46 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -391,7 +391,14 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
-  return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
+  bool Success =
+      CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
+
+  if (auto *MustTailCI = RI.getParent()->getTerminatingMustTailCall())
+    if (MustTailCI->getIntrinsicID() == Intrinsic::ret_popless)
+      Success &= CLI->adjustReturnToPopless(MIRBuilder);
+
+  return Success;
 }
 
 void IRTranslator::emitBranchForMergedCondition(
@@ -2424,6 +2431,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                           {getOrCreateVReg(*CI.getArgOperand(0))});
     return true;
   }
+  case Intrinsic::ret_popless: {
+    // The ret.popless intrin call itself is only annotating the following ret.
+    // To achieve that, it does need to be musttail and reachable from the ret.
+    assert(CI.getParent()->getTerminatingMustTailCall() == &CI &&
+           "llvm.ret.popless not in musttail position");
+    return true;
+  }
   case Intrinsic::cttz:
   case Intrinsic::ctlz: {
     ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index fbc0264961bc7..85db1a064980d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1050,6 +1050,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
       Flags.setSwiftAsync();
     if (Arg.IsSwiftError)
       Flags.setSwiftError();
+    if (Arg.IsSwiftCoro)
+      Flags.setSwiftCoro();
     if (Arg.IsCFGuardTarget)
       Flags.setCFGuardTarget();
     if (Arg.IsByVal)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f592e69d57fec..8cc9be15e4859 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2153,6 +2153,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
     return;
   }
 
+  // Musttail calls to @llvm.ret.popless are used to annotate the ret as
+  // "popless".  Keep track of it here, and ask the target to do so later.
+  bool IsPoplessReturn = false;
+  if (auto *MustTailCI = I.getParent()->getTerminatingMustTailCall())
+    if (MustTailCI->getIntrinsicID() == Intrinsic::ret_popless)
+      IsPoplessReturn = true;
+
   if (!FuncInfo.CanLowerReturn) {
     Register DemoteReg = FuncInfo.DemoteRegister;
 
@@ -2287,6 +2294,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   Chain = DAG.getTargetLoweringInfo().LowerReturn(
       Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
 
+  // If we did find this return instruction to be popless, make it so now.
+  // It's still a normal return in almost all regards, we just need to remember
+  // it's popless, for when we lower the return and emit the epilogue later.
+  // Ideally we'd ask LowerReturn to do that, but the API is enough of a pain
+  // as it is, and all targets would have to learn about that.
+  if (IsPoplessReturn) {
+    SDValue NewChain =
+        DAG.getTargetLoweringInfo().adjustReturnPopless(Chain, DAG);
+    DAG.RemoveDeadNode(Chain.getNode());
+    Chain = NewChain;
+  }
+
   // Verify that the target's LowerReturn behaved as expected.
   assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
          "LowerReturn didn't return a valid chain!");
@@ -8018,6 +8037,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask));
     return;
   }
+
+  case Intrinsic::ret_popless:
+    // The ret.popless intrin call itself is only annotating the following ret.
+    // To achieve that, it does need to be musttail and reachable from the ret.
+    assert(I.getParent()->getTerminatingMustTailCall() == &I &&
+           "llvm.ret.popless not in musttail position");
+    return;
+
   case Intrinsic::threadlocal_address: {
     setValue(&I, getValue(I.getOperand(0)));
     return;
@@ -11001,6 +11028,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     Entry.IsSwiftSelf = false;
     Entry.IsSwiftAsync = false;
     Entry.IsSwiftError = false;
+    Entry.IsSwiftCoro = false;
     Entry.IsCFGuardTarget = false;
     Entry.Alignment = Alignment;
     CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
@@ -11120,6 +11148,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setSwiftAsync();
       if (Args[i].IsSwiftError)
         Flags.setSwiftError();
+      if (Args[i].IsSwiftCoro)
+        Flags.setSwiftCoro();
       if (Args[i].IsCFGuardTarget)
         Flags.setCFGuardTarget();
       if (Args[i].IsByVal)
@@ -11651,6 +11681,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         Flags.setSwiftAsync();
       if (Arg.hasAttribute(Attribute::SwiftError))
         Flags.setSwiftError();
+      if (Arg.hasAttribute(Attribute::SwiftCoro))
+        Flags.setSwiftCoro();
       if (Arg.hasAttribute(Attribute::ByVal))
         Flags.setByVal();
       if (Arg.hasAttribute(Attribute::ByRef))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3995216e3d689..60ebf71b3dd3c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -128,6 +128,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
+  IsSwiftCoro = Call->paramHasAttr(ArgIdx, Attribute::SwiftCoro);
   Alignment = Call->getParamStackAlign(ArgIdx);
   IndirectType = nullptr;
   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ac8aa0d35ea30..f57e65980e731 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -378,6 +378,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
   case CallingConv::SPIR_KERNEL:   Out << "spir_kernel"; break;
   case CallingConv::Swift:         Out << "swiftcc"; break;
   case CallingConv::SwiftTail:     Out << "swifttailcc"; break;
+  case CallingConv::SwiftCoro:     Out << "swiftcorocc"; break;
   case CallingConv::X86_INTR:      Out << "x86_intrcc"; break;
   case CallingConv::DUMMY_HHVM:
     Out << "hhvmcc";
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index c632b1b2dc2ab..20050459b3c3e 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -311,6 +311,14 @@ const CallInst *BasicBlock::getTerminatingMustTailCall() const {
   if (!Prev)
     return nullptr;
 
+  // Some musttail intrinsic calls are special in being really simply ret
+  // annotations, and only need to be the last instruction before the ret.
+  // We don't need to look through the return value in those cases.
+  // FIXME: we should generalize getTerminatingDeoptimizeCall for this case.
+  if (auto *CI = dyn_cast<CallInst>(Prev))
+    if (CI->isMustTailCall() && CI->getIntrinsicID() == Intrinsic::ret_popless)
+      return CI;
+
   if (Value *RV = RI->getReturnValue()) {
     if (RV != Prev)
       return nullptr;
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index b0ccab120ccf0..336ab9b8acc0a 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2239,6 +2239,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
   bool SawSwiftSelf = false;
   bool SawSwiftAsync = false;
   bool SawSwiftError = false;
+  bool SawSwiftCoro = false;
 
   // Verify return value attributes.
   AttributeSet RetAttrs = Attrs.getRetAttrs();
@@ -2315,6 +2316,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
       SawSwiftError = true;
     }
 
+    if (ArgAttrs.hasAttribute(Attribute::SwiftCoro)) {
+      Check(!SawSwiftCoro, "Cannot have multiple 'swiftcoro' parameters!", V);
+      SawSwiftCoro = true;
+    }
+
     if (ArgAttrs.hasAttribute(Attribute::InAlloca)) {
       Check(i == FT->getNumParams() - 1,
             "inalloca isn't on the last parameter!", V);
@@ -3953,6 +3959,7 @@ static AttrBuilder getParameterABIAttributes(LLVMContext& C, unsigned I, Attribu
   static const Attribute::AttrKind ABIAttrs[] = {
       Attribute::StructRet,  Attribute::ByVal,          Attribute::InAlloca,
       Attribute::InReg,      Attribute::StackAlignment, Attribute::SwiftSelf,
+      Attribute::SwiftCoro,
       Attribute::SwiftAsync, Attribute::SwiftError,     Attribute::Preallocated,
       Attribute::ByRef};
   AttrBuilder Copy(C);
@@ -3987,6 +3994,15 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
             &CI);
     }
 #endif
+    Check(CI.getIntrinsicID() != Intrinsic::ret_popless,
+          "llvm.ret.popless call must be musttail", &CI);
+    return;
+  }
+
+  // Some musttail intrinsic calls are special, and don't have all the rules.
+  if (CI.getIntrinsicID() == Intrinsic::ret_popless) {
+    ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(CI.getNextNode());
+    Check(Ret, "musttail intrinsic call must precede a ret", &CI);
     return;
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 7cca6d9bc6b9c..e071689dacddc 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -72,6 +72,9 @@ defvar AArch64_Common = [
   // preserved for normal function calls.
   CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
 
+  // Pass SwiftCoro in X23.
+  CCIfSwiftCoro<CCIfType<[i64], CCAssignToReg<[X23]>>>,
+
   CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
 
   CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
@@ -241,6 +244,9 @@ def CC_AArch64_Arm64EC_Thunk : CallingConv<[
   // normal functions don't need to save it somewhere.
   CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X21]>>>,
 
+  // Pass SwiftCoro in X22.
+  CCIfSwiftCoro<CCIfType<[i64], CCAssignToReg<[X22]>>>,
+
   // The 'CFGuardTarget' parameter, if any, is passed in RAX (R8).
   CCIfCFGuardTarget<CCAssignToReg<[X8]>>,
 
@@ -374,6 +380,9 @@ def CC_AArch64_DarwinPCS : CallingConv<[
   // preserved for normal function calls.
   CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
 
+  // Pass SwiftCoro in X23.
+  CCIfSwiftCoro<CCIfType<[i64], CCAssignToReg<[X23]>>>,
+
   CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
 
   CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 37241f3a70471..b609fcf58eee7 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2926,6 +2926,7 @@ bool AArch64FastISel::fastLowerArguments() {
         Arg.hasAttribute(Attribute::SwiftSelf) ||
         Arg.hasAttribute(Attribute::SwiftAsync) ||
         Arg.hasAttribute(Attribute::SwiftError) ||
+        Arg.hasAttribute(Attribute::SwiftCoro) ||
         Arg.hasAttribute(Attribute::Nest))
       return false;
 
@@ -3193,6 +3194,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
   for (auto Flag : CLI.OutFlags)
     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
+        Flag.isSwiftCoro() ||
         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
       return false;
 
@@ -3861,6 +3863,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
   if (TLI.supportSplitCSR(FuncInfo.MF))
     return false;
 
+  if (I->getParent()->getTerminatingMustTailCall())
+    return false;
+
   // Build a list of return value registers.
   SmallVector<Register, 4> RetRegs;
 
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 642d59d06b123..546d2aaca4e98 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -350,7 +350,8 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
   // Bail on stack adjustment needed on return for simplicity.
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-  if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
+  if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
+      MFI.hasPoplessCall())
     return false;
   if (Exit && getArgumentStackToRestore(MF, *Exit))
     return false;
@@ -502,9 +503,15 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
   if (MF.getTarget().Options.DisableFramePointerElim(MF))
     return true;
   if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
+      MFI.hasPoplessCall() ||
       MFI.hasStackMap() || MFI.hasPatchPoint() ||
       RegInfo->hasStackRealignment(MF))
     return true;
+
+  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  if (AFI->hasPoplessEpilogue())
+    return true;
+
   // With large callframes around we may need to use FP to access the scavenging
   // emergency spillslot.
   //
@@ -1123,6 +1130,12 @@ bool AArch64FrameLowering::canUseAsPrologue(
       return false;
   }
 
+  // If we have some return path that's popless, it needs its own very-special
+  // epilogue, so we can't shrink-wrap it away.
+  // FIXME: this and some of the below checks belong in enableShrinkWrapping.
+  if (AFI->hasPoplessEpilogue())
+    return false;
+
   // Certain stack probing sequences might clobber flags, then we can't use
   // the block as a prologue if the flags register is a live-in.
   if (MF->getInfo<AArch64FunctionInfo>()->hasStackProbing() &&
@@ -1186,6 +1199,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
   if (MFI.hasVarSizedObjects())
     return false;
 
+  if (MFI.hasPoplessCall())
+    return false;
+
   if (RegInfo->hasStackRealignment(MF))
     return false;
 
@@ -1205,6 +1221,12 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
 
 bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
     MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
+
+  MachineFunction &MF = *MBB.getParent();
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  if (AFI->hasPoplessEpilogue())
+    return false;
+
   if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
     return false;
   if (MBB.empty())
@@ -1572,6 +1594,53 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
   return std::prev(MBB.erase(MBBI));
 }
 
+static void fixupCalleeSaveRestoreToFPBased(MachineInstr &MI,
+                                            int64_t FPSPOffset) {
+  assert(!AArch64InstrInfo::isSEHInstruction(MI));
+
+  unsigned Opc = MI.getOpcode();
+  unsigned Scale;
+  switch (Opc) {
+  case AArch64::STPXi:
+  case AArch64::STRXui:
+  case AArch64::STPDi:
+  case AArch64::STRDui:
+  case AArch64::LDPXi:
+  case AArch64::LDRXui:
+  case AArch64::LDPDi:
+  case AArch64::LDRDui:
+    Scale = 8;
+    break;
+  case AArch64::STPQi:
+  case AArch64::STRQui:
+  case AArch64::LDPQi:
+  case AArch64::LDRQui:
+    Scale = 16;
+    break;
+  default:
+    llvm_unreachable("Unexpected callee-save save/restore opcode!");
+  }
+
+  unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
+
+  MachineOperand &BaseRegOpnd = MI.getOperand(OffsetIdx - 1);
+  assert(BaseRegOpnd.getReg() == AArch64::SP &&
+         "Unexpected base register in callee-save save/restore instruction!");
+  BaseRegOpnd.setReg(AArch64::FP); // XXX TRI
+
+  // Last operand is immediate offset that needs fixing.
+  MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
+  // All generated opcodes have scaled offsets.
+  assert(FPSPOffset % Scale == 0);
+  int64_t ResidualOffset = OffsetOpnd.getImm() - (FPSPOffset / Scale);
+  OffsetOpnd.setImm(ResidualOffset);
+
+  assert((!MI.getOperand(0).isReg() ||
+          MI.getOperand(0).getReg() != AArch64::FP || ResidualOffset == 0) &&
+         "FP/LR frame record should be restored from FP+0");
+
+}
+
 // Fixup callee-save register save/restore instructions to take into account
 // combined SP bump by adding the local stack size to the stack offsets.
 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
@@ -2214,7 +2283,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
   allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
                      nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
-                     MFI.hasVarSizedObjects() || LocalsSize);
+                     MFI.hasVarSizedObjects() || LocalsSize ||
+                         MFI.hasPoplessCall());
   CFAOffset += SVECalleeSavesSize;
 
   if (EmitAsyncCFI)
@@ -2231,7 +2301,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
                        SVELocalsSize + StackOffset::getFixed(NumBytes),
                        NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
-                       CFAOffset, MFI.hasVarSizedObjects());
+                       CFAOffset,
+                       MFI.hasVarSizedObjects() || MFI.hasPoplessCall());
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of the
@@ -2312,10 +2383,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
   bool HasWinCFI = false;
   bool IsFunclet = false;
+  bool IsSwiftCoroPartialReturn = false;
 
   if (MBB.end() != MBBI) {
     DL = MBBI->getDebugLoc();
     IsFunclet = isFuncletReturnInstr(*MBBI);
+    IsSwiftCoroPartialReturn = MBBI->getOpcode() == AArch64::RET_POPLESS;
+  }
+
+  if (IsSwiftCoroPartialReturn) {
+    // The partial-return intrin/instr requires the swiftcoro cc
+    if (MF.getFunction().getCallingConv() != CallingConv::SwiftCoro)
+      report_fatal_error("llvm.ret.popless requires swiftcorocc");
+    assert(MBBI->getOpcode() == AArch64::RET_POPLESS);
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::RET_ReallyLR))
+        .setMIFlag(MachineInstr::FrameDestroy);
+    MBB.erase(MBBI);
   }
 
   MachineBasicBlock::iterator EpilogStartI = MBB.end();
@@ -2364,6 +2447,39 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
         if (Info.getReg() != AArch64::LR)
           continue;
         MachineBasicBlock::iterator TI = MBB.getFirstTerminator();
+
+        // When we're doing a popless ret (i.e., that doesn't restore SP), we
+        // can't rely on the exit SP being the same as the entry, but they need
+        // to match for the LR auth to succeed.  Instead, derive the entry SP
+        // from our FP (using a -16 static offset for the size of the frame
+        // record itself), save that into X16, and use that as the discriminator
+        // in an AUTIB.
+        if (IsSwiftCoroPartialReturn) {
+          const auto *TRI = Subtarget.getRegisterInfo();
+
+          MachineBasicBlock::iterator EpilogStartI = MBB.getFirstTerminator();
+          MachineBasicBlock::iterator Begin = MBB.begin();
+          while (EpilogStartI != Begin) {
+            --EpilogStartI;
+            if (!EpilogStartI->getFlag(MachineInstr::FrameDestroy)) {
+              ++EpilogStartI;
+              break;
+            }
+            if (EpilogStartI->readsRegister(AArch64::X16, TRI) ||
+                EpilogStartI->modifiesRegister(AArch64::X16, TRI))
+              report_fatal_error("unable to use x16 for popless ret LR auth");
+          }
+
+          emitFrameOffset(MBB, EpilogStartI, DL, AArch64::X16, AArch64::FP,
+                          StackOffset::getFixed(16), TII,
+                          MachineInstr::FrameDestroy);
+          BuildMI(MBB, TI, DL, TII->get(AArch64::AUTIB), AArch64::LR)
+              .addUse(AArch64::LR)
+              .addUse(AArch64::X16)
+              .setMIFlag(MachineInstr::FrameDestroy);
+          return;
+        }
+
         if (TI != MBB.end() && TI->getOpcode() == AArch64::RET_ReallyLR) {
           // If there is a terminator and it's a RET, we can fold AUTH into it.
           // Be careful to keep the implicitly returned registers.
@@ -2397,6 +2513,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
   if (homogeneousPrologEpilog(MF, &MBB)) {
     assert(!NeedsWinCFI);
+    assert(!IsSwiftCoroPartialReturn);
     auto LastPopI = MBB.getFirstTerminator();
     if (LastPopI != MBB.begin()) {
       auto HomogeneousEpilog = std::prev(LastPopI);
@@ -2417,7 +2534,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
   // Assume we can't combine the last pop with the sp restore.
   bool CombineAfterCSRBump = false;
-  if (!CombineSPBump && PrologueSaveSize != 0) {
+  if (!CombineSPBump && PrologueSaveSize != 0 && !IsSwiftCoroPartialReturn) {
     MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
     while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
            AArch64InstrInfo::isSEHInstruction(*Pop))
@@ -2453,6 +2570,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
         IsSVECalleeSave(LastPopI)) {
       ++LastPopI;
       break;
+    } else if (IsSwiftCoroPartialReturn) {
+      assert(!EmitCFI);
+      assert(hasFP(MF));
+      fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
+                                        NeedsWinCFI, &HasWinCFI);
+      // if FP-based addressing, rewrite CSR restores from SP to FP
+      int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset() +
+                         AFI->getLocalStackSize();
+      fixupCalleeSaveRestoreToFPBased(*LastPopI, FPOffset);
     } else if (CombineSPBump)
       fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
                                         NeedsWinCFI, &HasWinCFI);
@@ -2472,6 +2598,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   }
 
   if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
+    assert(!IsSwiftCoroPartialReturn);
     switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
     case SwiftAsyncFramePointerMode::DeploymentBased:
       // Avoid the reload as it is GOT relative, and instead fall back to the
@@ -2505,6 +2632,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // If there is a single SP update, insert it before the ret and we're done.
   if (CombineSPBump) {
     assert(!SVEStackSize && "Cannot combine SP bump with SVE");
+    assert(!IsSwiftCoroPartialReturn);
 
     // When we are about to restore the CSRs, the CFA register is SP again.
     if (EmitCFI && hasFP(MF)) {
@@ -2551,7 +2679,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     // If we have stack realignment or variable sized objects on the stack,
     // restore the stack pointer from the frame pointer prior to SVE CSR
     // restoration.
-    if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
+    if (AFI->isStackRealigned() || MFI.hasVarSizedObjects() ||
+        MFI.hasPoplessCall()) {
       if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
         // Set SP to start of SVE callee-save area from which they can
         // be reloaded. The code below will deallocate the stack space
@@ -2589,6 +2718,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   }
 
   if (!hasFP(MF)) {
+    assert(!IsSwiftCoroPartialReturn);
     bool RedZone = canUseRedZone(MF);
     // If this was a redzone leaf function, we don't need to restore the
     // stack pointer (but we may need to pop stack args for fastcc).
@@ -2619,11 +2749,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     NumBytes = 0;
   }
 
+  if (IsSwiftCoroPartialReturn)
+    return;
+
   // Restore the original stack pointer.
   // FIXME: Rather than doing the math here, we should instead just use
   // non-post-indexed loads for the restores if we aren't actually going to
   // be able to save any instructions.
-  if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
+  if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned() ||
+                     MFI.hasPoplessCall())) {
     emitFrameOffset(
         MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
         StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
@@ -2825,7 +2959,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
         // If the FPOffset is positive, that'll always be best, as the SP/BP
         // will be even further away.
         UseFP = true;
-      } else if (MFI.hasVarSizedObjects()) {
+      } else if (MFI.hasVarSizedObjects() || MFI.hasPoplessCall()) {
         // If we have variable sized objects, we can use either FP or BP, as the
         // SP offset is unknown. We can use the base pointer if we have one and
         // FP is not preferred. If not, we're stuck with using FP.
@@ -3495,9 +3629,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
   DebugLoc DL;
   SmallVector<RegPairInfo, 8> RegPairs;
   bool NeedsWinCFI = needsWinCFI(MF);
+  bool IsSwiftCoroPartialReturn = false;
 
-  if (MBBI != MBB.end())
+  if (MBBI != MBB.end()) {
     DL = MBBI->getDebugLoc();
+    IsSwiftCoroPartialReturn = MBBI->getOpcode() == AArch64::RET_POPLESS;
+  }
+
+  // The partial-return intrin/instr requires the swiftcoro cc
+  if (IsSwiftCoroPartialReturn &&
+      MF.getFunction().getCallingConv() != CallingConv::SwiftCoro)
+    report_fatal_error("llvm.ret.popless requires swiftcorocc");
 
   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
   if (homogeneousPrologEpilog(MF, &MBB)) {
@@ -3510,6 +3652,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
     return true;
   }
 
+  // If doing a partial/popless return, CSR restores are from FP, so do it last.
+  if (IsSwiftCoroPartialReturn) {
+    auto IsFPLR = [](const RegPairInfo &c) {
+      return c.Reg1 == AArch64::LR && c.Reg2 == AArch64::FP;
+    };
+    auto FPLRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsFPLR);
+    const RegPairInfo FPLRRPI = *FPLRBegin;
+    FPLRBegin = std::remove_if(RegPairs.begin(), RegPairs.end(), IsFPLR);
+    *FPLRBegin = FPLRRPI;
+  }
+
   // For performance reasons restore SVE register in increasing order
   auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
   auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
@@ -5082,6 +5235,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
 
   // Go to common code if we cannot provide sp + offset.
   if (MFI.hasVarSizedObjects() ||
+      MFI.hasPoplessCall() ||
       MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
       MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
     return getFrameIndexReference(MF, FI, FrameReg);
@@ -5189,6 +5343,10 @@ void AArch64FrameLowering::orderFrameObjects(
 
   const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  if (AFI.hasPoplessEpilogue())
+    return;
+
   std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
   for (auto &Obj : ObjectsToAllocate) {
     FrameObjects[Obj].IsValid = true;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc5471f038043..8bbf210195917 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2714,6 +2714,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::AUTH_CALL_RVMARKER)
     MAKE_CASE(AArch64ISD::LOADgot)
     MAKE_CASE(AArch64ISD::RET_GLUE)
+    MAKE_CASE(AArch64ISD::RET_POPLESS)
     MAKE_CASE(AArch64ISD::BRCOND)
     MAKE_CASE(AArch64ISD::CSEL)
     MAKE_CASE(AArch64ISD::CSINV)
@@ -7869,6 +7870,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::CXX_FAST_TLS:
   case CallingConv::Swift:
   case CallingConv::SwiftTail:
+  case CallingConv::SwiftCoro:
   case CallingConv::Tail:
   case CallingConv::GRAAL:
     if (Subtarget->isTargetWindows()) {
@@ -8391,7 +8393,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
   if (CallConv == CallingConv::PreserveNone) {
     for (const ISD::InputArg &I : Ins) {
       if (I.Flags.isSwiftSelf() || I.Flags.isSwiftError() ||
-          I.Flags.isSwiftAsync()) {
+          I.Flags.isSwiftAsync() || I.Flags.isSwiftCoro()) {
         MachineFunction &MF = DAG.getMachineFunction();
         DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
             MF.getFunction(),
@@ -8855,6 +8857,18 @@ bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
          CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
 }
 
+SDValue AArch64TargetLowering::adjustReturnPopless(SDValue RetChain,
+                                                   SelectionDAG &DAG) const {
+  if (RetChain.getOpcode() != AArch64ISD::RET_GLUE)
+    report_fatal_error("Unsupported aarch64 return for popless ret lowering");
+
+  auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+  AFI->setHasPoplessEpilogue();
+
+  return DAG.getNode(AArch64ISD::RET_POPLESS, SDLoc(RetChain),
+                     MVT::Other, RetChain->ops());
+}
+
 // Check if the value is zero-extended from i1 to i8
 static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
   unsigned SizeInBits = Arg.getValueType().getSizeInBits();
@@ -9047,6 +9061,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       CallConv = CallingConv::AArch64_SVE_VectorCall;
   }
 
+  if (CallConv == CallingConv::SwiftCoro)
+    MF.getFrameInfo().setHasPoplessCall();
+
   if (IsTailCall) {
     // Check if it's really possible to do a tail call.
     IsTailCall = isEligibleForTailCallOptimization(CLI);
@@ -9743,7 +9760,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   if (CallConv == CallingConv::PreserveNone) {
     for (const ISD::OutputArg &O : Outs) {
       if (O.Flags.isSwiftSelf() || O.Flags.isSwiftError() ||
-          O.Flags.isSwiftAsync()) {
+          O.Flags.isSwiftAsync() || O.Flags.isSwiftCoro()) {
         MachineFunction &MF = DAG.getMachineFunction();
         DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
             MF.getFunction(),
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 0d51ef2be8631..89a2f4aa05fee 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -90,6 +90,7 @@ enum NodeType : unsigned {
   LOADgot,  // Load from automatically generated descriptor (e.g. Global
             // Offset Table, TLS record).
   RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
+  RET_POPLESS, // Same as RET_GLUE, though "popless", = doesn't clean the stack.
   BRCOND,   // Conditional branch instruction; "b.cond".
   CSEL,
   CSINV, // Conditional select invert.
@@ -1104,6 +1105,9 @@ class AArch64TargetLowering : public TargetLowering {
   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
                            SDValue &Chain) const;
 
+  SDValue adjustReturnPopless(SDValue RetChain,
+                              SelectionDAG &DAG) const override;
+
   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
                       bool isVarArg,
                       const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index fbeacaa6a498c..a1634ac33597f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -779,6 +779,8 @@ def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
 def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
 def AArch64retglue       : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
                                 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def AArch64ret_popless   : SDNode<"AArch64ISD::RET_POPLESS", SDTNone,
+                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
 def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
 def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
@@ -3312,6 +3314,13 @@ def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
   let isReturn = 1;
 }
 
+def RET_POPLESS : Pseudo<(outs), (ins), [(AArch64ret_popless)]>,
+                   Sched<[WriteBrReg]> {
+  let isTerminator = 1;
+  let isBarrier = 1;
+  let isReturn = 1;
+}
+
 // This is a directive-like pseudo-instruction. The purpose is to insert an
 // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
 // (which in the usual case is a BLR).
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index d3026ca45c349..08985c038c29c 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -205,6 +205,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// The stack slot where the Swift asynchronous context is stored.
   int SwiftAsyncContextFrameIdx = std::numeric_limits<int>::max();
 
+  /// Whether this function has a swift coro return that doesn't restore
+  /// the stack.
+  bool HasPoplessEpilogue = false;
+
   bool IsMTETagged = false;
 
   /// The function has Scalable Vector or Scalable Predicate register argument
@@ -549,6 +553,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   }
   int getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; }
 
+  bool hasPoplessEpilogue() const {
+    return HasPoplessEpilogue;
+  }
+  void setHasPoplessEpilogue(bool PE = true) {
+    HasPoplessEpilogue = PE;
+  }
+
   bool needsDwarfUnwindInfo(const MachineFunction &MF) const;
   bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const;
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index e4719b26cab52..ccae95c67fda5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -483,6 +483,21 @@ bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
   return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv));
 }
 
+bool AArch64CallLowering::adjustReturnToPopless(
+    MachineIRBuilder &MIRBuilder) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+
+  auto MBBI = std::prev(MIRBuilder.getMBB().end());
+  assert(MBBI->getOpcode() == AArch64::RET_ReallyLR);
+
+  auto *TII = MF.getSubtarget().getInstrInfo();
+  MBBI->setDesc(TII->get(AArch64::RET_POPLESS));
+
+  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+  FuncInfo->setHasPoplessEpilogue();
+  return true;
+}
+
 /// Helper function to compute forwarded registers for musttail calls. Computes
 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
 /// can be used to save + restore registers later.
@@ -1455,6 +1470,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       return false;
   }
 
+  if (Info.CallConv == CallingConv::SwiftCoro)
+    MF.getFrameInfo().setHasPoplessCall();
+
   if (Info.SwiftErrorVReg) {
     MIB.addDef(AArch64::X21, RegState::Implicit);
     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
index 9ae175274d5d9..34f233480b77d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -39,6 +39,8 @@ class AArch64CallLowering: public CallLowering {
                       SmallVectorImpl<BaseArgInfo> &Outs,
                       bool IsVarArg) const override;
 
+  bool adjustReturnToPopless(MachineIRBuilder &MIRBuilder) const override;
+
   bool fallBackToDAGISel(const MachineFunction &MF) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 794db887bd073..fe56c5c7a2a5f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1284,6 +1284,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
     SDValue &OutVal = OutVals[I];
     HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
     HasSwiftErrorArg |= Out.Flags.isSwiftError();
+    if (Out.Flags.isSwiftCoro())
+      fail(DL, DAG, "WebAssembly hasn't implemented swiftcoro arguments");
     if (Out.Flags.isNest())
       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
     if (Out.Flags.isInAlloca())
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index a0a26827aa09d..25eadd04e52ef 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -69,6 +69,7 @@ bool Lowerer::lower(Function &F) {
       case Intrinsic::coro_id:
       case Intrinsic::coro_id_retcon:
       case Intrinsic::coro_id_retcon_once:
+      case Intrinsic::coro_id_retcon_once_dynamic:
       case Intrinsic::coro_id_async:
         II->replaceAllUsesWith(ConstantTokenNone::get(Context));
         break;
diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index d1887980fb3bc..e2db4fdbe38f6 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -67,6 +67,7 @@ class BaseCloner {
         Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
         ActiveSuspend(ActiveSuspend) {
     assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
+           Shape.ABI == coro::ABI::RetconOnceDynamic ||
            Shape.ABI == ABI::Async);
     assert(NewF && "need existing function for continuation");
     assert(ActiveSuspend && "need active suspend point for continuation");
@@ -86,6 +87,7 @@ class BaseCloner {
                                AnyCoroSuspendInst *ActiveSuspend,
                                TargetTransformInfo &TTI) {
     assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
+           Shape.ABI == coro::ABI::RetconOnceDynamic ||
            Shape.ABI == ABI::Async);
     TimeTraceScope FunctionScope("BaseCloner");
 
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 5375448d2d2e2..3093323da67b8 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -209,6 +209,7 @@ void Lowerer::lowerEarlyIntrinsics(Function &F) {
         break;
       case Intrinsic::coro_id_retcon:
       case Intrinsic::coro_id_retcon_once:
+      case Intrinsic::coro_id_retcon_once_dynamic:
       case Intrinsic::coro_id_async:
         F.setPresplitCoroutine();
         break;
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index d08170438bd2a..d65edb132165d 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -874,6 +874,8 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
   std::optional<Align> MaxFrameAlignment;
   if (Shape.ABI == coro::ABI::Async)
     MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
+  if (Shape.ABI == coro::ABI::RetconOnceDynamic)
+    MaxFrameAlignment = Shape.RetconLowering.getStorageAlignment();
   FrameTypeBuilder B(C, DL, MaxFrameAlignment);
 
   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
@@ -962,6 +964,19 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
          B.getStructAlign() <= Id->getStorageAlignment());
     break;
   }
+  case coro::ABI::RetconOnceDynamic: {
+    // In the dynamic retcon.once ABI, the frame is always inline in the
+    // storage.
+    Shape.RetconLowering.IsFrameInlineInStorage = true;
+    Shape.RetconLowering.ContextSize =
+        alignTo(Shape.FrameSize, Shape.RetconLowering.StorageAlignment);
+    if (Shape.RetconLowering.StorageAlignment < Shape.FrameAlign) {
+      report_fatal_error(
+          "The alignment requirment of frame variables cannot be higher than "
+          "the alignment of the coro function context");
+    }
+    break;
+  }
   case coro::ABI::Async: {
     Shape.AsyncLowering.FrameOffset =
         alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
@@ -1188,7 +1203,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
 
   // retcon and retcon.once lowering assumes all uses have been sunk.
   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
-      Shape.ABI == coro::ABI::Async) {
+      Shape.ABI == coro::ABI::Async ||
+      Shape.ABI == coro::ABI::RetconOnceDynamic) {
     // If we found any allocas, replace all of their remaining uses with Geps.
     Builder.SetInsertPoint(SpillBlock, SpillBlock->begin());
     for (const auto &P : FrameData.Allocas) {
@@ -2078,7 +2094,8 @@ void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) {
 
   const DominatorTree DT(F);
   if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
-      Shape.ABI != coro::ABI::RetconOnce)
+      Shape.ABI != coro::ABI::RetconOnce &&
+      Shape.ABI != coro::ABI::RetconOnceDynamic)
     sinkLifetimeStartMarkers(F, Shape, Checker, DT);
 
   // All values (that are not allocas) that needs to be spilled to the frame.
@@ -2098,7 +2115,8 @@ void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) {
   LLVM_DEBUG(dumpSpills("Spills", Spills));
 
   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
-      Shape.ABI == coro::ABI::Async)
+      Shape.ABI == coro::ABI::Async ||
+      Shape.ABI == coro::ABI::RetconOnceDynamic)
     sinkSpillUsesAfterCoroBegin(DT, Shape.CoroBegin, Spills, Allocas);
 
   // Build frame
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index f9a6c70fedc2d..a1055d3d829ce 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -157,7 +157,8 @@ static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) {
 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
                                    const coro::Shape &Shape, Value *FramePtr,
                                    CallGraph *CG) {
-  assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
+  assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
+         Shape.ABI == coro::ABI::RetconOnceDynamic);
   if (Shape.RetconLowering.IsFrameInlineInStorage)
     return;
 
@@ -239,7 +240,8 @@ static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
 
   // In unique continuation lowering, the continuations always return void.
   // But we may have implicitly allocated storage.
-  case coro::ABI::RetconOnce: {
+  case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic: {
     maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
     auto *CoroEnd = cast<CoroEndInst>(End);
     auto *RetTy = Shape.getResumeFunctionType()->getReturnType();
@@ -368,6 +370,7 @@ static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
   // In continuation-lowering, this frees the continuation storage.
   case coro::ABI::Retcon:
   case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic:
     maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
     break;
   }
@@ -469,6 +472,7 @@ static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
 /// This assumes that the builder has a meaningful insertion point.
 void coro::BaseCloner::replaceRetconOrAsyncSuspendUses() {
   assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
+         Shape.ABI == coro::ABI::RetconOnceDynamic ||
          Shape.ABI == coro::ABI::Async);
 
   auto NewS = VMap[ActiveSuspend];
@@ -538,6 +542,7 @@ void coro::BaseCloner::replaceCoroSuspends() {
   // spilled.
   case coro::ABI::RetconOnce:
   case coro::ABI::Retcon:
+  case coro::ABI::RetconOnceDynamic:
     return;
   }
 
@@ -707,14 +712,16 @@ void coro::BaseCloner::replaceEntryBlock() {
   }
   case coro::ABI::Async:
   case coro::ABI::Retcon:
-  case coro::ABI::RetconOnce: {
+  case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic: {
     // In continuation ABIs, we want to branch to immediately after the
     // active suspend point.  Earlier phases will have put the suspend in its
     // own basic block, so just thread our jump directly to its successor.
     assert((Shape.ABI == coro::ABI::Async &&
             isa<CoroSuspendAsyncInst>(ActiveSuspend)) ||
            ((Shape.ABI == coro::ABI::Retcon ||
-             Shape.ABI == coro::ABI::RetconOnce) &&
+             Shape.ABI == coro::ABI::RetconOnce ||
+             Shape.ABI == coro::ABI::RetconOnceDynamic) &&
             isa<CoroSuspendRetconInst>(ActiveSuspend)));
     auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]);
     auto Branch = cast<BranchInst>(MappedCS->getNextNode());
@@ -779,7 +786,8 @@ Value *coro::BaseCloner::deriveNewFramePointer() {
   }
   // In continuation-lowering, the argument is the opaque storage.
   case coro::ABI::Retcon:
-  case coro::ABI::RetconOnce: {
+  case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic: {
     Argument *NewStorage = &*NewF->arg_begin();
     auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext());
 
@@ -873,6 +881,18 @@ static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
   Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
 }
 
+static void eraseIntrinsicRetPoplessBefore(ReturnInst *Return) {
+  if (Return == &*Return->getParent()->begin())
+    return;
+  auto *Prev = &*std::prev(Return->getIterator());
+  auto *Intr = dyn_cast<IntrinsicInst>(Prev);
+  if (!Intr)
+    return;
+  if (Intr->getIntrinsicID() != Intrinsic::ret_popless)
+    return;
+  Intr->eraseFromParent();
+}
+
 /// Clone the body of the original function into a resume function of
 /// some sort.
 void coro::BaseCloner::create() {
@@ -997,6 +1017,11 @@ void coro::BaseCloner::create() {
                          /*NoAlias=*/true);
 
     break;
+  case coro::ABI::RetconOnceDynamic:
+    // If we have a continuation prototype, just use its attributes,
+    // full-stop.
+    NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
+    break;
   }
 
   switch (Shape.ABI) {
@@ -1006,9 +1031,12 @@ void coro::BaseCloner::create() {
   // this is fine because we can't suspend twice.
   case coro::ABI::Switch:
   case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic:
     // Remove old returns.
-    for (ReturnInst *Return : Returns)
+    for (ReturnInst *Return : Returns) {
+      eraseIntrinsicRetPoplessBefore(Return);
       changeToUnreachable(Return);
+    }
     break;
 
   // With multi-suspend continuations, we'll already have eliminated the
@@ -1063,6 +1091,13 @@ void coro::BaseCloner::create() {
   if (OldVFrame != NewVFrame)
     OldVFrame->replaceAllUsesWith(NewVFrame);
 
+  // Remap allocator pointer.
+  if (Shape.ABI == coro::ABI::RetconOnceDynamic) {
+    Value *OldAllocatorPointer = VMap[Shape.RetconLowering.Allocator];
+    Argument *NewAllocatorPointer = &*NewF->getArg(1);
+    OldAllocatorPointer->replaceAllUsesWith(NewAllocatorPointer);
+  }
+
   // All uses of the arguments should have been resolved by this point,
   // so we can safely remove the dummy values.
   for (Instruction *DummyArg : DummyArgs) {
@@ -1081,6 +1116,7 @@ void coro::BaseCloner::create() {
   case coro::ABI::Async:
   case coro::ABI::Retcon:
   case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic:
     // Replace uses of the active suspend with the corresponding
     // continuation-function arguments.
     assert(ActiveSuspend != nullptr &&
@@ -1139,9 +1175,26 @@ static TypeSize getFrameSizeForShape(coro::Shape &Shape) {
   return DL.getTypeAllocSize(Shape.FrameTy);
 }
 
+static void updateCoroFuncPointerContextSize(coro::Shape &Shape) {
+  assert(Shape.ABI == coro::ABI::RetconOnceDynamic);
+
+  auto *FuncPtrStruct = cast<ConstantStruct>(
+      Shape.RetconLowering.CoroFuncPointer->getInitializer());
+  auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0);
+  auto *OrigContextSize = FuncPtrStruct->getOperand(1);
+  auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(),
+                                          Shape.RetconLowering.ContextSize);
+  auto *NewFuncPtrStruct = ConstantStruct::get(
+      FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize);
+
+  Shape.RetconLowering.CoroFuncPointer->setInitializer(NewFuncPtrStruct);
+}
+
 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
   if (Shape.ABI == coro::ABI::Async)
     updateAsyncFuncPointerContextSize(Shape);
+  if (Shape.ABI == coro::ABI::RetconOnceDynamic)
+    updateCoroFuncPointerContextSize(Shape);
 
   for (CoroAlignInst *CA : Shape.CoroAligns) {
     CA->replaceAllUsesWith(
@@ -1200,6 +1253,7 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) {
   case coro::ABI::Async:
   case coro::ABI::Retcon:
   case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic:
     CoroBegin->replaceAllUsesWith(PoisonValue::get(CoroBegin->getType()));
     break;
   }
@@ -1795,7 +1849,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
 void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
                                         SmallVectorImpl<Function *> &Clones,
                                         TargetTransformInfo &TTI) {
-  assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
+  assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
+         Shape.ABI == coro::ABI::RetconOnceDynamic);
   assert(Clones.empty());
 
   // Reset various things that the optimizer might have decided it
@@ -1805,10 +1860,10 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
   F.removeRetAttr(Attribute::NonNull);
 
   // Allocate the frame.
-  auto *Id = Shape.getRetconCoroId();
+  auto *Id = Shape.CoroBegin->getId();
   Value *RawFramePtr;
   if (Shape.RetconLowering.IsFrameInlineInStorage) {
-    RawFramePtr = Id->getStorage();
+    RawFramePtr = Shape.RetconLowering.Storage;
   } else {
     IRBuilder<> Builder(Id);
 
@@ -1824,7 +1879,7 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
         Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
 
     // Stash the allocated frame pointer in the continuation storage.
-    Builder.CreateStore(RawFramePtr, Id->getStorage());
+    Builder.CreateStore(RawFramePtr, Shape.RetconLowering.Storage);
   }
 
   // Map all uses of llvm.coro.begin to the allocated frame pointer.
@@ -1901,7 +1956,37 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
           RetV = Builder.CreateInsertValue(RetV, Phi, ValueIdx++);
       }
 
-      Builder.CreateRet(RetV);
+      if (Shape.ABI == coro::ABI::RetconOnceDynamic &&
+          F.getCallingConv() == CallingConv::SwiftCoro) {
+        //  %retval = ...
+        //  %null_allocator = icmp %1, null
+        //  br i1 %null_allocator, label %popless, label %normal
+        // popless:
+        //  ret %retval
+        // normal:
+        //  %popless_retval = musttail call i64 @llvm.coro.return(%retval)
+        //  ret %popless_retval
+        auto *NullAllocator = Builder.CreateCmp(
+            CmpInst::Predicate::ICMP_EQ, Shape.RetconLowering.Allocator,
+            ConstantPointerNull::get(
+                cast<PointerType>(Shape.RetconLowering.Allocator->getType())));
+        auto *PoplessReturnBB = BasicBlock::Create(
+            F.getContext(), "coro.return.popless", &F, NewSuspendBB);
+        auto *NormalReturnBB = BasicBlock::Create(
+            F.getContext(), "coro.return.normal", &F, NewSuspendBB);
+        Builder.CreateCondBr(NullAllocator, PoplessReturnBB, NormalReturnBB);
+        IRBuilder<> PoplessBuilder(PoplessReturnBB);
+        auto &Context = F.getContext();
+        auto *VoidTy = Type::getVoidTy(Context);
+        auto *RetPopless =
+            PoplessBuilder.CreateIntrinsic(VoidTy, Intrinsic::ret_popless, {});
+        RetPopless->setTailCallKind(CallInst::TailCallKind::TCK_MustTail);
+        PoplessBuilder.CreateRet(RetV);
+        IRBuilder<> NormalBuilder(NormalReturnBB);
+        NormalBuilder.CreateRet(RetV);
+      } else {
+        Builder.CreateRet(RetV);
+      }
     }
 
     // Branch to the return block.
@@ -2039,6 +2124,7 @@ static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit(
     case coro::ABI::Async:
     case coro::ABI::Retcon:
     case coro::ABI::RetconOnce:
+    case coro::ABI::RetconOnceDynamic:
       // Each clone in the Async/Retcon lowering references of the other clones.
       // Let the LazyCallGraph know about all of them at once.
       if (!Clones.empty())
@@ -2137,6 +2223,8 @@ CreateNewABI(Function &F, coro::Shape &S,
     return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback);
   case coro::ABI::RetconOnce:
     return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback);
+  case coro::ABI::RetconOnceDynamic:
+    return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback);
   }
   llvm_unreachable("Unknown ABI");
 }
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 71f2bdd50f210..5092c77d3320a 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -325,6 +325,23 @@ void coro::Shape::analyze(Function &F,
     AsyncLowering.AsyncCC = F.getCallingConv();
     break;
   }
+  case Intrinsic::coro_id_retcon_once_dynamic: {
+    auto ContinuationId = cast<CoroIdRetconOnceDynamicInst>(Id);
+    ABI = coro::ABI::RetconOnceDynamic;
+    auto Prototype = ContinuationId->getPrototype();
+    RetconLowering.ResumePrototype = Prototype;
+    RetconLowering.Alloc = ContinuationId->getAllocFunction();
+    RetconLowering.Dealloc = ContinuationId->getDeallocFunction();
+    RetconLowering.Storage = ContinuationId->getStorage();
+    RetconLowering.Allocator = ContinuationId->getAllocator();
+    RetconLowering.ReturnBlock = nullptr;
+    RetconLowering.IsFrameInlineInStorage = false;
+    RetconLowering.ContextSize = 0;
+    RetconLowering.StorageSize = ContinuationId->getStorageSize();
+    RetconLowering.StorageAlignment = ContinuationId->getStorageAlignment();
+    RetconLowering.CoroFuncPointer = ContinuationId->getCoroFunctionPointer();
+    break;
+  }
   case Intrinsic::coro_id_retcon:
   case Intrinsic::coro_id_retcon_once: {
     ABI = IntrID == Intrinsic::coro_id_retcon ? coro::ABI::Retcon
@@ -335,6 +352,7 @@ void coro::Shape::analyze(Function &F,
     RetconLowering.ResumePrototype = Prototype;
     RetconLowering.Alloc = ContinuationId->getAllocFunction();
     RetconLowering.Dealloc = ContinuationId->getDeallocFunction();
+    RetconLowering.Storage = ContinuationId->getStorage();
     RetconLowering.ReturnBlock = nullptr;
     RetconLowering.IsFrameInlineInStorage = false;
     RetconLowering.TypeId = ContinuationId->getTypeId();
@@ -396,7 +414,8 @@ void coro::SwitchABI::init() {
 void coro::AsyncABI::init() { assert(Shape.ABI == coro::ABI::Async); }
 
 void coro::AnyRetconABI::init() {
-  assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
+  assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
+         Shape.ABI == coro::ABI::RetconOnceDynamic);
   {
     // Determine the result value types, and make sure they match up with
     // the values passed to the suspends.
@@ -509,17 +528,25 @@ Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size,
     llvm_unreachable("can't allocate memory in coro switch-lowering");
 
   case coro::ABI::Retcon:
-  case coro::ABI::RetconOnce: {
+  case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic: {
+    unsigned sizeParamIndex = 0;
+    SmallVector<Value *, 2> Args;
+    if (ABI == coro::ABI::RetconOnceDynamic) {
+      sizeParamIndex = 1;
+      Args.push_back(RetconLowering.Allocator);
+    }
     auto Alloc = RetconLowering.Alloc;
-    Size = Builder.CreateIntCast(Size,
-                                 Alloc->getFunctionType()->getParamType(0),
-                                 /*is signed*/ false);
-    ConstantInt* TypeId = RetconLowering.TypeId;
-    CallInst *Call;
-    if (TypeId == nullptr)
-      Call = Builder.CreateCall(Alloc, Size);
-    else
-      Call = Builder.CreateCall(Alloc, {Size, TypeId});
+    Size = Builder.CreateIntCast(
+        Size, Alloc->getFunctionType()->getParamType(sizeParamIndex),
+        /*is signed*/ false);
+    Args.push_back(Size);
+    if (ABI == coro::ABI::RetconOnce) {
+      ConstantInt *TypeId = RetconLowering.TypeId;
+      if (TypeId != nullptr)
+        Args.push_back(TypeId);
+    }
+    auto *Call = Builder.CreateCall(Alloc, Args);
     propagateCallAttrsFromCallee(Call, Alloc);
     addCallToCallGraph(CG, Call, Alloc);
     return Call;
@@ -537,11 +564,19 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr,
     llvm_unreachable("can't allocate memory in coro switch-lowering");
 
   case coro::ABI::Retcon:
-  case coro::ABI::RetconOnce: {
+  case coro::ABI::RetconOnce:
+  case coro::ABI::RetconOnceDynamic: {
     auto Dealloc = RetconLowering.Dealloc;
-    Ptr = Builder.CreateBitCast(Ptr,
-                                Dealloc->getFunctionType()->getParamType(0));
-    auto *Call = Builder.CreateCall(Dealloc, Ptr);
+    SmallVector<Value *, 2> Args;
+    unsigned sizeParamIndex = 0;
+    if (ABI == coro::ABI::RetconOnceDynamic) {
+      sizeParamIndex = 1;
+      Args.push_back(RetconLowering.Allocator);
+    }
+    Ptr = Builder.CreateBitCast(
+        Ptr, Dealloc->getFunctionType()->getParamType(sizeParamIndex));
+    Args.push_back(Ptr);
+    auto *Call = Builder.CreateCall(Dealloc, Args);
     propagateCallAttrsFromCallee(Call, Dealloc);
     addCallToCallGraph(CG, Call, Dealloc);
     return;
@@ -567,7 +602,7 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr,
 
 /// Check that the given value is a well-formed prototype for the
 /// llvm.coro.id.retcon.* intrinsics.
-static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) {
+static void checkWFRetconPrototype(const AnyCoroIdInst *I, Value *V) {
   auto F = dyn_cast<Function>(V->stripPointerCasts());
   if (!F)
     fail(I, "llvm.coro.id.retcon.* prototype not a Function", V);
@@ -594,7 +629,7 @@ static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) {
       fail(I, "llvm.coro.id.retcon prototype return type must be same as"
               "current function return type", F);
   } else {
-    // No meaningful validation to do here for llvm.coro.id.unique.once.
+    // No meaningful validation to do here for llvm.coro.id.retcon.once.
   }
 
   if (FT->getNumParams() == 0 || !FT->getParamType(0)->isPointerTy())
@@ -654,6 +689,29 @@ void AnyCoroIdRetconInst::checkWellFormed() const {
   checkWFDealloc(this, getArgOperand(DeallocArg));
 }
 
+static void checkCoroFuncPointer(const Instruction *I, Value *V) {
+  auto *CoroFuncPtrAddr = dyn_cast<GlobalVariable>(V->stripPointerCasts());
+  if (!CoroFuncPtrAddr)
+    fail(I, "coro.id.retcon.once.dynamic coro function pointer not a global",
+         V);
+}
+
+void CoroIdRetconOnceDynamicInst::checkWellFormed() const {
+  checkConstantInt(
+      this, getArgOperand(SizeArg),
+      "size argument to coro.id.retcon.once.dynamic must be constant");
+  checkConstantInt(
+      this, getArgOperand(AlignArg),
+      "alignment argument to coro.id.retcon.once.dynamic must be constant");
+  checkConstantInt(this, getArgOperand(StorageArg),
+                   "storage argument offset to coro.id.retcon.once.dynamic "
+                   "must be constant");
+  checkCoroFuncPointer(this, getArgOperand(CoroFuncPtrArg));
+  checkWFRetconPrototype(this, getArgOperand(PrototypeArg));
+  checkWFAlloc(this, getArgOperand(AllocArg));
+  checkWFDealloc(this, getArgOperand(DeallocArg));
+}
+
 static void checkAsyncFuncPointer(const Instruction *I, Value *V) {
   auto *AsyncFuncPtrAddr = dyn_cast<GlobalVariable>(V->stripPointerCasts());
   if (!AsyncFuncPtrAddr)
diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
index b3e5b7fa6e0b5..f27640141379b 100644
--- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
+++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
@@ -435,7 +435,8 @@ static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape,
   // code.
   bool ShouldUseLifetimeStartInfo =
       (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
-       Shape.ABI != coro::ABI::RetconOnce);
+       Shape.ABI != coro::ABI::RetconOnce &&
+       Shape.ABI != coro::ABI::RetconOnceDynamic);
   AllocaUseVisitor Visitor{AI->getDataLayout(), DT, Shape, Checker,
                            ShouldUseLifetimeStartInfo};
   Visitor.visitPtr(*AI);
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 18af0972bc36d..8be1dbe49696f 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1011,6 +1011,7 @@ Function *CodeExtractor::constructFunctionDeclaration(
       case Attribute::SwiftError:
       case Attribute::SwiftSelf:
       case Attribute::SwiftAsync:
+      case Attribute::SwiftCoro:
       case Attribute::ZExt:
       case Attribute::ImmArg:
       case Attribute::ByRef:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7f53aa7d4f73d..29ffae9b53c51 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1505,6 +1505,12 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
     if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
       return false;
 
+  // Similarly for llvm.ret.popless (and likely generalizable to all musttail
+  // intrinsics).
+  if (auto *CB = dyn_cast<CallBase>(I))
+    if (CB->getIntrinsicID() == Intrinsic::ret_popless)
+      return false;
+
   // It's also unsafe/illegal to hoist an instruction above its instruction
   // operands
   BasicBlock *BB = I->getParent();
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index 7dd86a8c0eb16..affcaef4593cd 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -292,6 +292,12 @@ define void @f50(ptr swiftself %0)
   ret void;
 }
 
+; CHECK: define void @swiftcoro(ptr swiftcoro %0)
+define void @swiftcoro(ptr swiftcoro %0)
+{
+  ret void;
+}
+
 ; CHECK: define i32 @f51(ptr swifterror %0)
 define i32 @f51(ptr swifterror %0)
 {
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index d9e594abcd50c..afe247aae1549 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -564,6 +564,10 @@ declare riscv_vls_cc(32768) void @riscv_vls_cc_32768()
 ; CHECK: declare riscv_vls_cc(32768) void @riscv_vls_cc_32768()
 declare riscv_vls_cc(65536) void @riscv_vls_cc_65536()
 ; CHECK: declare riscv_vls_cc(65536) void @riscv_vls_cc_65536()
+declare cc124 void @f.cc124()
+; CHECK: declare swiftcorocc void @f.cc124()
+declare swiftcorocc void @f.swiftcorocc()
+; CHECK: declare swiftcorocc void @f.swiftcorocc()
 declare cc1023 void @f.cc1023()
 ; CHECK: declare cc1023 void @f.cc1023()
 
@@ -626,6 +630,8 @@ declare void @f.param.swiftasync(ptr swiftasync)
 ; CHECK: declare void @f.param.swiftasync(ptr swiftasync)
 declare void @f.param.swifterror(ptr swifterror)
 ; CHECK: declare void @f.param.swifterror(ptr swifterror)
+declare void @f.param.swiftcoro(ptr swiftcoro)
+; CHECK: declare void @f.param.swiftcoro(ptr swiftcoro)
 declare void @f.param.allocalign(i32 allocalign)
 ; CHECK: declare void @f.param.allocalign(i32 allocalign)
 declare void @f.param.allocptr(ptr allocptr)
diff --git a/llvm/test/CodeGen/AArch64/swiftcoro.ll b/llvm/test/CodeGen/AArch64/swiftcoro.ll
new file mode 100644
index 0000000000000..d03201d8bd013
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/swiftcoro.ll
@@ -0,0 +1,70 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s
+; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTARM64_32 %s
+
+; Parameter with swiftcoro should be allocated to x23.
+; CHECK-LABEL: swiftcoro_param:
+; CHECK: mov x0, x23
+; CHECK-NEXT: ret
+define ptr @swiftcoro_param(ptr swiftcoro %addr0) {
+  ret ptr %addr0
+}
+
+; Check that x23 is used to pass a swiftcoro argument.
+; CHECK-LABEL: call_swiftcoro:
+; CHECK: mov x23, x0
+; CHECK: bl {{_?}}swiftcoro_param
+; CHECK: ret
+define ptr @call_swiftcoro(ptr %arg) {
+  %res = call ptr @swiftcoro_param(ptr swiftcoro %arg)
+  ret ptr %res
+}
+
+; x23 should be saved by the callee even if used for swiftcoro
+; CHECK-LABEL: swiftcoro_clobber:
+; CHECK: {{stp|str}} {{.*}}x23{{.*}}sp
+; ...
+; CHECK: {{ldp|ldr}} {{.*}}x23{{.*}}sp
+; CHECK: ret
+define ptr @swiftcoro_clobber(ptr swiftcoro %addr0) {
+  call void asm sideeffect "", "~{x23}"()
+  ret ptr %addr0
+}
+
+; Demonstrate that we do not need any movs when calling multiple functions
+; with swiftcoro argument.
+; CHECK-LABEL: swiftcoro_passthrough:
+; OPT-NOT: mov{{.*}}x23
+; OPT: bl {{_?}}swiftcoro_param
+; OPT-NOT: mov{{.*}}x23
+; OPT-NEXT: bl {{_?}}swiftcoro_param
+; OPT: ret
+define void @swiftcoro_passthrough(ptr swiftcoro %addr0) {
+  call ptr @swiftcoro_param(ptr swiftcoro %addr0)
+  call ptr @swiftcoro_param(ptr swiftcoro %addr0)
+  ret void
+}
+
+; We can use a tail call if the callee swiftcoro is the same as the caller one.
+; This should also work with fast-isel.
+; CHECK-LABEL: swiftcoro_tail:
+; OPTAARCH64: b {{_?}}swiftcoro_param
+; OPTAARCH64-NOT: ret
+; OPTARM64_32: b {{_?}}swiftcoro_param
+define ptr @swiftcoro_tail(ptr swiftcoro %addr0) {
+  call void asm sideeffect "", "~{x23}"()
+  %res = musttail call ptr @swiftcoro_param(ptr swiftcoro %addr0)
+  ret ptr %res
+}
+
+; We can not use a tail call if the callee swiftcoro is not the same as the
+; caller one.
+; CHECK-LABEL: swiftcoro_notail:
+; CHECK: mov x23, x0
+; CHECK: bl {{_?}}swiftcoro_param
+; CHECK: ret
+define ptr @swiftcoro_notail(ptr swiftcoro %addr0, ptr %addr1) nounwind {
+  %res = tail call ptr @swiftcoro_param(ptr swiftcoro %addr1)
+  ret ptr %res
+}
diff --git a/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll b/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll
new file mode 100644
index 0000000000000..117cbfd04a5df
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/swiftcorocc-call.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple arm64e-apple-darwin -verify-machineinstrs -o - %s \
+; RUN:   | FileCheck %s
+
+; RUN: llc -mtriple arm64e-apple-darwin -verify-machineinstrs -o - %s \
+; RUN:   -global-isel -global-isel-abort=2 | FileCheck %s
+
+declare i64 @g(ptr, ptr)
+
+define i64 @test_call_to_swiftcoro() #0 {
+; CHECK-LABEL: test_call_to_swiftcoro:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w25, -24
+; CHECK-NEXT:    .cfi_offset w26, -32
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _test_call
+; CHECK-NEXT:    sub x0, x29, #24
+; CHECK-NEXT:    sub x1, x29, #32
+; CHECK-NEXT:    bl _g
+; CHECK-NEXT:    sub sp, x29, #16
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp], #32 ; 16-byte Folded Reload
+; CHECK-NEXT:    retab
+  %v1 = alloca i64
+  %v2 = alloca i64
+  call void asm sideeffect "", "~{x25},~{x26}"()
+  %v3 = call swiftcorocc i64 @test_call()
+  %v4 = call i64 @g(ptr %v1, ptr %v2)
+  ret i64 %v4
+}
+
+define i64 @test_call_to_normal() #0 {
+; CHECK-LABEL: test_call_to_normal:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x26, x25, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w25, -24
+; CHECK-NEXT:    .cfi_offset w26, -32
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _test_call_normal
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    mov x1, sp
+; CHECK-NEXT:    bl _g
+; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    retab
+  %v1 = alloca i64
+  %v2 = alloca i64
+  call void asm sideeffect "", "~{x25},~{x26}"()
+  %v3 = call i64 @test_call_normal()
+  %v4 = call i64 @g(ptr %v1, ptr %v2)
+  ret i64 %v4
+}
+
+define swiftcorocc i64 @test_call() #0 {
+; CHECK-LABEL: test_call:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x26, x25, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w25, -24
+; CHECK-NEXT:    .cfi_offset w26, -32
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    mov x1, sp
+; CHECK-NEXT:    bl _g
+; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    retab
+  %v1 = alloca i64
+  %v2 = alloca i64
+  call void asm sideeffect "", "~{x25},~{x26}"()
+  %v3 = call i64 @g(ptr %v1, ptr %v2)
+  ret i64 %v3
+}
+
+define i64 @test_call_normal() #0 {
+; CHECK-LABEL: test_call_normal:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x26, x25, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w25, -24
+; CHECK-NEXT:    .cfi_offset w26, -32
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    mov x1, sp
+; CHECK-NEXT:    bl _g
+; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    retab
+  %v1 = alloca i64
+  %v2 = alloca i64
+  call void asm sideeffect "", "~{x25},~{x26}"()
+  %v3 = call i64 @g(ptr %v1, ptr %v2)
+  ret i64 %v3
+}
+
+
+attributes #0 = { "ptrauth-returns" }
diff --git a/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll
new file mode 100644
index 0000000000000..3c6fb76d905e4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/swiftcorocc-ret-popless.ll
@@ -0,0 +1,154 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -o - %s | FileCheck %s --check-prefixes=CHECK,SDISEL
+; RUN: llc -verify-machineinstrs -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL
+
+declare i64 @g(ptr, ptr)
+
+define swiftcorocc i64 @test_intrin_basic() #0 {
+; CHECK-LABEL: test_intrin_basic:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    add x16, x29, #16
+; CHECK-NEXT:    ldp x29, x30, [x29] ; 16-byte Folded Reload
+; CHECK-NEXT:    autib x30, x16
+; CHECK-NEXT:    ret
+  musttail call void @llvm.ret.popless()
+  ret i64 0
+}
+
+define swiftcorocc i64 @test_intrin() #0 {
+; CHECK-LABEL: test_intrin:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x26, x25, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w25, -24
+; CHECK-NEXT:    .cfi_offset w26, -32
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    mov x1, sp
+; CHECK-NEXT:    bl _g
+; CHECK-NEXT:    cbz x0, LBB1_2
+; CHECK-NEXT:  ; %bb.1: ; %else
+; CHECK-NEXT:    add x16, x29, #16
+; CHECK-NEXT:    ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [x29] ; 16-byte Folded Reload
+; CHECK-NEXT:    autib x30, x16
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB1_2: ; %then
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp], #32 ; 16-byte Folded Reload
+; CHECK-NEXT:    retab
+  %v1 = alloca i64
+  %v2 = alloca i64
+  call void asm sideeffect "", "~{x25},~{x26}"()
+  %v3 = call i64 @g(ptr %v1, ptr %v2)
+  %c = icmp eq i64 %v3, 0
+  br i1 %c, label %then, label %else
+then:
+  ret i64 %v3
+else:
+  musttail call void @llvm.ret.popless()
+  ret i64 %v3
+}
+
+define swiftcorocc i64 @test_vla(i32 %n) #0 {
+; SDISEL-LABEL: test_vla:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    pacibsp
+; SDISEL-NEXT:    stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill
+; SDISEL-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; SDISEL-NEXT:    add x29, sp, #16
+; SDISEL-NEXT:    .cfi_def_cfa w29, 16
+; SDISEL-NEXT:    .cfi_offset w30, -8
+; SDISEL-NEXT:    .cfi_offset w29, -16
+; SDISEL-NEXT:    .cfi_offset w25, -24
+; SDISEL-NEXT:    .cfi_offset w26, -32
+; SDISEL-NEXT:    ; kill: def $w0 killed $w0 def $x0
+; SDISEL-NEXT:    ubfiz x8, x0, #3, #32
+; SDISEL-NEXT:    add x8, x8, #15
+; SDISEL-NEXT:    and x8, x8, #0xffffffff0
+; SDISEL-NEXT:    mov x9, sp
+; SDISEL-NEXT:    sub x0, x9, x8
+; SDISEL-NEXT:    mov sp, x0
+; SDISEL-NEXT:    mov x9, sp
+; SDISEL-NEXT:    sub x1, x9, x8
+; SDISEL-NEXT:    mov sp, x1
+; SDISEL-NEXT:    ; InlineAsm Start
+; SDISEL-NEXT:    ; InlineAsm End
+; SDISEL-NEXT:    bl _g
+; SDISEL-NEXT:    cbz x0, LBB2_2
+; SDISEL-NEXT:  ; %bb.1: ; %else
+; SDISEL-NEXT:    add x16, x29, #16
+; SDISEL-NEXT:    ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload
+; SDISEL-NEXT:    ldp x29, x30, [x29] ; 16-byte Folded Reload
+; SDISEL-NEXT:    autib x30, x16
+; SDISEL-NEXT:    ret
+; SDISEL-NEXT:  LBB2_2: ; %then
+; SDISEL-NEXT:    sub sp, x29, #16
+; SDISEL-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; SDISEL-NEXT:    ldp x26, x25, [sp], #32 ; 16-byte Folded Reload
+; SDISEL-NEXT:    retab
+;
+; GISEL-LABEL: test_vla:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    pacibsp
+; GISEL-NEXT:    stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill
+; GISEL-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; GISEL-NEXT:    add x29, sp, #16
+; GISEL-NEXT:    .cfi_def_cfa w29, 16
+; GISEL-NEXT:    .cfi_offset w30, -8
+; GISEL-NEXT:    .cfi_offset w29, -16
+; GISEL-NEXT:    .cfi_offset w25, -24
+; GISEL-NEXT:    .cfi_offset w26, -32
+; GISEL-NEXT:    mov x8, sp
+; GISEL-NEXT:    mov w9, w0
+; GISEL-NEXT:    lsl x9, x9, #3
+; GISEL-NEXT:    add x9, x9, #15
+; GISEL-NEXT:    and x9, x9, #0xfffffffffffffff0
+; GISEL-NEXT:    sub x0, x8, x9
+; GISEL-NEXT:    mov sp, x0
+; GISEL-NEXT:    mov x8, sp
+; GISEL-NEXT:    sub x1, x8, x9
+; GISEL-NEXT:    mov sp, x1
+; GISEL-NEXT:    ; InlineAsm Start
+; GISEL-NEXT:    ; InlineAsm End
+; GISEL-NEXT:    bl _g
+; GISEL-NEXT:    cbz x0, LBB2_2
+; GISEL-NEXT:  ; %bb.1: ; %else
+; GISEL-NEXT:    add x16, x29, #16
+; GISEL-NEXT:    ldp x26, x25, [x29, #-16] ; 16-byte Folded Reload
+; GISEL-NEXT:    ldp x29, x30, [x29] ; 16-byte Folded Reload
+; GISEL-NEXT:    autib x30, x16
+; GISEL-NEXT:    ret
+; GISEL-NEXT:  LBB2_2: ; %then
+; GISEL-NEXT:    sub sp, x29, #16
+; GISEL-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; GISEL-NEXT:    ldp x26, x25, [sp], #32 ; 16-byte Folded Reload
+; GISEL-NEXT:    retab
+  %v1 = alloca i64, i32 %n
+  %v2 = alloca i64, i32 %n
+  call void asm sideeffect "", "~{x25},~{x26}"()
+  %v3 = call i64 @g(ptr %v1, ptr %v2)
+  %c = icmp eq i64 %v3, 0
+  br i1 %c, label %then, label %else
+then:
+  ret i64 %v3
+else:
+  musttail call void @llvm.ret.popless()
+  ret i64 %v3
+}
+
+attributes #0 = { "ptrauth-returns" }
diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll
new file mode 100644
index 0000000000000..786116a833265
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic-nocleanup.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split)' -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "arm64-apple-macos99.99"
+
+
+@func_cfp = constant <{ i32, i32 }>
+  <{ i32 trunc (
+       i64 sub (
+         i64 ptrtoint (ptr @func to i64),
+         i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @func_cfp, i32 0, i32 1) to i64)
+       )
+     to i32),
+     i32 64
+}>
+
+
+%func_int = type <{ i64 }>
+%func_obj = type <{ %func_int, ptr }>
+%func_guts = type <{ %func_obj }>
+%func_impl = type <{ %func_guts }>
+%func_self = type <{ %func_impl }>
+
+declare swiftcorocc void @func_continuation_prototype(ptr noalias, ptr)
+
+; CHECK-LABEL: @func.resume.0(
+; CHECK-SAME:      ptr noalias %0, 
+; CHECK-SAME:      ptr %1
+; CHECK-SAME:  ) {
+; CHECK:       coro.return.popless:
+; CHECK-NEXT:    unreachable
+; CHECK:       coro.return.normal:
+; CHECK-NEXT:    unreachable
+; CHECK:       }
+
+define swiftcorocc { ptr, ptr } @func(ptr noalias %buffer, ptr %allocator, ptr nocapture swiftself dereferenceable(16) %2) {
+entry:
+  %3 = call token @llvm.coro.id.retcon.once.dynamic(
+    i32 -1, 
+    i32 16,
+    ptr @func_cfp,
+    ptr %allocator,
+    ptr %buffer,
+    ptr @func_continuation_prototype,
+    ptr @allocate, 
+    ptr @deallocate
+  )
+  %handle = call ptr @llvm.coro.begin(token %3, ptr null)
+  %yielded = getelementptr inbounds %func_self, ptr %2, i32 0, i32 0
+  call ptr (...) @llvm.coro.suspend.retcon.p0(ptr %yielded)
+  br i1 false, label %unwind, label %normal
+
+normal:
+  br label %coro.end
+
+unwind:
+  br label %coro.end
+
+coro.end:
+  %8 = call i1 @llvm.coro.end(ptr %handle, i1 false, token none)
+  unreachable
+}
+
+declare swiftcorocc noalias ptr @allocate(i32 %size)
+declare void @deallocate(ptr %ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll
new file mode 100644
index 0000000000000..6f491584c877e
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-dynamic.ll
@@ -0,0 +1,159 @@
+; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split),module(coro-cleanup)' -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "arm64-apple-macos99.99"
+
+; CHECK-LABEL: %func.Frame = type { ptr }
+; CHECK-LABEL: %big_types.Frame = type { <32 x i8>, [16 x i8], i64, ptr, %Integer8 }
+
+; CHECK-LABEL: @func_cfp = constant <{ i32, i32 }> 
+; CHECK-SAME:  <{ 
+; CHECK-SAME:    i32 trunc
+; CHECK-SAME:    i32 16
+; CHECK-SAME:  }>
+@func_cfp = constant <{ i32, i32 }>
+  <{ i32 trunc ( ; offset to @func from @func_cfp
+       i64 sub (
+         i64 ptrtoint (ptr @func to i64),
+         i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @func_cfp, i32 0, i32 1) to i64)
+       )
+     to i32),
+     i32 64 ; frame size
+}>
+
+@big_types_cfp = constant <{ i32, i32 }>
+  <{ i32 trunc ( ; offset to @func from @big_types_cfp
+       i64 sub (
+         i64 ptrtoint (ptr @big_types to i64),
+         i64 ptrtoint (ptr getelementptr inbounds (<{ i32, i32 }>, ptr @big_types_cfp, i32 0, i32 1) to i64)
+       )
+     to i32),
+     i32 64 ; frame size
+}>
+
+
+; CHECK-LABEL: @func(
+; CHECK-SAME:      ptr %buffer,
+; CHECK-SAME:      ptr %allocator
+; CHECK-SAME:      ptr %array
+; CHECK-SAME:  ) {
+; CHECK:           %array.spill.addr = getelementptr inbounds %func.Frame, ptr %buffer, i32 0, i32 0
+; CHECK:           store ptr %array, ptr %array.spill.addr
+; CHECK:           %load = load i32, ptr %array
+; CHECK:           %load.positive = icmp sgt i32 %load, 0
+; CHECK:           [[CONTINUATION:%.*]] = select i1 %load.positive
+; CHECK-SAME:          ptr @func.resume.0
+; CHECK-SAME:          ptr @func.resume.1
+; CHECK:           [[RETVAL_1:%.*]] = insertvalue { ptr, i32 } poison, ptr [[CONTINUATION:%.*]], 0
+; CHECK:           [[RETVAL_2:%.*]] = insertvalue { ptr, i32 } [[RETVAL_1:%.*]], i32 %load, 1
+; CHECK:           [[DONT_POP:%.*]] = icmp eq ptr %allocator, null
+; CHECK:           br i1 [[DONT_POP:%[^,]+]],
+; CHECK-SAME:          label %coro.return.popless
+; CHECK-SAME:          label %coro.return.normal
+; CHECK:         coro.return.popless:
+; CHECK:           musttail call void @llvm.ret.popless()
+; CHECK:           ret { ptr, i32 } [[RETVAL_2:%.*]]
+; CHECK:         coro.return.normal:
+; CHECK:           ret { ptr, i32 } [[RETVAL_2:%.*]]
+; CHECK:       }
+
+; CHECK-LABEL: @func.resume.0(
+; CHECK-SAME:      ptr [[BUFFER:[^,]+]]
+; CHECK-SAME:      ptr [[ALLOCATOR:%[^)]+]]
+; CHECK-SAME:  ) {
+; CHECK:           %array.reload.addr3 = getelementptr inbounds %func.Frame, ptr [[BUFFER:%.*]], i32 0, i32 0
+; CHECK:           %array.reload4 = load ptr, ptr %array.reload.addr3
+; CHECK:           store i32 0, ptr %array.reload4
+; CHECK:           ret void
+; CHECK:       }
+
+; CHECK-LABEL: @func.resume.1(
+; CHECK-SAME:      ptr [[BUFFER:[^,]+]]
+; CHECK-SAME:      ptr [[ALLOCATOR:%[^)]+]]
+; CHECK-SAME:  ) {
+; CHECK:           %array.reload.addr = getelementptr inbounds %func.Frame, ptr [[BUFFER:%.*]], i32 0, i32 0
+; CHECK:           %array.reload = load ptr, ptr %array.reload.addr
+; CHECK:           store i32 10, ptr %array.reload
+; CHECK:           ret void
+; CHECK:       }
+define swiftcorocc {ptr, i32} @func(ptr %buffer, ptr %allocator, ptr %array) {
+entry:
+  %id = call token @llvm.coro.id.retcon.once.dynamic(
+    i32 -1, 
+    i32 16, 
+    ptr @func_cfp, 
+    ptr %allocator, 
+    ptr %buffer, 
+    ptr @continuation_prototype, 
+    ptr @allocate, 
+    ptr @deallocate
+  )
+  %handle = call ptr @llvm.coro.begin(token %id, ptr null)
+  %load = load i32, ptr %array
+  %load.positive = icmp sgt i32 %load, 0
+  br i1 %load.positive, label %positive, label %negative
+
+positive:
+  call ptr (...) @llvm.coro.suspend.retcon.p0(i32 %load)
+  store i32 0, ptr %array, align 4
+  br label %cleanup
+
+negative:
+  call ptr (...) @llvm.coro.suspend.retcon.p0(i32 %load)
+  store i32 10, ptr %array, align 4
+  br label %cleanup
+
+cleanup:
+  call i1 @llvm.coro.end(ptr %handle, i1 0, token none)
+  unreachable
+}
+
+declare void @continuation_prototype(ptr, ptr)
+
+declare swiftcorocc noalias ptr @allocate(i32 %size)
+declare void @deallocate(ptr %ptr)
+
+%Integer8 = type { i8 }
+
+; CHECK-LABEL: @big_types(
+; CHECK-SAME:      ptr noalias %frame,
+; CHECK-SAME:      ptr swiftcoro %allocator,
+; CHECK-SAME:      i64 %index,
+; CHECK-SAME:      ptr swiftself dereferenceable(32) %vec_addr
+; CHECK-SAME:  ) {
+; CHECK:         [[VEC_STK_BASE_PTR:%.*]] = getelementptr inbounds %big_types.Frame, ptr %frame, i32 0, i32 0
+; CHECK:         [[VEC_STK_BASE_INT:%.*]] = ptrtoint ptr [[VEC_STK_BASE_PTR]] to i64
+; CHECK:         [[VEC_STK_BIASED_INT:%.*]] = add i64 [[VEC_STK_BASE_INT]], 31
+; CHECK:         [[VEC_STK_ALIGNED_INT:%.*]] = and i64 [[VEC_STK_BIASED_INT]], -32
+; CHECK:         %vec_stk = inttoptr i64 [[VEC_STK_ALIGNED_INT]] to ptr
+define swiftcorocc { ptr, ptr } @big_types(ptr noalias %frame, ptr swiftcoro %allocator, i64 %index, ptr nocapture swiftself dereferenceable(32) %vec_addr) {
+  %element_addr = alloca %Integer8, align 1
+  %id = tail call token @llvm.coro.id.retcon.once.dynamic(
+    i32 -1, 
+    i32 16, 
+    ptr nonnull @big_types_cfp, 
+    ptr %allocator, 
+    ptr %frame, 
+    ptr @continuation_prototype, 
+    ptr nonnull @allocate, 
+    ptr nonnull @deallocate
+  )
+  %handle = tail call ptr @llvm.coro.begin(token %id, ptr null)
+  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %element_addr)
+  %vec_original = load <32 x i8>, ptr %vec_addr, align 32
+  %vec_stk = alloca <32 x i8>, align 32
+  store <32 x i8> %vec_original, ptr %vec_stk, align 32
+  %vec_original_2 = load <32 x i8>, ptr %vec_stk, align 32
+  %index32 = trunc i64 %index to i32
+  %element_original = extractelement <32 x i8> %vec_original_2, i32 %index32
+  store i8 %element_original, ptr %element_addr, align 1
+  call ptr (...) @llvm.coro.suspend.retcon.p0(ptr nonnull %element_addr)
+  %element_modified = load i8, ptr %element_addr, align 1
+  %vec_original_3 = load <32 x i8>, ptr %vec_stk, align 32
+  %vec_modified = insertelement <32 x i8> %vec_original_3, i8 %element_modified, i32 %index32
+  store <32 x i8> %vec_modified, ptr %vec_addr, align 32
+  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %element_addr)
+  call i1 @llvm.coro.end(ptr %handle, i1 false, token none)
+  unreachable
+}
+
diff --git a/llvm/test/Transforms/DCE/int_ret_popless.ll b/llvm/test/Transforms/DCE/int_ret_popless.ll
new file mode 100644
index 0000000000000..4e6fd4d05e89e
--- /dev/null
+++ b/llvm/test/Transforms/DCE/int_ret_popless.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+define void @test_ret_popless() {
+; CHECK-LABEL: define void @test_ret_popless() {
+; CHECK-NEXT:    musttail call void @llvm.ret.popless()
+; CHECK-NEXT:    ret void
+;
+  musttail call void @llvm.ret.popless()
+  ret void
+}
diff --git a/llvm/test/Verifier/ret_popless.ll b/llvm/test/Verifier/ret_popless.ll
new file mode 100644
index 0000000000000..6747d3fd039ed
--- /dev/null
+++ b/llvm/test/Verifier/ret_popless.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+define void @test_ret_popless_not_musttail() {
+; CHECK: llvm.ret.popless call must be musttail
+  call void @llvm.ret.popless()
+  ret void
+}
+
+define i64 @test_ret_popless_not_returned(i64 %a) {
+; CHECK: musttail intrinsic call must precede a ret
+  musttail call void @llvm.ret.popless()
+  %res = bitcast i64 %a to i64
+  ret i64 %res
+}
diff --git a/llvm/test/Verifier/swiftcoro.ll b/llvm/test/Verifier/swiftcoro.ll
new file mode 100644
index 0000000000000..fcc980478a5bb
--- /dev/null
+++ b/llvm/test/Verifier/swiftcoro.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @a(ptr swiftcoro %a, ptr swiftcoro %b)
+; CHECK: Cannot have multiple 'swiftcoro' parameters!