release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) #137615

llvmbot · 2025-04-28T10:58:49Z

Backport 21fb19f

Requested by: @anutosh491

llvmbot · 2025-04-28T10:58:56Z

@vgvassilev What do you think about merging this PR to the release branch?

llvmbot · 2025-04-28T10:59:23Z

@llvm/pr-subscribers-clang

Author: None (llvmbot)

Changes

Backport 21fb19f

Requested by: @anutosh491

Full diff: https://github.com/llvm/llvm-project/pull/137615.diff

4 Files Affected:

(modified) clang/include/clang/Interpreter/Interpreter.h (+9-4)
(modified) clang/lib/Interpreter/DeviceOffload.cpp (+15-30)
(modified) clang/lib/Interpreter/DeviceOffload.h (-2)
(modified) clang/lib/Interpreter/Interpreter.cpp (+44-15)

diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index b1b63aedf86ab..56213f88b9e30 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -41,6 +41,7 @@ class CXXRecordDecl;
 class Decl;
 class IncrementalExecutor;
 class IncrementalParser;
+class IncrementalCUDADeviceParser;
 
 /// Create a pre-configured \c CompilerInstance for incremental processing.
 class IncrementalCompilerBuilder {
@@ -93,7 +94,10 @@ class Interpreter {
   std::unique_ptr<IncrementalExecutor> IncrExecutor;
 
   // An optional parser for CUDA offloading
-  std::unique_ptr<IncrementalParser> DeviceParser;
+  std::unique_ptr<IncrementalCUDADeviceParser> DeviceParser;
+
+  // An optional action for CUDA offloading
+  std::unique_ptr<IncrementalAction> DeviceAct;
 
   /// List containing information about each incrementally parsed piece of code.
   std::list<PartialTranslationUnit> PTUs;
@@ -175,10 +179,11 @@ class Interpreter {
   llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E);
   llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD);
 
-  CodeGenerator *getCodeGen() const;
-  std::unique_ptr<llvm::Module> GenModule();
+  CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const;
+  std::unique_ptr<llvm::Module> GenModule(IncrementalAction *Action = nullptr);
   PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU,
-                                      std::unique_ptr<llvm::Module> M = {});
+                                      std::unique_ptr<llvm::Module> M = {},
+                                      IncrementalAction *Action = nullptr);
 
   // A cache for the compiled destructors used to for de-allocation of managed
   // clang::Values.
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 1999d63d1aa04..7d0125403ea52 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -31,10 +31,9 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
     llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
     : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
       CodeGenOpts(HostInstance.getCodeGenOpts()),
-      TargetOpts(HostInstance.getTargetOpts()) {
+      TargetOpts(DeviceInstance->getTargetOpts()) {
   if (Err)
     return;
-  DeviceCI = std::move(DeviceInstance);
   StringRef Arch = TargetOpts.CPU;
   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
@@ -42,34 +41,7 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
                                                llvm::inconvertibleErrorCode()));
     return;
   }
-}
-
-llvm::Expected<TranslationUnitDecl *>
-IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
-  auto PTU = IncrementalParser::Parse(Input);
-  if (!PTU)
-    return PTU.takeError();
-
-  auto PTX = GeneratePTX();
-  if (!PTX)
-    return PTX.takeError();
-
-  auto Err = GenerateFatbinary();
-  if (Err)
-    return std::move(Err);
-
-  std::string FatbinFileName =
-      "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
-  VFS->addFile(FatbinFileName, 0,
-               llvm::MemoryBuffer::getMemBuffer(
-                   llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
-                   "", false));
-
-  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
-
-  FatbinContent.clear();
-
-  return PTU;
+  DeviceCI = std::move(DeviceInstance);
 }
 
 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
@@ -172,6 +144,19 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
 
   FatbinContent.append(PTXCode.begin(), PTXCode.end());
 
+  const PartialTranslationUnit &PTU = PTUs.back();
+
+  std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin";
+
+  VFS->addFile(FatbinFileName, 0,
+               llvm::MemoryBuffer::getMemBuffer(
+                   llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+                   "", false));
+
+  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+
+  FatbinContent.clear();
+
   return llvm::Error::success();
 }
 
diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h
index b9a1acab004c3..43645033c4840 100644
--- a/clang/lib/Interpreter/DeviceOffload.h
+++ b/clang/lib/Interpreter/DeviceOffload.h
@@ -33,8 +33,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser {
       llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
       llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs);
 
-  llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
-
   // Generate PTX for the last PTU.
   llvm::Expected<llvm::StringRef> GeneratePTX();
 
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index fa4c1439c9261..bd21f82081712 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -480,20 +480,34 @@ Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
   OverlayVFS->pushOverlay(IMVFS);
   CI->createFileManager(OverlayVFS);
 
-  auto Interp = Interpreter::create(std::move(CI));
-  if (auto E = Interp.takeError())
-    return std::move(E);
+  llvm::Expected<std::unique_ptr<Interpreter>> InterpOrErr =
+      Interpreter::create(std::move(CI));
+  if (!InterpOrErr)
+    return InterpOrErr;
+
+  std::unique_ptr<Interpreter> Interp = std::move(*InterpOrErr);
 
   llvm::Error Err = llvm::Error::success();
-  auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
-      std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err,
-      (*Interp)->PTUs);
+  llvm::LLVMContext &LLVMCtx = *Interp->TSCtx->getContext();
+
+  auto DeviceAct =
+      std::make_unique<IncrementalAction>(*DCI, LLVMCtx, Err, *Interp);
+
   if (Err)
     return std::move(Err);
 
-  (*Interp)->DeviceParser = std::move(DeviceParser);
+  Interp->DeviceAct = std::move(DeviceAct);
+
+  DCI->ExecuteAction(*Interp->DeviceAct);
+
+  auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
+      std::move(DCI), *Interp->getCompilerInstance(), IMVFS, Err, Interp->PTUs);
+
+  if (Err)
+    return std::move(Err);
 
-  return Interp;
+  Interp->DeviceParser = std::move(DeviceParser);
+  return std::move(Interp);
 }
 
 const CompilerInstance *Interpreter::getCompilerInstance() const {
@@ -531,15 +545,17 @@ size_t Interpreter::getEffectivePTUSize() const {
 
 PartialTranslationUnit &
 Interpreter::RegisterPTU(TranslationUnitDecl *TU,
-                         std::unique_ptr<llvm::Module> M /*={}*/) {
+                         std::unique_ptr<llvm::Module> M /*={}*/,
+                         IncrementalAction *Action) {
   PTUs.emplace_back(PartialTranslationUnit());
   PartialTranslationUnit &LastPTU = PTUs.back();
   LastPTU.TUPart = TU;
 
   if (!M)
-    M = GenModule();
+    M = GenModule(Action);
 
-  assert((!getCodeGen() || M) && "Must have a llvm::Module at this point");
+  assert((!getCodeGen(Action) || M) &&
+         "Must have a llvm::Module at this point");
 
   LastPTU.TheModule = std::move(M);
   LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1
@@ -559,6 +575,16 @@ Interpreter::Parse(llvm::StringRef Code) {
     llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code);
     if (auto E = DeviceTU.takeError())
       return std::move(E);
+
+    RegisterPTU(*DeviceTU, nullptr, DeviceAct.get());
+
+    llvm::Expected<llvm::StringRef> PTX = DeviceParser->GeneratePTX();
+    if (!PTX)
+      return PTX.takeError();
+
+    llvm::Error Err = DeviceParser->GenerateFatbinary();
+    if (Err)
+      return std::move(Err);
   }
 
   // Tell the interpreter sliently ignore unused expressions since value
@@ -726,9 +752,10 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) {
   return llvm::Error::success();
 }
 
-std::unique_ptr<llvm::Module> Interpreter::GenModule() {
+std::unique_ptr<llvm::Module>
+Interpreter::GenModule(IncrementalAction *Action) {
   static unsigned ID = 0;
-  if (CodeGenerator *CG = getCodeGen()) {
+  if (CodeGenerator *CG = getCodeGen(Action)) {
     // Clang's CodeGen is designed to work with a single llvm::Module. In many
     // cases for convenience various CodeGen parts have a reference to the
     // llvm::Module (TheModule or Module) which does not change when a new
@@ -750,8 +777,10 @@ std::unique_ptr<llvm::Module> Interpreter::GenModule() {
   return nullptr;
 }
 
-CodeGenerator *Interpreter::getCodeGen() const {
-  FrontendAction *WrappedAct = Act->getWrapped();
+CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const {
+  if (!Action)
+    Action = Act.get();
+  FrontendAction *WrappedAct = Action->getWrapped();
   if (!WrappedAct->hasIRSupport())
     return nullptr;
   return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();

vgvassilev

Lgtm!

`clang-repl --cuda` was previously crashing with a segmentation fault, instead of reporting a clean error ``` (base) anutosh491@Anutoshs-MacBook-Air bin % ./clang-repl --cuda #0 0x0000000111da4fbc llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x150fbc) #1 0x0000000111da31dc llvm::sys::RunSignalHandlers() (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x14f1dc) #2 0x0000000111da5628 SignalHandler(int) (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x151628) #3 0x000000019b242de4 (/usr/lib/system/libsystem_platform.dylib+0x180482de4) #4 0x0000000107f638d0 clang::IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(std::__1::unique_ptr<clang::CompilerInstance, std::__1::default_delete<clang::CompilerInstance>>, clang::CompilerInstance&, llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>, llvm::Error&, std::__1::list<clang::PartialTranslationUnit, std::__1::allocator<clang::PartialTranslationUnit>> const&) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x216b8d0) #5 0x0000000107f638d0 clang::IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(std::__1::unique_ptr<clang::CompilerInstance, std::__1::default_delete<clang::CompilerInstance>>, clang::CompilerInstance&, llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>, llvm::Error&, std::__1::list<clang::PartialTranslationUnit, std::__1::allocator<clang::PartialTranslationUnit>> const&) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x216b8d0) #6 0x0000000107f6bac8 clang::Interpreter::createWithCUDA(std::__1::unique_ptr<clang::CompilerInstance, std::__1::default_delete<clang::CompilerInstance>>, std::__1::unique_ptr<clang::CompilerInstance, std::__1::default_delete<clang::CompilerInstance>>) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x2173ac8) #7 0x000000010206f8a8 main (/opt/local/libexec/llvm-20/bin/clang-repl+0x1000038a8) #8 0x000000019ae8c274 Segmentation fault: 11 ``` The underlying issue was that the `DeviceCompilerInstance` (used for device-side CUDA compilation) was never initialized with a `Sema`, which is required before constructing the `IncrementalCUDADeviceParser`. https://github.com/llvm/llvm-project/blob/89687e6f383b742a3c6542dc673a84d9f82d02de/clang/lib/Interpreter/DeviceOffload.cpp#L32 https://github.com/llvm/llvm-project/blob/89687e6f383b742a3c6542dc673a84d9f82d02de/clang/lib/Interpreter/IncrementalParser.cpp#L31 Unlike the host-side `CompilerInstance` which runs `ExecuteAction` inside the Interpreter constructor (thereby setting up Sema), the device-side CI was passed into the parser uninitialized, leading to an assertion or crash when accessing its internals. To fix this, I refactored the `Interpreter::create` method to include an optional `DeviceCI` parameter. If provided, we know we need to take care of this instance too. Only then do we construct the `IncrementalCUDADeviceParser`. (cherry picked from commit 21fb19f)

github-actions · 2025-04-29T22:41:55Z

@anutosh491 (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

llvmbot added this to the LLVM 20.X Release milestone Apr 28, 2025

github-project-automation bot added this to LLVM Release Status Apr 28, 2025

github-project-automation bot moved this to Needs Triage in LLVM Release Status Apr 28, 2025

llvmbot requested a review from vgvassilev April 28, 2025 10:58

llvmbot mentioned this pull request Apr 28, 2025

[clang-repl] : Fix clang-repl crash with --cuda flag #136404

Merged

llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Apr 28, 2025

vgvassilev approved these changes Apr 28, 2025

View reviewed changes

github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status Apr 28, 2025

llvmbot force-pushed the issue136404 branch from 39a2186 to c877757 Compare April 29, 2025 22:41

tstellar merged commit c877757 into llvm:release/20.x Apr 29, 2025
7 of 10 checks passed

github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Apr 29, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) #137615

release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) #137615

Uh oh!

llvmbot commented Apr 28, 2025

Uh oh!

llvmbot commented Apr 28, 2025

Uh oh!

llvmbot commented Apr 28, 2025

Uh oh!

vgvassilev left a comment

Uh oh!

Uh oh!

github-actions bot commented Apr 29, 2025

Uh oh!

Uh oh!

release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) #137615

release/20.x: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404) #137615

Uh oh!

Conversation

llvmbot commented Apr 28, 2025

Uh oh!

llvmbot commented Apr 28, 2025

Uh oh!

llvmbot commented Apr 28, 2025

Uh oh!

vgvassilev left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Apr 29, 2025

Uh oh!

Uh oh!