Fix cuda flag with clang-repl

anutosh491 · anutosh491 · commit 1f7205615f8d · 2025-04-24T12:48:29.000+05:30
diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
@@ -95,6 +95,9 @@ class Interpreter {
   // An optional parser for CUDA offloading
   std::unique_ptr<IncrementalParser> DeviceParser;
 
+  // An optional action for CUDA offloading
+  std::unique_ptr<IncrementalAction> DeviceAct;
+
   /// List containing information about each incrementally parsed piece of code.
   std::list<PartialTranslationUnit> PTUs;
 
@@ -129,7 +132,8 @@ class Interpreter {
 public:
   virtual ~Interpreter();
   static llvm::Expected<std::unique_ptr<Interpreter>>
-  create(std::unique_ptr<CompilerInstance> CI);
+  create(std::unique_ptr<CompilerInstance> CI,
+         std::unique_ptr<CompilerInstance> DeviceCI = nullptr);
   static llvm::Expected<std::unique_ptr<Interpreter>>
   createWithCUDA(std::unique_ptr<CompilerInstance> CI,
                  std::unique_ptr<CompilerInstance> DCI);
@@ -175,10 +179,11 @@ class Interpreter {
   llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E);
   llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD);
 
-  CodeGenerator *getCodeGen() const;
-  std::unique_ptr<llvm::Module> GenModule();
+  CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const;
+  std::unique_ptr<llvm::Module> GenModule(IncrementalAction *Action = nullptr);
   PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU,
-                                      std::unique_ptr<llvm::Module> M = {});
+                                      std::unique_ptr<llvm::Module> M = {},
+                                      IncrementalAction *Action = nullptr);
 
   // A cache for the compiled destructors used to for de-allocation of managed
   // clang::Values.
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -28,20 +28,21 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
     std::unique_ptr<CompilerInstance> DeviceInstance,
     CompilerInstance &HostInstance,
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
-    llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
+    llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs)
     : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
       CodeGenOpts(HostInstance.getCodeGenOpts()),
-      TargetOpts(HostInstance.getTargetOpts()) {
+      TargetOpts(DeviceInstance->getTargetOpts()) {
   if (Err)
     return;
-  DeviceCI = std::move(DeviceInstance);
   StringRef Arch = TargetOpts.CPU;
   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
+    DeviceInstance.release();
     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
                                                "Invalid CUDA architecture",
                                                llvm::inconvertibleErrorCode()));
     return;
   }
+  DeviceCI = std::move(DeviceInstance);
 }
 
 llvm::Expected<TranslationUnitDecl *>
@@ -50,25 +51,6 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
   if (!PTU)
     return PTU.takeError();
 
-  auto PTX = GeneratePTX();
-  if (!PTX)
-    return PTX.takeError();
-
-  auto Err = GenerateFatbinary();
-  if (Err)
-    return std::move(Err);
-
-  std::string FatbinFileName =
-      "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
-  VFS->addFile(FatbinFileName, 0,
-               llvm::MemoryBuffer::getMemBuffer(
-                   llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
-                   "", false));
-
-  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
-
-  FatbinContent.clear();
-
   return PTU;
 }
 
@@ -78,9 +60,11 @@ llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
 
   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
       PTU.TheModule->getTargetTriple(), Error);
-  if (!Target)
+  if (!Target) {
     return llvm::make_error<llvm::StringError>(std::move(Error),
                                                std::error_code());
+  }
+
   llvm::TargetOptions TO = llvm::TargetOptions();
   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
       PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
@@ -172,6 +156,19 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
 
   FatbinContent.append(PTXCode.begin(), PTXCode.end());
 
+  auto &PTU = PTUs.back();
+
+  std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin";
+
+  VFS->addFile(FatbinFileName, 0,
+               llvm::MemoryBuffer::getMemBuffer(
+                   llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+                   "", false));
+
+  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+
+  FatbinContent.clear();
+
   return llvm::Error::success();
 }
 
diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h
@@ -24,14 +24,14 @@ class CodeGenOptions;
 class TargetOptions;
 
 class IncrementalCUDADeviceParser : public IncrementalParser {
-  const std::list<PartialTranslationUnit> &PTUs;
+  std::list<PartialTranslationUnit> &PTUs;
 
 public:
   IncrementalCUDADeviceParser(
       std::unique_ptr<CompilerInstance> DeviceInstance,
       CompilerInstance &HostInstance,
       llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
-      llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs);
+      llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs);
 
   llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
 
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
@@ -451,13 +451,44 @@ const char *const Runtimes = R"(
 )";
 
 llvm::Expected<std::unique_ptr<Interpreter>>
-Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
+Interpreter::create(std::unique_ptr<CompilerInstance> CI,
+                    std::unique_ptr<CompilerInstance> DeviceCI) {
   llvm::Error Err = llvm::Error::success();
   auto Interp =
       std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err));
   if (Err)
     return std::move(Err);
 
+  CompilerInstance &HostCI = *(Interp->getCompilerInstance());
+
+  if (DeviceCI) {
+    Interp->DeviceAct = std::make_unique<IncrementalAction>(
+        *DeviceCI, *Interp->TSCtx->getContext(), Err, *Interp);
+
+    if (Err)
+      return std::move(Err);
+
+    DeviceCI->ExecuteAction(*Interp->DeviceAct);
+
+    // avoid writing fat binary to disk using an in-memory virtual file system
+    llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
+        std::make_unique<llvm::vfs::InMemoryFileSystem>();
+    llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
+        std::make_unique<llvm::vfs::OverlayFileSystem>(
+            llvm::vfs::getRealFileSystem());
+    OverlayVFS->pushOverlay(IMVFS);
+    HostCI.createFileManager(OverlayVFS);
+
+    auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
+        std::move(DeviceCI), HostCI, IMVFS, Err,
+        Interp->PTUs);
+
+    if (Err)
+      return std::move(Err);
+
+    Interp->DeviceParser = std::move(DeviceParser);
+  }
+
   // Add runtime code and set a marker to hide it from user code. Undo will not
   // go through that.
   auto PTU = Interp->Parse(Runtimes);
@@ -472,29 +503,7 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
 llvm::Expected<std::unique_ptr<Interpreter>>
 Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
                             std::unique_ptr<CompilerInstance> DCI) {
-  // avoid writing fat binary to disk using an in-memory virtual file system
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
-      std::make_unique<llvm::vfs::InMemoryFileSystem>();
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
-      std::make_unique<llvm::vfs::OverlayFileSystem>(
-          llvm::vfs::getRealFileSystem());
-  OverlayVFS->pushOverlay(IMVFS);
-  CI->createFileManager(OverlayVFS);
-
-  auto Interp = Interpreter::create(std::move(CI));
-  if (auto E = Interp.takeError())
-    return std::move(E);
-
-  llvm::Error Err = llvm::Error::success();
-  auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
-      std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err,
-      (*Interp)->PTUs);
-  if (Err)
-    return std::move(Err);
-
-  (*Interp)->DeviceParser = std::move(DeviceParser);
-
-  return Interp;
+  return Interpreter::create(std::move(CI), std::move(DCI));
 }
 
 const CompilerInstance *Interpreter::getCompilerInstance() const {
@@ -532,15 +541,16 @@ size_t Interpreter::getEffectivePTUSize() const {
 
 PartialTranslationUnit &
 Interpreter::RegisterPTU(TranslationUnitDecl *TU,
-                         std::unique_ptr<llvm::Module> M /*={}*/) {
+                         std::unique_ptr<llvm::Module> M /*={}*/,
+                         IncrementalAction *Action) {
   PTUs.emplace_back(PartialTranslationUnit());
   PartialTranslationUnit &LastPTU = PTUs.back();
   LastPTU.TUPart = TU;
 
   if (!M)
-    M = GenModule();
+    M = GenModule(Action);
 
-  assert((!getCodeGen() || M) && "Must have a llvm::Module at this point");
+  assert((!getCodeGen(Action) || M) && "Must have a llvm::Module at this point");
 
   LastPTU.TheModule = std::move(M);
   LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1
@@ -558,8 +568,21 @@ Interpreter::Parse(llvm::StringRef Code) {
   // included in the host compilation
   if (DeviceParser) {
     llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code);
-    if (auto E = DeviceTU.takeError())
+    if (auto E = DeviceTU.takeError()) {
       return std::move(E);
+    }
+
+    auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get());
+
+    PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU, nullptr, DeviceAct.get());
+
+    llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX();
+    if (!PTX)
+      return PTX.takeError();
+
+    llvm::Error Err = CudaParser->GenerateFatbinary();
+    if (Err)
+      return std::move(Err);
   }
 
   // Tell the interpreter sliently ignore unused expressions since value
@@ -736,9 +759,9 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) {
   return llvm::Error::success();
 }
 
-std::unique_ptr<llvm::Module> Interpreter::GenModule() {
+std::unique_ptr<llvm::Module> Interpreter::GenModule(IncrementalAction *Action) {
   static unsigned ID = 0;
-  if (CodeGenerator *CG = getCodeGen()) {
+  if (CodeGenerator *CG = getCodeGen(Action)) {
     // Clang's CodeGen is designed to work with a single llvm::Module. In many
     // cases for convenience various CodeGen parts have a reference to the
     // llvm::Module (TheModule or Module) which does not change when a new
@@ -760,8 +783,10 @@ std::unique_ptr<llvm::Module> Interpreter::GenModule() {
   return nullptr;
 }
 
-CodeGenerator *Interpreter::getCodeGen() const {
-  FrontendAction *WrappedAct = Act->getWrapped();
+CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const {
+  if (!Action)
+    Action = Act.get();
+  FrontendAction *WrappedAct = Action->getWrapped();
   if (!WrappedAct->hasIRSupport())
     return nullptr;
   return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();