Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions clang/test/Driver/Inputs/SYCL/two-kernels.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
target triple = "spirv64"

define spir_func i32 @helper_shared(i32 %a) {
entry:
%r = add nsw i32 %a, 1
ret i32 %r
}

define spir_kernel void @kernel_a(ptr addrspace(1) %out, i32 %a) {
entry:
%r = tail call spir_func i32 @helper_shared(i32 %a)
store i32 %r, ptr addrspace(1) %out, align 4
ret void
}

define spir_kernel void @kernel_b(ptr addrspace(1) %out, i32 %a, i32 %b) {
entry:
%h = tail call spir_func i32 @helper_shared(i32 %a)
%r = mul nsw i32 %h, %b
store i32 %r, ptr addrspace(1) %out, align 4
ret void
}
32 changes: 30 additions & 2 deletions clang/test/Driver/clang-sycl-linker-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
// REQUIRES: spirv-registered-target
//
// Test the dry run of a simple case to link two input files.
// Also verifies the default split mode ("none").
// RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_1.bc
// RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_2.bc
// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-spirv.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=SIMPLE-FO
// SIMPLE-FO: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
// SIMPLE-FO: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
// SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none
// SIMPLE-FO-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv
//
// Test that IMG_SPIRV image kind is set for non-AOT compilation.
Expand All @@ -20,7 +22,8 @@
// RUN: touch %t.dir/lib2.bc
// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc --library-path=%t.dir --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 \
// RUN: | FileCheck %s --check-prefix=DEVLIBS
// DEVLIBS: sycl-device-link: inputs: {{.*}}.bc libfiles: {{.*}}lib1.bc, {{.*}}lib2.bc output: [[LLVMLINKOUT:.*]].bc
// DEVLIBS: sycl-device-link: inputs: {{.*}}.bc libfiles: {{.*}}lib1.bc, {{.*}}lib2.bc output: [[LLVMLINKOUT:.*]].bc
// DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none
// DEVLIBS-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: a_0.spv
//
// Test a simple case with a random file (not bitcode) as input.
Expand All @@ -42,6 +45,7 @@
// RUN: --ocloc-options="-a -b" \
// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU
// AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
// AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none
// AOT-INTEL-GPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv
// AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b {{.*}}-output [[SPIRVTRANSLATIONOUT]]_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv
//
Expand All @@ -54,6 +58,7 @@
// RUN: --opencl-aot-options="-a -b" \
// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU
// AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
// AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none
// AOT-INTEL-CPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv
// AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o [[SPIRVTRANSLATIONOUT]]_0.out [[SPIRVTRANSLATIONOUT]]_0.spv
//
Expand All @@ -69,3 +74,26 @@
// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=NOTARGET
// NOTARGET: Target triple must be specified
//
// Test the split mode ("none"): no extra splits are produced.
// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --sycl-module-split-mode=none %t_1.bc %t_2.bc -o %t-split-none.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=SPLIT-NONE
// SPLIT-NONE: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
// SPLIT-NONE-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none
// SPLIT-NONE-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv
// SPLIT-NONE-NOT: LLVM backend: input: {{.*}}.bc, output: {{.*}}_1.spv
//
// Test per-kernel split: a module with two SPIR_KERNEL functions produces two
// device images.
// RUN: llvm-as %S/Inputs/SYCL/two-kernels.ll -o %t-two.bc
// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --sycl-module-split-mode=kernel %t-two.bc -o %t-split-kernel.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=SPLIT-KERNEL
// SPLIT-KERNEL: sycl-device-link: inputs: {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
// SPLIT-KERNEL-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[SPLIT0:.*]].bc, [[SPLIT1:.*]].bc, mode: kernel
// SPLIT-KERNEL-NEXT: LLVM backend: input: [[SPLIT0]].bc, output: {{.*}}_0.spv
// SPLIT-KERNEL-NEXT: LLVM backend: input: [[SPLIT1]].bc, output: {{.*}}_1.spv
//
// Test that an invalid split mode is rejected.
// RUN: not clang-sycl-linker --dry-run -triple=spirv64 --sycl-module-split-mode=bogus %t_1.bc -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=SPLIT-INVALID
// SPLIT-INVALID: sycl-module-split-mode value isn't recognized: bogus
1 change: 1 addition & 0 deletions clang/tools/clang-sycl-linker/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ set(LLVM_LINK_COMPONENTS
Support
Target
TargetParser
TransformUtils
)

set(LLVM_TARGET_DEFINITIONS SYCLLinkOpts.td)
Expand Down
187 changes: 148 additions & 39 deletions clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "clang/Basic/Version.h"

#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/CodeGen/CommandFlags.h"
Expand Down Expand Up @@ -48,6 +49,7 @@
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SplitModuleByCategory.h"

using namespace llvm;
using namespace llvm::opt;
Expand Down Expand Up @@ -242,14 +244,19 @@ Expected<SmallVector<std::string>> getSYCLDeviceLibs(const ArgList &Args) {
return DeviceLibFiles;
}

struct LinkResult {
std::unique_ptr<Module> LinkedModule;
SmallString<256> BitcodeFile;
};

/// Following tasks are performed:
/// 1. Link all SYCL device bitcode images into one image. Device linking is
/// performed using the linkInModule API.
/// 2. Gather all SYCL device library bitcode images.
/// 3. Link all the images gathered in Step 2 with the output of Step 1 using
/// linkInModule API. LinkOnlyNeeded flag is used.
Expected<StringRef> linkDeviceCode(ArrayRef<std::string> InputFiles,
const ArgList &Args, LLVMContext &C) {
Expected<LinkResult> linkDeviceCode(ArrayRef<std::string> InputFiles,
const ArgList &Args, LLVMContext &C) {
llvm::TimeTraceScope TimeScope("SYCL link device code");

assert(InputFiles.size() && "No inputs to link");
Expand Down Expand Up @@ -309,7 +316,7 @@ Expected<StringRef> linkDeviceCode(ArrayRef<std::string> InputFiles,
LibInputs, *BitcodeOutput);
}

return *BitcodeOutput;
return LinkResult{std::move(LinkerOutput), SmallString<256>(*BitcodeOutput)};
}

/// Run Code Generation using LLVM backend.
Expand Down Expand Up @@ -461,6 +468,107 @@ static Error runAOTCompile(StringRef InputFile, StringRef OutputFile,
return createStringError(inconvertibleErrorCode(), "Unsupported arch");
}

/// SYCL device code module split mode.
enum IRSplitMode {
Comment thread
YuriPlyakhin marked this conversation as resolved.
Outdated
SPLIT_PER_KERNEL, // one module per kernel
SPLIT_NONE // no splitting
};

/// Parses the value of \p -sycl-module-split-mode.
static std::optional<IRSplitMode> convertStringToSplitMode(StringRef S) {
return StringSwitch<std::optional<IRSplitMode>>(S)
.Case("kernel", SPLIT_PER_KERNEL)
.Case("none", SPLIT_NONE)
.Default(std::nullopt);
}

/// Result of splitting a device module: the bitcode file path and the
/// serialized symbol table for each device image.
struct SplitModule {
SmallString<256> ModuleFilePath;
SmallString<0> Symbols;
};

static bool isEntryPoint(const Function &F) {
return !F.isDeclaration() && F.hasKernelCallingConv();
}

/// Collect kernel names from \p M and serialize them into a symbol table.
static SmallString<0> collectSymbols(const Module &M) {
SmallVector<StringRef> KernelNames;
for (const Function &F : M)
if (isEntryPoint(F))
KernelNames.push_back(F.getName());
SmallString<0> SymbolData;
llvm::offloading::sycl::writeSymbolTable(KernelNames, SymbolData);
return SymbolData;
}

/// Splits the fully linked device \p M into one bitcode file per device image
/// according to \p Mode and returns the list of split images with their symbol
/// tables.
///
/// For SPLIT_NONE, \p LinkedBitcodeFile is returned as-is.
/// For SPLIT_PER_KERNEL, the module is split into parts such that each part
/// contains exactly one kernel entry point and its transitive dependencies;
/// each part is written to a fresh temporary bitcode file.
static Expected<SmallVector<SplitModule, 0>>
splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile,
IRSplitMode Mode, const ArgList &Args) {
SmallVector<SplitModule, 0> SplitModules;

if (Mode == SPLIT_NONE) {
SplitModules.push_back(
{SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)});
return SplitModules;
}

assert(Mode == SPLIT_PER_KERNEL);

// splitModuleTransitiveFromEntryPoints asserts that at least one entry point
// was categorized. If the linked module contains no kernel definitions at
// all, there is nothing to split; fall back to shipping the linked module
// as a single image.
bool HasKernel = llvm::any_of(M->functions(), isEntryPoint);
if (!HasKernel) {
SplitModules.push_back(
{SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)});
return SplitModules;
}

// Categorize each kernel function into its own group. Non-kernels and
// declarations return std::nullopt so they are pulled into whichever split
// transitively needs them.
int NextCategory = 0;
auto EntryPointCategorizer =
[&NextCategory](const Function &F) -> std::optional<int> {
if (!isEntryPoint(F))
return std::nullopt;
return NextCategory++;
};

if (Error Err = splitModuleTransitiveFromEntryPoints(
std::move(M), EntryPointCategorizer,
[&](std::unique_ptr<Module> Part) -> Error {
Comment thread
YuriPlyakhin marked this conversation as resolved.
Outdated
Expected<StringRef> BitcodeFileOrErr =
createTempFile(Args, sys::path::filename(OutputFile), "bc");
if (!BitcodeFileOrErr)
return BitcodeFileOrErr.takeError();
int FD = -1;
if (std::error_code EC =
sys::fs::openFileForWrite(*BitcodeFileOrErr, FD))
return errorCodeToError(EC);
raw_fd_ostream OS(FD, /*shouldClose=*/true);
WriteBitcodeToFile(*Part, OS);
SplitModules.push_back(
{SmallString<256>(*BitcodeFileOrErr), collectSymbols(*Part)});
return Error::success();
}))
return Err;

return SplitModules;
}

/// Performs the following steps:
/// 1. Link input device code (user code and SYCL device library code).
/// 2. Run SPIR-V code generation.
Expand All @@ -470,34 +578,35 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
LLVMContext C;

// Link all input bitcode files and SYCL device library files, if any.
auto LinkedFile = linkDeviceCode(Files, Args, C);
if (!LinkedFile)
return LinkedFile.takeError();

// TODO: SYCL post link functionality involves device code splitting and will
// result in multiple bitcode codes.
// The following lines are placeholders to represent multiple files and will
// be refactored once SYCL post link support is available.
SmallVector<std::string> SplitModules;
SplitModules.emplace_back(*LinkedFile);

// Generate symbol table.
SmallVector<SmallString<0>> SymbolTable;
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
Expected<std::unique_ptr<Module>> ModOrErr =
getBitcodeModule(SplitModules[I], C);
if (!ModOrErr)
return ModOrErr.takeError();
Expected<LinkResult> LinkedOrErr = linkDeviceCode(Files, Args, C);
if (!LinkedOrErr)
return LinkedOrErr.takeError();
auto &[LinkedModule, LinkedFile] = *LinkedOrErr;

// Determine the requested module split mode.
IRSplitMode SplitMode = SPLIT_NONE;
if (Arg *A = Args.getLastArg(OPT_sycl_module_split_mode_EQ)) {
std::optional<IRSplitMode> ModeOrNone =
convertStringToSplitMode(A->getValue());
if (!ModeOrNone)
return createStringError(formatv(
"sycl-module-split-mode value isn't recognized: {0}", A->getValue()));
SplitMode = *ModeOrNone;
}

SmallVector<StringRef> KernelNames;
for (Function &F : **ModOrErr) {
// TODO: Consider using LLVM-IR metadata to identify globals of interest
if (F.hasKernelCallingConv())
KernelNames.push_back(F.getName());
}
SmallString<0> SymbolData;
llvm::offloading::sycl::writeSymbolTable(KernelNames, SymbolData);
SymbolTable.emplace_back(std::move(SymbolData));
// Split the linked module into one or more device images.
Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr =
splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, Args);
if (!SplitModulesOrErr)
return SplitModulesOrErr.takeError();
SmallVector<SplitModule, 0> &SplitModules = *SplitModulesOrErr;
if (Verbose) {
SmallVector<StringRef> SplitFiles;
for (const SplitModule &SI : SplitModules)
SplitFiles.push_back(SI.ModuleFilePath);
errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: {2}\n",
LinkedFile, llvm::join(SplitFiles, ", "),
SplitMode == SPLIT_PER_KERNEL ? "kernel" : "none");
}

bool IsAOTCompileNeeded = IsIntelOffloadArch(
Expand All @@ -510,31 +619,31 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
StringRef Stem = OutputFile.rsplit('.').first;
std::string CodeGenFile = (Stem + "_" + Twine(I) + OutputFileNameExt).str();

if (Error Err = runCodeGen(SplitModules[I], Args, CodeGenFile, C))
if (Error Err =
runCodeGen(SplitModules[I].ModuleFilePath, Args, CodeGenFile, C))
return Err;

SplitModules[I] = CodeGenFile;
SplitModules[I].ModuleFilePath = CodeGenFile;
if (IsAOTCompileNeeded) {
std::string AOTFile = (Stem + "_" + Twine(I) + ".out").str();
if (Error Err = runAOTCompile(CodeGenFile, AOTFile, Args))
return Err;
SplitModules[I] = AOTFile;
SplitModules[I].ModuleFilePath = AOTFile;
}
}

// Collect all images to be packed into a single OffloadBinary.
SmallVector<OffloadingImage> Images;
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
if (SymbolTable[I].empty())
for (SplitModule &SI : SplitModules) {
if (SI.Symbols.empty())
continue;
auto File = SplitModules[I];
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(File);
llvm::MemoryBuffer::getFileOrSTDIN(SI.ModuleFilePath);
if (std::error_code EC = FileOrErr.getError()) {
if (DryRun)
FileOrErr = MemoryBuffer::getMemBuffer("");
else
return createFileError(File, EC);
return createFileError(SI.ModuleFilePath, EC);
}
OffloadingImage TheImage{};
TheImage.TheImageKind = IsAOTCompileNeeded ? IMG_Object : IMG_SPIRV;
Expand All @@ -543,7 +652,7 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
Args.MakeArgString(Args.getLastArgValue(OPT_triple_EQ));
TheImage.StringData["arch"] =
Args.MakeArgString(Args.getLastArgValue(OPT_arch_EQ));
TheImage.StringData["symbols"] = SymbolTable[I];
TheImage.StringData["symbols"] = SI.Symbols;
TheImage.Image = std::move(*FileOrErr);
Images.emplace_back(std::move(TheImage));
}
Expand Down
6 changes: 6 additions & 0 deletions clang/tools/clang-sycl-linker/SYCLLinkOpts.td
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,9 @@ def ocloc_options_EQ : Joined<["--", "-"], "ocloc-options=">,
def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">,
Flags<[LinkerOnlyOption]>,
HelpText<"Options passed to opencl-aot for Intel CPU AOT compilation">;

def sycl_module_split_mode_EQ : Joined<["--", "-"], "sycl-module-split-mode=">,
Comment thread
YuriPlyakhin marked this conversation as resolved.
Outdated
Flags<[LinkerOnlyOption]>, MetaVarName<"<mode>">,
HelpText<"SYCL device code module split mode. Valid values: 'none' (default) "
"emits a single device image; 'kernel' emits one device image per "
"kernel function.">;
5 changes: 3 additions & 2 deletions llvm/include/llvm/Transforms/Utils/SplitModuleByCategory.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"

#include <memory>
#include <optional>
Expand Down Expand Up @@ -54,10 +55,10 @@ class Function;
///
/// FIXME: For now, the algorithm assumes no recursion in the input Module. This
/// will be addressed in the near future.
LLVM_ABI void splitModuleTransitiveFromEntryPoints(
LLVM_ABI Error splitModuleTransitiveFromEntryPoints(
std::unique_ptr<Module> M,
function_ref<std::optional<int>(const Function &F)> EntryPointCategorizer,
function_ref<void(std::unique_ptr<Module> Part)> Callback);
function_ref<Error(std::unique_ptr<Module> Part)> Callback);

} // namespace llvm

Expand Down
Loading