Skip to content

[LLD][COFF] Check both mangled and demangled symbols before adding a lazy archive symbol to the symbol table on ARM64EC #113284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions lld/COFF/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Parallel.h"
Expand Down Expand Up @@ -631,8 +632,47 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
return s;
}

// On ARM64EC, a function symbol may appear in both mangled and demangled forms:
// - ARM64EC archives contain only the mangled name, while the demangled symbol
// is defined by the object file as an alias.
// - x86_64 archives contain only the demangled name (the mangled name is
// usually defined by an object referencing the symbol as an alias to a guess
// exit thunk).
// - ARM64EC import files contain both the mangled and demangled names for
// thunks.
// If more than one archive defines the same function, this could lead
// to different libraries being used for the same function depending on how they
// are referenced. Avoid this by checking if the paired symbol is already
// defined before adding a symbol to the table.
template <typename T>
bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
if (name.starts_with("__imp_"))
return true;
std::string pairName;
if (std::optional<std::string> mangledName =
getArm64ECMangledFunctionName(name))
pairName = std::move(*mangledName);
else
pairName = *getArm64ECDemangledFunctionName(name);

Symbol *sym = symtab->find(pairName);
if (!sym)
return true;
if (sym->pendingArchiveLoad)
return false;
if (auto u = dyn_cast<Undefined>(sym))
return !u->weakAlias || u->isAntiDep;
// If the symbol is lazy, allow it only if it originates from the same
// archive.
auto lazy = dyn_cast<T>(sym);
return lazy && lazy->file == f;
}

void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
StringRef name = sym.getName();
if (isArm64EC(ctx.config.machine) &&
!checkLazyECPair<LazyArchive>(this, name, f))
return;
auto [s, wasInserted] = insert(name);
if (wasInserted) {
replaceSymbol<LazyArchive>(s, f, sym);
Expand All @@ -648,6 +688,8 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {

void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
assert(f->lazy);
if (isArm64EC(ctx.config.machine) && !checkLazyECPair<LazyObject>(this, n, f))
return;
auto [s, wasInserted] = insert(n, f);
if (wasInserted) {
replaceSymbol<LazyObject>(s, f, n);
Expand Down
20 changes: 20 additions & 0 deletions lld/test/COFF/arm64ec-lib.test
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ RUN: llvm-lib -machine:arm64ec -out:sym-arm64ec.lib sym-arm64ec.obj nsym-aarch64
RUN: llvm-lib -machine:amd64 -out:sym-x86_64.lib sym-x86_64.obj
RUN: llvm-lib -machine:arm64ec -out:func.lib func.obj
RUN: llvm-lib -machine:arm64ec -out:func-x86_64.lib func-x86_64.obj
RUN: llvm-lib -machine:arm64ec -out:func-imp.lib -def:func.def

Verify that a symbol can be referenced from ECSYMBOLS.
RUN: lld-link -machine:arm64ec -dll -noentry -out:test.dll symref-arm64ec.obj sym-arm64ec.lib loadconfig-arm64ec.obj
Expand Down Expand Up @@ -57,6 +58,15 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-2.dll func.lib ref-t
RUN: llvm-objdump -d ref-thunk-2.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-2.dll | FileCheck -check-prefix=TESTSEC %s

Pass multiple libraries containing `func` with different manglings and ensure they don't conflict with each other.
RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-3.dll func.lib loadconfig-arm64ec.obj func-x86_64.lib func-imp.lib ref-thunk.obj
RUN: llvm-objdump -d ref-thunk-3.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-3.dll | FileCheck -check-prefix=TESTSEC %s

RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-4.dll ref-thunk.obj func.lib loadconfig-arm64ec.obj func-x86_64.lib func-imp.lib
RUN: llvm-objdump -d ref-thunk-4.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-4.dll | FileCheck -check-prefix=TESTSEC %s

Test linking against an x86_64 library (which uses a demangled function name).
RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-x86-1.dll ref-thunk.obj func-x86_64.lib loadconfig-arm64ec.obj
RUN: llvm-objdump -d ref-x86-1.dll | FileCheck -check-prefix=DISASM-X86 %s
Expand All @@ -80,6 +90,11 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-1.dll ref-thunk.obj
RUN: llvm-objdump -d start-lib-1.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test start-lib-1.dll | FileCheck -check-prefix=TESTSEC %s

RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-2.dll ref-thunk.obj -start-lib func.obj -end-lib loadconfig-arm64ec.obj \
RUN: -start-lib func-x86_64.obj -end-lib func-imp.lib
RUN: llvm-objdump -d ref-thunk-3.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-3.dll | FileCheck -check-prefix=TESTSEC %s

#--- symref.s
.data
.rva sym
Expand Down Expand Up @@ -135,3 +150,8 @@ thunksym:
.globl func
func:
ret

#--- func.def
LIBRARY func.dll
EXPORTS
func
Loading