Skip to content

Mirror intel/llvm commits #2792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/intel-llvm-mirror-base-commit
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e84de949d8bad654483634664bf799a23bb4f460
72829cdeb123d5b7f0171903b7aac4d14fc963ec
14 changes: 14 additions & 0 deletions source/adapters/native_cpu/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,20 @@ inline void *aligned_malloc(size_t alignment, size_t size) {
return ptr;
}

// In many cases we require aligned memory without being told what the alignment
// requirement is. This helper function returns maximally aligned memory based
// on the size.
inline void *aligned_malloc(size_t size) {
constexpr size_t max_alignment = 16 * sizeof(double);
size_t alignment = max_alignment;
while (alignment > size) {
alignment >>= 1;
}
// aligned_malloc requires size to be a multiple of alignment; round up.
size = (size + alignment - 1) & ~(alignment - 1);
return aligned_malloc(alignment, size);
}

inline void aligned_free(void *ptr) {
#ifdef _MSC_VER
_aligned_free(ptr);
Expand Down
13 changes: 4 additions & 9 deletions source/adapters/native_cpu/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct ur_kernel_handle_t_ : RefCounted {

~ur_kernel_handle_t_() {
removeArgReferences();
free(_localMemPool);
native_cpu::aligned_free(_localMemPool);
}

ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *name,
Expand All @@ -59,7 +59,6 @@ struct ur_kernel_handle_t_ : RefCounted {
args_index_t Indices;
std::vector<size_t> ParamSizes;
std::vector<bool> OwnsMem;
static constexpr size_t MaxAlign = 16 * sizeof(double);

arguments() = default;

Expand Down Expand Up @@ -109,11 +108,7 @@ struct ur_kernel_handle_t_ : RefCounted {
}
}
if (NeedAlloc) {
size_t Align = MaxAlign;
while (Align > Size) {
Align >>= 1;
}
Indices[Index] = native_cpu::aligned_malloc(Align, Size);
Indices[Index] = native_cpu::aligned_malloc(Size);
ParamSizes[Index] = Size;
OwnsMem[Index] = true;
}
Expand Down Expand Up @@ -158,8 +153,8 @@ struct ur_kernel_handle_t_ : RefCounted {
if (reqSize == 0 || reqSize == _localMemPoolSize) {
return;
}
// realloc handles nullptr case
_localMemPool = (char *)realloc(_localMemPool, reqSize);
native_cpu::aligned_free(_localMemPool);
_localMemPool = static_cast<char *>(native_cpu::aligned_malloc(reqSize));
_localMemPoolSize = reqSize;
}

Expand Down
9 changes: 4 additions & 5 deletions source/adapters/native_cpu/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@

struct ur_mem_handle_t_ : ur_object {
ur_mem_handle_t_(size_t Size, bool _IsImage)
: _mem{static_cast<char *>(malloc(Size))}, _ownsMem{true},
IsImage{_IsImage} {}
: _mem{static_cast<char *>(native_cpu::aligned_malloc(Size))},
_ownsMem{true}, IsImage{_IsImage} {}

ur_mem_handle_t_(void *HostPtr, size_t Size, bool _IsImage)
: _mem{static_cast<char *>(malloc(Size))}, _ownsMem{true},
IsImage{_IsImage} {
: ur_mem_handle_t_(Size, _IsImage) {
memcpy(_mem, HostPtr, Size);
}

Expand All @@ -34,7 +33,7 @@ struct ur_mem_handle_t_ : ur_object {

~ur_mem_handle_t_() {
if (_ownsMem) {
free(_mem);
native_cpu::aligned_free(_mem);
}
}

Expand Down
82 changes: 82 additions & 0 deletions source/adapters/offload/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,85 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(

return UR_RESULT_SUCCESS;
}

ur_result_t enqueueNoOp(ur_queue_handle_t hQueue, ur_event_handle_t *phEvent) {
// This path is a no-op, but we can't output a real event because
// Offload doesn't currently support creating arbitrary events, and we
// don't know the last real event in the queue. Instead we just have to
// wait on the whole queue and then return an empty (implicitly
// finished) event.
*phEvent = ur_event_handle_t_::createEmptyEvent();
return urQueueFinish(hQueue);
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap,
ur_map_flags_t mapFlags, size_t offset, size_t size,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent, void **ppRetMap) {

auto &BufferImpl = std::get<BufferMem>(hBuffer->Mem);
auto MapPtr = BufferImpl.mapToPtr(size, offset, mapFlags);

if (!MapPtr) {
return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
}

const bool IsPinned =
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;

ur_result_t Result = UR_RESULT_SUCCESS;
if (!IsPinned &&
((mapFlags & UR_MAP_FLAG_READ) || (mapFlags & UR_MAP_FLAG_WRITE))) {
// Pinned host memory is already on host so it doesn't need to be read.
Result = urEnqueueMemBufferRead(hQueue, hBuffer, blockingMap, offset, size,
MapPtr, numEventsInWaitList,
phEventWaitList, phEvent);
} else {
if (IsPinned) {
// TODO: Ignore the event waits list for now. When urEnqueueEventsWait is
// implemented we can call it on the wait list.
}

if (phEvent) {
enqueueNoOp(hQueue, phEvent);
}
}
*ppRetMap = MapPtr;

return Result;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
auto &BufferImpl = std::get<BufferMem>(hMem->Mem);

auto *Map = BufferImpl.getMapDetails(pMappedPtr);
UR_ASSERT(Map != nullptr, UR_RESULT_ERROR_INVALID_MEM_OBJECT);

const bool IsPinned =
BufferImpl.MemAllocMode == BufferMem::AllocMode::AllocHostPtr;

ur_result_t Result = UR_RESULT_SUCCESS;
if (!IsPinned && ((Map->MapFlags & UR_MAP_FLAG_WRITE) ||
(Map->MapFlags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION))) {
// Pinned host memory is only on host so it doesn't need to be written to.
Result = urEnqueueMemBufferWrite(
hQueue, hMem, true, Map->MapOffset, Map->MapSize, pMappedPtr,
numEventsInWaitList, phEventWaitList, phEvent);
} else {
if (IsPinned) {
// TODO: Ignore the event waits list for now. When urEnqueueEventsWait is
// implemented we can call it on the wait list.
}

if (phEvent) {
enqueueNoOp(hQueue, phEvent);
}
}
BufferImpl.unmap(pMappedPtr);

return Result;
}
12 changes: 7 additions & 5 deletions source/adapters/offload/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "event.hpp"
#include "ur2offload.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hKernel,
UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent,
ur_event_info_t propName,
size_t propSize,
void *pPropValue,
Expand All @@ -23,7 +23,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hKernel,

switch (propName) {
case UR_EVENT_INFO_REFERENCE_COUNT:
return ReturnValue(hKernel->RefCount.load());
return ReturnValue(hEvent->RefCount.load());
default:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
}
Expand All @@ -42,9 +42,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(ur_event_handle_t,
UR_APIEXPORT ur_result_t UR_APICALL
urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) {
for (uint32_t i = 0; i < numEvents; i++) {
auto Res = olWaitEvent(phEventWaitList[i]->OffloadEvent);
if (Res) {
return offloadResultToUR(Res);
if (phEventWaitList[i]->OffloadEvent) {
auto Res = olWaitEvent(phEventWaitList[i]->OffloadEvent);
if (Res) {
return offloadResultToUR(Res);
}
}
}
return UR_RESULT_SUCCESS;
Expand Down
9 changes: 9 additions & 0 deletions source/adapters/offload/event.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,13 @@

struct ur_event_handle_t_ : RefCounted {
ol_event_handle_t OffloadEvent;
ur_command_t Type;

static ur_event_handle_t createEmptyEvent() {
auto *Event = new ur_event_handle_t_();
// Null event represents an empty event. Waiting on it is a no-op.
Event->OffloadEvent = nullptr;

return Event;
}
};
49 changes: 49 additions & 0 deletions source/adapters/offload/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,23 @@ struct BufferMem {
AllocHostPtr,
};

struct BufferMap {
size_t MapSize;
size_t MapOffset;
ur_map_flags_t MapFlags;
// Allocated host memory used exclusively for this map.
std::unique_ptr<unsigned char[]> MapMem;

BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags)
: MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags),
MapMem(nullptr) {}

BufferMap(size_t MapSize, size_t MapOffset, ur_map_flags_t MapFlags,
std::unique_ptr<unsigned char[]> &&MapMem)
: MapSize(MapSize), MapOffset(MapOffset), MapFlags(MapFlags),
MapMem(std::move(MapMem)) {}
};

ur_mem_handle_t Parent;
// Underlying device pointer
void *Ptr;
Expand All @@ -30,6 +47,7 @@ struct BufferMem {
size_t Size;

AllocMode MemAllocMode;
std::unordered_map<void *, BufferMap> PtrToBufferMap;

BufferMem(ur_mem_handle_t Parent, BufferMem::AllocMode Mode, void *Ptr,
void *HostPtr, size_t Size)
Expand All @@ -38,6 +56,37 @@ struct BufferMem {

void *get() const noexcept { return Ptr; }
size_t getSize() const noexcept { return Size; }

BufferMap *getMapDetails(void *Map) {
auto Details = PtrToBufferMap.find(Map);
if (Details != PtrToBufferMap.end()) {
return &Details->second;
}
return nullptr;
}

void *mapToPtr(size_t MapSize, size_t MapOffset,
ur_map_flags_t MapFlags) noexcept {

void *MapPtr = nullptr;
// If the buffer already has a host pointer we can just use it, otherwise
// create a new host allocation
if (HostPtr == nullptr) {
auto MapMem = std::make_unique<unsigned char[]>(MapSize);
MapPtr = MapMem.get();
PtrToBufferMap.insert(
{MapPtr, BufferMap(MapSize, MapOffset, MapFlags, std::move(MapMem))});
} else {
MapPtr = static_cast<char *>(HostPtr) + MapOffset;
PtrToBufferMap.insert({MapPtr, BufferMap(MapSize, MapOffset, MapFlags)});
}
return MapPtr;
}

void unmap(void *MapPtr) noexcept {
assert(MapPtr != nullptr);
PtrToBufferMap.erase(MapPtr);
}
};

struct ur_mem_handle_t_ : RefCounted {
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/offload/ur_interface_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,15 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
pDdiTable->pfnMemBufferCopy = nullptr;
pDdiTable->pfnMemBufferCopyRect = nullptr;
pDdiTable->pfnMemBufferFill = nullptr;
pDdiTable->pfnMemBufferMap = nullptr;
pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap;
pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead;
pDdiTable->pfnMemBufferReadRect = nullptr;
pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite;
pDdiTable->pfnMemBufferWriteRect = nullptr;
pDdiTable->pfnMemImageCopy = nullptr;
pDdiTable->pfnMemImageRead = nullptr;
pDdiTable->pfnMemImageWrite = nullptr;
pDdiTable->pfnMemUnmap = nullptr;
pDdiTable->pfnMemUnmap = urEnqueueMemUnmap;
pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D;
pDdiTable->pfnUSMFill = nullptr;
pDdiTable->pfnUSMAdvise = nullptr;
Expand Down
2 changes: 1 addition & 1 deletion source/common/linux/ur_lib_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ LibLoader::loadAdapterLibrary(const char *name) {
bool deepbind = getenv_tobool(DEEP_BIND_ENV);
if (deepbind) {
#if defined(SANITIZER_ANY)
UR_LOG(WARN
UR_LOG(WARN,
"Enabling RTLD_DEEPBIND while running under a sanitizer is likely "
"to cause issues. Consider disabling {} environment variable.",
DEEP_BIND_ENV);
Expand Down
Loading