Skip to content

Mirror intel/llvm commits #2799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/intel-llvm-mirror-base-commit
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5074ba7fe951d1b3c750da769c52259eac8d6775
156429e96783c484554d4594723dc180eeef8c1c
11 changes: 4 additions & 7 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,6 @@ ur_result_t createSyncPointAndGetZeEvents(
if (CommandBuffer->IsInOrderCmdList) {
UR_CALL(createSyncPointBetweenCopyAndCompute(CommandBuffer, ZeCommandList,
ZeEventList));
if (!ZeEventList.empty()) {
NumSyncPointsInWaitList = ZeEventList.size();
}
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -1330,9 +1327,9 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp(
CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
SyncPointWaitList, true, RetSyncPoint, ZeEventList, ZeLaunchEvent));

if (NumSyncPointsInWaitList) {
if (!ZeEventList.empty()) {
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
(CommandBuffer->ZeComputeCommandList, ZeEventList.size(),
ZeEventList.data()));
}

Expand Down Expand Up @@ -1394,9 +1391,9 @@ ur_result_t urCommandBufferAppendUSMAdviseExp(
NumSyncPointsInWaitList, SyncPointWaitList, true, RetSyncPoint,
ZeEventList, ZeLaunchEvent));

if (NumSyncPointsInWaitList) {
if (!ZeEventList.empty()) {
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
(CommandBuffer->ZeComputeCommandList, ZeEventList.size(),
ZeEventList.data()));
}

Expand Down
6 changes: 6 additions & 0 deletions source/adapters/offload/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#pragma once

#include "ur/ur.hpp"
#include "ur2offload.hpp"
#include <atomic>

namespace ur::offload {
Expand All @@ -23,3 +24,8 @@ using handle_base = ur::handle_base<ur::offload::ddi_getter>;
struct RefCounted : ur::offload::handle_base {
std::atomic_uint32_t RefCount = 1;
};

#define OL_RETURN_ON_ERR(call) \
if (auto OlRes = call) { \
return offloadResultToUR(OlRes); \
}
17 changes: 7 additions & 10 deletions source/adapters/offload/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
}

if (pPropSizeRet) {
if (auto Res =
olGetDeviceInfoSize(hDevice->OffloadDevice, olInfo, pPropSizeRet)) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(
olGetDeviceInfoSize(hDevice->OffloadDevice, olInfo, pPropSizeRet));
}

if (pPropValue) {
if (auto Res = olGetDeviceInfo(hDevice->OffloadDevice, olInfo, propSize,
pPropValue)) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(
olGetDeviceInfo(hDevice->OffloadDevice, olInfo, propSize, pPropValue));
// Need to explicitly map this type
if (olInfo == OL_DEVICE_INFO_TYPE) {
auto urPropPtr = reinterpret_cast<ur_device_type_t *>(pPropValue);
Expand Down Expand Up @@ -149,8 +145,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary(
uint32_t NumBinaries, uint32_t *pSelectedBinary) {

ol_platform_backend_t Backend;
olGetPlatformInfo(hDevice->Platform->OffloadPlatform,
OL_PLATFORM_INFO_BACKEND, sizeof(Backend), &Backend);
OL_RETURN_ON_ERR(olGetPlatformInfo(hDevice->Platform->OffloadPlatform,
OL_PLATFORM_INFO_BACKEND, sizeof(Backend),
&Backend));

const char *ImageTarget = UR_DEVICE_BINARY_TARGET_UNKNOWN;
if (Backend == OL_PLATFORM_BACKEND_CUDA) {
Expand Down
29 changes: 10 additions & 19 deletions source/adapters/offload/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
LaunchArgs.DynSharedMemory = 0;

ol_event_handle_t EventOut;
auto Ret =
OL_RETURN_ON_ERR(
olLaunchKernel(hQueue->OffloadQueue, hQueue->OffloadDevice,
hKernel->OffloadKernel, hKernel->Args.getStorage(),
hKernel->Args.getStorageSize(), &LaunchArgs, &EventOut);

if (Ret != OL_SUCCESS) {
return offloadResultToUR(Ret);
}
hKernel->Args.getStorageSize(), &LaunchArgs, &EventOut));

if (phEvent) {
auto *Event = new ur_event_handle_t_();
Expand Down Expand Up @@ -112,11 +108,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead(
char *DevPtr =
reinterpret_cast<char *>(std::get<BufferMem>(hBuffer->Mem).Ptr);

olMemcpy(hQueue->OffloadQueue, pDst, Adapter->HostDevice, DevPtr + offset,
hQueue->OffloadDevice, size, phEvent ? &EventOut : nullptr);
OL_RETURN_ON_ERR(olMemcpy(hQueue->OffloadQueue, pDst, Adapter->HostDevice,
DevPtr + offset, hQueue->OffloadDevice, size,
phEvent ? &EventOut : nullptr));

if (blockingRead) {
olWaitQueue(hQueue->OffloadQueue);
OL_RETURN_ON_ERR(olWaitQueue(hQueue->OffloadQueue));
}

if (phEvent) {
Expand All @@ -143,18 +140,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(
char *DevPtr =
reinterpret_cast<char *>(std::get<BufferMem>(hBuffer->Mem).Ptr);

auto Res =
olMemcpy(hQueue->OffloadQueue, DevPtr + offset, hQueue->OffloadDevice,
pSrc, Adapter->HostDevice, size, phEvent ? &EventOut : nullptr);
if (Res) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olMemcpy(hQueue->OffloadQueue, DevPtr + offset,
hQueue->OffloadDevice, pSrc, Adapter->HostDevice,
size, phEvent ? &EventOut : nullptr));

if (blockingWrite) {
auto Res = olWaitQueue(hQueue->OffloadQueue);
if (Res) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olWaitQueue(hQueue->OffloadQueue));
}

if (phEvent) {
Expand Down
5 changes: 1 addition & 4 deletions source/adapters/offload/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,7 @@ UR_APIEXPORT ur_result_t UR_APICALL
urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) {
for (uint32_t i = 0; i < numEvents; i++) {
if (phEventWaitList[i]->OffloadEvent) {
auto Res = olWaitEvent(phEventWaitList[i]->OffloadEvent);
if (Res) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olWaitEvent(phEventWaitList[i]->OffloadEvent));
}
}
return UR_RESULT_SUCCESS;
Expand Down
25 changes: 8 additions & 17 deletions source/adapters/offload/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
auto AllocMode = BufferMem::AllocMode::Default;

if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) {
auto Res = olMemAlloc(OffloadDevice, OL_ALLOC_TYPE_HOST, size, &HostPtr);
if (Res) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(
olMemAlloc(OffloadDevice, OL_ALLOC_TYPE_HOST, size, &HostPtr));

// TODO: We (probably) need something like cuMemHostGetDevicePointer
// for this to work everywhere. For now assume the managed host pointer is
// device-accessible.
Ptr = HostPtr;
AllocMode = BufferMem::AllocMode::AllocHostPtr;
} else {
auto Res = olMemAlloc(OffloadDevice, OL_ALLOC_TYPE_DEVICE, size, &Ptr);
if (Res) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(
olMemAlloc(OffloadDevice, OL_ALLOC_TYPE_DEVICE, size, &Ptr));
if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) {
AllocMode = BufferMem::AllocMode::CopyIn;
}
Expand All @@ -59,11 +56,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
hContext, ParentBuffer, flags, AllocMode, Ptr, HostPtr, size});

if (PerformInitialCopy) {
auto Res = olMemcpy(nullptr, Ptr, OffloadDevice, HostPtr,
Adapter->HostDevice, size, nullptr);
if (Res) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olMemcpy(nullptr, Ptr, OffloadDevice, HostPtr,
Adapter->HostDevice, size, nullptr));
}

*phBuffer = URMemObj.release();
Expand All @@ -85,10 +79,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) {
if (hMem->MemType == ur_mem_handle_t_::Type::Buffer) {
// TODO: Handle registered host memory
auto &BufferImpl = std::get<BufferMem>(MemObjPtr->Mem);
auto Res = olMemFree(BufferImpl.Ptr);
if (Res) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olMemFree(BufferImpl.Ptr));
}

return UR_RESULT_SUCCESS;
Expand Down
12 changes: 4 additions & 8 deletions source/adapters/offload/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,13 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName,
}

if (pPropSizeRet) {
if (auto Res = olGetPlatformInfoSize(hPlatform->OffloadPlatform, olInfo,
pPropSizeRet)) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olGetPlatformInfoSize(hPlatform->OffloadPlatform, olInfo,
pPropSizeRet));
}

if (pPropValue) {
if (auto Res = olGetPlatformInfo(hPlatform->OffloadPlatform, olInfo,
propSize, pPropValue)) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olGetPlatformInfo(hPlatform->OffloadPlatform, olInfo,
propSize, pPropValue));
}

return UR_RESULT_SUCCESS;
Expand Down
9 changes: 5 additions & 4 deletions source/adapters/offload/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
if (auto Parser = HipOffloadBundleParser::load(RealBinary, RealLength)) {
std::string DevName{};
size_t DevNameLength;
olGetDeviceInfoSize(phDevices[0]->OffloadDevice, OL_DEVICE_INFO_NAME,
&DevNameLength);
OL_RETURN_ON_ERR(olGetDeviceInfoSize(phDevices[0]->OffloadDevice,
OL_DEVICE_INFO_NAME, &DevNameLength));
DevName.resize(DevNameLength);
olGetDeviceInfo(phDevices[0]->OffloadDevice, OL_DEVICE_INFO_NAME,
DevNameLength, DevName.data());
OL_RETURN_ON_ERR(olGetDeviceInfo(phDevices[0]->OffloadDevice,
OL_DEVICE_INFO_NAME, DevNameLength,
DevName.data()));

auto Res = Parser->extract(DevName, RealBinary, RealLength);
if (Res != UR_RESULT_SUCCESS) {
Expand Down
5 changes: 1 addition & 4 deletions source/adapters/offload/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) {

UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) {
if (--hQueue->RefCount == 0) {
auto Res = olDestroyQueue(hQueue->OffloadQueue);
if (Res) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olDestroyQueue(hQueue->OffloadQueue));
delete hQueue;
}

Expand Down
1 change: 1 addition & 0 deletions source/adapters/offload/ur2offload.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <OffloadAPI.h>
#include <ur_api.h>
Expand Down
24 changes: 6 additions & 18 deletions source/adapters/offload/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(ur_context_handle_t hContext,
const ur_usm_desc_t *,
ur_usm_pool_handle_t,
size_t size, void **ppMem) {
auto Res = olMemAlloc(hContext->Device->OffloadDevice, OL_ALLOC_TYPE_HOST,
size, ppMem);

if (Res != OL_SUCCESS) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olMemAlloc(hContext->Device->OffloadDevice,
OL_ALLOC_TYPE_HOST, size, ppMem));

hContext->AllocTypeMap.insert_or_assign(*ppMem, OL_ALLOC_TYPE_HOST);
return UR_RESULT_SUCCESS;
Expand All @@ -34,12 +30,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(ur_context_handle_t hContext,
UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(
ur_context_handle_t hContext, ur_device_handle_t, const ur_usm_desc_t *,
ur_usm_pool_handle_t, size_t size, void **ppMem) {
auto Res = olMemAlloc(hContext->Device->OffloadDevice, OL_ALLOC_TYPE_DEVICE,
size, ppMem);

if (Res != OL_SUCCESS) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olMemAlloc(hContext->Device->OffloadDevice,
OL_ALLOC_TYPE_DEVICE, size, ppMem));

hContext->AllocTypeMap.insert_or_assign(*ppMem, OL_ALLOC_TYPE_DEVICE);
return UR_RESULT_SUCCESS;
Expand All @@ -48,12 +40,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(
UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc(
ur_context_handle_t hContext, ur_device_handle_t, const ur_usm_desc_t *,
ur_usm_pool_handle_t, size_t size, void **ppMem) {
auto Res = olMemAlloc(hContext->Device->OffloadDevice, OL_ALLOC_TYPE_MANAGED,
size, ppMem);

if (Res != OL_SUCCESS) {
return offloadResultToUR(Res);
}
OL_RETURN_ON_ERR(olMemAlloc(hContext->Device->OffloadDevice,
OL_ALLOC_TYPE_MANAGED, size, ppMem));

hContext->AllocTypeMap.insert_or_assign(*ppMem, OL_ALLOC_TYPE_MANAGED);
return UR_RESULT_SUCCESS;
Expand Down
14 changes: 14 additions & 0 deletions source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,17 @@ ur_result_t TsanRuntimeDataWrapper::syncToDevice(ur_queue_handle_t Queue) {
return UR_RESULT_SUCCESS;
}

bool TsanRuntimeDataWrapper::hasReport(ur_queue_handle_t Queue) {
ur_result_t URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, ur_cast<void *>(&Host), getDevicePtr(),
sizeof(TsanRuntimeData::RecordedReportCount), 0, nullptr, nullptr);
if (URes != UR_RESULT_SUCCESS) {
UR_LOG(ERR, "Failed to sync runtime data to host: {}", URes);
return false;
}
return Host.RecordedReportCount != 0;
}

ur_result_t TsanRuntimeDataWrapper::importLocalArgsInfo(
ur_queue_handle_t Queue, const std::vector<TsanLocalArgsInfo> &LocalArgs) {
assert(!LocalArgs.empty());
Expand Down Expand Up @@ -310,6 +321,9 @@ ur_result_t TsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
// urEventSetCallback
UR_CALL(getContext()->urDdiTable.Queue.pfnFinish(Queue));

if (!LaunchInfo.Data.hasReport(Queue))
return UR_RESULT_SUCCESS;

UR_CALL(LaunchInfo.Data.syncFromDevice(Queue));

for (uptr ReportIndex = 0;
Expand Down
2 changes: 2 additions & 0 deletions source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ struct TsanRuntimeDataWrapper {

ur_result_t syncToDevice(ur_queue_handle_t Queue);

bool hasReport(ur_queue_handle_t Queue);

ur_result_t
importLocalArgsInfo(ur_queue_handle_t Queue,
const std::vector<TsanLocalArgsInfo> &LocalArgs);
Expand Down
4 changes: 2 additions & 2 deletions source/loader/layers/sanitizer/tsan/tsan_libdevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ struct TsanLocalArgsInfo {
constexpr uint64_t TSAN_MAX_NUM_REPORTS = 128;

struct TsanRuntimeData {
uint32_t RecordedReportCount = 0;

uintptr_t GlobalShadowOffset = 0;

uintptr_t GlobalShadowOffsetEnd = 0;
Expand All @@ -103,8 +105,6 @@ struct TsanRuntimeData {

int Lock = 0;

uint32_t RecordedReportCount = 0;

TsanErrorReport Report[TSAN_MAX_NUM_REPORTS];
};

Expand Down
9 changes: 1 addition & 8 deletions source/loader/layers/sanitizer/tsan/tsan_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,19 +169,12 @@ ur_result_t ShadowMemoryGPU::CleanShadow(ur_queue_handle_t Queue, uptr Ptr,
UR_LOG_L(getContext()->logger, DEBUG, "urVirtualMemMap: {} ~ {}",
(void *)MappedPtr, (void *)(MappedPtr + PageSize - 1));

// Initialize to zero
URes = EnqueueUSMSet(Queue, (void *)MappedPtr, (char)0, PageSize);
if (URes != UR_RESULT_SUCCESS) {
UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}",
URes);
return URes;
}

VirtualMemMaps[MappedPtr] = PhysicalMem;
}
}
}

// Initialize to zero
auto URes = EnqueueUSMSet(Queue, (void *)Begin, (char)0,
Size / kShadowCell * kShadowCnt * kShadowSize);
if (URes != UR_RESULT_SUCCESS) {
Expand Down
4 changes: 0 additions & 4 deletions test/conformance/exp_command_buffer/in-order.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ struct urInOrderCommandBufferExpTest
virtual void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp());

// Level-Zero bug https://github.com/intel/llvm/issues/18544
// Re-enable these tests once fixed
UUR_KNOWN_FAILURE_ON(uur::LevelZero{});

ur_exp_command_buffer_desc_t desc{
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, // stype
nullptr, // pnext
Expand Down