Skip to content

Commit 26f73c9

Browse files
Gasoonjiafacebook-github-bot
authored andcommitted
prefetch data blob before .so loading
Differential Revision: D96948202
1 parent b73ca05 commit 26f73c9

File tree

1 file changed

+46
-3
lines changed

1 file changed

+46
-3
lines changed

backends/cuda/runtime/cuda_backend.cpp

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
#include <executorch/runtime/core/evalue.h>
1414
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
1515
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
16+
#if !defined(_WIN32)
17+
#include <sys/mman.h>
18+
#include <unistd.h>
19+
#include <cerrno>
20+
#endif
1621
#include <cctype>
1722
#include <cstdio>
1823

@@ -275,6 +280,45 @@ class ET_EXPERIMENTAL CudaBackend final
275280
method_name.empty() ? "so_blob" : method_name + "_so_blob";
276281

277282
const NamedDataMap* named_data_map = context.get_named_data_map();
283+
ET_CHECK_OR_RETURN_ERROR(
284+
named_data_map != nullptr,
285+
Internal,
286+
"CudaBackend requires a NamedDataMap for weight loading");
287+
288+
// Prefetch the weights blob — trigger async readahead so pages are
289+
// resident by the time update_constants_from_blob memcpy's them.
290+
// This overlaps disk I/O with the .so write + dlopen.
291+
std::string weights_blob_key =
292+
method_name.empty() ? "weights_blob" : method_name + "_weights_blob";
293+
#if !defined(_WIN32)
294+
{
295+
auto prefetch_buf = named_data_map->get_data(weights_blob_key.c_str());
296+
if (prefetch_buf.ok() && prefetch_buf->data() != nullptr) {
297+
uintptr_t addr = reinterpret_cast<uintptr_t>(prefetch_buf->data());
298+
size_t page_size = getpagesize();
299+
uintptr_t aligned_addr = addr & ~(page_size - 1);
300+
size_t aligned_size = prefetch_buf->size() + (addr - aligned_addr);
301+
int ret = madvise(
302+
reinterpret_cast<void*>(aligned_addr),
303+
aligned_size,
304+
MADV_WILLNEED);
305+
if (ret != 0) {
306+
ET_LOG(
307+
Info,
308+
"CudaBackend::init - madvise(MADV_WILLNEED) failed for %s: %s",
309+
weights_blob_key.c_str(),
310+
strerror(errno));
311+
} else {
312+
ET_LOG(
313+
Info,
314+
"CudaBackend::init - Prefetching %s (%.1f MB)",
315+
weights_blob_key.c_str(),
316+
prefetch_buf->size() / (1024.0 * 1024.0));
317+
}
318+
}
319+
}
320+
#endif
321+
278322
auto aoti_dso_buffer = named_data_map->get_data(so_blob_key.c_str());
279323
ET_CHECK_OR_RETURN_ERROR(
280324
aoti_dso_buffer.ok(),
@@ -338,9 +382,8 @@ class ET_EXPERIMENTAL CudaBackend final
338382

339383
handle->container_handle = container_handle;
340384

341-
// Look into named data map for constant data
342-
std::string weights_blob_key =
343-
method_name.empty() ? "weights_blob" : method_name + "_weights_blob";
385+
// Look into named data map for constant data (key computed above for
386+
// prefetch)
344387
auto buffer_res = named_data_map->get_data(weights_blob_key.c_str());
345388
if (buffer_res.ok() && handle->update_constants_from_blob != nullptr) {
346389
ET_LOG(Info, "Found %s in named data map", weights_blob_key.c_str());

0 commit comments

Comments
 (0)