|
13 | 13 | #include <executorch/runtime/core/evalue.h> |
14 | 14 | #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h> |
15 | 15 | #include <executorch/runtime/core/exec_aten/util/tensor_util.h> |
| 16 | +#if !defined(_WIN32) |
| 17 | +#include <sys/mman.h> |
| 18 | +#include <unistd.h> |
| 19 | +#include <cerrno> |
| 20 | +#endif |
16 | 21 | #include <cctype> |
17 | 22 | #include <cstdio> |
18 | 23 |
|
@@ -275,6 +280,45 @@ class ET_EXPERIMENTAL CudaBackend final |
275 | 280 | method_name.empty() ? "so_blob" : method_name + "_so_blob"; |
276 | 281 |
|
277 | 282 | const NamedDataMap* named_data_map = context.get_named_data_map(); |
| 283 | + ET_CHECK_OR_RETURN_ERROR( |
| 284 | + named_data_map != nullptr, |
| 285 | + Internal, |
| 286 | + "CudaBackend requires a NamedDataMap for weight loading"); |
| 287 | + |
| 288 | + // Prefetch the weights blob — trigger async readahead so pages are |
| 289 | + // resident by the time update_constants_from_blob memcpy's them. |
| 290 | + // This overlaps disk I/O with the .so write + dlopen. |
| 291 | + std::string weights_blob_key = |
| 292 | + method_name.empty() ? "weights_blob" : method_name + "_weights_blob"; |
| 293 | +#if !defined(_WIN32) |
| 294 | + { |
| 295 | + auto prefetch_buf = named_data_map->get_data(weights_blob_key.c_str()); |
| 296 | + if (prefetch_buf.ok() && prefetch_buf->data() != nullptr) { |
| 297 | + uintptr_t addr = reinterpret_cast<uintptr_t>(prefetch_buf->data()); |
| 298 | + size_t page_size = getpagesize(); |
| 299 | + uintptr_t aligned_addr = addr & ~(page_size - 1); |
| 300 | + size_t aligned_size = prefetch_buf->size() + (addr - aligned_addr); |
| 301 | + int ret = madvise( |
| 302 | + reinterpret_cast<void*>(aligned_addr), |
| 303 | + aligned_size, |
| 304 | + MADV_WILLNEED); |
| 305 | + if (ret != 0) { |
| 306 | + ET_LOG( |
| 307 | + Info, |
| 308 | + "CudaBackend::init - madvise(MADV_WILLNEED) failed for %s: %s", |
| 309 | + weights_blob_key.c_str(), |
| 310 | + strerror(errno)); |
| 311 | + } else { |
| 312 | + ET_LOG( |
| 313 | + Info, |
| 314 | + "CudaBackend::init - Prefetching %s (%.1f MB)", |
| 315 | + weights_blob_key.c_str(), |
| 316 | + prefetch_buf->size() / (1024.0 * 1024.0)); |
| 317 | + } |
| 318 | + } |
| 319 | + } |
| 320 | +#endif |
| 321 | + |
278 | 322 | auto aoti_dso_buffer = named_data_map->get_data(so_blob_key.c_str()); |
279 | 323 | ET_CHECK_OR_RETURN_ERROR( |
280 | 324 | aoti_dso_buffer.ok(), |
@@ -338,9 +382,8 @@ class ET_EXPERIMENTAL CudaBackend final |
338 | 382 |
|
339 | 383 | handle->container_handle = container_handle; |
340 | 384 |
|
341 | | - // Look into named data map for constant data |
342 | | - std::string weights_blob_key = |
343 | | - method_name.empty() ? "weights_blob" : method_name + "_weights_blob"; |
| 385 | + // Look into named data map for constant data (key computed above for |
| 386 | + // prefetch) |
344 | 387 | auto buffer_res = named_data_map->get_data(weights_blob_key.c_str()); |
345 | 388 | if (buffer_res.ok() && handle->update_constants_from_blob != nullptr) { |
346 | 389 | ET_LOG(Info, "Found %s in named data map", weights_blob_key.c_str()); |
|
0 commit comments