File tree 2 files changed +13
-1
lines changed 2 files changed +13
-1
lines changed Original file line number Diff line number Diff line change @@ -1303,10 +1303,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
1303
1303
const int act_gpu_layers = devices.empty () ? 0 : std::min (n_gpu_layers, (int )n_layer + 1 );
1304
1304
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
1305
1305
if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
1306
+ LLAMA_LOG_DEBUG (" load_tensors: layer %3d assigned to device %s\n " , il, ggml_backend_dev_name (cpu_dev));
1306
1307
return {cpu_dev, &pimpl->cpu_buft_list };
1307
1308
}
1308
1309
const int layer_gpu = std::upper_bound (splits.begin (), splits.begin () + n_devices (), float (il - i_gpu_start)/act_gpu_layers) - splits.begin ();
1309
1310
auto * dev = devices.at (layer_gpu);
1311
+ LLAMA_LOG_DEBUG (" load_tensors: layer %3d assigned to device %s\n " , il, ggml_backend_dev_name (dev));
1310
1312
return {dev, &pimpl->gpu_buft_list .at (dev)};
1311
1313
};
1312
1314
Original file line number Diff line number Diff line change @@ -9405,6 +9405,7 @@ static struct llama_model * llama_model_load_from_file_impl(
9405
9405
model->devices.push_back(*dev);
9406
9406
}
9407
9407
} else {
9408
+ std::vector<ggml_backend_dev_t> rpc_servers;
9408
9409
// use all available devices
9409
9410
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
9410
9411
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
@@ -9415,10 +9416,19 @@ static struct llama_model * llama_model_load_from_file_impl(
9415
9416
break;
9416
9417
9417
9418
case GGML_BACKEND_DEVICE_TYPE_GPU:
9418
- model->devices.push_back(dev);
9419
+ ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
9420
+ if (ggml_backend_reg_name(reg) == std::string("RPC")) {
9421
+ rpc_servers.push_back(dev);
9422
+ } else {
9423
+ model->devices.push_back(dev);
9424
+ }
9419
9425
break;
9420
9426
}
9421
9427
}
9428
+ // add RPC servers at the front of the list
9429
+ if (!rpc_servers.empty()) {
9430
+ model->devices.insert(model->devices.begin(), rpc_servers.begin(), rpc_servers.end());
9431
+ }
9422
9432
}
9423
9433
9424
9434
// if using single GPU mode, remove all except the main GPU
You can’t perform that action at this time.
0 commit comments