Skip to content

Commit 4d115ea

Browse files
committed
wip
1 parent 792d1a1 commit 4d115ea

File tree

1 file changed

+33
-9
lines changed

1 file changed

+33
-9
lines changed

llama.cpp

+33-9
Original file line numberDiff line numberDiff line change
@@ -5085,10 +5085,10 @@ static struct ggml_cgraph * llama_build_graph(
50855085
{ OFFLOAD_FUNC_NOP, "CPU" },
50865086
{ OFFLOAD_FUNC_OUT, "CPU" },
50875087
#ifdef GGML_USE_CUBLAS
5088-
{ OFFLOAD_FUNC, "GPU (CUDA)" },
5089-
{ OFFLOAD_FUNC_KQ, "GPU (CUDA) KQ" },
5090-
{ OFFLOAD_FUNC_V, "GPU (CUDA) V" },
5091-
{ OFFLOAD_FUNC_NR, "GPU (CUDA) NR" },
5088+
{ OFFLOAD_FUNC, "GPU (CUDA)" },
5089+
{ OFFLOAD_FUNC_KQ, "GPU (CUDA) KQ" },
5090+
{ OFFLOAD_FUNC_V, "GPU (CUDA) V" },
5091+
{ OFFLOAD_FUNC_NR, "GPU (CUDA) NR" },
50925092
{ OFFLOAD_FUNC_EMB, "GPU (CUDA) EMB" },
50935093
#else
50945094
{ OFFLOAD_FUNC, "CPU" },
@@ -5103,11 +5103,11 @@ static struct ggml_cgraph * llama_build_graph(
51035103
llm_offload_func_e func_e = k_offload_func_trie.find(name);
51045104

51055105
if (func_e == OFFLOAD_FUNC_NOP) {
5106-
// if a tensor hasn't been offloaded, we warn the user
5107-
if (worst_case) {
5108-
LLAMA_LOG_WARN("%s: %32s: not offloaded (ref: %s)\n", __func__,
5109-
cur->name, "https://github.com/ggerganov/llama.cpp/pull/3837");
5110-
}
5106+
//// if a tensor hasn't been offloaded, we warn the user
5107+
//if (worst_case) {
5108+
// LLAMA_LOG_WARN("%s: %32s: not offloaded (ref: %s)\n", __func__,
5109+
// cur->name, "https://github.com/ggerganov/llama.cpp/pull/3837");
5110+
//}
51115111

51125112
return;
51135113
}
@@ -5214,6 +5214,30 @@ static struct ggml_cgraph * llama_build_graph(
52145214
GGML_ASSERT(false);
52155215
}
52165216

5217+
#ifdef GGML_USE_CUBLAS
5218+
// TODO: tmp code to help find tensors that haven't been offloaded
5219+
if (worst_case) {
5220+
for (int i = 0; i < result->n_nodes; ++i) {
5221+
struct ggml_tensor * cur = result->nodes[i];
5222+
5223+
if (cur->view_src != nullptr) {
5224+
continue;
5225+
}
5226+
5227+
// check the global map for what offload function to use for this tensor
5228+
llm_offload_func_e func_e = k_offload_func_trie.find(cur->name);
5229+
5230+
if (func_e == OFFLOAD_FUNC_NOP && cur->backend == GGML_BACKEND_CPU) {
5231+
// if a tensor hasn't been offloaded, we warn the user
5232+
if (worst_case) {
5233+
LLAMA_LOG_WARN("%s: %32s: not offloaded (ref: %s)\n", __func__,
5234+
cur->name, "https://github.com/ggerganov/llama.cpp/pull/3837");
5235+
}
5236+
}
5237+
}
5238+
}
5239+
#endif
5240+
52175241
return result;
52185242
}
52195243

0 commit comments

Comments
 (0)