wip

ggerganov · ggerganov · commit 4d115ea10f9b · 2023-10-30T11:45:26.000+02:00
diff --git a/llama.cpp b/llama.cpp
@@ -5085,10 +5085,10 @@ static struct ggml_cgraph * llama_build_graph(
             { OFFLOAD_FUNC_NOP, "CPU" },
             { OFFLOAD_FUNC_OUT, "CPU" },
 #ifdef GGML_USE_CUBLAS
-            { OFFLOAD_FUNC,     "GPU (CUDA)" },
-            { OFFLOAD_FUNC_KQ,  "GPU (CUDA) KQ" },
-            { OFFLOAD_FUNC_V,   "GPU (CUDA) V" },
-            { OFFLOAD_FUNC_NR,  "GPU (CUDA) NR" },
+            { OFFLOAD_FUNC,     "GPU (CUDA)"     },
+            { OFFLOAD_FUNC_KQ,  "GPU (CUDA) KQ"  },
+            { OFFLOAD_FUNC_V,   "GPU (CUDA) V"   },
+            { OFFLOAD_FUNC_NR,  "GPU (CUDA) NR"  },
             { OFFLOAD_FUNC_EMB, "GPU (CUDA) EMB" },
 #else
             { OFFLOAD_FUNC,     "CPU" },
@@ -5103,11 +5103,11 @@ static struct ggml_cgraph * llama_build_graph(
         llm_offload_func_e func_e = k_offload_func_trie.find(name);
 
         if (func_e == OFFLOAD_FUNC_NOP) {
-            // if a tensor hasn't been offloaded, we warn the user
-            if (worst_case) {
-                LLAMA_LOG_WARN("%s: %32s: not offloaded (ref: %s)\n", __func__,
-                        cur->name, "https://github.com/ggerganov/llama.cpp/pull/3837");
-            }
+            //// if a tensor hasn't been offloaded, we warn the user
+            //if (worst_case) {
+            //    LLAMA_LOG_WARN("%s: %32s: not offloaded (ref: %s)\n", __func__,
+            //            cur->name, "https://github.com/ggerganov/llama.cpp/pull/3837");
+            //}
 
             return;
         }
@@ -5214,6 +5214,30 @@ static struct ggml_cgraph * llama_build_graph(
             GGML_ASSERT(false);
     }
 
+#ifdef GGML_USE_CUBLAS
+    // TODO: tmp code to help find tensors that haven't been offloaded
+    if (worst_case) {
+        for (int i = 0; i < result->n_nodes; ++i) {
+            struct ggml_tensor * cur = result->nodes[i];
+
+            if (cur->view_src != nullptr) {
+                continue;
+            }
+
+            // check the global map for what offload function to use for this tensor
+            llm_offload_func_e func_e = k_offload_func_trie.find(cur->name);
+
+            if (func_e == OFFLOAD_FUNC_NOP && cur->backend == GGML_BACKEND_CPU) {
+                // if a tensor hasn't been offloaded, we warn the user
+                if (worst_case) {
+                    LLAMA_LOG_WARN("%s: %32s: not offloaded (ref: %s)\n", __func__,
+                            cur->name, "https://github.com/ggerganov/llama.cpp/pull/3837");
+                }
+            }
+        }
+    }
+#endif
+
     return result;
 }