From 2471d56a2e202c7ed83877d2b246ea3903880cbb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 22 Oct 2023 09:22:54 +0300 Subject: [PATCH 1/3] llama : profiling the attention compute --- llama.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/llama.cpp b/llama.cpp index 3653493355234..4bd6ffd8008d8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5815,6 +5815,24 @@ static struct ggml_cgraph * llama_build_graph( GGML_ASSERT(false); } +#if 1 + for (int i = 0; i < result->n_nodes; ++i) { + struct ggml_tensor * node = result->nodes[i]; + if (getenv("SKIP_KQ_ALL")) { + if ( + strcmp(node->name, "KQ") == 0 || + strcmp(node->name, "KQ_scaled") == 0 || + strcmp(node->name, "KQ_masked") == 0 || + strcmp(node->name, "KQ_soft_max") == 0 || + strcmp(node->name, "KQV") == 0 || + false) { + //printf("skipping %s\n", dst->name); + node->op = GGML_OP_NONE; + } + } + } +#endif + return result; } From ed9fde7a1e15ac588b316d163f426fa3ecc87b81 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 22 Oct 2023 09:55:37 +0300 Subject: [PATCH 2/3] ggml : skip nops --- ggml.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml.c b/ggml.c index 49f3b7aba31f5..17f0ce4877592 100644 --- a/ggml.c +++ b/ggml.c @@ -16602,6 +16602,10 @@ static void ggml_compute_forward_cross_entropy_loss_back( static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) { GGML_ASSERT(params); + if (tensor->op == GGML_OP_NONE) { + return; + } + #ifdef GGML_USE_CUBLAS bool skip_cpu = ggml_cuda_compute_forward(params, tensor); if (skip_cpu) { From cb79f8a2d8c0c36f59e1d5445411acb01645f2f1 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 22 Oct 2023 09:58:29 +0300 Subject: [PATCH 3/3] llama : add SKIP_KQ_KQV option --- llama.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llama.cpp b/llama.cpp index 4bd6ffd8008d8..76558059c4194 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5830,6 +5830,15 @@ static struct ggml_cgraph * llama_build_graph( node->op = GGML_OP_NONE; } } + if (getenv("SKIP_KQ_KQV")) { + if ( + strcmp(node->name, "KQ") == 0 || + strcmp(node->name, "KQV") == 0 || + false) { + //printf("skipping %s\n", dst->name); + node->op = GGML_OP_NONE; + } + } } #endif