From 2a2f39ef4523357cb7558526e579c399a50e4e73 Mon Sep 17 00:00:00 2001 From: Robyn Date: Sat, 17 Jun 2023 15:55:57 +1000 Subject: [PATCH 1/2] #1869 Fix null reference errors when training from scratch with CUDA build Calling ggml_compute_forward when node->src0 was null was causing train-text-from-scratch.exe to terminate unexpectedly. --- ggml.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ggml.c b/ggml.c index 0eda7f338e6..9a5f23c83de 100644 --- a/ggml.c +++ b/ggml.c @@ -16012,7 +16012,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) /*.wdata =*/ cgraph->work ? cgraph->work->data : NULL, }; - ggml_compute_forward(¶ms, node); + if (node->src0) { + ggml_compute_forward(¶ms, node); + } // COMPUTE if (node->n_tasks > 1) { @@ -16048,7 +16050,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) } params.type = GGML_TASK_COMPUTE; - ggml_compute_forward(¶ms, node); + if (node->src0) { + ggml_compute_forward(¶ms, node); + } // wait for thread pool if (node->n_tasks > 1) { @@ -16103,7 +16107,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) } params.type = GGML_TASK_FINALIZE; - ggml_compute_forward(¶ms, node); + if (node->src0) { + ggml_compute_forward(¶ms, node); + } // wait for thread pool if (node->n_tasks > 1) { From c2ccd541e93e184a1fb19df1780087e9dd9dc65e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 24 Jun 2023 11:36:10 +0300 Subject: [PATCH 2/2] ggml : do not dereference src0 if NULL --- ggml-cuda.cu | 2 +- ggml.c | 14 ++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 16488b9f906..d10cf58e8ea 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2542,7 +2542,7 @@ void ggml_cuda_free_scratch() { bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){ ggml_cuda_func_t func; const bool any_on_device = tensor->backend == GGML_BACKEND_GPU - || tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT + || (tensor->src0 != nullptr && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT)) || (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU); switch (tensor->op) { diff --git a/ggml.c b/ggml.c index a179f6586ed..ff3067461ad 100644 --- a/ggml.c +++ b/ggml.c @@ -14335,7 +14335,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm if (skip_cpu) { return; } - GGML_ASSERT(tensor->src0->backend == GGML_BACKEND_CPU); + GGML_ASSERT(tensor->src0 == NULL || tensor->src0->backend == GGML_BACKEND_CPU); GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU); #endif // GGML_USE_CUBLAS @@ -16032,9 +16032,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) /*.wdata =*/ cgraph->work ? cgraph->work->data : NULL, }; - if (node->src0) { - ggml_compute_forward(¶ms, node); - } + ggml_compute_forward(¶ms, node); // COMPUTE if (node->n_tasks > 1) { @@ -16070,9 +16068,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) } params.type = GGML_TASK_COMPUTE; - if (node->src0) { - ggml_compute_forward(¶ms, node); - } + ggml_compute_forward(¶ms, node); // wait for thread pool if (node->n_tasks > 1) { @@ -16127,9 +16123,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) } params.type = GGML_TASK_FINALIZE; - if (node->src0) { - ggml_compute_forward(¶ms, node); - } + ggml_compute_forward(¶ms, node); // wait for thread pool if (node->n_tasks > 1) {