Skip to content

Commit a3d0aa7

Browse files
authored
ggml : add error handling to graph_compute (#1714)
1 parent 14c5795 commit a3d0aa7

10 files changed

+38
-23
lines changed

bindings/ruby/ext/ggml-backend-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ extern "C" {
7070
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
7171

7272
// compute graph without a plan
73-
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
73+
bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
7474

7575
// check if the backend supports an operation
7676
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);

bindings/ruby/ext/ggml-backend.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
156156
backend->iface.graph_plan_compute(backend, plan);
157157
}
158158

159-
void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
160-
backend->iface.graph_compute(backend, cgraph);
159+
bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
160+
return backend->iface.graph_compute(backend, cgraph);
161161
}
162162

163163
bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {

bindings/ruby/ext/ggml-backend.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ extern "C" {
5252

5353
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
5454
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
55-
GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
55+
GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
5656
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
5757

5858
// tensor copy between different backends

ggml-backend-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ extern "C" {
9090
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
9191

9292
// compute graph without a plan
93-
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
93+
bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
9494

9595
// check if the backend supports an operation
9696
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);

ggml-backend.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,11 +195,14 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
195195
ggml_backend_synchronize(backend);
196196
}
197197

198-
void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
199-
backend->iface.graph_compute(backend, cgraph);
198+
bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
199+
if (!backend->iface.graph_compute(backend, cgraph)) {
200+
return false;
201+
}
200202

201203
// TODO: optional sync
202204
ggml_backend_synchronize(backend);
205+
return true;
203206
}
204207

205208
bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@@ -597,7 +600,7 @@ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_bac
597600
GGML_UNUSED(backend);
598601
}
599602

600-
static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
603+
static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
601604
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
602605

603606
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
@@ -611,6 +614,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
611614
cplan.work_data = cpu_ctx->work_data;
612615

613616
ggml_graph_compute(cgraph, &cplan);
617+
return true;
614618
}
615619

616620
static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {

ggml-backend.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ extern "C" {
5858

5959
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
6060
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
61-
GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
61+
GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
6262
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
6363

6464
// tensor copy between different backends

ggml-cuda.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9910,7 +9910,7 @@ static void ggml_backend_cuda_graph_plan_compute(ggml_backend_t backend, ggml_ba
99109910
UNUSED(plan);
99119911
}
99129912

9913-
static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
9913+
static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
99149914
ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context;
99159915

99169916
ggml_cuda_set_main_device(cuda_ctx->device);
@@ -9967,6 +9967,8 @@ static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph
99679967
}
99689968

99699969
UNUSED(backend);
9970+
9971+
return true;
99709972
}
99719973

99729974
static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) {

ggml-metal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
8787

8888
// same as ggml_graph_compute but uses Metal
8989
// creates gf->n_threads command buffers in parallel
90-
void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
90+
bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
9191

9292
//
9393
// backend API

ggml-metal.m

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -977,7 +977,7 @@ static bool ggml_metal_supports_op(const struct ggml_tensor * op) {
977977
return false;
978978
}
979979
}
980-
void ggml_metal_graph_compute(
980+
bool ggml_metal_graph_compute(
981981
struct ggml_metal_context * ctx,
982982
struct ggml_cgraph * gf) {
983983
@autoreleasepool {
@@ -2405,10 +2405,11 @@ void ggml_metal_graph_compute(
24052405
MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
24062406
if (status != MTLCommandBufferStatusCompleted) {
24072407
GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
2408-
GGML_ASSERT(false);
2408+
return false;
24092409
}
24102410
}
24112411

2412+
return true;
24122413
}
24132414
}
24142415

@@ -2688,10 +2689,10 @@ static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffer_type(ggm
26882689
UNUSED(backend);
26892690
}
26902691

2691-
static void ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
2692+
static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
26922693
struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
26932694

2694-
ggml_metal_graph_compute(metal_ctx, cgraph);
2695+
return ggml_metal_graph_compute(metal_ctx, cgraph);
26952696
}
26962697

26972698
static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {

whisper.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
152152
// ggml helpers
153153
//
154154

155-
static void ggml_graph_compute_helper(
155+
static bool ggml_graph_compute_helper(
156156
struct ggml_cgraph * graph,
157157
std::vector<uint8_t> & buf,
158158
int n_threads,
@@ -168,10 +168,10 @@ static void ggml_graph_compute_helper(
168168
plan.work_data = buf.data();
169169
}
170170

171-
ggml_graph_compute(graph, &plan);
171+
return ggml_graph_compute(graph, &plan);
172172
}
173173

174-
static void ggml_graph_compute_helper(
174+
static bool ggml_graph_compute_helper(
175175
struct ggml_backend * backend,
176176
struct ggml_cgraph * graph,
177177
int n_threads) {
@@ -183,7 +183,7 @@ static void ggml_graph_compute_helper(
183183
ggml_backend_metal_set_n_cb(backend, n_threads);
184184
}
185185
#endif
186-
ggml_backend_graph_compute(backend, graph);
186+
return ggml_backend_graph_compute(backend, graph);
187187
}
188188

189189
// faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
@@ -2103,7 +2103,9 @@ static bool whisper_encode_internal(
21032103
ggml_allocr_alloc_graph(alloc, gf);
21042104

21052105
if (!whisper_encode_external(wstate)) {
2106-
ggml_graph_compute_helper(wstate.backend, gf, n_threads);
2106+
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2107+
return false;
2108+
}
21072109
}
21082110
}
21092111

@@ -2117,7 +2119,9 @@ static bool whisper_encode_internal(
21172119

21182120
ggml_allocr_alloc_graph(alloc, gf);
21192121

2120-
ggml_graph_compute_helper(wstate.backend, gf, n_threads);
2122+
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2123+
return false;
2124+
}
21212125
}
21222126

21232127
// cross
@@ -2130,7 +2134,9 @@ static bool whisper_encode_internal(
21302134

21312135
ggml_allocr_alloc_graph(alloc, gf);
21322136

2133-
ggml_graph_compute_helper(wstate.backend, gf, n_threads);
2137+
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2138+
return false;
2139+
}
21342140
}
21352141

21362142
wstate.t_encode_us += ggml_time_us() - t_start_us;
@@ -2552,7 +2558,9 @@ static bool whisper_decode_internal(
25522558

25532559
logits = gf->nodes[gf->n_nodes - 1];
25542560

2555-
ggml_graph_compute_helper(wstate.backend, gf, n_threads);
2561+
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2562+
return false;
2563+
}
25562564
}
25572565

25582566
logits_out.resize(n_tokens*n_vocab);

0 commit comments

Comments
 (0)