Skip to content

Commit 5fbfe38

Browse files
authored
server : improve error reporting (#13680)
1 parent c76532e commit 5fbfe38

File tree

1 file changed

+22
-7
lines changed

1 file changed

+22
-7
lines changed

tools/server/server.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3366,14 +3366,29 @@ struct server_context {
33663366
metrics.on_decoded(slots);
33673367

33683368
if (ret != 0) {
3369-
if (n_batch == 1 || ret < 0) {
3370-
// if you get here, it means the KV cache is full - try increasing it via the context size
3371-
SRV_ERR("failed to decode the batch: KV cache is full - try increasing it via the context size, i = %d, n_batch = %d, ret = %d\n", i, n_batch, ret);
3372-
for (auto & slot : slots) {
3373-
slot.release();
3374-
send_error(slot, "Input prompt is too big compared to KV size. Please try increasing KV size.");
3369+
{
3370+
std::string err;
3371+
3372+
if (n_batch == 1 && ret == 1) {
3373+
err = "Context size has been exceeded.";
3374+
}
3375+
3376+
if (ret == -1) {
3377+
err = "Invalid input batch.";
3378+
}
3379+
3380+
if (ret < -1) {
3381+
err = "Compute error.";
3382+
}
3383+
3384+
if (!err.empty()) {
3385+
SRV_ERR("%s, i = %d, n_batch = %d, ret = %d\n", err.c_str(), i, n_batch, ret);
3386+
for (auto & slot : slots) {
3387+
slot.release();
3388+
send_error(slot, err);
3389+
}
3390+
break;
33753391
}
3376-
break; // break loop of n_batch
33773392
}
33783393

33793394
// retry with half the batch size to try to find a free slot in the KV cache

0 commit comments

Comments
 (0)