From 5248b16abc0882888ba3b56559864b70a1d0a8a7 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 4 Jun 2025 14:14:19 +0300
Subject: [PATCH] caontext : fix pos_min initialization upon error decode

ggml-ci
---
 src/llama-context.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 4ab5743879400..b73da2e63aec7 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1019,7 +1019,10 @@ int llama_context::decode(llama_batch & inp_batch) {
 
         if (!res) {
             // the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache
-            llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES] = { std::numeric_limits<llama_pos>::max() };
+            llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES];
+            for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
+                pos_min[s] = std::numeric_limits<llama_pos>::max();
+            }
 
             for (uint32_t i = 0; i < ubatch.n_tokens; ++i) {
                 const auto & seq_id = ubatch.seq_id[i][0];