cont : log ubatches

ggerganov · ggerganov · commit 7eb5ad42f4ee · 2025-06-17T09:05:17.000+03:00
ggml-ci
diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp
@@ -30,6 +30,8 @@ bool llama_batch_allocr::init(
 
     batch = batch_inp;
 
+    this->vocab = &vocab;
+
     GGML_ASSERT(batch.n_tokens > 0);
 
     //
@@ -172,67 +174,39 @@ bool llama_batch_allocr::init(
 
     if (debug > 0) {
         LLAMA_LOG_DEBUG("%s: input batch info:\n", __func__);
-        LLAMA_LOG_DEBUG("%s:   n_tokens  = %d\n", __func__,          batch.n_tokens);
-        LLAMA_LOG_DEBUG("%s:   token     = %p\n", __func__, (void *) batch.token);
-        LLAMA_LOG_DEBUG("%s:   embd      = %p\n", __func__, (void *) batch.embd);
-        LLAMA_LOG_DEBUG("%s:   pos       = %p\n", __func__, (void *) batch.pos);
-        LLAMA_LOG_DEBUG("%s:   n_seq_id  = %p\n", __func__, (void *) batch.n_seq_id);
-        LLAMA_LOG_DEBUG("%s:   seq_id    = %p\n", __func__, (void *) batch.seq_id);
-        LLAMA_LOG_DEBUG("%s:   logits    = %p\n", __func__, (void *) batch.logits);
-        LLAMA_LOG_DEBUG("%s:   n_outputs = %d\n", __func__, n_outputs);
 
-        if (debug > 1) {
-            int seq_id_max = 0;
-            for (int32_t i = 0; i < batch.n_tokens; ++i) {
-                for (int s = 0; s < batch.n_seq_id[i]; ++s) {
-                    for (int s = 0; s < batch.n_seq_id[i]; ++s) {
-                        seq_id_max = std::max(seq_id_max, batch.seq_id[i][s]);
-                    }
-                }
+        llama_ubatch ubatch {
+            /*.equal_seqs   =*/ false,
+            /*.n_tokens     =*/ (uint32_t) batch.n_tokens,
+            /*.n_seq_tokens =*/ (uint32_t) 1,
+            /*.n_seqs       =*/ (uint32_t) batch.n_tokens,
+            /*.token        =*/ batch.token,
+            /*.embd         =*/ batch.embd,
+            /*.pos          =*/ batch.pos,
+            /*.n_seq_id     =*/ batch.n_seq_id,
+            /*.seq_id       =*/ batch.seq_id,
+            /*.output       =*/ batch.logits,
+        };
+
+        ubatch_print(ubatch, debug);
+
+        LLAMA_LOG_DEBUG("%s:   seq       = [\n", __func__);
+        for (int s0 = 0; s0 < (int) seq_pos.size(); ++s0) {
+            if (seq_pos[s0].empty()) {
+                continue;
             }
-            ++seq_id_max;
 
-            LLAMA_LOG_DEBUG("%s:   token     = [\n", __func__);
-            for (int32_t i = 0; i < batch.n_tokens; ++i) {
-                std::vector<int8_t> seq_id(seq_id_max);
-
-                for (int s = 0; s < batch.n_seq_id[i]; ++s) {
-                    seq_id[batch.seq_id[i][s]] = 1;
-                }
-
-                std::stringstream ss;
-                for (int s = 0; s < seq_id_max; ++s) {
-                    if (seq_id[s]) {
-                        ss << s%10;
-                    } else {
-                        ss << ".";
-                    }
+            std::stringstream ss;
+            for (int s1 = 0; s1 < (int) seq_cpl[s0].size(); ++s1) {
+                if (seq_cpl[s0][s1]) {
+                    ss << s1 << " ";
                 }
-
-                LLAMA_LOG_DEBUG("%s:  %4d: id = %6d (%16s), pos = %4d, n_seq_id = %2d, seq_id = [%s], output = %d\n",
-                        __func__, i, batch.token[i], vocab.token_to_piece(batch.token[i]).c_str(),
-                        batch.pos[i], batch.n_seq_id[i], ss.str().c_str(), batch.logits[i]);
             }
-            LLAMA_LOG_DEBUG("%s:   ]\n", __func__);
-
-            LLAMA_LOG_DEBUG("%s:   seq       = [\n", __func__);
-            for (int s0 = 0; s0 < (int) seq_pos.size(); ++s0) {
-                if (seq_pos[s0].empty()) {
-                    continue;
-                }
 
-                std::stringstream ss;
-                for (int s1 = 0; s1 < (int) seq_cpl[s0].size(); ++s1) {
-                    if (seq_cpl[s0][s1]) {
-                        ss << s1 << " ";
-                    }
-                }
-
-                LLAMA_LOG_DEBUG("%s:  %4d: pos = [%4d, %4d], cpl = %s\n",
-                        __func__, s0, seq_pos_min(s0), seq_pos_max(s0), ss.str().empty() ? "-" : ss.str().c_str());
-            }
-            LLAMA_LOG_DEBUG("%s:   ]\n", __func__);
+            LLAMA_LOG_DEBUG("%s:  %4d: pos = [%4d, %4d], cpl = %s\n",
+                    __func__, s0, seq_pos_min(s0), seq_pos_max(s0), ss.str().empty() ? "-" : ss.str().c_str());
         }
+        LLAMA_LOG_DEBUG("%s:   ]\n", __func__);
     }
 
     //
@@ -296,7 +270,7 @@ bool llama_batch_allocr::init(
     return true;
 }
 
-llama_ubatch llama_batch_allocr::reserve_one(uint32_t n_tokens) {
+llama_ubatch llama_batch_allocr::ubatch_reserve(uint32_t n_tokens) {
     clear();
     split_reset();
 
@@ -389,7 +363,7 @@ llama_ubatch llama_batch_allocr::split_simple(uint32_t n_ubatch) {
         }
     }
 
-    return add_ubatch(idxs, idxs.size(), false);
+    return ubatch_add(idxs, idxs.size(), false);
 }
 
 llama_ubatch llama_batch_allocr::split_equal(uint32_t n_ubatch) {
@@ -470,7 +444,7 @@ llama_ubatch llama_batch_allocr::split_equal(uint32_t n_ubatch) {
         idxs.insert(idxs.end(), idxs_per_seq[s].begin(), idxs_per_seq[s].end());
     }
 
-    return add_ubatch(idxs, n_seqs, true);
+    return ubatch_add(idxs, n_seqs, true);
 }
 
 llama_ubatch llama_batch_allocr::split_seq(uint32_t n_ubatch) {
@@ -507,7 +481,7 @@ llama_ubatch llama_batch_allocr::split_seq(uint32_t n_ubatch) {
         cur_seq_set = seq_set[cur_idx];
     }
 
-    return add_ubatch(idxs, 1, true);
+    return ubatch_add(idxs, 1, true);
 }
 
 void llama_batch_allocr::clear() {
@@ -533,11 +507,9 @@ void llama_batch_allocr::clear() {
     seq_set_map.clear();
 }
 
-llama_ubatch llama_batch_allocr::add_ubatch(const std::vector<int32_t> & idxs, uint32_t n_seqs, bool equal_seqs) {
+llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, uint32_t n_seqs, bool equal_seqs) {
     const uint32_t n_tokens = idxs.size();
 
-    LLAMA_LOG_DEBUG("add_ubatch: n_tokens = %d, n_seqs = %d, equal_seqs = %d", n_tokens, n_seqs, equal_seqs);
-
     assert(n_tokens%n_seqs == 0);
 
     ubatches.emplace_back();
@@ -584,11 +556,67 @@ llama_ubatch llama_batch_allocr::add_ubatch(const std::vector<int32_t> & idxs, u
         /*.output       =*/ ubatch.output.data(),
     };
 
-    LLAMA_LOG_DEBUG("%s: added ubatch of size %d\n", __func__, res.n_tokens);
+    LLAMA_LOG_DEBUG("%s: added ubatch %d in split\n", __func__, (int) ubatches.size() - 1);
+
+    if (debug > 0) {
+        ubatch_print(res, debug);
+    }
 
     return res;
 }
 
+void llama_batch_allocr::ubatch_print(const llama_ubatch & ubatch, int debug) {
+    if (debug > 0) {
+        LLAMA_LOG_DEBUG("%s:   equal_seqs   = %d\n", __func__, ubatch.equal_seqs);
+        LLAMA_LOG_DEBUG("%s:   n_tokens     = %d\n", __func__, ubatch.n_tokens);
+        LLAMA_LOG_DEBUG("%s:   n_seq_tokens = %d\n", __func__, ubatch.n_seq_tokens);
+        LLAMA_LOG_DEBUG("%s:   n_seqs       = %d\n", __func__, ubatch.n_seqs);
+
+        LLAMA_LOG_DEBUG("%s:   token     = %p\n", __func__, (void *) ubatch.token);
+        LLAMA_LOG_DEBUG("%s:   embd      = %p\n", __func__, (void *) ubatch.embd);
+        LLAMA_LOG_DEBUG("%s:   pos       = %p\n", __func__, (void *) ubatch.pos);
+        LLAMA_LOG_DEBUG("%s:   n_seq_id  = %p\n", __func__, (void *) ubatch.n_seq_id);
+        LLAMA_LOG_DEBUG("%s:   seq_id    = %p\n", __func__, (void *) ubatch.seq_id);
+        LLAMA_LOG_DEBUG("%s:   output    = %p\n", __func__, (void *) ubatch.output);
+        LLAMA_LOG_DEBUG("%s:   n_outputs = %d\n", __func__, n_outputs);
+
+        if (debug > 1) {
+            int seq_id_max = 0;
+            for (uint32_t i = 0; i < ubatch.n_tokens; ++i) {
+                for (int s = 0; s < ubatch.n_seq_id[i]; ++s) {
+                    for (int s = 0; s < ubatch.n_seq_id[i]; ++s) {
+                        seq_id_max = std::max(seq_id_max, ubatch.seq_id[i][s]);
+                    }
+                }
+            }
+            ++seq_id_max;
+
+            LLAMA_LOG_DEBUG("%s:   token     = [\n", __func__);
+            for (uint32_t i = 0; i < ubatch.n_tokens; ++i) {
+                std::vector<int8_t> seq_id(seq_id_max);
+
+                for (int s = 0; s < ubatch.n_seq_id[i]; ++s) {
+                    seq_id[ubatch.seq_id[i][s]] = 1;
+                }
+
+                std::stringstream ss;
+                for (int s = 0; s < seq_id_max; ++s) {
+                    if (seq_id[s]) {
+                        ss << s%10;
+                    } else {
+                        ss << ".";
+                    }
+                }
+
+                LLAMA_LOG_DEBUG("%s:  %4d: id = %6d (%16s), pos = %4d, n_seq_id = %2d, seq_id = [%s], output = %d\n",
+                        __func__, i, ubatch.token[i], vocab->token_to_piece(ubatch.token[i]).c_str(),
+                        ubatch.pos[i], ubatch.n_seq_id[i], ss.str().c_str(), ubatch.output[i]);
+            }
+            LLAMA_LOG_DEBUG("%s:   ]\n", __func__);
+        }
+    }
+}
+
 //
 // interface implementation
 //
diff --git a/src/llama-batch.h b/src/llama-batch.h
@@ -61,14 +61,19 @@ class llama_batch_allocr {
     // sequence-wise split - each ubatch contains a single sequence
     llama_ubatch split_seq(uint32_t n_ubatch);
 
-    llama_ubatch reserve_one(uint32_t n_tokens);
+    llama_ubatch ubatch_reserve(uint32_t n_tokens);
 private:
     void clear();
 
-    llama_ubatch add_ubatch(const std::vector<int32_t> & idxs, uint32_t n_seqs, bool equal_seqs);
+    llama_ubatch ubatch_add(const std::vector<int32_t> & idxs, uint32_t n_seqs, bool equal_seqs);
+
+    void ubatch_print(const llama_ubatch & ubatch, int debug);
 
     llama_batch batch;
 
+    // only for debugging purposes
+    const llama_vocab * vocab;
+
     uint32_t n_embd;
     uint32_t n_outputs;
 
diff --git a/src/llama-kv-cache-recurrent.cpp b/src/llama-kv-cache-recurrent.cpp
@@ -826,7 +826,7 @@ bool llama_kv_cache_recurrent::state_read_meta(llama_io_read_i & io, uint32_t ce
 
         llama_batch_allocr batch_allocr;
 
-        llama_ubatch ubatch = batch_allocr.reserve_one(cell_count);
+        llama_ubatch ubatch = batch_allocr.ubatch_reserve(cell_count);
 
         ubatch.n_tokens = cell_count;
         ubatch.n_seq_tokens = cell_count;
diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp
@@ -1507,7 +1507,7 @@ bool llama_kv_cache_unified::state_read_meta(llama_io_read_i & io, uint32_t cell
 
         llama_batch_allocr batch_allocr;
 
-        llama_ubatch ubatch = batch_allocr.reserve_one(cell_count);
+        llama_ubatch ubatch = batch_allocr.ubatch_reserve(cell_count);
 
         ubatch.n_tokens = cell_count;
         ubatch.n_seq_tokens = cell_count;