move hash to user-define

ngxson · ngxson · commit bfbabea92cd5 · 2025-04-12T10:52:54.000+02:00
diff --git a/examples/llava/gemma3-cli.cpp b/examples/llava/gemma3-cli.cpp
@@ -89,7 +89,6 @@ struct gemma3_context {
         ctx_vision.reset(mtmd_init_from_file(clip_path, model, mtmd_context_params{
             /* use_gpu */   true,
             /* timings */   true,
-            /* hash */      false,
             /* n_threads */ params.cpuparams.n_threads,
             /* verbosity */ GGML_LOG_LEVEL_INFO,
         }));
diff --git a/examples/llava/mtmd.cpp b/examples/llava/mtmd.cpp
@@ -29,8 +29,7 @@ struct mtmd_context {
                    const mtmd_context_params & ctx_params) :
         print_timings  (ctx_params.print_timings),
         n_threads      (ctx_params.n_threads),
-        image_marker   (ctx_params.image_marker),
-        calc_image_hash(ctx_params.calc_image_hash)
+        image_marker   (ctx_params.image_marker)
     {
         clip_context_params ctx_clip_params;
         ctx_clip_params.use_gpu   = ctx_params.use_gpu;
@@ -56,7 +55,7 @@ struct mtmd_image_tokens {
     uint32_t ny; // number of tokens in y direction
     uint32_t n_tokens() const { return nx * ny; }
     clip_image_f32_batch batch_f32; // preprocessed image patches
-    size_t image_hash = 0; // hash of the image, useful for KV cache tracking
+    std::string hash; // optional user-defined hash, useful for KV cache tracking
 };
 
 mtmd_context * mtmd_init_from_file(const char * mmproj_fname,
@@ -96,16 +95,6 @@ static std::vector<llama_token> mtmd_tokenize_text_internal(
     return result;
 }
 
-static uint64_t hash_vector_float(const std::vector<float> & vec) {
-    uint64_t seed = vec.size();
-    std::hash<float> hasher;
-    for (float val : vec) {
-        // inspired by boost::hash_combine
-        seed ^= hasher(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-    }
-    return seed;
-}
-
 int32_t mtmd_tokenize(mtmd_context * ctx,
                         std::vector<mtmd_input_chunk> & output,
                         const mtmd_input_text & text,
@@ -170,11 +159,7 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
             image_tokens->nx = clip_n_patches(ctx->ctx_clip); // TODO @ngxson : use clip_n_patches_by_image
             image_tokens->ny = 1; // TODO
             image_tokens->batch_f32 = std::move(batch_f32);
-
-            // optionally calculate the hash
-            if (ctx->calc_image_hash) {
-                image_tokens->image_hash = hash_vector_float(image_tokens->batch_f32.entries[0]->buf);
-            }
+            image_tokens->hash = bitmaps[i_img].hash;
 
             mtmd_input_chunk chunk{
                 MTMD_INPUT_CHUNK_TYPE_IMAGE,
@@ -207,8 +192,8 @@ size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens) {
     return image_tokens->ny;
 }
 
-uint64_t mtmd_image_tokens_get_hash(const mtmd_image_tokens * image_tokens) {
-    return image_tokens->image_hash;
+std::string mtmd_image_tokens_get_hash(const mtmd_image_tokens * image_tokens) {
+    return image_tokens->hash;
 }
 
 int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens) {
diff --git a/examples/llava/mtmd.h b/examples/llava/mtmd.h
@@ -39,6 +39,7 @@ struct mtmd_bitmap {
     uint32_t nx;
     uint32_t ny;
     std::vector<unsigned char> data;
+    std::string hash; // optional user-defined hash, useful for KV cache tracking
 };
 
 struct mtmd_image_tokens_deleter {
@@ -57,9 +58,6 @@ using mtmd_input_chunks = std::vector<mtmd_input_chunk>;
 struct mtmd_context_params {
     bool use_gpu = true;
     bool print_timings = true;
-    // calc_image_hash is useful for tracking KV cache
-    // if not set, mtmd_image_tokens_get_hash will return 0
-    bool calc_image_hash = false;
     int n_threads = 4;
     enum ggml_log_level verbosity = GGML_LOG_LEVEL_INFO;
     const char * image_marker = "<__image__>";
@@ -100,11 +98,11 @@ MTMD_API int32_t mtmd_tokenize(mtmd_context * ctx,
                                 const std::vector<mtmd_bitmap> & bitmaps);
 
 // access mtmd_image_tokens
-MTMD_API size_t   mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens);
-MTMD_API size_t   mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens);
-MTMD_API size_t   mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens);
-MTMD_API uint64_t mtmd_image_tokens_get_hash(const mtmd_image_tokens * image_tokens);
-MTMD_API void     mtmd_image_tokens_free(mtmd_image_tokens * image_tokens);
+MTMD_API size_t      mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens);
+MTMD_API size_t      mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens);
+MTMD_API size_t      mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens);
+MTMD_API std::string mtmd_image_tokens_get_hash(const mtmd_image_tokens * image_tokens);
+MTMD_API void        mtmd_image_tokens_free(mtmd_image_tokens * image_tokens);
 
 // returns 0 on success
 MTMD_API int32_t mtmd_encode(mtmd_context * ctx,