Merge pull request #6416 from menloresearch/enhancement/experimental-label

urmauur · web-flow · commit e709d200aaf6 · 2025-09-11T16:12:35.000+07:00
enhancement: add label experimental for optimize setting
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
@@ -80,7 +80,7 @@ type ModelPlan = {
   gpuLayers: number
   maxContextLength: number
   noOffloadKVCache: boolean
-  noOffloadMmproj?: boolean
+  offloadMmproj?: boolean
   mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
 }
 
@@ -328,7 +328,8 @@ export default class llamacpp_extension extends AIEngine {
             await this.determineBestBackend(version_backends)
         }
       } else {
-        bestAvailableBackendString = await this.determineBestBackend(version_backends)
+        bestAvailableBackendString =
+          await this.determineBestBackend(version_backends)
       }
 
       let settings = structuredClone(SETTINGS)
@@ -2047,11 +2048,25 @@ export default class llamacpp_extension extends AIEngine {
     return { layerSize: modelSize / totalLayers, totalLayers }
   }
 
+  private isAbsolutePath(p: string): boolean {
+    // Normalize back‑slashes to forward‑slashes first.
+    const norm = p.replace(/\\/g, '/')
+    return (
+      norm.startsWith('/') || // POSIX absolute
+      /^[a-zA-Z]:/.test(norm) || // Drive‑letter Windows (C: or D:)
+      /^\/\/[^/]+/.test(norm) // UNC path //server/share
+    )
+  }
+
   async planModelLoad(
     path: string,
-    requestedCtx?: number,
-    mmprojPath?: string
+    mmprojPath?: string,
+    requestedCtx?: number
   ): Promise<ModelPlan> {
+    if (!this.isAbsolutePath(path))
+      path = await joinPath([await getJanDataFolderPath(), path])
+    if (mmprojPath && !this.isAbsolutePath(mmprojPath))
+      mmprojPath = await joinPath([await getJanDataFolderPath(), path])
     const modelSize = await this.getModelSize(path)
     const memoryInfo = await this.getTotalSystemMemory()
     const gguf = await readGgufMetadata(path)
@@ -2138,12 +2153,12 @@ export default class llamacpp_extension extends AIEngine {
     )
 
     // --- Priority 1: Allocate mmproj (if exists) ---
-    let noOffloadMmproj = false
+    let offloadMmproj = false
     let remainingVRAM = usableVRAM
 
     if (mmprojSize > 0) {
       if (mmprojSize <= remainingVRAM) {
-        noOffloadMmproj = true
+        offloadMmproj = true
         remainingVRAM -= mmprojSize
         logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`)
       } else {
@@ -2217,8 +2232,7 @@ export default class llamacpp_extension extends AIEngine {
         // Calculate available system RAM for KV cache
         const cpuLayers = totalLayers - gpuLayers
         const modelCPUSize = cpuLayers * layerSize
-        const mmprojCPUSize =
-          mmprojSize > 0 && !noOffloadMmproj ? mmprojSize : 0
+        const mmprojCPUSize = mmprojSize > 0 && !offloadMmproj ? mmprojSize : 0
         const systemRAMUsed = modelCPUSize + mmprojCPUSize
         const availableSystemRAMForKVCache = Math.max(
           0,
@@ -2277,7 +2291,7 @@ export default class llamacpp_extension extends AIEngine {
       const estimatedGPUUsage =
         gpuLayers * layerSize +
         maxContextLength * kvCachePerToken +
-        (noOffloadMmproj ? mmprojSize : 0)
+        (offloadMmproj ? mmprojSize : 0)
 
       if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) {
         logger.warn(
@@ -2293,7 +2307,7 @@ export default class llamacpp_extension extends AIEngine {
           const newEstimate =
             gpuLayers * layerSize +
             maxContextLength * kvCachePerToken +
-            (noOffloadMmproj ? mmprojSize : 0)
+            (offloadMmproj ? mmprojSize : 0)
           if (newEstimate <= memoryInfo.totalVRAM * 0.9) break
         }
 
@@ -2329,7 +2343,7 @@ export default class llamacpp_extension extends AIEngine {
 
     // Log final plan
     const mmprojInfo = mmprojPath
-      ? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, noOffloadMmproj=${noOffloadMmproj}`
+      ? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, offloadMmproj=${offloadMmproj}`
       : ''
 
     logger.info(
@@ -2343,7 +2357,7 @@ export default class llamacpp_extension extends AIEngine {
       maxContextLength,
       noOffloadKVCache,
       mode,
-      noOffloadMmproj,
+      offloadMmproj,
     }
   }
 
diff --git a/web-app/src/containers/ModelSetting.tsx b/web-app/src/containers/ModelSetting.tsx
@@ -46,15 +46,16 @@ export function ModelSetting({
     }
     setIsPlanning(true)
     try {
-      // Read the model config to get the actual model path
+      // Read the model config to get the actual model path and mmproj path
       const modelConfig = await serviceHub.app().readYaml<{
         model_path: string
+        mmproj_path?: string
       }>(`llamacpp/models/${model.id}/model.yml`)
 
       if (modelConfig && modelConfig.model_path) {
         const result = await serviceHub
           .models()
-          .planModelLoad(modelConfig.model_path)
+          .planModelLoad(modelConfig.model_path, modelConfig.mmproj_path)
 
         // Apply the recommended settings to the model sequentially to avoid race conditions
         const settingsToUpdate: Array<{
@@ -73,6 +74,15 @@ export function ModelSetting({
           })
         }
 
+        if (
+          model.settings?.no_kv_offload &&
+          result.noOffloadKVCache !== undefined
+        ) {
+          settingsToUpdate.push({
+            key: 'no_kv_offload',
+            value: result.noOffloadKVCache,
+          })
+        }
         if (
           model.settings?.no_kv_offload &&
           result.noOffloadKVCache !== undefined
@@ -83,6 +93,16 @@ export function ModelSetting({
           })
         }
 
+        if (
+          model.settings?.mmproj_offload &&
+          result.offloadMmproj !== undefined
+        ) {
+          settingsToUpdate.push({
+            key: 'mmproj_offload',
+            value: result.offloadMmproj,
+          })
+        }
+
         // Apply all settings in a single update to avoid race conditions
         if (settingsToUpdate.length > 0) {
           handleMultipleSettingsChange(settingsToUpdate)
@@ -242,11 +262,18 @@ export function ModelSetting({
           {provider.provider === 'llamacpp' && (
             <div className="pb-4 border-b border-main-view-fg/10 my-4">
               <div>
-                <h3 className="font-medium mb-1">Optimize Settings</h3>
-                <p className="text-main-view-fg/70 text-xs mb-3">
-                  Analyze your system and model, then apply optimal loading
-                  settings automatically
-                </p>
+                <div>
+                  <div className="flex items-center gap-2 mb-1">
+                    <h3 className="font-medium">Optimize Settings</h3>
+                    <div className="text-xs bg-main-view-fg/10 border border-main-view-fg/20 text-main-view-fg/70 rounded-full py-0.5 px-2">
+                      <span>{t('mcp-servers:experimental')}</span>
+                    </div>
+                  </div>
+                  <p className="text-main-view-fg/70 text-xs mb-3">
+                    Analyze your system and model, then apply optimal loading
+                    settings automatically
+                  </p>
+                </div>
                 <Button
                   onClick={handlePlanModelLoad}
                   disabled={isPlanning}
diff --git a/web-app/src/services/models/default.ts b/web-app/src/services/models/default.ts
@@ -495,12 +495,14 @@ export class DefaultModelsService implements ModelsService {
 
   async planModelLoad(
     modelPath: string,
+    mmprojPath?: string,
     requestedCtx?: number
   ): Promise<ModelPlan> {
     try {
       const engine = this.getEngine('llamacpp') as AIEngine & {
         planModelLoad?: (
           path: string,
+          mmprojPath?: string,
           requestedCtx?: number
         ) => Promise<ModelPlan>
       }
@@ -514,7 +516,12 @@ export class DefaultModelsService implements ModelsService {
           (core) => core.joinPath
         )
         const fullModelPath = await joinPath([janDataFolderPath, modelPath])
-        return await engine.planModelLoad(fullModelPath, requestedCtx)
+        // mmprojPath is currently unused, but included for compatibility
+        return await engine.planModelLoad(
+          fullModelPath,
+          mmprojPath,
+          requestedCtx
+        )
       }
 
       // Fallback if method is not available
@@ -523,6 +530,7 @@ export class DefaultModelsService implements ModelsService {
         gpuLayers: 0,
         maxContextLength: 2048,
         noOffloadKVCache: true,
+        offloadMmproj: false,
         mode: 'Unsupported',
       }
     } catch (error) {
@@ -531,6 +539,7 @@ export class DefaultModelsService implements ModelsService {
         gpuLayers: 0,
         maxContextLength: 2048,
         noOffloadKVCache: true,
+        offloadMmproj: false,
         mode: 'Unsupported',
       }
     }
diff --git a/web-app/src/services/models/types.ts b/web-app/src/services/models/types.ts
@@ -85,6 +85,7 @@ export interface ModelPlan {
   gpuLayers: number
   maxContextLength: number
   noOffloadKVCache: boolean
+  offloadMmproj: boolean
   mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
 }
 
@@ -136,5 +137,9 @@ export interface ModelsService {
     ctxSize?: number
   ): Promise<'RED' | 'YELLOW' | 'GREEN' | 'GREY'>
   validateGgufFile(filePath: string): Promise<ModelValidationResult>
-  planModelLoad(modelPath: string, requestedCtx?: number): Promise<ModelPlan>
+  planModelLoad(
+    modelPath: string,
+    mmprojPath?: string,
+    requestedCtx?: number
+  ): Promise<ModelPlan>
 }