Skip to content

Commit e709d20

Browse files
authored
Merge pull request #6416 from menloresearch/enhancement/experimental-label
enhancement: add label experimental for optimize setting
2 parents 4856cfb + 19aa15f commit e709d20

File tree

4 files changed

+76
-21
lines changed

4 files changed

+76
-21
lines changed

extensions/llamacpp-extension/src/index.ts

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ type ModelPlan = {
8080
gpuLayers: number
8181
maxContextLength: number
8282
noOffloadKVCache: boolean
83-
noOffloadMmproj?: boolean
83+
offloadMmproj?: boolean
8484
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
8585
}
8686

@@ -328,7 +328,8 @@ export default class llamacpp_extension extends AIEngine {
328328
await this.determineBestBackend(version_backends)
329329
}
330330
} else {
331-
bestAvailableBackendString = await this.determineBestBackend(version_backends)
331+
bestAvailableBackendString =
332+
await this.determineBestBackend(version_backends)
332333
}
333334

334335
let settings = structuredClone(SETTINGS)
@@ -2047,11 +2048,25 @@ export default class llamacpp_extension extends AIEngine {
20472048
return { layerSize: modelSize / totalLayers, totalLayers }
20482049
}
20492050

2051+
private isAbsolutePath(p: string): boolean {
2052+
// Normalize back‑slashes to forward‑slashes first.
2053+
const norm = p.replace(/\\/g, '/')
2054+
return (
2055+
norm.startsWith('/') || // POSIX absolute
2056+
/^[a-zA-Z]:/.test(norm) || // Drive‑letter Windows (C: or D:)
2057+
/^\/\/[^/]+/.test(norm) // UNC path //server/share
2058+
)
2059+
}
2060+
20502061
async planModelLoad(
20512062
path: string,
2052-
requestedCtx?: number,
2053-
mmprojPath?: string
2063+
mmprojPath?: string,
2064+
requestedCtx?: number
20542065
): Promise<ModelPlan> {
2066+
if (!this.isAbsolutePath(path))
2067+
path = await joinPath([await getJanDataFolderPath(), path])
2068+
if (mmprojPath && !this.isAbsolutePath(mmprojPath))
2069+
mmprojPath = await joinPath([await getJanDataFolderPath(), path])
20552070
const modelSize = await this.getModelSize(path)
20562071
const memoryInfo = await this.getTotalSystemMemory()
20572072
const gguf = await readGgufMetadata(path)
@@ -2138,12 +2153,12 @@ export default class llamacpp_extension extends AIEngine {
21382153
)
21392154

21402155
// --- Priority 1: Allocate mmproj (if exists) ---
2141-
let noOffloadMmproj = false
2156+
let offloadMmproj = false
21422157
let remainingVRAM = usableVRAM
21432158

21442159
if (mmprojSize > 0) {
21452160
if (mmprojSize <= remainingVRAM) {
2146-
noOffloadMmproj = true
2161+
offloadMmproj = true
21472162
remainingVRAM -= mmprojSize
21482163
logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`)
21492164
} else {
@@ -2217,8 +2232,7 @@ export default class llamacpp_extension extends AIEngine {
22172232
// Calculate available system RAM for KV cache
22182233
const cpuLayers = totalLayers - gpuLayers
22192234
const modelCPUSize = cpuLayers * layerSize
2220-
const mmprojCPUSize =
2221-
mmprojSize > 0 && !noOffloadMmproj ? mmprojSize : 0
2235+
const mmprojCPUSize = mmprojSize > 0 && !offloadMmproj ? mmprojSize : 0
22222236
const systemRAMUsed = modelCPUSize + mmprojCPUSize
22232237
const availableSystemRAMForKVCache = Math.max(
22242238
0,
@@ -2277,7 +2291,7 @@ export default class llamacpp_extension extends AIEngine {
22772291
const estimatedGPUUsage =
22782292
gpuLayers * layerSize +
22792293
maxContextLength * kvCachePerToken +
2280-
(noOffloadMmproj ? mmprojSize : 0)
2294+
(offloadMmproj ? mmprojSize : 0)
22812295

22822296
if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) {
22832297
logger.warn(
@@ -2293,7 +2307,7 @@ export default class llamacpp_extension extends AIEngine {
22932307
const newEstimate =
22942308
gpuLayers * layerSize +
22952309
maxContextLength * kvCachePerToken +
2296-
(noOffloadMmproj ? mmprojSize : 0)
2310+
(offloadMmproj ? mmprojSize : 0)
22972311
if (newEstimate <= memoryInfo.totalVRAM * 0.9) break
22982312
}
22992313

@@ -2329,7 +2343,7 @@ export default class llamacpp_extension extends AIEngine {
23292343

23302344
// Log final plan
23312345
const mmprojInfo = mmprojPath
2332-
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, noOffloadMmproj=${noOffloadMmproj}`
2346+
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, offloadMmproj=${offloadMmproj}`
23332347
: ''
23342348

23352349
logger.info(
@@ -2343,7 +2357,7 @@ export default class llamacpp_extension extends AIEngine {
23432357
maxContextLength,
23442358
noOffloadKVCache,
23452359
mode,
2346-
noOffloadMmproj,
2360+
offloadMmproj,
23472361
}
23482362
}
23492363

web-app/src/containers/ModelSetting.tsx

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,16 @@ export function ModelSetting({
4646
}
4747
setIsPlanning(true)
4848
try {
49-
// Read the model config to get the actual model path
49+
// Read the model config to get the actual model path and mmproj path
5050
const modelConfig = await serviceHub.app().readYaml<{
5151
model_path: string
52+
mmproj_path?: string
5253
}>(`llamacpp/models/${model.id}/model.yml`)
5354

5455
if (modelConfig && modelConfig.model_path) {
5556
const result = await serviceHub
5657
.models()
57-
.planModelLoad(modelConfig.model_path)
58+
.planModelLoad(modelConfig.model_path, modelConfig.mmproj_path)
5859

5960
// Apply the recommended settings to the model sequentially to avoid race conditions
6061
const settingsToUpdate: Array<{
@@ -73,6 +74,15 @@ export function ModelSetting({
7374
})
7475
}
7576

77+
if (
78+
model.settings?.no_kv_offload &&
79+
result.noOffloadKVCache !== undefined
80+
) {
81+
settingsToUpdate.push({
82+
key: 'no_kv_offload',
83+
value: result.noOffloadKVCache,
84+
})
85+
}
7686
if (
7787
model.settings?.no_kv_offload &&
7888
result.noOffloadKVCache !== undefined
@@ -83,6 +93,16 @@ export function ModelSetting({
8393
})
8494
}
8595

96+
if (
97+
model.settings?.mmproj_offload &&
98+
result.offloadMmproj !== undefined
99+
) {
100+
settingsToUpdate.push({
101+
key: 'mmproj_offload',
102+
value: result.offloadMmproj,
103+
})
104+
}
105+
86106
// Apply all settings in a single update to avoid race conditions
87107
if (settingsToUpdate.length > 0) {
88108
handleMultipleSettingsChange(settingsToUpdate)
@@ -242,11 +262,18 @@ export function ModelSetting({
242262
{provider.provider === 'llamacpp' && (
243263
<div className="pb-4 border-b border-main-view-fg/10 my-4">
244264
<div>
245-
<h3 className="font-medium mb-1">Optimize Settings</h3>
246-
<p className="text-main-view-fg/70 text-xs mb-3">
247-
Analyze your system and model, then apply optimal loading
248-
settings automatically
249-
</p>
265+
<div>
266+
<div className="flex items-center gap-2 mb-1">
267+
<h3 className="font-medium">Optimize Settings</h3>
268+
<div className="text-xs bg-main-view-fg/10 border border-main-view-fg/20 text-main-view-fg/70 rounded-full py-0.5 px-2">
269+
<span>{t('mcp-servers:experimental')}</span>
270+
</div>
271+
</div>
272+
<p className="text-main-view-fg/70 text-xs mb-3">
273+
Analyze your system and model, then apply optimal loading
274+
settings automatically
275+
</p>
276+
</div>
250277
<Button
251278
onClick={handlePlanModelLoad}
252279
disabled={isPlanning}

web-app/src/services/models/default.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,12 +495,14 @@ export class DefaultModelsService implements ModelsService {
495495

496496
async planModelLoad(
497497
modelPath: string,
498+
mmprojPath?: string,
498499
requestedCtx?: number
499500
): Promise<ModelPlan> {
500501
try {
501502
const engine = this.getEngine('llamacpp') as AIEngine & {
502503
planModelLoad?: (
503504
path: string,
505+
mmprojPath?: string,
504506
requestedCtx?: number
505507
) => Promise<ModelPlan>
506508
}
@@ -514,7 +516,12 @@ export class DefaultModelsService implements ModelsService {
514516
(core) => core.joinPath
515517
)
516518
const fullModelPath = await joinPath([janDataFolderPath, modelPath])
517-
return await engine.planModelLoad(fullModelPath, requestedCtx)
519+
// mmprojPath is currently unused, but included for compatibility
520+
return await engine.planModelLoad(
521+
fullModelPath,
522+
mmprojPath,
523+
requestedCtx
524+
)
518525
}
519526

520527
// Fallback if method is not available
@@ -523,6 +530,7 @@ export class DefaultModelsService implements ModelsService {
523530
gpuLayers: 0,
524531
maxContextLength: 2048,
525532
noOffloadKVCache: true,
533+
offloadMmproj: false,
526534
mode: 'Unsupported',
527535
}
528536
} catch (error) {
@@ -531,6 +539,7 @@ export class DefaultModelsService implements ModelsService {
531539
gpuLayers: 0,
532540
maxContextLength: 2048,
533541
noOffloadKVCache: true,
542+
offloadMmproj: false,
534543
mode: 'Unsupported',
535544
}
536545
}

web-app/src/services/models/types.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ export interface ModelPlan {
8585
gpuLayers: number
8686
maxContextLength: number
8787
noOffloadKVCache: boolean
88+
offloadMmproj: boolean
8889
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
8990
}
9091

@@ -136,5 +137,9 @@ export interface ModelsService {
136137
ctxSize?: number
137138
): Promise<'RED' | 'YELLOW' | 'GREEN' | 'GREY'>
138139
validateGgufFile(filePath: string): Promise<ModelValidationResult>
139-
planModelLoad(modelPath: string, requestedCtx?: number): Promise<ModelPlan>
140+
planModelLoad(
141+
modelPath: string,
142+
mmprojPath?: string,
143+
requestedCtx?: number
144+
): Promise<ModelPlan>
140145
}

0 commit comments

Comments
 (0)