Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 12 additions & 28 deletions core/application/config_file_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,33 +185,6 @@ func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHan
return handler
}

type runtimeSettings struct {
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
Threads *int `json:"threads,omitempty"`
ContextSize *int `json:"context_size,omitempty"`
F16 *bool `json:"f16,omitempty"`
Debug *bool `json:"debug,omitempty"`
CORS *bool `json:"cors,omitempty"`
CSRF *bool `json:"csrf,omitempty"`
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
P2PToken *string `json:"p2p_token,omitempty"`
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
Federated *bool `json:"federated,omitempty"`
Galleries *[]config.Gallery `json:"galleries,omitempty"`
BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"`
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
ApiKeys *[]string `json:"api_keys,omitempty"`
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
}

func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler {
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
log.Debug().Msg("processing runtime_settings.json")
Expand All @@ -227,6 +200,8 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends
envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
envMemoryReclaimerEnabled := appConfig.MemoryReclaimerEnabled == startupAppConfig.MemoryReclaimerEnabled
envMemoryReclaimerThreshold := appConfig.MemoryReclaimerThreshold == startupAppConfig.MemoryReclaimerThreshold
envThreads := appConfig.Threads == startupAppConfig.Threads
envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
envF16 := appConfig.F16 == startupAppConfig.F16
Expand All @@ -242,7 +217,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays

if len(fileContent) > 0 {
var settings runtimeSettings
var settings config.RuntimeSettings
err := json.Unmarshal(fileContent, &settings)
if err != nil {
return err
Expand Down Expand Up @@ -294,6 +269,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
if settings.ParallelBackendRequests != nil && !envParallelRequests {
appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
}
if settings.MemoryReclaimerEnabled != nil && !envMemoryReclaimerEnabled {
appConfig.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
if appConfig.MemoryReclaimerEnabled {
appConfig.WatchDog = true // Memory reclaimer requires watchdog
}
}
if settings.MemoryReclaimerThreshold != nil && !envMemoryReclaimerThreshold {
appConfig.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
}
if settings.Threads != nil && !envThreads {
appConfig.Threads = *settings.Threads
}
Expand Down
62 changes: 41 additions & 21 deletions core/application/startup.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,17 +218,7 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
return
}

var settings struct {
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited)
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
}
var settings config.RuntimeSettings

if err := json.Unmarshal(fileContent, &settings); err != nil {
log.Warn().Err(err).Msg("failed to parse runtime_settings.json")
Expand Down Expand Up @@ -281,6 +271,16 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
}
}
}
if settings.WatchdogInterval != nil {
if options.WatchDogInterval == 0 {
dur, err := time.ParseDuration(*settings.WatchdogInterval)
if err == nil {
options.WatchDogInterval = dur
} else {
log.Warn().Err(err).Str("interval", *settings.WatchdogInterval).Msg("invalid watchdog interval in runtime_settings.json")
}
}
}
// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
if settings.MaxActiveBackends != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
Expand All @@ -303,6 +303,21 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
options.ParallelBackendRequests = *settings.ParallelBackendRequests
}
}
if settings.MemoryReclaimerEnabled != nil {
// Only apply if current value is default (false), suggesting it wasn't set from env var
if !options.MemoryReclaimerEnabled {
options.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
if options.MemoryReclaimerEnabled {
options.WatchDog = true // Memory reclaimer requires watchdog
}
}
}
if settings.MemoryReclaimerThreshold != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.MemoryReclaimerThreshold == 0 {
options.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
}
}
if settings.AgentJobRetentionDays != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.AgentJobRetentionDays == 0 {
Expand All @@ -323,19 +338,24 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
lruLimit := options.GetEffectiveMaxActiveBackends()

// Create watchdog if enabled OR if LRU limit is set
if options.WatchDog || lruLimit > 0 {
// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
if options.WatchDog || lruLimit > 0 || options.MemoryReclaimerEnabled {
wd := model.NewWatchDog(
application.ModelLoader(),
options.WatchDogBusyTimeout,
options.WatchDogIdleTimeout,
options.WatchDogBusy,
options.WatchDogIdle,
lruLimit)
model.WithProcessManager(application.ModelLoader()),
model.WithBusyTimeout(options.WatchDogBusyTimeout),
model.WithIdleTimeout(options.WatchDogIdleTimeout),
model.WithWatchdogInterval(options.WatchDogInterval),
model.WithBusyCheck(options.WatchDogBusy),
model.WithIdleCheck(options.WatchDogIdle),
model.WithLRULimit(lruLimit),
model.WithMemoryReclaimer(options.MemoryReclaimerEnabled, options.MemoryReclaimerThreshold),
)
application.ModelLoader().SetWatchDog(wd)

// Start watchdog goroutine only if busy/idle checks are enabled
if options.WatchDogBusy || options.WatchDogIdle {
// Start watchdog goroutine if any periodic checks are enabled
// LRU eviction doesn't need the Run() loop - it's triggered on model load
// But memory reclaimer needs the Run() loop for periodic checking
if options.WatchDogBusy || options.WatchDogIdle || options.MemoryReclaimerEnabled {
go wd.Run()
}

Expand Down
33 changes: 22 additions & 11 deletions core/application/watchdog.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,28 @@ func (a *Application) startWatchdog() error {
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
lruLimit := appConfig.GetEffectiveMaxActiveBackends()

// Create watchdog if enabled OR if LRU limit is set
// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
// LRU eviction requires watchdog infrastructure even without busy/idle checks
if appConfig.WatchDog || lruLimit > 0 {
if appConfig.WatchDog || lruLimit > 0 || appConfig.MemoryReclaimerEnabled {
wd := model.NewWatchDog(
a.modelLoader,
appConfig.WatchDogBusyTimeout,
appConfig.WatchDogIdleTimeout,
appConfig.WatchDogBusy,
appConfig.WatchDogIdle,
lruLimit)
model.WithProcessManager(a.modelLoader),
model.WithBusyTimeout(appConfig.WatchDogBusyTimeout),
model.WithIdleTimeout(appConfig.WatchDogIdleTimeout),
model.WithWatchdogInterval(appConfig.WatchDogInterval),
model.WithBusyCheck(appConfig.WatchDogBusy),
model.WithIdleCheck(appConfig.WatchDogIdle),
model.WithLRULimit(lruLimit),
model.WithMemoryReclaimer(appConfig.MemoryReclaimerEnabled, appConfig.MemoryReclaimerThreshold),
)
a.modelLoader.SetWatchDog(wd)

// Create new stop channel
a.watchdogStop = make(chan bool, 1)

// Start watchdog goroutine only if busy/idle checks are enabled
// Start watchdog goroutine if any periodic checks are enabled
// LRU eviction doesn't need the Run() loop - it's triggered on model load
if appConfig.WatchDogBusy || appConfig.WatchDogIdle {
// But memory reclaimer needs the Run() loop for periodic checking
if appConfig.WatchDogBusy || appConfig.WatchDogIdle || appConfig.MemoryReclaimerEnabled {
go wd.Run()
}

Expand All @@ -56,7 +60,14 @@ func (a *Application) startWatchdog() error {
}
}()

log.Info().Int("lruLimit", lruLimit).Bool("busyCheck", appConfig.WatchDogBusy).Bool("idleCheck", appConfig.WatchDogIdle).Msg("Watchdog started with new settings")
log.Info().
Int("lruLimit", lruLimit).
Bool("busyCheck", appConfig.WatchDogBusy).
Bool("idleCheck", appConfig.WatchDogIdle).
Bool("memoryReclaimer", appConfig.MemoryReclaimerEnabled).
Float64("memoryThreshold", appConfig.MemoryReclaimerThreshold).
Dur("interval", appConfig.WatchDogInterval).
Msg("Watchdog started with new settings")
} else {
log.Info().Msg("Watchdog disabled")
}
Expand Down
8 changes: 8 additions & 0 deletions core/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ type RunCMD struct {
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
EnableMemoryReclaimer bool `env:"LOCALAI_MEMORY_RECLAIMER,MEMORY_RECLAIMER,LOCALAI_GPU_RECLAIMER,GPU_RECLAIMER" default:"false" help:"Enable memory threshold monitoring to auto-evict backends when memory usage exceeds threshold (uses GPU VRAM if available, otherwise RAM)" group:"backends"`
MemoryReclaimerThreshold float64 `env:"LOCALAI_MEMORY_RECLAIMER_THRESHOLD,MEMORY_RECLAIMER_THRESHOLD,LOCALAI_GPU_RECLAIMER_THRESHOLD,GPU_RECLAIMER_THRESHOLD" default:"0.95" help:"Memory usage threshold (0.0-1.0) that triggers backend eviction (default 0.95 = 95%%)" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"`
Expand Down Expand Up @@ -200,6 +202,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
opts = append(opts, config.SetWatchDogBusyTimeout(dur))
}
}

// Handle memory reclaimer (uses GPU VRAM if available, otherwise RAM)
if r.EnableMemoryReclaimer {
opts = append(opts, config.WithMemoryReclaimer(true, r.MemoryReclaimerThreshold))
}

if r.ParallelRequests {
opts = append(opts, config.EnableParallelBackendRequests)
}
Expand Down
Loading
Loading