Skip to content

Commit 4d39630

Browse files
authored
add priority to admit request (#2251)
* add priority to admit request * add inference obj spec to llmrequest * make requestObjective an internal type * make requestObjective an internal type 2 * make requestObjective an internal type 3 * make requestObjective an internal type 4 * make requestObjective an internal type 5
1 parent 1f62512 commit 4d39630

3 files changed

Lines changed: 12 additions & 0 deletions

File tree

pkg/epp/framework/interface/scheduling/types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ import (
2929

3030
const nilString = "<nil>"
3131

32+
// RequestObjectives represents the scheduling objectives parsed from the InferenceObjectiveSpec, to be used in scheduling decisions.
33+
type RequestObjectives struct {
34+
Priority int
35+
}
36+
3237
// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
3338
type LLMRequest struct {
3439
// RequestId is the Envoy generated Id for the request being processed
@@ -39,6 +44,8 @@ type LLMRequest struct {
3944
Body *LLMRequestBody
4045
// Headers is a map of the request headers.
4146
Headers map[string]string
47+
// Request Objective
48+
Objectives RequestObjectives
4249
}
4350

4451
func (r *LLMRequest) String() string {

pkg/epp/requestcontrol/director.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,15 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
149149

150150
// Parse inference objective.
151151
infObjective := d.getInferenceObjective(ctx, reqCtx)
152+
requestObjectives := fwksched.RequestObjectives{Priority: *infObjective.Spec.Priority}
152153

153154
// Prepare LLMRequest (needed for both saturation detection and Scheduler)
154155
reqCtx.SchedulingRequest = &fwksched.LLMRequest{
155156
RequestId: reqCtx.Request.Headers[requtil.RequestIdHeaderKey],
156157
TargetModel: reqCtx.TargetModelName,
157158
Body: requestBody,
158159
Headers: reqCtx.Request.Headers,
160+
Objectives: requestObjectives,
159161
}
160162

161163
logger = logger.WithValues("objectiveKey", reqCtx.ObjectiveKey, "incomingModelName", reqCtx.IncomingModelName, "targetModelName", reqCtx.TargetModelName, "priority", infObjective.Spec.Priority)

pkg/epp/requestcontrol/request_control_config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ func (c *Config) AddPlugins(pluginObjects ...plugin.Plugin) {
103103
if prepareDataPlugin, ok := plugin.(fwk.PrepareDataPlugin); ok {
104104
c.prepareDataPlugins = append(c.prepareDataPlugins, prepareDataPlugin)
105105
}
106+
if admissionPlugin, ok := plugin.(fwk.AdmissionPlugin); ok {
107+
c.admissionPlugins = append(c.admissionPlugins, admissionPlugin)
108+
}
106109
}
107110
}
108111

0 commit comments

Comments
 (0)