Skip to content

Commit 7082f01

Browse files
authored
Merge branch 'kubernetes-sigs:main' into main
2 parents 96d09c9 + 4d39630 commit 7082f01

33 files changed

Lines changed: 759 additions & 247 deletions

File tree

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{{/*
2+
common validations
3+
*/}}
4+
{{- define "gateway-api-inference-extension.validations.inferencepool.common" }}
5+
{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.createInferencePool }}
6+
{{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }}
7+
{{- fail ".Values.inferencePool.modelServers.matchLabels is required" }}
8+
{{- end }}
9+
{{- end }}
10+
{{- end -}}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.createInferencePool }}
2+
{{- if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}}
3+
apiVersion: {{ .Values.inferencePool.apiVersion }}
4+
kind: InferencePool
5+
metadata:
6+
name: {{ .Release.Name }}
7+
namespace: {{ .Release.Namespace }}
8+
labels:
9+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
10+
spec:
11+
targetPortNumber: {{ .Values.inferencePool.targetPortNumber | default 8000 }}
12+
selector:
13+
{{- if .Values.inferencePool.modelServers.matchLabels }}
14+
{{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
15+
{{ $key }}: {{ quote $value }}
16+
{{- end }}
17+
{{- end }}
18+
extensionRef:
19+
name: {{ include "gateway-api-inference-extension.name" . }}
20+
portNumber: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
21+
failureMode: {{ .Values.inferenceExtension.failureMode | default "FailClose" }}
22+
{{ else }}
23+
{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
24+
apiVersion: "inference.networking.k8s.io/v1"
25+
kind: InferencePool
26+
metadata:
27+
name: {{ .Release.Name }}
28+
namespace: {{ .Release.Namespace }}
29+
labels:
30+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
31+
spec:
32+
targetPorts:
33+
{{- range .Values.inferencePool.targetPorts }}
34+
- number: {{ .number }}
35+
{{- end }}
36+
selector:
37+
matchLabels:
38+
{{- if .Values.inferencePool.modelServers.matchLabels }}
39+
{{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
40+
{{ $key }}: {{ quote $value }}
41+
{{- end }}
42+
{{- end }}
43+
endpointPickerRef:
44+
name: {{ include "gateway-api-inference-extension.name" . }}
45+
port:
46+
number: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
47+
{{- end }}
48+
{{- end }}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.createInferencePool }}
2+
apiVersion: rbac.authorization.k8s.io/v1
3+
kind: Role
4+
metadata:
5+
name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
9+
rules:
10+
- apiGroups: ["inference.networking.x-k8s.io"]
11+
resources: ["inferenceobjectives", "inferencemodelrewrites"]
12+
verbs: ["get", "watch", "list"]
13+
- apiGroups: ["{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"]
14+
resources: ["inferencepools"]
15+
verbs: ["get", "watch", "list"]
16+
---
17+
apiVersion: rbac.authorization.k8s.io/v1
18+
kind: RoleBinding
19+
metadata:
20+
name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
21+
namespace: {{ .Release.Namespace }}
22+
subjects:
23+
- kind: ServiceAccount
24+
name: {{ include "gateway-api-inference-extension.name" . }}
25+
namespace: {{ .Release.Namespace }}
26+
roleRef:
27+
apiGroup: rbac.authorization.k8s.io
28+
kind: Role
29+
name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
30+
{{- end }}

config/charts/epp-standalone/values.yaml

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@ inferenceExtension:
1515
pluginsConfigFile: "default-plugins.yaml"
1616

1717
endpointsServer:
18-
standalone: true
19-
# Required when standalone is true
20-
# endpointSelector: app=vllm-llama3-8b-instruct
18+
# set it to false when you want to deploy epp-standalone with inferencepool
19+
createInferencePool: true
20+
# Required when createInferencePool is false
21+
# endpointSelector: app=vllm-llama3-8b-instruct
22+
# unused when createInferencePool is true
2123
targetPorts: 8000
24+
# unused when createInferencePool is true
2225
modelServerType: vllm # vllm, triton-tensorrt-llm
2326

2427

@@ -308,3 +311,16 @@ provider:
308311
# Set to true if the cluster is an Autopilot cluster.
309312
autopilot: false
310313

314+
# This is not used when you deploy epp-standalone with inferenceExtension.endpointsServer.createInferencePool=false
315+
inferencePool:
316+
targetPorts:
317+
- number: 8000
318+
modelServerType: vllm # vllm, triton-tensorrt-llm
319+
apiVersion: inference.networking.k8s.io/v1
320+
# modelServers: # REQUIRED
321+
# matchLabels:
322+
# app: vllm-llama3-8b-instruct
323+
324+
# Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2,
325+
# This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
326+
targetPortNumber: 8000

config/charts/inference-extension/templates/_deployment.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ metadata:
66
namespace: {{ .Release.Namespace }}
77
labels:
88
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
9+
{{- include "gateway-api-inference-extension.modeLabels" . | nindent 4 }}
910
spec:
1011
replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
1112
strategy:
@@ -23,6 +24,7 @@ spec:
2324
metadata:
2425
labels:
2526
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
27+
{{- include "gateway-api-inference-extension.modeLabels" . | nindent 8 }}
2628
spec:
2729
serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
2830
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
@@ -73,13 +75,13 @@ spec:
7375
args:
7476
{{- /* 1. Determine Model Server Type Logic */ -}}
7577
{{- $modelServerType := "vllm" }}
76-
{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}}
78+
{{- if and .Values.inferenceExtension.endpointsServer (not .Values.inferenceExtension.endpointsServer.createInferencePool) -}}
7779
{{- $modelServerType = .Values.inferenceExtension.endpointsServer.modelServerType | default "vllm" }}
7880
{{- else }}
7981
{{- $modelServerType = .Values.inferencePool.modelServerType | default "vllm" }}
8082
{{- end }}
8183
{{- /* 2. Mode Specific Flags */ -}}
82-
{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone }}
84+
{{- if and .Values.inferenceExtension.endpointsServer (not .Values.inferenceExtension.endpointsServer.createInferencePool) }}
8385
- --endpoint-selector
8486
- {{ .Values.inferenceExtension.endpointsServer.endpointSelector | quote }}
8587
- --endpoint-target-ports
@@ -128,6 +130,10 @@ spec:
128130
{{- if not .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
129131
- --metrics-endpoint-auth=false
130132
{{- end }}
133+
{{- with .Values.inferenceExtension.resources }}
134+
resources:
135+
{{ toYaml . | nindent 12 }}
136+
{{- end }}
131137
ports:
132138
- name: grpc
133139
containerPort: 9002

config/charts/inference-extension/templates/_helpers.tpl

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,23 @@ Cluster RBAC unique name
2929
Selector labels
3030
*/}}
3131
{{- define "gateway-api-inference-extension.selectorLabels" -}}
32-
{{- /* Check if endpointsServer exists AND if standalone is true */ -}}
33-
{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}}
32+
{{- /* Check if endpointsServer exists AND if createInferencePool is false */ -}}
33+
{{- if and .Values.inferenceExtension.endpointsServer (not .Values.inferenceExtension.endpointsServer.createInferencePool) -}}
3434
{{- /* LOGIC FOR STANDALONE EPP MODE */ -}}
3535
epp: {{ include "gateway-api-inference-extension.name" . }}
3636
{{- else -}}
3737
{{- /* LOGIC FOR PARENT (INFERENCEPOOL) MODE */ -}}
3838
inferencepool: {{ include "gateway-api-inference-extension.name" . }}
3939
{{- end -}}
4040
{{- end -}}
41+
42+
{{/*
43+
Mode labels
44+
*/}}
45+
{{- define "gateway-api-inference-extension.modeLabels" -}}
46+
{{- if and .Values.inferenceExtension.endpointsServer (not .Values.inferenceExtension.endpointsServer.createInferencePool) -}}
47+
inference.networking.k8s.io/igw-mode: standalone
48+
{{- else -}}
49+
inference.networking.k8s.io/igw-mode: inferencepool
50+
{{- end -}}
51+
{{- end -}}

config/charts/inferencepool/values.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ inferenceExtension:
4444

4545
tolerations: []
4646

47+
# resources:
48+
# requests:
49+
# cpu: 1000m
50+
# memory: 1Gi
51+
# limits:
52+
# cpu: 4000m
53+
# memory: 8Gi
54+
4755
# Sidecar configuration for EPP
4856
sidecar:
4957
enabled: false

config/manifests/vllm/gpu-deployment.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ spec:
2727
- "8000"
2828
- "--max-num-seq"
2929
- "1024"
30-
- "--compilation-config"
31-
- "3"
3230
- "--enable-lora"
3331
- "--max-loras"
3432
- "2"
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
GatewayAPIInferenceExtensionVersion: v1.0.1
2+
apiVersion: gateway.networking.k8s.io/v1
3+
date: "2026-01-29T22:31:59Z"
4+
gatewayAPIChannel: standard
5+
gatewayAPIVersion: v1.3.0
6+
implementation:
7+
contact:
8+
- '@istio/maintainers'
9+
organization: istio
10+
project: istio
11+
url: https://istio.io
12+
version: 1.28.3
13+
kind: ConformanceReport
14+
mode: default
15+
profiles:
16+
- core:
17+
result: success
18+
statistics:
19+
Failed: 0
20+
Passed: 9
21+
Skipped: 0
22+
name: Gateway
23+
summary: Core tests succeeded.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# istio (gateway Profile Conformance) - v1.0.1
2+
3+
## Test Results
4+
5+
This directory contains conformance test results for Gateway API Inference Extension v1.0.1 testing against istio implementations using the gateway profile.
6+
7+
| Extension Version Tested | Profile Tested | Implementation Version | Mode | Report | Status |
8+
|--------------------------|----------------|------------------------|---------|--------|--------|
9+
| v1.0.1 | Gateway | 1.28.3 | default | [./1.28.3-default-gateway-report.yaml](./1.28.3-default-gateway-report.yaml) | PASS |
10+
11+
## Running the Tests
12+
13+
For instructions on how to reproduce these test results and run the conformance tests yourself, see the [istio Conformance Testing README](../../../../scripts/istio/README.md).
14+
15+
## About This Version
16+
17+
- **Extension Version**: v1.0.1
18+
- **Profile**: gateway
19+
- **Implementation**: istio
20+
- **Test Mode**: Default
21+
22+
For detailed information about conformance testing, report generation, and requirements, see the [main conformance README](../../../../../README.md).

0 commit comments

Comments
 (0)