kubernetes-sigs
diff --git a/‎config/charts/epp-standalone/templates/_validations.tpl‎
Lines changed: 10 additions & 0 deletions b/‎config/charts/epp-standalone/templates/_validations.tpl‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎config/charts/epp-standalone/templates/inferencepool.yaml‎
Lines changed: 48 additions & 0 deletions b/‎config/charts/epp-standalone/templates/inferencepool.yaml‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎config/charts/epp-standalone/templates/rbac.yaml‎
Lines changed: 30 additions & 0 deletions b/‎config/charts/epp-standalone/templates/rbac.yaml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎config/charts/epp-standalone/values.yaml‎
Lines changed: 19 additions & 3 deletions b/‎config/charts/epp-standalone/values.yaml‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎config/charts/inference-extension/templates/_deployment.yaml‎
Lines changed: 8 additions & 2 deletions b/‎config/charts/inference-extension/templates/_deployment.yaml‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎config/charts/inference-extension/templates/_helpers.tpl‎
Lines changed: 13 additions & 2 deletions b/‎config/charts/inference-extension/templates/_helpers.tpl‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎config/charts/inferencepool/values.yaml‎
Lines changed: 8 additions & 0 deletions b/‎config/charts/inferencepool/values.yaml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎config/manifests/vllm/gpu-deployment.yaml‎
Lines changed: 0 additions & 2 deletions b/‎config/manifests/vllm/gpu-deployment.yaml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎conformance/reports/v1.0.1/gateway/istio/1.28.3-default-gateway-report.yaml‎
Lines changed: 23 additions & 0 deletions b/‎conformance/reports/v1.0.1/gateway/istio/1.28.3-default-gateway-report.yaml‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎conformance/reports/v1.0.1/gateway/istio/README.md‎
Lines changed: 22 additions & 0 deletions b/‎conformance/reports/v1.0.1/gateway/istio/README.md‎
Lines changed: 22 additions & 0 deletions
@@ -0,0 +1,10 @@
+{{/*
+common validations
+*/}}
+{{- define "gateway-api-inference-extension.validations.inferencepool.common" }}
+{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.createInferencePool }}
+{{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }}
+{{- fail ".Values.inferencePool.modelServers.matchLabels is required" }}
+{{- end }}
+{{- end }}
+{{- end -}}
@@ -0,0 +1,48 @@
+{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.createInferencePool }}
+{{- if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}}
+apiVersion: {{ .Values.inferencePool.apiVersion }}
+kind: InferencePool
+metadata:
+  name: {{ .Release.Name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  targetPortNumber: {{ .Values.inferencePool.targetPortNumber | default 8000 }}
+  selector:
+    {{- if .Values.inferencePool.modelServers.matchLabels }}
+    {{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
+    {{ $key }}: {{ quote $value }}
+    {{- end }}
+    {{- end }}
+  extensionRef:
+    name: {{ include "gateway-api-inference-extension.name" . }}
+    portNumber: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
+    failureMode: {{ .Values.inferenceExtension.failureMode | default "FailClose" }}
+{{ else }}
+{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
+apiVersion: "inference.networking.k8s.io/v1"
+kind: InferencePool
+metadata:
+  name: {{ .Release.Name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  targetPorts:
+    {{- range .Values.inferencePool.targetPorts }}
+    - number: {{ .number }}
+    {{- end }}
+  selector:
+    matchLabels:
+      {{- if .Values.inferencePool.modelServers.matchLabels }}
+      {{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
+      {{ $key }}: {{ quote $value }}
+      {{- end }}
+      {{- end }}
+  endpointPickerRef:
+    name: {{ include "gateway-api-inference-extension.name" . }}
+    port:
+      number: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
+{{- end }}
+{{- end }}
@@ -0,0 +1,30 @@
+{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.createInferencePool }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+rules:
+  - apiGroups: ["inference.networking.x-k8s.io"]
+    resources: ["inferenceobjectives", "inferencemodelrewrites"]
+    verbs: ["get", "watch", "list"]
+  - apiGroups: ["{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"]
+    resources: ["inferencepools"]
+    verbs: ["get", "watch", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
+  namespace: {{ .Release.Namespace }}
+subjects:
+  - kind: ServiceAccount
+    name: {{ include "gateway-api-inference-extension.name" . }}
+    namespace: {{ .Release.Namespace }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
+{{- end }}
@@ -15,10 +15,13 @@ inferenceExtension:
   pluginsConfigFile: "default-plugins.yaml"
 
   endpointsServer:
-    standalone: true
-    # Required when standalone is true
-    #    endpointSelector: app=vllm-llama3-8b-instruct
+    # set it to false when you want to deploy epp-standalone with inferencepool
+    createInferencePool: true
+    # Required when createInferencePool is false
+#    endpointSelector: app=vllm-llama3-8b-instruct
+    # unused when createInferencePool is true
     targetPorts: 8000
+    # unused when createInferencePool is true
     modelServerType: vllm # vllm, triton-tensorrt-llm
 
 
@@ -308,3 +311,16 @@ provider:
     # Set to true if the cluster is an Autopilot cluster.
     autopilot: false
 
+# This is not used when you deploy epp-standalone with inferenceExtension.endpointsServer.createInferencePool=false
+inferencePool:
+  targetPorts:
+    - number: 8000
+  modelServerType: vllm # vllm, triton-tensorrt-llm
+  apiVersion: inference.networking.k8s.io/v1
+  # modelServers: # REQUIRED
+  #   matchLabels:
+  #     app: vllm-llama3-8b-instruct
+
+  # Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2,
+  # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
+  targetPortNumber: 8000
@@ -6,6 +6,7 @@ metadata:
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+    {{- include "gateway-api-inference-extension.modeLabels" . | nindent 4 }}
 spec:
   replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
   strategy:
@@ -23,6 +24,7 @@ spec:
     metadata:
       labels:
         {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
+        {{- include "gateway-api-inference-extension.modeLabels" . | nindent 8 }}
     spec:
       serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
       # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
@@ -73,13 +75,13 @@ spec:
           args:
           {{- /* 1. Determine Model Server Type Logic */ -}}
           {{- $modelServerType := "vllm" }}
-          {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}}
+          {{- if and .Values.inferenceExtension.endpointsServer (not .Values.inferenceExtension.endpointsServer.createInferencePool) -}}
             {{- $modelServerType = .Values.inferenceExtension.endpointsServer.modelServerType | default "vllm" }}
           {{- else }}
             {{- $modelServerType = .Values.inferencePool.modelServerType | default "vllm" }}
           {{- end }}
           {{- /* 2. Mode Specific Flags */ -}}
-          {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone }}
+          {{- if and .Values.inferenceExtension.endpointsServer (not .Values.inferenceExtension.endpointsServer.createInferencePool) }}
               - --endpoint-selector
               - {{ .Values.inferenceExtension.endpointsServer.endpointSelector | quote }}
               - --endpoint-target-ports
@@ -128,6 +130,10 @@ spec:
           {{- if not .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
               - --metrics-endpoint-auth=false
           {{- end }}
+          {{- with .Values.inferenceExtension.resources }}
+          resources:
+            {{ toYaml . | nindent 12 }}
+          {{- end }}
           ports:
             - name: grpc
               containerPort: 9002
 
@@ -29,12 +29,23 @@ Cluster RBAC unique name
 Selector labels
 */}}
 {{- define "gateway-api-inference-extension.selectorLabels" -}}
-{{- /* Check if endpointsServer exists AND if standalone is true */ -}}
-{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}}
+{{- /* Check if endpointsServer exists AND if createInferencePool is false */ -}}
+{{- if and .Values.inferenceExtension.endpointsServer (not .Values.inferenceExtension.endpointsServer.createInferencePool) -}}
 {{- /* LOGIC FOR STANDALONE EPP MODE */ -}}
 epp: {{ include "gateway-api-inference-extension.name" . }}
 {{- else -}}
 {{- /* LOGIC FOR PARENT (INFERENCEPOOL) MODE */ -}}
 inferencepool: {{ include "gateway-api-inference-extension.name" . }}
 {{- end -}}
 {{- end -}}
+
+{{/*
+Mode labels
+*/}}
+{{- define "gateway-api-inference-extension.modeLabels" -}}
+{{- if and .Values.inferenceExtension.endpointsServer (not .Values.inferenceExtension.endpointsServer.createInferencePool) -}}
+inference.networking.k8s.io/igw-mode: standalone
+{{- else -}}
+inference.networking.k8s.io/igw-mode: inferencepool
+{{- end -}}
+{{- end -}}
@@ -44,6 +44,14 @@ inferenceExtension:
 
   tolerations: []
 
+#  resources:
+#    requests:
+#      cpu: 1000m
+#      memory: 1Gi
+#    limits:
+#      cpu: 4000m
+#      memory: 8Gi
+
   # Sidecar configuration for EPP
   sidecar:
     enabled: false
 
@@ -27,8 +27,6 @@ spec:
           - "8000"
           - "--max-num-seq"
           - "1024"
-          - "--compilation-config"
-          - "3"
           - "--enable-lora"
           - "--max-loras"
           - "2"
 
@@ -0,0 +1,23 @@
+GatewayAPIInferenceExtensionVersion: v1.0.1
+apiVersion: gateway.networking.k8s.io/v1
+date: "2026-01-29T22:31:59Z"
+gatewayAPIChannel: standard
+gatewayAPIVersion: v1.3.0
+implementation:
+  contact:
+  - '@istio/maintainers'
+  organization: istio
+  project: istio
+  url: https://istio.io
+  version: 1.28.3
+kind: ConformanceReport
+mode: default
+profiles:
+- core:
+    result: success
+    statistics:
+      Failed: 0
+      Passed: 9
+      Skipped: 0
+  name: Gateway
+  summary: Core tests succeeded.
@@ -0,0 +1,22 @@
+# istio (gateway Profile Conformance) - v1.0.1
+
+## Test Results
+
+This directory contains conformance test results for Gateway API Inference Extension v1.0.1 testing against istio implementations using the gateway profile.
+
+| Extension Version Tested | Profile Tested | Implementation Version | Mode    | Report | Status |
+|--------------------------|----------------|------------------------|---------|--------|--------|
+| v1.0.1 | Gateway | 1.28.3 | default | [./1.28.3-default-gateway-report.yaml](./1.28.3-default-gateway-report.yaml) | PASS |
+
+## Running the Tests
+
+For instructions on how to reproduce these test results and run the conformance tests yourself, see the [istio Conformance Testing README](../../../../scripts/istio/README.md).
+
+## About This Version
+
+- **Extension Version**: v1.0.1
+- **Profile**: gateway
+- **Implementation**: istio
+- **Test Mode**: Default
+
+For detailed information about conformance testing, report generation, and requirements, see the [main conformance README](../../../../../README.md).