kubernetes-sigs · k8s-ci-robot · Jan 21, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/config/charts/epp-standalone/Chart.yaml b/config/charts/epp-standalone/Chart.yaml
@@ -12,3 +12,5 @@ dependencies:
   - name: inference-extension
     version: 0.0.0
     repository: "file://../inference-extension"
+    # This is needed to make use of the common values.yaml in ./config/charts/inference-extension/values.yaml
+    alias: inferenceExtension
diff --git a/config/charts/epp-standalone/values.yaml b/config/charts/epp-standalone/values.yaml
@@ -295,4 +295,16 @@ inferenceExtension:
     enabled: false
 
   latencyPredictor:
+    # common latencyPredictor setting exists in config/charts/inference-extension/values.yaml
     enabled: false
+
+# Options: ["gke"]
+provider:
+  name: none
+
+  # GKE-specific configuration.
+  # This block is only used if name is "gke".
+  gke:
+    # Set to true if the cluster is an Autopilot cluster.
+    autopilot: false
+
diff --git a/config/charts/inference-extension/templates/_gke.yaml b/config/charts/inference-extension/templates/_gke.yaml
@@ -1,5 +1,5 @@
 {{- define "inference-extension.gke" -}}
-{{- if eq (lower .Values.provider.name) "gke" }}
+{{- if and .Values.provider (eq (lower .Values.provider.name) "gke") }}
 {{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
 {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
 {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}

diff --git a/config/charts/inference-extension/values.yaml b/config/charts/inference-extension/values.yaml
@@ -0,0 +1,80 @@
+latencyPredictor:
+  enabled: false
+  # Training Server Configuration
+  trainingServer:
+    image:
+      hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
+      name: latencypredictor-training-server
+      tag: latest
+      pullPolicy: Always
+    port: 8000
+    resources:
+      requests:
+        cpu: "2000m"
+        memory: "4Gi"
+      limits:
+        cpu: "4000m"
+        memory: "8Gi"
+    livenessProbe:
+      httpGet:
+        path: /healthz
+        port: 8000
+      initialDelaySeconds: 30
+      periodSeconds: 20
+    readinessProbe:
+      httpGet:
+        path: /readyz
+        port: 8000
+      initialDelaySeconds: 45
+      periodSeconds: 10
+    volumeSize: "20Gi"
+    config:
+      LATENCY_RETRAINING_INTERVAL_SEC: "1"
+      LATENCY_MIN_SAMPLES_FOR_RETRAIN: "100"
+      LATENCY_TTFT_MODEL_PATH: "/models/ttft.joblib"
+      LATENCY_TPOT_MODEL_PATH: "/models/tpot.joblib"
+      LATENCY_TTFT_SCALER_PATH: "/models/ttft_scaler.joblib"
+      LATENCY_TPOT_SCALER_PATH: "/models/tpot_scaler.joblib"
+      LATENCY_MODEL_TYPE: "xgboost"
+      LATENCY_MAX_TRAINING_DATA_SIZE_PER_BUCKET: "5000"
+      LATENCY_QUANTILE_ALPHA: "0.9"
+
+  # Prediction Server Configuration
+  predictionServers:
+    count: 10
+    startPort: 8001
+    image:
+      hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
+      name: latencypredictor-prediction-server
+      tag: latest
+      pullPolicy: Always
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
+    livenessProbe:
+      httpGet:
+        path: /healthz
+      initialDelaySeconds: 15
+      periodSeconds: 15
+    readinessProbe:
+      httpGet:
+        path: /readyz
+      initialDelaySeconds: 10
+      periodSeconds: 5
+      failureThreshold: 10
+    volumeSize: "10Gi"
+    config:
+      LATENCY_MODEL_TYPE: "xgboost"
+      PREDICT_HOST: "0.0.0.0"
+      LOCAL_TTFT_MODEL_PATH: "/server_models/ttft.joblib"
+      LOCAL_TPOT_MODEL_PATH: "/server_models/tpot.joblib"
+      LOCAL_TTFT_SCALER_PATH: "/server_models/ttft_scaler.joblib"
+      LOCAL_TPOT_SCALER_PATH: "/server_models/tpot_scaler.joblib"
+
+  # EPP Environment Variables for Latency Predictor
+  eppEnv:
+    LATENCY_MAX_SAMPLE_SIZE: "10000"
diff --git a/config/charts/inferencepool/Chart.yaml b/config/charts/inferencepool/Chart.yaml
@@ -11,3 +11,5 @@ dependencies:
   - name: inference-extension
     version: 0.0.0
     repository: "file://../inference-extension"
+    # This is needed to make use of the common values.yaml in ./config/charts/inference-extension/values.yaml
+    alias: inferenceExtension
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -69,87 +69,9 @@ inferenceExtension:
 
   # Latency Predictor Configuration
   latencyPredictor:
+    # common latencyPredictor setting exists in config/charts/inference-extension/values.yaml
     enabled: false
 
-    # Training Server Configuration
-    trainingServer:
-      image:
-        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
-        name: latencypredictor-training-server
-        tag: latest
-        pullPolicy: Always
-      port: 8000
-      resources:
-        requests:
-          cpu: "2000m"
-          memory: "4Gi"
-        limits:
-          cpu: "4000m"
-          memory: "8Gi"
-      livenessProbe:
-        httpGet:
-          path: /healthz
-          port: 8000
-        initialDelaySeconds: 30
-        periodSeconds: 20
-      readinessProbe:
-        httpGet:
-          path: /readyz
-          port: 8000
-        initialDelaySeconds: 45
-        periodSeconds: 10
-      volumeSize: "20Gi"
-      config:
-        LATENCY_RETRAINING_INTERVAL_SEC: "1"
-        LATENCY_MIN_SAMPLES_FOR_RETRAIN: "100"
-        LATENCY_TTFT_MODEL_PATH: "/models/ttft.joblib"
-        LATENCY_TPOT_MODEL_PATH: "/models/tpot.joblib"
-        LATENCY_TTFT_SCALER_PATH: "/models/ttft_scaler.joblib"
-        LATENCY_TPOT_SCALER_PATH: "/models/tpot_scaler.joblib"
-        LATENCY_MODEL_TYPE: "xgboost"
-        LATENCY_MAX_TRAINING_DATA_SIZE_PER_BUCKET: "5000"
-        LATENCY_QUANTILE_ALPHA: "0.9"
-
-    # Prediction Server Configuration
-    predictionServers:
-      count: 10
-      startPort: 8001
-      image:
-        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
-        name: latencypredictor-prediction-server
-        tag: latest
-        pullPolicy: Always
-      resources:
-        requests:
-          cpu: "500m"
-          memory: "1Gi"
-        limits:
-          cpu: "1000m"
-          memory: "2Gi"
-      livenessProbe:
-        httpGet:
-          path: /healthz
-        initialDelaySeconds: 15
-        periodSeconds: 15
-      readinessProbe:
-        httpGet:
-          path: /readyz
-        initialDelaySeconds: 10
-        periodSeconds: 5
-        failureThreshold: 10
-      volumeSize: "10Gi"
-      config:
-        LATENCY_MODEL_TYPE: "xgboost"
-        PREDICT_HOST: "0.0.0.0"
-        LOCAL_TTFT_MODEL_PATH: "/server_models/ttft.joblib"
-        LOCAL_TPOT_MODEL_PATH: "/server_models/tpot.joblib"
-        LOCAL_TTFT_SCALER_PATH: "/server_models/ttft_scaler.joblib"
-        LOCAL_TPOT_SCALER_PATH: "/server_models/tpot_scaler.joblib"
-
-    # EPP Environment Variables for Latency Predictor
-    eppEnv:
-      LATENCY_MAX_SAMPLE_SIZE: "10000"
-
 inferencePool:
   targetPorts:
     - number: 8000

diff --git a/hack/verify-helm.sh b/hack/verify-helm.sh
@@ -22,6 +22,7 @@ declare -A test_cases_inference_pool
 test_cases_inference_pool["basic"]="--set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 test_cases_inference_pool["gke-provider"]="--set provider.name=gke --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 test_cases_inference_pool["multiple-replicas"]="--set inferencePool.replicas=3 --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
+test_cases_inference_pool["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 
 # Run the install command in case this script runs from a different bash
 # source (such as in the verify-all script)
@@ -46,5 +47,30 @@ for key in "${!test_cases_inference_pool[@]}"; do
   fi
 done
 
+declare -A test_cases_epp_standalone
 
+# InferencePool Helm Chart test cases
+test_cases_epp_standalone["basic"]="--set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
+test_cases_epp_standalone["gke-provider"]="--set provider.name=gke --set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
+test_cases_epp_standalone["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true --set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
+
+
+echo "Building dependencies for epp-standalone chart..."
+${SCRIPT_ROOT}/bin/helm dependency build ${SCRIPT_ROOT}/config/charts/epp-standalone
+if [ $? -ne 0 ]; then
+  echo "Helm dependency build failed."
+  exit 1
+fi
+
+# Running tests cases
+echo "Running helm template command for epp-standalone chart..."
+# Loop through the keys of the associative array
+for key in "${!test_cases_epp_standalone[@]}"; do
+  echo "Running test: $key"
+  ${SCRIPT_ROOT}/bin/helm template ${SCRIPT_ROOT}/config/charts/epp-standalone ${test_cases_epp_standalone[$key]} --output-dir="${SCRIPT_ROOT}/bin"
+  if [ $? -ne 0 ]; then
+    echo "Helm template command failed for test: $key"
+    exit 1
+  fi
+done