kubernetes-sigs · k8s-ci-robot · Jan 21, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/config/charts/epp-standalone/values.yaml b/config/charts/epp-standalone/values.yaml
@@ -296,3 +296,94 @@ inferenceExtension:
 
   latencyPredictor:
     enabled: false
+
+    # Training Server Configuration
+    trainingServer:
+      image:
+        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
+        name: latencypredictor-training-server
+        tag: latest
+        pullPolicy: Always
+      port: 8000
+      resources:
+        requests:
+          cpu: "2000m"
+          memory: "4Gi"
+        limits:
+          cpu: "4000m"
+          memory: "8Gi"
+      livenessProbe:
+        httpGet:
+          path: /healthz
+          port: 8000
+        initialDelaySeconds: 30
+        periodSeconds: 20
+      readinessProbe:
+        httpGet:
+          path: /readyz
+          port: 8000
+        initialDelaySeconds: 45
+        periodSeconds: 10
+      volumeSize: "20Gi"
+      config:
+        LATENCY_RETRAINING_INTERVAL_SEC: "1"
+        LATENCY_MIN_SAMPLES_FOR_RETRAIN: "100"
+        LATENCY_TTFT_MODEL_PATH: "/models/ttft.joblib"
+        LATENCY_TPOT_MODEL_PATH: "/models/tpot.joblib"
+        LATENCY_TTFT_SCALER_PATH: "/models/ttft_scaler.joblib"
+        LATENCY_TPOT_SCALER_PATH: "/models/tpot_scaler.joblib"
+        LATENCY_MODEL_TYPE: "xgboost"
+        LATENCY_MAX_TRAINING_DATA_SIZE_PER_BUCKET: "5000"
+        LATENCY_QUANTILE_ALPHA: "0.9"
+
+    # Prediction Server Configuration
+    predictionServers:
+      count: 10
+      startPort: 8001
+      image:
+        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
+        name: latencypredictor-prediction-server
+        tag: latest
+        pullPolicy: Always
+      resources:
+        requests:
+          cpu: "500m"
+          memory: "1Gi"
+        limits:
+          cpu: "1000m"
+          memory: "2Gi"
+      livenessProbe:
+        httpGet:
+          path: /healthz
+        initialDelaySeconds: 15
+        periodSeconds: 15
+      readinessProbe:
+        httpGet:
+          path: /readyz
+        initialDelaySeconds: 10
+        periodSeconds: 5
+        failureThreshold: 10
+      volumeSize: "10Gi"
+      config:
+        LATENCY_MODEL_TYPE: "xgboost"
+        PREDICT_HOST: "0.0.0.0"
+        LOCAL_TTFT_MODEL_PATH: "/server_models/ttft.joblib"
+        LOCAL_TPOT_MODEL_PATH: "/server_models/tpot.joblib"
+        LOCAL_TTFT_SCALER_PATH: "/server_models/ttft_scaler.joblib"
+        LOCAL_TPOT_SCALER_PATH: "/server_models/tpot_scaler.joblib"
+
+    # EPP Environment Variables for Latency Predictor
+    eppEnv:
+      LATENCY_MAX_SAMPLE_SIZE: "10000"
+
+
+# Options: ["gke"]
+provider:
+  name: none
+
+  # GKE-specific configuration.
+  # This block is only used if name is "gke".
+  gke:
+    # Set to true if the cluster is an Autopilot cluster.
+    autopilot: false
+
diff --git a/config/charts/inference-extension/templates/_gke.yaml b/config/charts/inference-extension/templates/_gke.yaml
@@ -1,5 +1,5 @@
 {{- define "inference-extension.gke" -}}
-{{- if eq (lower .Values.provider.name) "gke" }}
+{{- if and .Values.provider (eq (lower .Values.provider.name) "gke") }}
 {{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
 {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
 {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}

diff --git a/hack/verify-helm.sh b/hack/verify-helm.sh
@@ -22,6 +22,7 @@ declare -A test_cases_inference_pool
 test_cases_inference_pool["basic"]="--set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 test_cases_inference_pool["gke-provider"]="--set provider.name=gke --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 test_cases_inference_pool["multiple-replicas"]="--set inferencePool.replicas=3 --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
+test_cases_inference_pool["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 
 # Run the install command in case this script runs from a different bash
 # source (such as in the verify-all script)
@@ -46,5 +47,30 @@ for key in "${!test_cases_inference_pool[@]}"; do
   fi
 done
 
+declare -A test_cases_epp_standalone
 
+# InferencePool Helm Chart test cases
+test_cases_epp_standalone["basic"]="--set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
+test_cases_epp_standalone["gke-provider"]="--set provider.name=gke --set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
+test_cases_epp_standalone["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true --set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
+
+
+echo "Building dependencies for epp-standalone chart..."
+${SCRIPT_ROOT}/bin/helm dependency build ${SCRIPT_ROOT}/config/charts/epp-standalone
+if [ $? -ne 0 ]; then
+  echo "Helm dependency build failed."
+  exit 1
+fi
+
+# Running tests cases
+echo "Running helm template command for epp-standalone chart..."
+# Loop through the keys of the associative array
+for key in "${!test_cases_epp_standalone[@]}"; do
+  echo "Running test: $key"
+  ${SCRIPT_ROOT}/bin/helm template ${SCRIPT_ROOT}/config/charts/epp-standalone ${test_cases_epp_standalone[$key]} --output-dir="${SCRIPT_ROOT}/bin"
+  if [ $? -ne 0 ]; then
+    echo "Helm template command failed for test: $key"
+    exit 1
+  fi
+done