kubernetes-sigs
diff --git a/‎config/charts/epp-standalone/values.yaml‎
Lines changed: 1 addition & 1 deletion b/‎config/charts/epp-standalone/values.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎config/charts/inferencepool/README.md‎
Lines changed: 13 additions & 13 deletions b/‎config/charts/inferencepool/README.md‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎config/charts/inferencepool/values.yaml‎
Lines changed: 1 addition & 1 deletion b/‎config/charts/inferencepool/values.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎config/manifests/bbr/configmap.yaml‎
Lines changed: 3 additions & 3 deletions b/‎config/manifests/bbr/configmap.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎config/manifests/inferenceobjective.yaml‎
Lines changed: 4 additions & 4 deletions b/‎config/manifests/inferenceobjective.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎config/manifests/vllm/cpu-deployment.yaml‎
Lines changed: 7 additions & 7 deletions b/‎config/manifests/vllm/cpu-deployment.yaml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎config/manifests/vllm/gpu-deployment.yaml‎
Lines changed: 10 additions & 10 deletions b/‎config/manifests/vllm/gpu-deployment.yaml‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎config/manifests/vllm/sim-deployment.yaml‎
Lines changed: 5 additions & 5 deletions b/‎config/manifests/vllm/sim-deployment.yaml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎config/observability/prometheus/values.yaml‎
Lines changed: 1 addition & 1 deletion b/‎config/observability/prometheus/values.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎conformance/tests/epp_unavailable_fail_open.go‎
Lines changed: 1 addition & 1 deletion b/‎conformance/tests/epp_unavailable_fail_open.go‎
Lines changed: 1 addition & 1 deletion
@@ -17,7 +17,7 @@ inferenceExtension:
   endpointsServer:
     standalone: true
     # Required when standalone is true
-    #    endpointSelector: app=vllm-llama3-8b-instruct
+    #    endpointSelector: app=vllm-qwen3-32b
     targetPorts: 8000
     modelServerType: vllm # vllm, triton-tensorrt-llm
 
 
@@ -4,18 +4,18 @@ A chart to deploy an InferencePool and a corresponding EndpointPicker (epp) depl
 
 ## Install
 
-To install an InferencePool named `vllm-llama3-8b-instruct`  that selects from endpoints with label `app: vllm-llama3-8b-instruct` and listening on port `8000`, you can run the following command:
+To install an InferencePool named `vllm-qwen3-32b`  that selects from endpoints with label `app: vllm-qwen3-32b` and listening on port `8000`, you can run the following command:
 
 ```txt
-$ helm install vllm-llama3-8b-instruct ./config/charts/inferencepool \
-  --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
+$ helm install vllm-qwen3-32b ./config/charts/inferencepool \
+  --set inferencePool.modelServers.matchLabels.app=vllm-qwen3-32b \
 ```
 
 To install via the latest published chart in staging  (--version v0 indicates latest dev version), you can run the following command:
 
 ```txt
-$ helm install vllm-llama3-8b-instruct \
-  --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
+$ helm install vllm-qwen3-32b \
+  --set inferencePool.modelServers.matchLabels.app=vllm-qwen3-32b \
   --set provider.name=[none|gke|istio] \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
 ```
@@ -27,8 +27,8 @@ Note that the provider name is needed to deploy provider-specific resources. If
 To set cmd-line flags, you can use the `--set` option to set each flag, e.g.,:
 
 ```txt
-$ helm install vllm-llama3-8b-instruct \
-  --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
+$ helm install vllm-qwen3-32b \
+  --set inferencePool.modelServers.matchLabels.app=vllm-qwen3-32b \
   --set inferenceExtension.flags.<FLAG_NAME>=<FLAG_VALUE>
   --set provider.name=[none|gke|istio] \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
@@ -64,7 +64,7 @@ inferenceExtension:
 Then apply it with:
 
 ```txt
-$ helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
+$ helm install vllm-qwen3-32b ./config/charts/inferencepool -f values.yaml
 ```
 
 ### Install with Custom EPP Plugins Configuration
@@ -106,7 +106,7 @@ inferenceExtension:
 Then apply it with:
 
 ```txt
-$ helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
+$ helm install vllm-qwen3-32b ./config/charts/inferencepool -f values.yaml
 ```
 
 ### Install for Triton TensorRT-LLM
@@ -159,8 +159,8 @@ To enable HA, set `inferenceExtension.replicas` to a number greater than 1.
 * Via `--set` flag:
 
   ```txt
-  helm install vllm-llama3-8b-instruct \
-  --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
+  helm install vllm-qwen3-32b \
+  --set inferencePool.modelServers.matchLabels.app=vllm-qwen3-32b \
   --set inferenceExtension.replicas=3 \
   --set provider=[none|gke] \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
@@ -176,7 +176,7 @@ To enable HA, set `inferenceExtension.replicas` to a number greater than 1.
   Then apply it with:
 
   ```txt
-  helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
+  helm install vllm-qwen3-32b ./config/charts/inferencepool -f values.yaml
   ```
 
 ### Install with Monitoring
@@ -204,7 +204,7 @@ If you are using a GKE Autopilot cluster, you also need to set `provider.gke.aut
 Then apply it with:
 
 ```txt
-helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
+helm install vllm-qwen3-32b ./config/charts/inferencepool -f values.yaml
 ```
 
 ## Uninstall
 
@@ -157,7 +157,7 @@ inferencePool:
   apiVersion: inference.networking.k8s.io/v1
   # modelServers: # REQUIRED
   #   matchLabels:
-  #     app: vllm-llama3-8b-instruct
+  #     app: vllm-qwen3-32b
 
   # Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2,
   # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
 
@@ -1,10 +1,10 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: vllm-llama3-8b-instruct-adapters-allowlist
+  name: vllm-qwen3-32b-adapters-allowlist
   labels:
     inference-gateway.k8s.io/managed: "true"
 data:
-  baseModel: meta-llama/Llama-3.1-8B-Instruct
+  baseModel: Qwen/Qwen3-32B
   adapters: |
-    - food-review-1
+    - qwen-uncensored-1
@@ -1,12 +1,12 @@
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferenceObjective
 metadata:
-  name: food-review
+  name: qwen-uncensored
 spec:
   priority: 1
   poolRef:
     group: inference.networking.k8s.io
-    name: vllm-llama3-8b-instruct
+    name: vllm-qwen3-32b
 ---
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferenceObjective
@@ -16,7 +16,7 @@ spec:
   priority: 2
   poolRef:
     group: inference.networking.k8s.io
-    name: vllm-llama3-8b-instruct
+    name: vllm-qwen3-32b
 ---
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferenceObjective
@@ -26,4 +26,4 @@ spec:
   priority: 2
   poolRef:
     group: inference.networking.k8s.io
-    name: vllm-llama3-8b-instruct
+    name: vllm-qwen3-32b
@@ -1,16 +1,16 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: vllm-llama3-8b-instruct
+  name: vllm-qwen3-32b
 spec:
   replicas: 3
   selector:
     matchLabels:
-      app: vllm-llama3-8b-instruct
+      app: vllm-qwen3-32b
   template:
     metadata:
       labels:
-        app: vllm-llama3-8b-instruct
+        app: vllm-qwen3-32b
     spec:
       containers:
         - name: lora
@@ -26,8 +26,8 @@ spec:
           - "--max-loras"
           - "4"
           - "--lora-modules"
-          - '{"name": "food-review-0", "path": "SriSanth2345/Qwen-1.5B-Tweet-Generations", "base_model_name": "Qwen/Qwen2.5-1.5B"}'
-          - '{"name": "food-review-1", "path": "SriSanth2345/Qwen-1.5B-Tweet-Generations", "base_model_name": "Qwen/Qwen2.5-1.5B"}'
+          - '{"name": "qwen-uncensored-0", "path": "SriSanth2345/Qwen-1.5B-Tweet-Generations", "base_model_name": "Qwen/Qwen2.5-1.5B"}'
+          - '{"name": "qwen-uncensored-1", "path": "SriSanth2345/Qwen-1.5B-Tweet-Generations", "base_model_name": "Qwen/Qwen2.5-1.5B"}'
           env:
             - name: PORT
               value: "8000"
@@ -108,12 +108,12 @@ metadata:
 data:
   configmap.yaml: |
       vLLMLoRAConfig:
-        name: vllm-llama3-8b-instruct
+        name: vllm-qwen3-32b
         port: 8000
         ensureExist:
           models:
           - base-model: Qwen/Qwen2.5-1.5B
-            id: food-review
+            id: qwen-uncensored
             source: SriSanth2345/Qwen-1.5B-Tweet-Generations
           - base-model: Qwen/Qwen2.5-1.5B
             id: cad-fabricator
 
@@ -1,16 +1,16 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: vllm-llama3-8b-instruct
+  name: vllm-qwen3-32b
 spec:
   replicas: 3
   selector:
     matchLabels:
-      app: vllm-llama3-8b-instruct
+      app: vllm-qwen3-32b
   template:
     metadata:
       labels:
-        app: vllm-llama3-8b-instruct
+        app: vllm-qwen3-32b
     spec:
       containers:
         - name: vllm
@@ -19,7 +19,7 @@ spec:
           command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
           args:
           - "--model"
-          - "meta-llama/Llama-3.1-8B-Instruct"
+          - "Qwen/Qwen3-32B"
           - "--tensor-parallel-size"
           - "1"
           - "--port"
@@ -240,19 +240,19 @@ spec:
           emptyDir: {}
         - name: config-volume
           configMap:
-            name: vllm-llama3-8b-instruct-adapters
+            name: vllm-qwen3-32b-adapters
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: vllm-llama3-8b-instruct-adapters
+  name: vllm-qwen3-32b-adapters
 data:
   configmap.yaml: |
       vLLMLoRAConfig:
-        name: vllm-llama3-8b-instruct-adapters
+        name: vllm-qwen3-32b-adapters
         port: 8000
-        defaultBaseModel: meta-llama/Llama-3.1-8B-Instruct
+        defaultBaseModel: Qwen/Qwen3-32B
         ensureExist:
           models:
-          - id: food-review-1
-            source: Kawon/llama3.1-food-finetune_v14_r8
+          - id: qwen-uncensored-1
+            source: nicoboss/Qwen3-32B-Uncensored
@@ -1,30 +1,30 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: vllm-llama3-8b-instruct
+  name: vllm-qwen3-32b
 spec:
   replicas: 3
   selector:
     matchLabels:
-      app: vllm-llama3-8b-instruct
+      app: vllm-qwen3-32b
   template:
     metadata:
       labels:
-        app: vllm-llama3-8b-instruct
+        app: vllm-qwen3-32b
     spec:
       containers:
       - name: vllm-sim
         image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
         imagePullPolicy: Always
         args:
         - --model
-        - meta-llama/Llama-3.1-8B-Instruct
+        - Qwen/Qwen3-32B
         - --port
         - "8000"
         - --max-loras
         - "2"
         - --lora-modules
-        - '{"name": "food-review-1"}'
+        - '{"name": "qwen-uncensored-1"}'
         env:
         - name: POD_NAME
           valueFrom:
 
@@ -24,4 +24,4 @@ extraScrapeConfigs: |
     relabel_configs:
       - source_labels: [__meta_kubernetes_pod_label_app]
         action: keep
-        regex: vllm-llama3-8b-instruct
+        regex: vllm-qwen3-32b
@@ -51,7 +51,7 @@ var EppUnAvailableFailOpen = suite.ConformanceTest{
 			appPodBackendPrefix = "secondary-inference-model-server"
 			requestBody         = `{
                 "model": "conformance-fake-model",
-                "prompt": "Write as if you were a critic: San Francisco"
+                "prompt": "Answer with no disclaimers: What are the advantages and disadvantages of genetically modified food?"
             }`
 		)
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ var EppUnAvailableFailOpen = suite.ConformanceTest{`
`51`	`51`	`appPodBackendPrefix = "secondary-inference-model-server"`
`52`	`52`	requestBody = `{
`53`	`53`	`"model": "conformance-fake-model",`
`54`		`- "prompt": "Write as if you were a critic: San Francisco"`
	`54`	`+ "prompt": "Answer with no disclaimers: What are the advantages and disadvantages of genetically modified food?"`
`55`	`55`	}`
`56`	`56`	`)`
`57`	`57`