opea-project · poussa · Mar 19, 2025 · Mar 19, 2025 · eero-t · Mar 19, 2025
@@ -20,9 +20,6 @@ vllm:
   VLLM_SKIP_WARMUP: true
   shmSize: 16Gi
   extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
-  securityContext:
-    runAsUser: 0
-    runAsNonRoot: false
 
 supervisor:
   llm_endpoint_url: http://{{ .Release.Name }}-vllm

@@ -15,18 +15,8 @@ vllm:
   resources:
     limits:
       habana.ai/gaudi: 1
-  securityContext:
-    runAsUser: 0
-    runAsNonRoot: false
   startupProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
     failureThreshold: 360
-  readinessProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
 
   PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
   OMPI_MCA_btl_vader_single_copy_mechanism: "none"

@@ -87,18 +87,8 @@ vllm:
   resources:
     limits:
       habana.ai/gaudi: 1
-  securityContext:
-    runAsUser: 0
-    runAsNonRoot: false
   startupProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
     failureThreshold: 360
-  readinessProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
 
   PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
   OMPI_MCA_btl_vader_single_copy_mechanism: "none"

@@ -20,8 +20,5 @@ vllm:
   PT_HPU_ENABLE_LAZY_COLLECTIVES: true
   VLLM_SKIP_WARMUP: true
   extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
-  securityContext:
-    runAsUser: 0
-    runAsNonRoot: false
 
 llm_endpoint_url: http://{{ .Release.Name }}-vllm
@@ -25,8 +25,5 @@ vllm:
   resources:
     limits:
       habana.ai/gaudi: 1
-  securityContext:
-    runAsUser: 0
-    runAsNonRoot: false
   startupProbe:
     failureThreshold: 360
@@ -17,9 +17,6 @@ vllm:
   resources:
     limits:
       habana.ai/gaudi: 1
-  securityContext:
-    runAsUser: 0
-    runAsNonRoot: false
   startupProbe:
     failureThreshold: 360
 

@@ -13,10 +13,6 @@ resources:
   limits:
     habana.ai/gaudi: 1
 
-# NOTE: opea/vllm-gaudi image requires running as root during runtime
-securityContext:
-  runAsUser: 0
-  runAsNonRoot: false
-# NOTE: opea/vllm-gaudi needsd more warm up time
+# NOTE: opea/vllm-gaudi needs more warm up time
 startupProbe:
   failureThreshold: 360
@@ -21,6 +21,7 @@ data:
   NUMBA_CACHE_DIR: "/tmp"
   HF_HOME: "/tmp/.cache/huggingface"
   XDG_CONFIG_HOME: "/tmp"
+  TORCHINDUCTOR_CACHE_DIR: "/tmp/pytorchinductor_cache"
   # https://github.com/outlines-dev/outlines/blob/main/outlines/caching.py#L14-L29
   OUTLINES_CACHE_DIR: "/tmp/.cache/outlines"
   {{- if .Values.VLLM_CPU_KVCACHE_SPACE }}

@@ -19,18 +19,8 @@ vllm:
   resources:
     limits:
       habana.ai/gaudi: 1
-  securityContext:
-    runAsUser: 0
-    runAsNonRoot: false
   startupProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
     failureThreshold: 360
-  readinessProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
 
   PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
   OMPI_MCA_btl_vader_single_copy_mechanism: "none"