diff --git a/helm-charts/agentqna/gaudi-values.yaml b/helm-charts/agentqna/gaudi-values.yaml index 4dcc1e09c..fc040abc0 100644 --- a/helm-charts/agentqna/gaudi-values.yaml +++ b/helm-charts/agentqna/gaudi-values.yaml @@ -20,9 +20,6 @@ vllm: VLLM_SKIP_WARMUP: true shmSize: 16Gi extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"] - securityContext: - runAsUser: 0 - runAsNonRoot: false supervisor: llm_endpoint_url: http://{{ .Release.Name }}-vllm diff --git a/helm-charts/chatqna/gaudi-vllm-values.yaml b/helm-charts/chatqna/gaudi-vllm-values.yaml index 8b1ffb539..58b031c2d 100644 --- a/helm-charts/chatqna/gaudi-vllm-values.yaml +++ b/helm-charts/chatqna/gaudi-vllm-values.yaml @@ -15,18 +15,8 @@ vllm: resources: limits: habana.ai/gaudi: 1 - securityContext: - runAsUser: 0 - runAsNonRoot: false startupProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 failureThreshold: 360 - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" OMPI_MCA_btl_vader_single_copy_mechanism: "none" diff --git a/helm-charts/chatqna/guardrails-gaudi-values.yaml b/helm-charts/chatqna/guardrails-gaudi-values.yaml index 91f4e3fb0..b5d8fa677 100644 --- a/helm-charts/chatqna/guardrails-gaudi-values.yaml +++ b/helm-charts/chatqna/guardrails-gaudi-values.yaml @@ -87,18 +87,8 @@ vllm: resources: limits: habana.ai/gaudi: 1 - securityContext: - runAsUser: 0 - runAsNonRoot: false startupProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 failureThreshold: 360 - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" OMPI_MCA_btl_vader_single_copy_mechanism: "none" diff --git a/helm-charts/common/agent/gaudi-values.yaml b/helm-charts/common/agent/gaudi-values.yaml index be90b710d..89457d6ba 100644 --- a/helm-charts/common/agent/gaudi-values.yaml +++ b/helm-charts/common/agent/gaudi-values.yaml @@ -20,8 +20,5 @@ vllm: PT_HPU_ENABLE_LAZY_COLLECTIVES: true VLLM_SKIP_WARMUP: true extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"] - securityContext: - runAsUser: 0 - runAsNonRoot: false llm_endpoint_url: http://{{ .Release.Name }}-vllm diff --git a/helm-charts/common/llm-uservice/vllm-docsum-gaudi-values.yaml b/helm-charts/common/llm-uservice/vllm-docsum-gaudi-values.yaml index dba85f13f..84e2716ae 100644 --- a/helm-charts/common/llm-uservice/vllm-docsum-gaudi-values.yaml +++ b/helm-charts/common/llm-uservice/vllm-docsum-gaudi-values.yaml @@ -25,8 +25,5 @@ vllm: resources: limits: habana.ai/gaudi: 1 - securityContext: - runAsUser: 0 - runAsNonRoot: false startupProbe: failureThreshold: 360 diff --git a/helm-charts/common/llm-uservice/vllm-gaudi-values.yaml b/helm-charts/common/llm-uservice/vllm-gaudi-values.yaml index 3302fe245..cedcaa130 100644 --- a/helm-charts/common/llm-uservice/vllm-gaudi-values.yaml +++ b/helm-charts/common/llm-uservice/vllm-gaudi-values.yaml @@ -17,9 +17,6 @@ vllm: resources: limits: habana.ai/gaudi: 1 - securityContext: - runAsUser: 0 - runAsNonRoot: false startupProbe: failureThreshold: 360 diff --git a/helm-charts/common/vllm/gaudi-values.yaml b/helm-charts/common/vllm/gaudi-values.yaml index b1780bf93..0266c45b6 100644 --- a/helm-charts/common/vllm/gaudi-values.yaml +++ b/helm-charts/common/vllm/gaudi-values.yaml @@ -13,10 +13,6 @@ resources: limits: habana.ai/gaudi: 1 -# NOTE: opea/vllm-gaudi image requires running as root during runtime -securityContext: - runAsUser: 0 - runAsNonRoot: false -# NOTE: opea/vllm-gaudi needsd more warm up time +# NOTE: opea/vllm-gaudi needs more warm up time startupProbe: failureThreshold: 360 diff --git a/helm-charts/common/vllm/templates/configmap.yaml b/helm-charts/common/vllm/templates/configmap.yaml index 01a7e92e3..c64174f93 100644 --- a/helm-charts/common/vllm/templates/configmap.yaml +++ b/helm-charts/common/vllm/templates/configmap.yaml @@ -21,6 +21,7 @@ data: NUMBA_CACHE_DIR: "/tmp" HF_HOME: "/tmp/.cache/huggingface" XDG_CONFIG_HOME: "/tmp" + TORCHINDUCTOR_CACHE_DIR: "/tmp/pytorchinductor_cache" # https://github.com/outlines-dev/outlines/blob/main/outlines/caching.py#L14-L29 OUTLINES_CACHE_DIR: "/tmp/.cache/outlines" {{- if .Values.VLLM_CPU_KVCACHE_SPACE }} diff --git a/helm-charts/docsum/gaudi-vllm-values.yaml b/helm-charts/docsum/gaudi-vllm-values.yaml index ade2eb510..88125cdd9 100644 --- a/helm-charts/docsum/gaudi-vllm-values.yaml +++ b/helm-charts/docsum/gaudi-vllm-values.yaml @@ -19,18 +19,8 @@ vllm: resources: limits: habana.ai/gaudi: 1 - securityContext: - runAsUser: 0 - runAsNonRoot: false startupProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 failureThreshold: 360 - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" OMPI_MCA_btl_vader_single_copy_mechanism: "none"