Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@ site

# MacOS generated files
**/.DS_Store

# Ignore all Chart.lock files anywhere under config/charts
config/charts/**/Chart.lock

# Ignore all .tgz files anywhere under config/charts
config/charts/**/*.tgz
23 changes: 23 additions & 0 deletions config/charts/epp-standalone/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
14 changes: 14 additions & 0 deletions config/charts/epp-standalone/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v2
name: epp-standalone
description: A Helm chart for Endpoint Picker

type: application

version: 0.0.0

appVersion: "0.0.0"

dependencies:
- name: inference-extension
version: 0.0.0
repository: "file://../inference-extension"
1 change: 1 addition & 0 deletions config/charts/epp-standalone/templates/epp-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "inference-extension.config" . -}}
1 change: 1 addition & 0 deletions config/charts/epp-standalone/templates/epp-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "inference-extension.deployment" . -}}
1 change: 1 addition & 0 deletions config/charts/epp-standalone/templates/epp-gke.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "inference-extension.gke" . -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "inference-extension.lead-election-rbac" . -}}
1 change: 1 addition & 0 deletions config/charts/epp-standalone/templates/epp-rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "inference-extension.rbac" . -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "inference-extension.sa-token-secret" . -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "inference-extension.service-monitor" . -}}
1 change: 1 addition & 0 deletions config/charts/epp-standalone/templates/epp-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{- include "inference-extension.service" . -}}
298 changes: 298 additions & 0 deletions config/charts/epp-standalone/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
inferenceExtension:
replicas: 1
image:
name: epp
hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension
tag: main
pullPolicy: Always
extProcPort: 9002
extraServicePorts:
- name: http
port: 8081
protocol: TCP
targetPort: 8081
env: []
pluginsConfigFile: "default-plugins.yaml"

endpointsServer:
standalone: true
# Required when standalone is true
# endpointSelector: app=vllm-llama3-8b-instruct
targetPorts: 8000
modelServerType: vllm # vllm, triton-tensorrt-llm


sidecar:
enabled: true
configMap:
name: envoy
# Because the template just dumps this section, the keys become filenames.
# The values MUST be strings (note the literal block scalar '|')
data:
envoy.yaml: |
admin:
address:
socket_address:
address: 127.0.0.1
port_value: 19000
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: /dev/null
static_resources:
listeners:
- name: envoy-proxy-ready-0.0.0.0-19001
address:
socket_address:
address: 0.0.0.0
port_value: 19001
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: envoy-ready-http
route_config:
name: local_route
virtual_hosts:
- name: prometheus_stats
domains: ["*"]
routes:
- match:
prefix: "/stats/prometheus"
route:
cluster: "prometheus_stats"
http_filters:
- name: envoy.filters.http.health_check
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.health_check.v3.HealthCheck
pass_through_mode: false
headers:
- name: ":path"
string_match:
exact: "/ready"
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: vllm
address:
socket_address:
address: 0.0.0.0
port_value: 8081
per_connection_buffer_limit_bytes: 32768
access_log:
- name: envoy.access_loggers.file
filter:
response_flag_filter:
flags: ["NR"]
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: /dev/stdout
log_format:
text_format_source:
inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n"
filter_chains:
- name: vllm
filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: http-8081
route_config:
name: vllm
virtual_hosts:
- name: vllm-default
domains: ["*"]
routes:
- match:
prefix: "/"
route:
cluster: original_destination_cluster
timeout: 86400s
idle_timeout: 86400s
upgrade_configs:
- upgrade_type: websocket
typed_per_filter_config:
envoy.filters.http.ext_proc:
"@type": type.googleapis.com/envoy.config.route.v3.FilterConfig
config: {}
http_filters:
- name: envoy.filters.http.ext_proc
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
grpc_service:
envoy_grpc:
cluster_name: ext_proc
authority: localhost:9002
timeout: 10s
processing_mode:
request_header_mode: SEND
response_header_mode: SEND
request_body_mode: FULL_DUPLEX_STREAMED
response_body_mode: FULL_DUPLEX_STREAMED
request_trailer_mode: SEND
response_trailer_mode: SEND
message_timeout: 1000s
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
suppress_envoy_headers: true
http2_protocol_options:
max_concurrent_streams: 100
initial_stream_window_size: 65536
initial_connection_window_size: 1048576
use_remote_address: true
normalize_path: true
merge_slashes: true
server_header_transformation: PASS_THROUGH
common_http_protocol_options:
headers_with_underscores_action: REJECT_REQUEST
path_with_escaped_slashes_action: UNESCAPE_AND_REDIRECT
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: /dev/stdout
log_format:
text_format_source:
inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n"
clusters:
- name: prometheus_stats
type: STATIC
connect_timeout: 0.250s
load_assignment:
cluster_name: prometheus_stats
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: 127.0.0.1
port_value: 19000
- name: original_destination_cluster
type: ORIGINAL_DST
connect_timeout: 1000s
lb_policy: CLUSTER_PROVIDED
circuit_breakers:
thresholds:
- max_connections: 40000
max_pending_requests: 40000
max_requests: 40000
original_dst_lb_config:
use_http_header: true
http_header_name: x-gateway-destination-endpoint
- name: ext_proc
type: STATIC
connect_timeout: 86400s
lb_policy: LEAST_REQUEST
circuit_breakers:
thresholds:
- max_connections: 40000
max_pending_requests: 40000
max_requests: 40000
max_retries: 1024
health_checks:
- timeout: 2s
interval: 10s
unhealthy_threshold: 3
healthy_threshold: 2
reuse_connection: true
grpc_health_check:
service_name: "envoy.service.ext_proc.v3.ExternalProcessor"
tls_options:
alpn_protocols: ["h2"]
transport_socket:
name: "envoy.transport_sockets.tls"
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
common_tls_context:
validation_context:
typed_extension_protocol_options:
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
explicit_http_config:
http2_protocol_options:
initial_stream_window_size: 65536
initial_connection_window_size: 1048576
load_assignment:
cluster_name: ext_proc
endpoints:
- locality:
region: ext_proc/e2e/0
lb_endpoints:
- endpoint:
address:
socket_address:
address: 127.0.0.1
port_value: 9002
load_balancing_weight: 1
name: envoy-sidecar
image: docker.io/envoyproxy/envoy:distroless-v1.33.2
command: "envoy"
args:
- "--service-node"
- "envoy-sidecar"
- "--log-level"
- "trace"
- "--cpuset-threads"
- "--drain-strategy"
- "immediate"
- "--drain-time-s"
- "60"
- "-c"
- "/etc/envoy/envoy.yaml"
env:
- name: NS_NAME
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
ports:
- containerPort: 8081
name: http-8081
- containerPort: 19001
name: metrics-19001
resources:
requests:
cpu: 100m
memory: 512Mi

readinessProbe:
failureThreshold: 1
httpGet:
path: /ready
port: 19001
scheme: HTTP
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1

volumeMounts:
- name: config
mountPath: /etc/envoy
readOnly: true
volumes:
- name: config
configMap:
name: envoy
items:
- key: envoy.yaml
path: envoy.yaml
monitoring:
interval: "10s"
# Prometheus ServiceMonitor will be created when enabled for EPP metrics collection
prometheus:
enabled: false
auth:
# To allow unauthenticated /metrics access (e.g., for debugging with curl), set to false
enabled: true

tracing:
enabled: false

latencyPredictor:
enabled: false
23 changes: 23 additions & 0 deletions config/charts/inference-extension/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
10 changes: 10 additions & 0 deletions config/charts/inference-extension/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: v2
name: inference-extension
description: A library Helm chart for Endpoint Picker

type: library

version: 0.0.0

appVersion: "0.0.0"

Loading