-
Notifications
You must be signed in to change notification settings - Fork 25
Expand file tree
/
Copy path04-aura-2-setup.values.yaml
More file actions
177 lines (156 loc) · 5.21 KB
/
04-aura-2-setup.values.yaml
File metadata and controls
177 lines (156 loc) · 5.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# Sample Helm values for Aura-2 deployment with English and Spanish language support
# This configuration supports both English and Spanish Aura-2 models
#
# Usage:
# helm install deepgram ./charts/deepgram-self-hosted -f samples/04-aura-2-setup.yaml
global:
# pullSecretRef should refer to a K8s secret that
# must be created prior to installing this Chart.
# Consult the [official Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/secret/) for best practices on configuring Secrets for use in your cluster.
#
# You can create a secret for your image pull credentials
# with the following commands:
# ```bash
# docker login quay.io
# kubectl create secret docker-registry dg-regcred \
# --docker-server=quay.io \
# --docker-username='QUAY_DG_USER' \
# --docker-password='QUAY_DG_PASSWORD'
# ```
pullSecretRef: "dg-regcred"
# deepgramSecretRef should refer to a K8s secret that
# must be created prior to installing this Chart.
# Consult the [official Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/secret/) for best practices on configuring Secrets for use in your cluster.
#
# You can create a secret for your Deepgram self-hosted API key
# with the following command:
# ```bash
# kubectl create secret generic dg-self-hosted-api-key --from-literal=DEEPGRAM_API_KEY='<id>'
# ```
deepgramSecretRef: "dg-self-hosted-api-key"
# Configure scaling for multi-language deployment
scaling:
replicas:
# Deploy separate API instances for each language
api: 2
# Deploy separate Engine instances for each language
engine: 2
# API configuration for English Aura-2
api:
image:
tag: release-260430
# Configure driver pool to connect to both language engines
driverPool:
standard:
timeoutBackoff: 1.2
retrySleep: "2s"
retryBackoff: 1.6
maxResponseSize: "1073741824"
# Engine configuration for Aura-2
engine:
image:
tag: release-260430
# Aura-2 requires more resources than standard models
resources:
requests:
memory: "32Gi"
cpu: "4000m"
gpu: 2
limits:
memory: "40Gi"
cpu: "8000m"
gpu: 2
# Enable automatic model management for Aura-2 models
modelManager:
models:
# Add your Aura-2 model links here
# Replace with actual model links provided by Deepgram
add:
# Example (replace with your actual model links):
# - https://path.to/aura-2-en-model
# - https://path.to/aura-2-es-model
# - ...
remove:
# - https://link-to-old-model-1.dg # Replace these with identifiers for any models already present
# - https://link-to-old-model-2.dg # in the EFS that you'd like removed. For a new installation,
# - name-of-old-model-3.dg # this will likely be empty.
# - ...
# Configure volume storage for models
volumes:
# For AWS EKS deployments
aws:
efs:
enabled: false
# fileSystemId: "fs-xxxxxxxxx"
forceDownload: false
# For GCP GKE deployments
gcp:
gpd:
enabled: false
# storageCapacity: "100G"
# volumeHandle: "projects/PROJECT/zones/ZONE/disks/DISK"
# Or use custom PVC
customVolumeClaim:
enabled: false
# name: "deepgram-models-pvc"
modelsDirectory: "/"
# Enable License Proxy for production Aura-2 deployments
licenseProxy:
enabled: true
deploySecondReplica: false
keepUpstreamServerAsBackup: true
image:
tag: release-260430
# Monitoring configuration for Aura-2
# Enable Prometheus stack for metrics collection
kube-prometheus-stack:
enabled: true
fullnameOverride: "dg-prometheus-stack"
prometheus-adapter:
enabled: true
# GPU Operator for NVIDIA GPU support (required for Aura-2)
gpu-operator:
enabled: true
driver:
enabled: true
version: "570.172.08"
toolkit:
enabled: true
version: v1.15.0-ubi8
# Auto-scaling configuration for Aura-2 workloads
scaling:
auto:
# Can toggle to true to enable autoscaling. Make sure to set a value for one of the available metrics
enabled: false
engine:
minReplicas: 1
maxReplicas: 4
metrics:
requestCapacityRatio: 0.8
speechToText:
batch:
requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative
streaming:
requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative
textToSpeech:
batch:
requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative
# Additional environment variables for Aura-2 Engine containers
# These will be added via configmap in the templates
aura2:
# Aura-2 specific configuration
enabled: true
# English language configuration
english:
enabled: true
maxBatchSize: 8
t2cUuid: "0ec06c9b-0aa0-44d0-a001-3ec57d32229e"
c2aUuid: "2e5096c7-7bf1-435e-bbdd-f673f88d0ebd"
cudaVisibleDevices: "0,1"
# Spanish language configuration
spanish:
enabled: true
maxBatchSize: 8
t2cUuid: "c053c7a8-7317-4de8-8a50-7e01c54e7ba9"
c2aUuid: "04355c1e-8148-478d-9f6c-6a6c54ec3591"
cudaVisibleDevices: "2,3"