self-hosted-resources/charts/deepgram-self-hosted/samples/04-aura-2-setup.values.yaml at main · deepgram/self-hosted-resources · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# Sample Helm values for Aura-2 deployment with English and Spanish language support
# This configuration supports both English and Spanish Aura-2 models
#
# Usage:
#   helm install deepgram ./charts/deepgram-self-hosted -f samples/04-aura-2-setup.yaml

global:
  # pullSecretRef should refer to a K8s secret that
  # must be created prior to installing this Chart.
  # Consult the [official Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/secret/) for best practices on configuring Secrets for use in your cluster.
  #
  # You can create a secret for your image pull credentials
  # with the following commands:
  # ```bash
  # docker login quay.io
  # kubectl create secret docker-registry dg-regcred \
  #   --docker-server=quay.io \
  #   --docker-username='QUAY_DG_USER' \
  #   --docker-password='QUAY_DG_PASSWORD'
  # ```
  pullSecretRef: "dg-regcred"

  # deepgramSecretRef should refer to a K8s secret that
  # must be created prior to installing this Chart.
  # Consult the [official Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/secret/) for best practices on configuring Secrets for use in your cluster.
  #
  # You can create a secret for your Deepgram self-hosted API key
  # with the following command:
  # ```bash
  # kubectl create secret generic dg-self-hosted-api-key --from-literal=DEEPGRAM_API_KEY='<id>'
  # ```
  deepgramSecretRef: "dg-self-hosted-api-key"

# Configure scaling for multi-language deployment
scaling:
  replicas:
    # Deploy separate API instances for each language
    api: 2
    # Deploy separate Engine instances for each language
    engine: 2

# API configuration for English Aura-2
api:
  image:
    tag: release-260430

  # Configure driver pool to connect to both language engines
  driverPool:
    standard:
      timeoutBackoff: 1.2
      retrySleep: "2s"
      retryBackoff: 1.6
      maxResponseSize: "1073741824"

# Engine configuration for Aura-2
engine:
  image:
    tag: release-260430

  # Aura-2 requires more resources than standard models
  resources:
    requests:
      memory: "32Gi"
      cpu: "4000m"
      gpu: 2
    limits:
      memory: "40Gi"
      cpu: "8000m"
      gpu: 2

  # Enable automatic model management for Aura-2 models
  modelManager:
    models:
      # Add your Aura-2 model links here
      # Replace with actual model links provided by Deepgram
      add:
        # Example (replace with your actual model links):
        # - https://path.to/aura-2-en-model
        # - https://path.to/aura-2-es-model
        # - ...
      remove:
        # - https://link-to-old-model-1.dg # Replace these with identifiers for any models already present
        # - https://link-to-old-model-2.dg #   in the EFS that you'd like removed. For a new installation,
        # - name-of-old-model-3.dg #   this will likely be empty.
        # - ...

    # Configure volume storage for models
    volumes:
      # For AWS EKS deployments
      aws:
        efs:
          enabled: false
          # fileSystemId: "fs-xxxxxxxxx"
          forceDownload: false

      # For GCP GKE deployments
      gcp:
        gpd:
          enabled: false
          # storageCapacity: "100G"
          # volumeHandle: "projects/PROJECT/zones/ZONE/disks/DISK"

      # Or use custom PVC
      customVolumeClaim:
        enabled: false
        # name: "deepgram-models-pvc"
        modelsDirectory: "/"

# Enable License Proxy for production Aura-2 deployments
licenseProxy:
  enabled: true
  deploySecondReplica: false
  keepUpstreamServerAsBackup: true

  image:
    tag: release-260430

# Monitoring configuration for Aura-2
# Enable Prometheus stack for metrics collection
kube-prometheus-stack:
  enabled: true
  fullnameOverride: "dg-prometheus-stack"

prometheus-adapter:
  enabled: true

# GPU Operator for NVIDIA GPU support (required for Aura-2)
gpu-operator:
  enabled: true
  driver:
    enabled: true
    version: "570.172.08"
  toolkit:
    enabled: true
    version: v1.15.0-ubi8

# Auto-scaling configuration for Aura-2 workloads
scaling:
  auto:
    # Can toggle to true to enable autoscaling. Make sure to set a value for one of the available metrics
    enabled: false

    engine:
      minReplicas: 1
      maxReplicas: 4
      metrics:
        requestCapacityRatio: 0.8
        speechToText:
          batch:
            requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative
          streaming:
            requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative
        textToSpeech:
          batch:
            requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative

# Additional environment variables for Aura-2 Engine containers
# These will be added via configmap in the templates
aura2:
  # Aura-2 specific configuration
  enabled: true

  # English language configuration
  english:
    enabled: true
    maxBatchSize: 8
    t2cUuid: "0ec06c9b-0aa0-44d0-a001-3ec57d32229e"
    c2aUuid: "2e5096c7-7bf1-435e-bbdd-f673f88d0ebd"
    cudaVisibleDevices: "0,1"

  # Spanish language configuration
  spanish:
    enabled: true
    maxBatchSize: 8
    t2cUuid: "c053c7a8-7317-4de8-8a50-7e01c54e7ba9"
    c2aUuid: "04355c1e-8148-478d-9f6c-6a6c54ec3591"
    cudaVisibleDevices: "2,3"