self-hosted-resources/charts/deepgram-self-hosted/samples/02-basic-setup-gcp.yaml at main · deepgram/self-hosted-resources · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# See the Chart [README](https://github.com/deepgram/self-hosted-resources/blob/main/charts/deepgram-self-hosted#values)
#   for documentation on all available options.

global:
  # pullSecretRef should refer to a K8s secret that
  # must be created prior to installing this Chart.
  # Consult the [official Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/secret/) for best practices on configuring Secrets for use in your cluster.
  #
  # You can create a secret for your image pull credentials
  # with the following commands:
  # ```bash
  # docker login quay.io
  # kubectl create secret docker-registry dg-regcred \
  #   --docker-server=quay.io \
  #   --docker-username='QUAY_DG_USER' \
  #   --docker-password='QUAY_DG_PASSWORD'
  # ```
  pullSecretRef: "dg-regcred"

  # deepgramSecretRef should refer to a K8s secret that
  # must be created prior to installing this Chart.
  # Consult the [official Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/secret/) for best practices on configuring Secrets for use in your cluster.
  #
  # You can create a secret for your Deepgram self-hosted API key
  # with the following command:
  # ```bash
  # kubectl create secret generic dg-self-hosted-api-key --from-literal=DEEPGRAM_API_KEY='<id>'
  # ```
  deepgramSecretRef: "dg-self-hosted-api-key"

scaling:
  replicas:
    api: 1
    engine: 1
  auto:
    # Can toggle to true to enable autoscaling. Make sure to set a value for one of the available metrics
    enabled: false
    engine:
      metrics:
        speechToText:
          batch:
            requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative
          streaming:
            requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative
        textToSpeech:
          batch:
            requestsPerPod: # Discuss a reasonable value with your Deepgram Account Representative
        # Discuss a reasonable value with your Deepgram Account Representative
        # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling
        requestCapacityRatio:

agent:
  enabled: false

api:
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
          - matchExpressions:
              - key: k8s.deepgram.com/node-type
                operator: In
                values:
                  - api
  resources:
    requests:
      memory: "4Gi"
      cpu: "2000m"
    limits:
      memory: "12Gi"
      cpu: "4000m"

engine:
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
          - matchExpressions:
              - key: k8s.deepgram.com/node-type
                operator: In
                values:
                  - engine
  resources:
    requests:
      memory: "24Gi"
      cpu: "5000m"
      gpu: 1
    limits:
      memory: "40Gi"
      cpu: "8000m"
      gpu: 1
  # Discuss a reasonable value with your Deepgram Account Representative
  # If not using autoscaling, can be left empty, but must be set if using
  # autoscaling with scaling.auto.engine.metrics.requestCapacityRatio
  concurrencyLimit:
    activeRequests:
  modelManager:
    volumes:
      gcp:
        gpd:
          enabled: true
          # Replace with your Google disk handle
          volumeHandle: "projects/<your-project-id>/zones/<your-zone>/disks/<your-disk>"

licenseProxy:
  enabled: true

  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
          - matchExpressions:
              - key: k8s.deepgram.com/node-type
                operator: In
                values:
                  - license-proxy
  resources:
    requests:
      memory: "4Gi"
      cpu: "1000m"
    limits:
      memory: "8Gi"
      cpu: "2000m"

gpu-operator:
  # GKE will manage the driver and toolkit installation for us by default.
  enabled: false