InftyAI · InftyAI-Agent · Sep 12, 2024 · Sep 12, 2024
diff --git a/.github/workflows/publish-helm-chart.yaml b/.github/workflows/publish-helm-chart.yaml
diff --git a/Makefile b/Makefile
@@ -295,15 +295,14 @@ $(HELMIFY): $(LOCALBIN)
 
 .PHONY: helm
 helm: manifests kustomize helmify
-	$(KUBECTL) create namespace llmaz-system --dry-run=client -o yaml | $(KUBECTL) apply -f -
 	$(KUSTOMIZE) build config/default | $(HELMIFY) -crd-dir
 
 .PHONY: helm-install
 helm-install: helm
 	helm upgrade --install llmaz ./chart --namespace llmaz-system --create-namespace -f ./chart/values.global.yaml
 
 .PHONY: helm-package
-helm-package:
+helm-package: helm
 	# Make sure will alwasy start with a new line.
 	printf "\n" >> ./chart/values.yaml
 	cat ./chart/values.global.yaml >> ./chart/values.yaml

diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go
@@ -131,10 +131,14 @@ const (
 	DraftRole ModelRole = "draft"
 )
 
-type ModelRepresentative struct {
+// ModelRefer refers to a created Model with it's role.
+type ModelRefer struct {
 	// Name represents the model name.
 	Name ModelName `json:"name"`
 	// Role represents the model role once more than one model is required.
+	// Such as a draft role, which means running with SpeculativeDecoding,
+	// and default arguments for backend will be searched in backendRuntime
+	// with the name of speculative-decoding.
 	// +kubebuilder:validation:Enum={main,draft}
 	// +kubebuilder:default=main
 	// +optional
@@ -148,7 +152,7 @@ type ModelClaims struct {
 	// speculative decoding, then one model is main(target) model, another one
 	// is draft model.
 	// +kubebuilder:validation:MinItems=1
-	Models []ModelRepresentative `json:"models,omitempty"`
+	Models []ModelRefer `json:"models,omitempty"`
 	// InferenceFlavors represents a list of flavors with fungibility supported
 	// to serve the model.
 	// - If not set, always apply with the 0-index model by default.

diff --git a/api/core/v1alpha1/zz_generated.deepcopy.go b/api/core/v1alpha1/zz_generated.deepcopy.go
diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go
@@ -21,16 +21,15 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
-type InferenceMode string
-
-const (
-	DefaultInferenceMode             InferenceMode = "Default"
-	SpeculativeDecodingInferenceMode InferenceMode = "SpeculativeDecoding"
-)
-
+// BackendRuntimeArg is preset arguments for easy to use.
+// Do not edit the preset names unless set the argument name explicitly
+// in Playground backendRuntimeConfig.
 type BackendRuntimeArg struct {
-	Mode  InferenceMode `json:"mode"`
-	Flags []string      `json:"flags,omitempty"`
+	// Name represents the identifier of the backendRuntime argument.
+	Name string `json:"name"`
+	// Flags represents all the preset configurations.
+	// Flag around with {{ .CONFIG }} is a configuration waiting for render.
+	Flags []string `json:"flags,omitempty"`
 }
 
 // BackendRuntimeSpec defines the desired state of BackendRuntime
@@ -43,11 +42,8 @@ type BackendRuntimeSpec struct {
 	// Version represents the default version of the backendRuntime.
 	// It will be appended to the image as a tag.
 	Version string `json:"version"`
-	// Args represents the args of the backendRuntime.
-	// They can be appended or overwritten by the Playground args.
-	// The key is the inference option, like default one or advanced
-	// speculativeDecoding, the values are the corresponding args.
-	// Flag around with {{ .XXX }} is a flag waiting for render.
+	// Args represents the preset arguments of the backendRuntime.
+	// They can be appended or overwritten by the Playground backendRuntimeConfig.
 	Args []BackendRuntimeArg `json:"args,omitempty"`
 	// Envs represents the environments set to the container.
 	// +optional

diff --git a/chart/Chart.yaml b/chart/Chart.yaml
@@ -13,9 +13,9 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.0.2
+version: 0.0.3
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "0.0.6"
+appVersion: 0.0.7
diff --git a/chart/crds/backendruntime-crd.yaml b/chart/crds/backendruntime-crd.yaml
@@ -42,21 +42,27 @@ spec:
             properties:
               args:
                 description: |-
-                  Args represents the args of the backendRuntime.
-                  They can be appended or overwritten by the Playground args.
-                  The key is the inference option, like default one or advanced
-                  speculativeDecoding, the values are the corresponding args.
-                  Flag around with {{ .XXX }} is a flag waiting for render.
+                  Args represents the preset arguments of the backendRuntime.
+                  They can be appended or overwritten by the Playground backendRuntimeConfig.
                 items:
+                  description: |-
+                    BackendRuntimeArg is preset arguments for easy to use.
+                    Do not edit the preset names unless set the argument name explicitly
+                    in Playground backendRuntimeConfig.
                   properties:
                     flags:
+                      description: |-
+                        Flags represents all the preset configurations.
+                        Flag around with {{ .CONFIG }} is a configuration waiting for render.
                       items:
                         type: string
                       type: array
-                    mode:
+                    name:
+                      description: Name represents the identifier of the backendRuntime
+                        argument.
                       type: string
                   required:
-                  - mode
+                  - name
                   type: object
                 type: array
               commands:

diff --git a/chart/crds/playground-crd.yaml b/chart/crds/playground-crd.yaml
@@ -259,14 +259,19 @@ spec:
                       speculative decoding, then one model is main(target) model, another one
                       is draft model.
                     items:
+                      description: ModelRefer refers to a created Model with it's
+                        role.
                       properties:
                         name:
                           description: Name represents the model name.
                           type: string
                         role:
                           default: main
-                          description: Role represents the model role once more than
-                            one model is required.
+                          description: |-
+                            Role represents the model role once more than one model is required.
+                            Such as a draft role, which means running with SpeculativeDecoding,
+                            and default arguments for backend will be searched in backendRuntime
+                            with the name of speculative-decoding.
                           enum:
                           - main
                           - draft

diff --git a/chart/crds/service-crd.yaml b/chart/crds/service-crd.yaml
@@ -84,14 +84,19 @@ spec:
                       speculative decoding, then one model is main(target) model, another one
                       is draft model.
                     items:
+                      description: ModelRefer refers to a created Model with it's
+                        role.
                       properties:
                         name:
                           description: Name represents the model name.
                           type: string
                         role:
                           default: main
-                          description: Role represents the model role once more than
-                            one model is required.
+                          description: |-
+                            Role represents the model role once more than one model is required.
+                            Such as a draft role, which means running with SpeculativeDecoding,
+                            and default arguments for backend will be searched in backendRuntime
+                            with the name of speculative-decoding.
                           enum:
                           - main
                           - draft

diff --git a/chart/templates/backends/llamacpp.yaml b/chart/templates/backends/llamacpp.yaml
@@ -12,16 +12,18 @@ spec:
     - ./llama-server
   image: ghcr.io/ggerganov/llama.cpp
   version: server
+  # Do not edit the preset argument name unless you know what you're doing.
+  # Free to add more arguments with your requirements.
   args:
-    - mode: Default
+    - name: default
       flags:
         - -m
         - "{{`{{ .ModelPath }}`}}"
         - --host
         - "0.0.0.0"
         - --port
         - "8080"
-    - mode: SpeculativeDecoding
+    - name: speculative-decoding
       flags:
         - -m
         - "{{`{{ .ModelPath }}`}}"

diff --git a/chart/templates/backends/sglang.yaml b/chart/templates/backends/sglang.yaml
@@ -14,8 +14,10 @@ spec:
     - sglang.launch_server
   image: lmsysorg/sglang
   version: v0.2.10-cu121
+  # Do not edit the preset argument name unless you know what you're doing.
+  # Free to add more arguments with your requirements.
   args:
-    - mode: Default
+    - name: default
       flags:
         - --model-path
         - "{{`{{ .ModelPath }}`}}"

diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml
@@ -14,8 +14,10 @@ spec:
     - vllm.entrypoints.openai.api_server
   image: vllm/vllm-openai
   version: v0.6.0
+  # Do not edit the preset argument name unless you know what you're doing.
+  # Free to add more arguments with your requirements.
   args:
-    - mode: Default
+    - name: default
       flags:
         - --model
         - "{{`{{ .ModelPath }}`}}"
@@ -25,7 +27,7 @@ spec:
         - "0.0.0.0"
         - --port
         - "8080"
-    - mode: SpeculativeDecoding
+    - name: speculative-decoding
       flags:
         - --model
         - "{{`{{ .ModelPath }}`}}"

diff --git a/client-go/applyconfiguration/core/v1alpha1/modelclaims.go b/client-go/applyconfiguration/core/v1alpha1/modelclaims.go
diff --git a/...tion/core/v1alpha1/modelrepresentative.go → ...configuration/core/v1alpha1/modelrefer.go b/...tion/core/v1alpha1/modelrepresentative.go → ...configuration/core/v1alpha1/modelrefer.go
diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go
diff --git a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
@@ -43,21 +43,27 @@ spec:
             properties:
               args:
                 description: |-
-                  Args represents the args of the backendRuntime.
-                  They can be appended or overwritten by the Playground args.
-                  The key is the inference option, like default one or advanced
-                  speculativeDecoding, the values are the corresponding args.
-                  Flag around with {{ .XXX }} is a flag waiting for render.
+                  Args represents the preset arguments of the backendRuntime.
+                  They can be appended or overwritten by the Playground backendRuntimeConfig.
                 items:
+                  description: |-
+                    BackendRuntimeArg is preset arguments for easy to use.
+                    Do not edit the preset names unless set the argument name explicitly
+                    in Playground backendRuntimeConfig.
                   properties:
                     flags:
+                      description: |-
+                        Flags represents all the preset configurations.
+                        Flag around with {{ .CONFIG }} is a configuration waiting for render.
                       items:
                         type: string
                       type: array
-                    mode:
+                    name:
+                      description: Name represents the identifier of the backendRuntime
+                        argument.
                       type: string
                   required:
-                  - mode
+                  - name
                   type: object
                 type: array
               commands:

diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
@@ -260,14 +260,19 @@ spec:
                       speculative decoding, then one model is main(target) model, another one
                       is draft model.
                     items:
+                      description: ModelRefer refers to a created Model with it's
+                        role.
                       properties:
                         name:
                           description: Name represents the model name.
                           type: string
                         role:
                           default: main
-                          description: Role represents the model role once more than
-                            one model is required.
+                          description: |-
+                            Role represents the model role once more than one model is required.
+                            Such as a draft role, which means running with SpeculativeDecoding,
+                            and default arguments for backend will be searched in backendRuntime
+                            with the name of speculative-decoding.
                           enum:
                           - main
                           - draft