diff --git a/README.md b/README.md
index eb7e846e..8944eb37 100644
--- a/README.md
+++ b/README.md
@@ -66,10 +66,11 @@ spec:
   source:
     modelHub:
       modelID: facebook/opt-125m
-  inferenceFlavors:
-    - name: t4 # GPU type
-      requests:
-        nvidia.com/gpu: 1
+  inferenceConfig:
+    flavors:
+      - name: default # Configure GPU type
+        requests:
+          nvidia.com/gpu: 1
 ```
 
 #### Inference Playground
diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go
index 3ba757b1..017edbfb 100644
--- a/api/core/v1alpha1/model_types.go
+++ b/api/core/v1alpha1/model_types.go
@@ -122,6 +122,15 @@ type Flavor struct {
 	Params map[string]string `json:"params,omitempty"`
 }
 
+// InferenceConfig represents the inference configurations for the model.
+type InferenceConfig struct {
+	// Flavors represents the accelerator requirements to serve the model.
+	// Flavors are fungible following the priority represented by the slice order.
+	// +kubebuilder:validation:MaxItems=8
+	// +optional
+	Flavors []Flavor `json:"flavors,omitempty"`
+}
+
 type ModelName string
 
 // ModelClaim represents claiming for one model, it's the standard claimMode
@@ -188,11 +197,8 @@ type ModelSpec struct {
 	// Source represents the source of the model, there're several ways to load
 	// the model such as loading from huggingface, OCI registry, s3, host path and so on.
 	Source ModelSource `json:"source"`
-	// InferenceFlavors represents the accelerator requirements to serve the model.
-	// Flavors are fungible following the priority represented by the slice order.
-	// +kubebuilder:validation:MaxItems=8
-	// +optional
-	InferenceFlavors []Flavor `json:"inferenceFlavors,omitempty"`
+	// InferenceConfig represents the inference configurations for the model.
+	InferenceConfig *InferenceConfig `json:"inferenceConfig,omitempty"`
 }
 
 const (
diff --git a/api/core/v1alpha1/zz_generated.deepcopy.go b/api/core/v1alpha1/zz_generated.deepcopy.go
index 7c94dbca..9534b3c2 100644
--- a/api/core/v1alpha1/zz_generated.deepcopy.go
+++ b/api/core/v1alpha1/zz_generated.deepcopy.go
@@ -62,6 +62,28 @@ func (in *Flavor) DeepCopy() *Flavor {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferenceConfig) DeepCopyInto(out *InferenceConfig) {
+	*out = *in
+	if in.Flavors != nil {
+		in, out := &in.Flavors, &out.Flavors
+		*out = make([]Flavor, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceConfig.
+func (in *InferenceConfig) DeepCopy() *InferenceConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(InferenceConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ModelClaim) DeepCopyInto(out *ModelClaim) {
 	*out = *in
@@ -198,12 +220,10 @@ func (in *ModelSource) DeepCopy() *ModelSource {
 func (in *ModelSpec) DeepCopyInto(out *ModelSpec) {
 	*out = *in
 	in.Source.DeepCopyInto(&out.Source)
-	if in.InferenceFlavors != nil {
-		in, out := &in.InferenceFlavors, &out.InferenceFlavors
-		*out = make([]Flavor, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
+	if in.InferenceConfig != nil {
+		in, out := &in.InferenceConfig, &out.InferenceConfig
+		*out = new(InferenceConfig)
+		(*in).DeepCopyInto(*out)
 	}
 }
 
diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go
index f766f7e6..838b16d3 100644
--- a/api/inference/v1alpha1/backendruntime_types.go
+++ b/api/inference/v1alpha1/backendruntime_types.go
@@ -26,6 +26,7 @@ import (
 // do not change the name.
 type BackendRuntimeArg struct {
 	// Name represents the identifier of the backendRuntime argument.
+	// +kubebuilder:default=default
 	Name string `json:"name"`
 	// Flags represents all the preset configurations.
 	// Flag around with {{ .CONFIG }} is a configuration waiting for render.
diff --git a/api/inference/v1alpha1/config_types.go b/api/inference/v1alpha1/config_types.go
index 1aa56889..ea937791 100644
--- a/api/inference/v1alpha1/config_types.go
+++ b/api/inference/v1alpha1/config_types.go
@@ -33,17 +33,9 @@ type BackendRuntimeConfig struct {
 	// from the default version.
 	// +optional
 	Version *string `json:"version,omitempty"`
-	// ArgName represents the argument name set in the backendRuntimeArg.
-	// If not set, will be derived by the model role, e.g. if one model's role
-	// is <draft>, the argName will be set to <speculative-decoding>. Better to
-	// set the argName explicitly.
-	// By default, the argName will be treated as <default> in runtime.
-	// +optional
-	ArgName *string `json:"argName,omitempty"`
-	// ArgFlags represents the argument flags appended to the backend.
-	// You can add new flags or overwrite the default flags.
-	// +optional
-	ArgFlags []string `json:"argFlags,omitempty"`
+	// Args represents the specified arguments of the backendRuntime,
+	// will be append to the backendRuntime.spec.Args.
+	Args *BackendRuntimeArg `json:"args,omitempty"`
 	// Envs represents the environments set to the container.
 	// +optional
 	Envs []corev1.EnvVar `json:"envs,omitempty"`
diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go
index bf5bf07e..731f4490 100644
--- a/api/inference/v1alpha1/zz_generated.deepcopy.go
+++ b/api/inference/v1alpha1/zz_generated.deepcopy.go
@@ -87,15 +87,10 @@ func (in *BackendRuntimeConfig) DeepCopyInto(out *BackendRuntimeConfig) {
 		*out = new(string)
 		**out = **in
 	}
-	if in.ArgName != nil {
-		in, out := &in.ArgName, &out.ArgName
-		*out = new(string)
-		**out = **in
-	}
-	if in.ArgFlags != nil {
-		in, out := &in.ArgFlags, &out.ArgFlags
-		*out = make([]string, len(*in))
-		copy(*out, *in)
+	if in.Args != nil {
+		in, out := &in.Args, &out.Args
+		*out = new(BackendRuntimeArg)
+		(*in).DeepCopyInto(*out)
 	}
 	if in.Envs != nil {
 		in, out := &in.Envs, &out.Envs
@@ -181,6 +176,21 @@ func (in *BackendRuntimeSpec) DeepCopyInto(out *BackendRuntimeSpec) {
 		}
 	}
 	in.Resources.DeepCopyInto(&out.Resources)
+	if in.LivenessProbe != nil {
+		in, out := &in.LivenessProbe, &out.LivenessProbe
+		*out = new(v1.Probe)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.ReadinessProbe != nil {
+		in, out := &in.ReadinessProbe, &out.ReadinessProbe
+		*out = new(v1.Probe)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.StartupProbe != nil {
+		in, out := &in.StartupProbe, &out.StartupProbe
+		*out = new(v1.Probe)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackendRuntimeSpec.
diff --git a/client-go/applyconfiguration/core/v1alpha1/flavor.go b/client-go/applyconfiguration/core/v1alpha1/flavor.go
index 6a8a7d94..b1f609ff 100644
--- a/client-go/applyconfiguration/core/v1alpha1/flavor.go
+++ b/client-go/applyconfiguration/core/v1alpha1/flavor.go
@@ -18,17 +18,17 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
 	v1 "k8s.io/api/core/v1"
 )
 
 // FlavorApplyConfiguration represents a declarative configuration of the Flavor type for use
 // with apply.
 type FlavorApplyConfiguration struct {
-	Name         *v1alpha1.FlavorName `json:"name,omitempty"`
-	Requests     *v1.ResourceList     `json:"requests,omitempty"`
-	NodeSelector map[string]string    `json:"nodeSelector,omitempty"`
-	Params       map[string]string    `json:"params,omitempty"`
+	Name         *corev1alpha1.FlavorName `json:"name,omitempty"`
+	Requests     *v1.ResourceList         `json:"requests,omitempty"`
+	NodeSelector map[string]string        `json:"nodeSelector,omitempty"`
+	Params       map[string]string        `json:"params,omitempty"`
 }
 
 // FlavorApplyConfiguration constructs a declarative configuration of the Flavor type for use with
@@ -40,7 +40,7 @@ func Flavor() *FlavorApplyConfiguration {
 // WithName sets the Name field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the Name field is set to the value of the last call.
-func (b *FlavorApplyConfiguration) WithName(value v1alpha1.FlavorName) *FlavorApplyConfiguration {
+func (b *FlavorApplyConfiguration) WithName(value corev1alpha1.FlavorName) *FlavorApplyConfiguration {
 	b.Name = &value
 	return b
 }
diff --git a/client-go/applyconfiguration/core/v1alpha1/inferenceconfig.go b/client-go/applyconfiguration/core/v1alpha1/inferenceconfig.go
new file mode 100644
index 00000000..bece2699
--- /dev/null
+++ b/client-go/applyconfiguration/core/v1alpha1/inferenceconfig.go
@@ -0,0 +1,43 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+// InferenceConfigApplyConfiguration represents a declarative configuration of the InferenceConfig type for use
+// with apply.
+type InferenceConfigApplyConfiguration struct {
+	Flavors []FlavorApplyConfiguration `json:"flavors,omitempty"`
+}
+
+// InferenceConfigApplyConfiguration constructs a declarative configuration of the InferenceConfig type for use with
+// apply.
+func InferenceConfig() *InferenceConfigApplyConfiguration {
+	return &InferenceConfigApplyConfiguration{}
+}
+
+// WithFlavors adds the given value to the Flavors field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Flavors field.
+func (b *InferenceConfigApplyConfiguration) WithFlavors(values ...*FlavorApplyConfiguration) *InferenceConfigApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithFlavors")
+		}
+		b.Flavors = append(b.Flavors, *values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelclaim.go b/client-go/applyconfiguration/core/v1alpha1/modelclaim.go
index 659c7b53..02584826 100644
--- a/client-go/applyconfiguration/core/v1alpha1/modelclaim.go
+++ b/client-go/applyconfiguration/core/v1alpha1/modelclaim.go
@@ -18,14 +18,14 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
 )
 
 // ModelClaimApplyConfiguration represents a declarative configuration of the ModelClaim type for use
 // with apply.
 type ModelClaimApplyConfiguration struct {
-	ModelName        *v1alpha1.ModelName   `json:"modelName,omitempty"`
-	InferenceFlavors []v1alpha1.FlavorName `json:"inferenceFlavors,omitempty"`
+	ModelName        *corev1alpha1.ModelName   `json:"modelName,omitempty"`
+	InferenceFlavors []corev1alpha1.FlavorName `json:"inferenceFlavors,omitempty"`
 }
 
 // ModelClaimApplyConfiguration constructs a declarative configuration of the ModelClaim type for use with
@@ -37,7 +37,7 @@ func ModelClaim() *ModelClaimApplyConfiguration {
 // WithModelName sets the ModelName field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the ModelName field is set to the value of the last call.
-func (b *ModelClaimApplyConfiguration) WithModelName(value v1alpha1.ModelName) *ModelClaimApplyConfiguration {
+func (b *ModelClaimApplyConfiguration) WithModelName(value corev1alpha1.ModelName) *ModelClaimApplyConfiguration {
 	b.ModelName = &value
 	return b
 }
@@ -45,7 +45,7 @@ func (b *ModelClaimApplyConfiguration) WithModelName(value v1alpha1.ModelName) *
 // WithInferenceFlavors adds the given value to the InferenceFlavors field in the declarative configuration
 // and returns the receiver, so that objects can be build by chaining "With" function invocations.
 // If called multiple times, values provided by each call will be appended to the InferenceFlavors field.
-func (b *ModelClaimApplyConfiguration) WithInferenceFlavors(values ...v1alpha1.FlavorName) *ModelClaimApplyConfiguration {
+func (b *ModelClaimApplyConfiguration) WithInferenceFlavors(values ...corev1alpha1.FlavorName) *ModelClaimApplyConfiguration {
 	for i := range values {
 		b.InferenceFlavors = append(b.InferenceFlavors, values[i])
 	}
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelrefer.go b/client-go/applyconfiguration/core/v1alpha1/modelrefer.go
index 85f24cb2..29ffe4a3 100644
--- a/client-go/applyconfiguration/core/v1alpha1/modelrefer.go
+++ b/client-go/applyconfiguration/core/v1alpha1/modelrefer.go
@@ -18,14 +18,14 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
 )
 
 // ModelReferApplyConfiguration represents a declarative configuration of the ModelRefer type for use
 // with apply.
 type ModelReferApplyConfiguration struct {
-	Name *v1alpha1.ModelName `json:"name,omitempty"`
-	Role *v1alpha1.ModelRole `json:"role,omitempty"`
+	Name *corev1alpha1.ModelName `json:"name,omitempty"`
+	Role *corev1alpha1.ModelRole `json:"role,omitempty"`
 }
 
 // ModelReferApplyConfiguration constructs a declarative configuration of the ModelRefer type for use with
@@ -37,7 +37,7 @@ func ModelRefer() *ModelReferApplyConfiguration {
 // WithName sets the Name field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the Name field is set to the value of the last call.
-func (b *ModelReferApplyConfiguration) WithName(value v1alpha1.ModelName) *ModelReferApplyConfiguration {
+func (b *ModelReferApplyConfiguration) WithName(value corev1alpha1.ModelName) *ModelReferApplyConfiguration {
 	b.Name = &value
 	return b
 }
@@ -45,7 +45,7 @@ func (b *ModelReferApplyConfiguration) WithName(value v1alpha1.ModelName) *Model
 // WithRole sets the Role field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the Role field is set to the value of the last call.
-func (b *ModelReferApplyConfiguration) WithRole(value v1alpha1.ModelRole) *ModelReferApplyConfiguration {
+func (b *ModelReferApplyConfiguration) WithRole(value corev1alpha1.ModelRole) *ModelReferApplyConfiguration {
 	b.Role = &value
 	return b
 }
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelspec.go b/client-go/applyconfiguration/core/v1alpha1/modelspec.go
index c01e505c..7d2440a1 100644
--- a/client-go/applyconfiguration/core/v1alpha1/modelspec.go
+++ b/client-go/applyconfiguration/core/v1alpha1/modelspec.go
@@ -18,15 +18,15 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
 )
 
 // ModelSpecApplyConfiguration represents a declarative configuration of the ModelSpec type for use
 // with apply.
 type ModelSpecApplyConfiguration struct {
-	FamilyName       *v1alpha1.ModelName            `json:"familyName,omitempty"`
-	Source           *ModelSourceApplyConfiguration `json:"source,omitempty"`
-	InferenceFlavors []FlavorApplyConfiguration     `json:"inferenceFlavors,omitempty"`
+	FamilyName      *corev1alpha1.ModelName            `json:"familyName,omitempty"`
+	Source          *ModelSourceApplyConfiguration     `json:"source,omitempty"`
+	InferenceConfig *InferenceConfigApplyConfiguration `json:"inferenceConfig,omitempty"`
 }
 
 // ModelSpecApplyConfiguration constructs a declarative configuration of the ModelSpec type for use with
@@ -38,7 +38,7 @@ func ModelSpec() *ModelSpecApplyConfiguration {
 // WithFamilyName sets the FamilyName field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the FamilyName field is set to the value of the last call.
-func (b *ModelSpecApplyConfiguration) WithFamilyName(value v1alpha1.ModelName) *ModelSpecApplyConfiguration {
+func (b *ModelSpecApplyConfiguration) WithFamilyName(value corev1alpha1.ModelName) *ModelSpecApplyConfiguration {
 	b.FamilyName = &value
 	return b
 }
@@ -51,15 +51,10 @@ func (b *ModelSpecApplyConfiguration) WithSource(value *ModelSourceApplyConfigur
 	return b
 }
 
-// WithInferenceFlavors adds the given value to the InferenceFlavors field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the InferenceFlavors field.
-func (b *ModelSpecApplyConfiguration) WithInferenceFlavors(values ...*FlavorApplyConfiguration) *ModelSpecApplyConfiguration {
-	for i := range values {
-		if values[i] == nil {
-			panic("nil value passed to WithInferenceFlavors")
-		}
-		b.InferenceFlavors = append(b.InferenceFlavors, *values[i])
-	}
+// WithInferenceConfig sets the InferenceConfig field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the InferenceConfig field is set to the value of the last call.
+func (b *ModelSpecApplyConfiguration) WithInferenceConfig(value *InferenceConfigApplyConfiguration) *ModelSpecApplyConfiguration {
+	b.InferenceConfig = value
 	return b
 }
diff --git a/client-go/applyconfiguration/core/v1alpha1/openmodel.go b/client-go/applyconfiguration/core/v1alpha1/openmodel.go
index 431c56c7..b5e2e9d0 100644
--- a/client-go/applyconfiguration/core/v1alpha1/openmodel.go
+++ b/client-go/applyconfiguration/core/v1alpha1/openmodel.go
@@ -47,7 +47,7 @@ func OpenModel(name, namespace string) *OpenModelApplyConfiguration {
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the Kind field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithKind(value string) *OpenModelApplyConfiguration {
-	b.Kind = &value
+	b.TypeMetaApplyConfiguration.Kind = &value
 	return b
 }
 
@@ -55,7 +55,7 @@ func (b *OpenModelApplyConfiguration) WithKind(value string) *OpenModelApplyConf
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the APIVersion field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithAPIVersion(value string) *OpenModelApplyConfiguration {
-	b.APIVersion = &value
+	b.TypeMetaApplyConfiguration.APIVersion = &value
 	return b
 }
 
@@ -64,7 +64,7 @@ func (b *OpenModelApplyConfiguration) WithAPIVersion(value string) *OpenModelApp
 // If called multiple times, the Name field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithName(value string) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Name = &value
+	b.ObjectMetaApplyConfiguration.Name = &value
 	return b
 }
 
@@ -73,7 +73,7 @@ func (b *OpenModelApplyConfiguration) WithName(value string) *OpenModelApplyConf
 // If called multiple times, the GenerateName field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithGenerateName(value string) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.GenerateName = &value
+	b.ObjectMetaApplyConfiguration.GenerateName = &value
 	return b
 }
 
@@ -82,7 +82,7 @@ func (b *OpenModelApplyConfiguration) WithGenerateName(value string) *OpenModelA
 // If called multiple times, the Namespace field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithNamespace(value string) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Namespace = &value
+	b.ObjectMetaApplyConfiguration.Namespace = &value
 	return b
 }
 
@@ -91,7 +91,7 @@ func (b *OpenModelApplyConfiguration) WithNamespace(value string) *OpenModelAppl
 // If called multiple times, the UID field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithUID(value types.UID) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.UID = &value
+	b.ObjectMetaApplyConfiguration.UID = &value
 	return b
 }
 
@@ -100,7 +100,7 @@ func (b *OpenModelApplyConfiguration) WithUID(value types.UID) *OpenModelApplyCo
 // If called multiple times, the ResourceVersion field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithResourceVersion(value string) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.ResourceVersion = &value
+	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
 	return b
 }
 
@@ -109,7 +109,7 @@ func (b *OpenModelApplyConfiguration) WithResourceVersion(value string) *OpenMod
 // If called multiple times, the Generation field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithGeneration(value int64) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Generation = &value
+	b.ObjectMetaApplyConfiguration.Generation = &value
 	return b
 }
 
@@ -118,7 +118,7 @@ func (b *OpenModelApplyConfiguration) WithGeneration(value int64) *OpenModelAppl
 // If called multiple times, the CreationTimestamp field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithCreationTimestamp(value metav1.Time) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.CreationTimestamp = &value
+	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
 	return b
 }
 
@@ -127,7 +127,7 @@ func (b *OpenModelApplyConfiguration) WithCreationTimestamp(value metav1.Time) *
 // If called multiple times, the DeletionTimestamp field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.DeletionTimestamp = &value
+	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
 	return b
 }
 
@@ -136,7 +136,7 @@ func (b *OpenModelApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *
 // If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
 func (b *OpenModelApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.DeletionGracePeriodSeconds = &value
+	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
 	return b
 }
 
@@ -146,11 +146,11 @@ func (b *OpenModelApplyConfiguration) WithDeletionGracePeriodSeconds(value int64
 // overwriting an existing map entries in Labels field with the same key.
 func (b *OpenModelApplyConfiguration) WithLabels(entries map[string]string) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	if b.Labels == nil && len(entries) > 0 {
-		b.Labels = make(map[string]string, len(entries))
+	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
 	}
 	for k, v := range entries {
-		b.Labels[k] = v
+		b.ObjectMetaApplyConfiguration.Labels[k] = v
 	}
 	return b
 }
@@ -161,11 +161,11 @@ func (b *OpenModelApplyConfiguration) WithLabels(entries map[string]string) *Ope
 // overwriting an existing map entries in Annotations field with the same key.
 func (b *OpenModelApplyConfiguration) WithAnnotations(entries map[string]string) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	if b.Annotations == nil && len(entries) > 0 {
-		b.Annotations = make(map[string]string, len(entries))
+	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
 	}
 	for k, v := range entries {
-		b.Annotations[k] = v
+		b.ObjectMetaApplyConfiguration.Annotations[k] = v
 	}
 	return b
 }
@@ -179,7 +179,7 @@ func (b *OpenModelApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerRef
 		if values[i] == nil {
 			panic("nil value passed to WithOwnerReferences")
 		}
-		b.OwnerReferences = append(b.OwnerReferences, *values[i])
+		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
 	}
 	return b
 }
@@ -190,7 +190,7 @@ func (b *OpenModelApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerRef
 func (b *OpenModelApplyConfiguration) WithFinalizers(values ...string) *OpenModelApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
 	for i := range values {
-		b.Finalizers = append(b.Finalizers, values[i])
+		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
 	}
 	return b
 }
@@ -220,5 +220,5 @@ func (b *OpenModelApplyConfiguration) WithStatus(value *ModelStatusApplyConfigur
 // GetName retrieves the value of the Name field in the declarative configuration.
 func (b *OpenModelApplyConfiguration) GetName() *string {
 	b.ensureObjectMetaApplyConfigurationExists()
-	return b.Name
+	return b.ObjectMetaApplyConfiguration.Name
 }
diff --git a/client-go/applyconfiguration/inference/v1alpha1/backendruntimearg.go b/client-go/applyconfiguration/inference/v1alpha1/backendruntimearg.go
new file mode 100644
index 00000000..231aa87a
--- /dev/null
+++ b/client-go/applyconfiguration/inference/v1alpha1/backendruntimearg.go
@@ -0,0 +1,49 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+// BackendRuntimeArgApplyConfiguration represents a declarative configuration of the BackendRuntimeArg type for use
+// with apply.
+type BackendRuntimeArgApplyConfiguration struct {
+	Name  *string  `json:"name,omitempty"`
+	Flags []string `json:"flags,omitempty"`
+}
+
+// BackendRuntimeArgApplyConfiguration constructs a declarative configuration of the BackendRuntimeArg type for use with
+// apply.
+func BackendRuntimeArg() *BackendRuntimeArgApplyConfiguration {
+	return &BackendRuntimeArgApplyConfiguration{}
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *BackendRuntimeArgApplyConfiguration) WithName(value string) *BackendRuntimeArgApplyConfiguration {
+	b.Name = &value
+	return b
+}
+
+// WithFlags adds the given value to the Flags field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Flags field.
+func (b *BackendRuntimeArgApplyConfiguration) WithFlags(values ...string) *BackendRuntimeArgApplyConfiguration {
+	for i := range values {
+		b.Flags = append(b.Flags, values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/inference/v1alpha1/backendruntimeconfig.go b/client-go/applyconfiguration/inference/v1alpha1/backendruntimeconfig.go
index 9624854c..17cb2d2c 100644
--- a/client-go/applyconfiguration/inference/v1alpha1/backendruntimeconfig.go
+++ b/client-go/applyconfiguration/inference/v1alpha1/backendruntimeconfig.go
@@ -18,17 +18,16 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	inferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	v1 "k8s.io/api/core/v1"
 )
 
 // BackendRuntimeConfigApplyConfiguration represents a declarative configuration of the BackendRuntimeConfig type for use
 // with apply.
 type BackendRuntimeConfigApplyConfiguration struct {
-	Name      *v1alpha1.BackendName                   `json:"name,omitempty"`
+	Name      *inferencev1alpha1.BackendName          `json:"name,omitempty"`
 	Version   *string                                 `json:"version,omitempty"`
-	ArgName   *string                                 `json:"argName,omitempty"`
-	ArgFlags  []string                                `json:"argFlags,omitempty"`
+	Args      *BackendRuntimeArgApplyConfiguration    `json:"args,omitempty"`
 	Envs      []v1.EnvVar                             `json:"envs,omitempty"`
 	Resources *ResourceRequirementsApplyConfiguration `json:"resources,omitempty"`
 }
@@ -42,7 +41,7 @@ func BackendRuntimeConfig() *BackendRuntimeConfigApplyConfiguration {
 // WithName sets the Name field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the Name field is set to the value of the last call.
-func (b *BackendRuntimeConfigApplyConfiguration) WithName(value v1alpha1.BackendName) *BackendRuntimeConfigApplyConfiguration {
+func (b *BackendRuntimeConfigApplyConfiguration) WithName(value inferencev1alpha1.BackendName) *BackendRuntimeConfigApplyConfiguration {
 	b.Name = &value
 	return b
 }
@@ -55,21 +54,11 @@ func (b *BackendRuntimeConfigApplyConfiguration) WithVersion(value string) *Back
 	return b
 }
 
-// WithArgName sets the ArgName field in the declarative configuration to the given value
+// WithArgs sets the Args field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the ArgName field is set to the value of the last call.
-func (b *BackendRuntimeConfigApplyConfiguration) WithArgName(value string) *BackendRuntimeConfigApplyConfiguration {
-	b.ArgName = &value
-	return b
-}
-
-// WithArgFlags adds the given value to the ArgFlags field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the ArgFlags field.
-func (b *BackendRuntimeConfigApplyConfiguration) WithArgFlags(values ...string) *BackendRuntimeConfigApplyConfiguration {
-	for i := range values {
-		b.ArgFlags = append(b.ArgFlags, values[i])
-	}
+// If called multiple times, the Args field is set to the value of the last call.
+func (b *BackendRuntimeConfigApplyConfiguration) WithArgs(value *BackendRuntimeArgApplyConfiguration) *BackendRuntimeConfigApplyConfiguration {
+	b.Args = value
 	return b
 }
 
diff --git a/client-go/applyconfiguration/inference/v1alpha1/playground.go b/client-go/applyconfiguration/inference/v1alpha1/playground.go
index c58253d7..76e8dc38 100644
--- a/client-go/applyconfiguration/inference/v1alpha1/playground.go
+++ b/client-go/applyconfiguration/inference/v1alpha1/playground.go
@@ -47,7 +47,7 @@ func Playground(name, namespace string) *PlaygroundApplyConfiguration {
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the Kind field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithKind(value string) *PlaygroundApplyConfiguration {
-	b.Kind = &value
+	b.TypeMetaApplyConfiguration.Kind = &value
 	return b
 }
 
@@ -55,7 +55,7 @@ func (b *PlaygroundApplyConfiguration) WithKind(value string) *PlaygroundApplyCo
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the APIVersion field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithAPIVersion(value string) *PlaygroundApplyConfiguration {
-	b.APIVersion = &value
+	b.TypeMetaApplyConfiguration.APIVersion = &value
 	return b
 }
 
@@ -64,7 +64,7 @@ func (b *PlaygroundApplyConfiguration) WithAPIVersion(value string) *PlaygroundA
 // If called multiple times, the Name field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithName(value string) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Name = &value
+	b.ObjectMetaApplyConfiguration.Name = &value
 	return b
 }
 
@@ -73,7 +73,7 @@ func (b *PlaygroundApplyConfiguration) WithName(value string) *PlaygroundApplyCo
 // If called multiple times, the GenerateName field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithGenerateName(value string) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.GenerateName = &value
+	b.ObjectMetaApplyConfiguration.GenerateName = &value
 	return b
 }
 
@@ -82,7 +82,7 @@ func (b *PlaygroundApplyConfiguration) WithGenerateName(value string) *Playgroun
 // If called multiple times, the Namespace field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithNamespace(value string) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Namespace = &value
+	b.ObjectMetaApplyConfiguration.Namespace = &value
 	return b
 }
 
@@ -91,7 +91,7 @@ func (b *PlaygroundApplyConfiguration) WithNamespace(value string) *PlaygroundAp
 // If called multiple times, the UID field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithUID(value types.UID) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.UID = &value
+	b.ObjectMetaApplyConfiguration.UID = &value
 	return b
 }
 
@@ -100,7 +100,7 @@ func (b *PlaygroundApplyConfiguration) WithUID(value types.UID) *PlaygroundApply
 // If called multiple times, the ResourceVersion field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithResourceVersion(value string) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.ResourceVersion = &value
+	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
 	return b
 }
 
@@ -109,7 +109,7 @@ func (b *PlaygroundApplyConfiguration) WithResourceVersion(value string) *Playgr
 // If called multiple times, the Generation field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithGeneration(value int64) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Generation = &value
+	b.ObjectMetaApplyConfiguration.Generation = &value
 	return b
 }
 
@@ -118,7 +118,7 @@ func (b *PlaygroundApplyConfiguration) WithGeneration(value int64) *PlaygroundAp
 // If called multiple times, the CreationTimestamp field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithCreationTimestamp(value metav1.Time) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.CreationTimestamp = &value
+	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
 	return b
 }
 
@@ -127,7 +127,7 @@ func (b *PlaygroundApplyConfiguration) WithCreationTimestamp(value metav1.Time)
 // If called multiple times, the DeletionTimestamp field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.DeletionTimestamp = &value
+	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
 	return b
 }
 
@@ -136,7 +136,7 @@ func (b *PlaygroundApplyConfiguration) WithDeletionTimestamp(value metav1.Time)
 // If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
 func (b *PlaygroundApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.DeletionGracePeriodSeconds = &value
+	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
 	return b
 }
 
@@ -146,11 +146,11 @@ func (b *PlaygroundApplyConfiguration) WithDeletionGracePeriodSeconds(value int6
 // overwriting an existing map entries in Labels field with the same key.
 func (b *PlaygroundApplyConfiguration) WithLabels(entries map[string]string) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	if b.Labels == nil && len(entries) > 0 {
-		b.Labels = make(map[string]string, len(entries))
+	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
 	}
 	for k, v := range entries {
-		b.Labels[k] = v
+		b.ObjectMetaApplyConfiguration.Labels[k] = v
 	}
 	return b
 }
@@ -161,11 +161,11 @@ func (b *PlaygroundApplyConfiguration) WithLabels(entries map[string]string) *Pl
 // overwriting an existing map entries in Annotations field with the same key.
 func (b *PlaygroundApplyConfiguration) WithAnnotations(entries map[string]string) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	if b.Annotations == nil && len(entries) > 0 {
-		b.Annotations = make(map[string]string, len(entries))
+	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
 	}
 	for k, v := range entries {
-		b.Annotations[k] = v
+		b.ObjectMetaApplyConfiguration.Annotations[k] = v
 	}
 	return b
 }
@@ -179,7 +179,7 @@ func (b *PlaygroundApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerRe
 		if values[i] == nil {
 			panic("nil value passed to WithOwnerReferences")
 		}
-		b.OwnerReferences = append(b.OwnerReferences, *values[i])
+		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
 	}
 	return b
 }
@@ -190,7 +190,7 @@ func (b *PlaygroundApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerRe
 func (b *PlaygroundApplyConfiguration) WithFinalizers(values ...string) *PlaygroundApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
 	for i := range values {
-		b.Finalizers = append(b.Finalizers, values[i])
+		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
 	}
 	return b
 }
@@ -220,5 +220,5 @@ func (b *PlaygroundApplyConfiguration) WithStatus(value *PlaygroundStatusApplyCo
 // GetName retrieves the value of the Name field in the declarative configuration.
 func (b *PlaygroundApplyConfiguration) GetName() *string {
 	b.ensureObjectMetaApplyConfigurationExists()
-	return b.Name
+	return b.ObjectMetaApplyConfiguration.Name
 }
diff --git a/client-go/applyconfiguration/inference/v1alpha1/playgroundspec.go b/client-go/applyconfiguration/inference/v1alpha1/playgroundspec.go
index f4c6ea4a..10c30346 100644
--- a/client-go/applyconfiguration/inference/v1alpha1/playgroundspec.go
+++ b/client-go/applyconfiguration/inference/v1alpha1/playgroundspec.go
@@ -18,17 +18,17 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/core/v1alpha1"
+	corev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/core/v1alpha1"
 )
 
 // PlaygroundSpecApplyConfiguration represents a declarative configuration of the PlaygroundSpec type for use
 // with apply.
 type PlaygroundSpecApplyConfiguration struct {
-	Replicas             *int32                                  `json:"replicas,omitempty"`
-	ModelClaim           *v1alpha1.ModelClaimApplyConfiguration  `json:"modelClaim,omitempty"`
-	ModelClaims          *v1alpha1.ModelClaimsApplyConfiguration `json:"modelClaims,omitempty"`
-	BackendRuntimeConfig *BackendRuntimeConfigApplyConfiguration `json:"backendRuntimeConfig,omitempty"`
-	ElasticConfig        *ElasticConfigApplyConfiguration        `json:"elasticConfig,omitempty"`
+	Replicas             *int32                                      `json:"replicas,omitempty"`
+	ModelClaim           *corev1alpha1.ModelClaimApplyConfiguration  `json:"modelClaim,omitempty"`
+	ModelClaims          *corev1alpha1.ModelClaimsApplyConfiguration `json:"modelClaims,omitempty"`
+	BackendRuntimeConfig *BackendRuntimeConfigApplyConfiguration     `json:"backendRuntimeConfig,omitempty"`
+	ElasticConfig        *ElasticConfigApplyConfiguration            `json:"elasticConfig,omitempty"`
 }
 
 // PlaygroundSpecApplyConfiguration constructs a declarative configuration of the PlaygroundSpec type for use with
@@ -48,7 +48,7 @@ func (b *PlaygroundSpecApplyConfiguration) WithReplicas(value int32) *Playground
 // WithModelClaim sets the ModelClaim field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the ModelClaim field is set to the value of the last call.
-func (b *PlaygroundSpecApplyConfiguration) WithModelClaim(value *v1alpha1.ModelClaimApplyConfiguration) *PlaygroundSpecApplyConfiguration {
+func (b *PlaygroundSpecApplyConfiguration) WithModelClaim(value *corev1alpha1.ModelClaimApplyConfiguration) *PlaygroundSpecApplyConfiguration {
 	b.ModelClaim = value
 	return b
 }
@@ -56,7 +56,7 @@ func (b *PlaygroundSpecApplyConfiguration) WithModelClaim(value *v1alpha1.ModelC
 // WithModelClaims sets the ModelClaims field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the ModelClaims field is set to the value of the last call.
-func (b *PlaygroundSpecApplyConfiguration) WithModelClaims(value *v1alpha1.ModelClaimsApplyConfiguration) *PlaygroundSpecApplyConfiguration {
+func (b *PlaygroundSpecApplyConfiguration) WithModelClaims(value *corev1alpha1.ModelClaimsApplyConfiguration) *PlaygroundSpecApplyConfiguration {
 	b.ModelClaims = value
 	return b
 }
diff --git a/client-go/applyconfiguration/inference/v1alpha1/service.go b/client-go/applyconfiguration/inference/v1alpha1/service.go
index a18f3416..423d82ba 100644
--- a/client-go/applyconfiguration/inference/v1alpha1/service.go
+++ b/client-go/applyconfiguration/inference/v1alpha1/service.go
@@ -47,7 +47,7 @@ func Service(name, namespace string) *ServiceApplyConfiguration {
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the Kind field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithKind(value string) *ServiceApplyConfiguration {
-	b.Kind = &value
+	b.TypeMetaApplyConfiguration.Kind = &value
 	return b
 }
 
@@ -55,7 +55,7 @@ func (b *ServiceApplyConfiguration) WithKind(value string) *ServiceApplyConfigur
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the APIVersion field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithAPIVersion(value string) *ServiceApplyConfiguration {
-	b.APIVersion = &value
+	b.TypeMetaApplyConfiguration.APIVersion = &value
 	return b
 }
 
@@ -64,7 +64,7 @@ func (b *ServiceApplyConfiguration) WithAPIVersion(value string) *ServiceApplyCo
 // If called multiple times, the Name field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithName(value string) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Name = &value
+	b.ObjectMetaApplyConfiguration.Name = &value
 	return b
 }
 
@@ -73,7 +73,7 @@ func (b *ServiceApplyConfiguration) WithName(value string) *ServiceApplyConfigur
 // If called multiple times, the GenerateName field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithGenerateName(value string) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.GenerateName = &value
+	b.ObjectMetaApplyConfiguration.GenerateName = &value
 	return b
 }
 
@@ -82,7 +82,7 @@ func (b *ServiceApplyConfiguration) WithGenerateName(value string) *ServiceApply
 // If called multiple times, the Namespace field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithNamespace(value string) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Namespace = &value
+	b.ObjectMetaApplyConfiguration.Namespace = &value
 	return b
 }
 
@@ -91,7 +91,7 @@ func (b *ServiceApplyConfiguration) WithNamespace(value string) *ServiceApplyCon
 // If called multiple times, the UID field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithUID(value types.UID) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.UID = &value
+	b.ObjectMetaApplyConfiguration.UID = &value
 	return b
 }
 
@@ -100,7 +100,7 @@ func (b *ServiceApplyConfiguration) WithUID(value types.UID) *ServiceApplyConfig
 // If called multiple times, the ResourceVersion field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithResourceVersion(value string) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.ResourceVersion = &value
+	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
 	return b
 }
 
@@ -109,7 +109,7 @@ func (b *ServiceApplyConfiguration) WithResourceVersion(value string) *ServiceAp
 // If called multiple times, the Generation field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithGeneration(value int64) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.Generation = &value
+	b.ObjectMetaApplyConfiguration.Generation = &value
 	return b
 }
 
@@ -118,7 +118,7 @@ func (b *ServiceApplyConfiguration) WithGeneration(value int64) *ServiceApplyCon
 // If called multiple times, the CreationTimestamp field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithCreationTimestamp(value metav1.Time) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.CreationTimestamp = &value
+	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
 	return b
 }
 
@@ -127,7 +127,7 @@ func (b *ServiceApplyConfiguration) WithCreationTimestamp(value metav1.Time) *Se
 // If called multiple times, the DeletionTimestamp field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.DeletionTimestamp = &value
+	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
 	return b
 }
 
@@ -136,7 +136,7 @@ func (b *ServiceApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *Se
 // If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
 func (b *ServiceApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	b.DeletionGracePeriodSeconds = &value
+	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
 	return b
 }
 
@@ -146,11 +146,11 @@ func (b *ServiceApplyConfiguration) WithDeletionGracePeriodSeconds(value int64)
 // overwriting an existing map entries in Labels field with the same key.
 func (b *ServiceApplyConfiguration) WithLabels(entries map[string]string) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	if b.Labels == nil && len(entries) > 0 {
-		b.Labels = make(map[string]string, len(entries))
+	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
 	}
 	for k, v := range entries {
-		b.Labels[k] = v
+		b.ObjectMetaApplyConfiguration.Labels[k] = v
 	}
 	return b
 }
@@ -161,11 +161,11 @@ func (b *ServiceApplyConfiguration) WithLabels(entries map[string]string) *Servi
 // overwriting an existing map entries in Annotations field with the same key.
 func (b *ServiceApplyConfiguration) WithAnnotations(entries map[string]string) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
-	if b.Annotations == nil && len(entries) > 0 {
-		b.Annotations = make(map[string]string, len(entries))
+	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
 	}
 	for k, v := range entries {
-		b.Annotations[k] = v
+		b.ObjectMetaApplyConfiguration.Annotations[k] = v
 	}
 	return b
 }
@@ -179,7 +179,7 @@ func (b *ServiceApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerRefer
 		if values[i] == nil {
 			panic("nil value passed to WithOwnerReferences")
 		}
-		b.OwnerReferences = append(b.OwnerReferences, *values[i])
+		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
 	}
 	return b
 }
@@ -190,7 +190,7 @@ func (b *ServiceApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerRefer
 func (b *ServiceApplyConfiguration) WithFinalizers(values ...string) *ServiceApplyConfiguration {
 	b.ensureObjectMetaApplyConfigurationExists()
 	for i := range values {
-		b.Finalizers = append(b.Finalizers, values[i])
+		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
 	}
 	return b
 }
@@ -220,5 +220,5 @@ func (b *ServiceApplyConfiguration) WithStatus(value *ServiceStatusApplyConfigur
 // GetName retrieves the value of the Name field in the declarative configuration.
 func (b *ServiceApplyConfiguration) GetName() *string {
 	b.ensureObjectMetaApplyConfigurationExists()
-	return b.Name
+	return b.ObjectMetaApplyConfiguration.Name
 }
diff --git a/client-go/applyconfiguration/inference/v1alpha1/servicespec.go b/client-go/applyconfiguration/inference/v1alpha1/servicespec.go
index 2666c01b..dbec9ba0 100644
--- a/client-go/applyconfiguration/inference/v1alpha1/servicespec.go
+++ b/client-go/applyconfiguration/inference/v1alpha1/servicespec.go
@@ -18,15 +18,15 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/core/v1alpha1"
+	corev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/core/v1alpha1"
 	v1 "sigs.k8s.io/lws/api/leaderworkerset/v1"
 )
 
 // ServiceSpecApplyConfiguration represents a declarative configuration of the ServiceSpec type for use
 // with apply.
 type ServiceSpecApplyConfiguration struct {
-	ModelClaims      *v1alpha1.ModelClaimsApplyConfiguration `json:"modelClaims,omitempty"`
-	WorkloadTemplate *v1.LeaderWorkerSetSpec                 `json:"workloadTemplate,omitempty"`
+	ModelClaims      *corev1alpha1.ModelClaimsApplyConfiguration `json:"modelClaims,omitempty"`
+	WorkloadTemplate *v1.LeaderWorkerSetSpec                     `json:"workloadTemplate,omitempty"`
 }
 
 // ServiceSpecApplyConfiguration constructs a declarative configuration of the ServiceSpec type for use with
@@ -38,7 +38,7 @@ func ServiceSpec() *ServiceSpecApplyConfiguration {
 // WithModelClaims sets the ModelClaims field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the ModelClaims field is set to the value of the last call.
-func (b *ServiceSpecApplyConfiguration) WithModelClaims(value *v1alpha1.ModelClaimsApplyConfiguration) *ServiceSpecApplyConfiguration {
+func (b *ServiceSpecApplyConfiguration) WithModelClaims(value *corev1alpha1.ModelClaimsApplyConfiguration) *ServiceSpecApplyConfiguration {
 	b.ModelClaims = value
 	return b
 }
diff --git a/client-go/applyconfiguration/internal/internal.go b/client-go/applyconfiguration/internal/internal.go
index 69b66345..8f24c8ba 100644
--- a/client-go/applyconfiguration/internal/internal.go
+++ b/client-go/applyconfiguration/internal/internal.go
@@ -18,8 +18,8 @@ limitations under the License.
 package internal
 
 import (
-	"fmt"
-	"sync"
+	fmt "fmt"
+	sync "sync"
 
 	typed "sigs.k8s.io/structured-merge-diff/v4/typed"
 )
diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go
index d765df69..f2ad5af7 100644
--- a/client-go/applyconfiguration/utils.go
+++ b/client-go/applyconfiguration/utils.go
@@ -33,6 +33,8 @@ import (
 func ForKind(kind schema.GroupVersionKind) interface{} {
 	switch kind {
 	// Group=inference.llmaz.io, Version=v1alpha1
+	case v1alpha1.SchemeGroupVersion.WithKind("BackendRuntimeArg"):
+		return &inferencev1alpha1.BackendRuntimeArgApplyConfiguration{}
 	case v1alpha1.SchemeGroupVersion.WithKind("BackendRuntimeConfig"):
 		return &inferencev1alpha1.BackendRuntimeConfigApplyConfiguration{}
 	case v1alpha1.SchemeGroupVersion.WithKind("ElasticConfig"):
@@ -55,6 +57,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} {
 		// Group=llmaz.io, Version=v1alpha1
 	case corev1alpha1.SchemeGroupVersion.WithKind("Flavor"):
 		return &applyconfigurationcorev1alpha1.FlavorApplyConfiguration{}
+	case corev1alpha1.SchemeGroupVersion.WithKind("InferenceConfig"):
+		return &applyconfigurationcorev1alpha1.InferenceConfigApplyConfiguration{}
 	case corev1alpha1.SchemeGroupVersion.WithKind("ModelClaim"):
 		return &applyconfigurationcorev1alpha1.ModelClaimApplyConfiguration{}
 	case corev1alpha1.SchemeGroupVersion.WithKind("ModelClaims"):
diff --git a/client-go/clientset/versioned/clientset.go b/client-go/clientset/versioned/clientset.go
index 4f533e60..e4a3a1ce 100644
--- a/client-go/clientset/versioned/clientset.go
+++ b/client-go/clientset/versioned/clientset.go
@@ -18,8 +18,8 @@ limitations under the License.
 package versioned
 
 import (
-	"fmt"
-	"net/http"
+	fmt "fmt"
+	http "net/http"
 
 	llmazv1alpha1 "github.com/inftyai/llmaz/client-go/clientset/versioned/typed/core/v1alpha1"
 	inferencev1alpha1 "github.com/inftyai/llmaz/client-go/clientset/versioned/typed/inference/v1alpha1"
diff --git a/client-go/clientset/versioned/typed/core/v1alpha1/core_client.go b/client-go/clientset/versioned/typed/core/v1alpha1/core_client.go
index c4ee664b..81a2f166 100644
--- a/client-go/clientset/versioned/typed/core/v1alpha1/core_client.go
+++ b/client-go/clientset/versioned/typed/core/v1alpha1/core_client.go
@@ -18,10 +18,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"net/http"
+	http "net/http"
 
-	v1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
-	"github.com/inftyai/llmaz/client-go/clientset/versioned/scheme"
+	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+	scheme "github.com/inftyai/llmaz/client-go/clientset/versioned/scheme"
 	rest "k8s.io/client-go/rest"
 )
 
@@ -84,10 +84,10 @@ func New(c rest.Interface) *LlmazV1alpha1Client {
 }
 
 func setConfigDefaults(config *rest.Config) error {
-	gv := v1alpha1.SchemeGroupVersion
+	gv := corev1alpha1.SchemeGroupVersion
 	config.GroupVersion = &gv
 	config.APIPath = "/apis"
-	config.NegotiatedSerializer = scheme.Codecs.WithoutConversion()
+	config.NegotiatedSerializer = rest.CodecFactoryForGeneratedClient(scheme.Scheme, scheme.Codecs).WithoutConversion()
 
 	if config.UserAgent == "" {
 		config.UserAgent = rest.DefaultKubernetesUserAgent()
diff --git a/client-go/clientset/versioned/typed/core/v1alpha1/fake/fake_core_client.go b/client-go/clientset/versioned/typed/core/v1alpha1/fake/fake_core_client.go
index 973c674a..5dcbd4fd 100644
--- a/client-go/clientset/versioned/typed/core/v1alpha1/fake/fake_core_client.go
+++ b/client-go/clientset/versioned/typed/core/v1alpha1/fake/fake_core_client.go
@@ -28,7 +28,7 @@ type FakeLlmazV1alpha1 struct {
 }
 
 func (c *FakeLlmazV1alpha1) OpenModels(namespace string) v1alpha1.OpenModelInterface {
-	return &FakeOpenModels{c, namespace}
+	return newFakeOpenModels(c, namespace)
 }
 
 // RESTClient returns a RESTClient that is used to communicate
diff --git a/client-go/clientset/versioned/typed/core/v1alpha1/fake/fake_openmodel.go b/client-go/clientset/versioned/typed/core/v1alpha1/fake/fake_openmodel.go
index b8384e7a..b1327e75 100644
--- a/client-go/clientset/versioned/typed/core/v1alpha1/fake/fake_openmodel.go
+++ b/client-go/clientset/versioned/typed/core/v1alpha1/fake/fake_openmodel.go
@@ -18,179 +18,33 @@ limitations under the License.
 package fake
 
 import (
-	"context"
-	json "encoding/json"
-	"fmt"
-
 	v1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
 	corev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/core/v1alpha1"
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	labels "k8s.io/apimachinery/pkg/labels"
-	types "k8s.io/apimachinery/pkg/types"
-	watch "k8s.io/apimachinery/pkg/watch"
-	testing "k8s.io/client-go/testing"
+	typedcorev1alpha1 "github.com/inftyai/llmaz/client-go/clientset/versioned/typed/core/v1alpha1"
+	gentype "k8s.io/client-go/gentype"
 )
 
-// FakeOpenModels implements OpenModelInterface
-type FakeOpenModels struct {
+// fakeOpenModels implements OpenModelInterface
+type fakeOpenModels struct {
+	*gentype.FakeClientWithListAndApply[*v1alpha1.OpenModel, *v1alpha1.OpenModelList, *corev1alpha1.OpenModelApplyConfiguration]
 	Fake *FakeLlmazV1alpha1
-	ns   string
-}
-
-var openmodelsResource = v1alpha1.SchemeGroupVersion.WithResource("openmodels")
-
-var openmodelsKind = v1alpha1.SchemeGroupVersion.WithKind("OpenModel")
-
-// Get takes name of the openModel, and returns the corresponding openModel object, and an error if there is any.
-func (c *FakeOpenModels) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.OpenModel, err error) {
-	emptyResult := &v1alpha1.OpenModel{}
-	obj, err := c.Fake.
-		Invokes(testing.NewGetActionWithOptions(openmodelsResource, c.ns, name, options), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.OpenModel), err
-}
-
-// List takes label and field selectors, and returns the list of OpenModels that match those selectors.
-func (c *FakeOpenModels) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.OpenModelList, err error) {
-	emptyResult := &v1alpha1.OpenModelList{}
-	obj, err := c.Fake.
-		Invokes(testing.NewListActionWithOptions(openmodelsResource, openmodelsKind, c.ns, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-
-	label, _, _ := testing.ExtractFromListOptions(opts)
-	if label == nil {
-		label = labels.Everything()
-	}
-	list := &v1alpha1.OpenModelList{ListMeta: obj.(*v1alpha1.OpenModelList).ListMeta}
-	for _, item := range obj.(*v1alpha1.OpenModelList).Items {
-		if label.Matches(labels.Set(item.Labels)) {
-			list.Items = append(list.Items, item)
-		}
-	}
-	return list, err
-}
-
-// Watch returns a watch.Interface that watches the requested openModels.
-func (c *FakeOpenModels) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) {
-	return c.Fake.
-		InvokesWatch(testing.NewWatchActionWithOptions(openmodelsResource, c.ns, opts))
-
-}
-
-// Create takes the representation of a openModel and creates it.  Returns the server's representation of the openModel, and an error, if there is any.
-func (c *FakeOpenModels) Create(ctx context.Context, openModel *v1alpha1.OpenModel, opts v1.CreateOptions) (result *v1alpha1.OpenModel, err error) {
-	emptyResult := &v1alpha1.OpenModel{}
-	obj, err := c.Fake.
-		Invokes(testing.NewCreateActionWithOptions(openmodelsResource, c.ns, openModel, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.OpenModel), err
-}
-
-// Update takes the representation of a openModel and updates it. Returns the server's representation of the openModel, and an error, if there is any.
-func (c *FakeOpenModels) Update(ctx context.Context, openModel *v1alpha1.OpenModel, opts v1.UpdateOptions) (result *v1alpha1.OpenModel, err error) {
-	emptyResult := &v1alpha1.OpenModel{}
-	obj, err := c.Fake.
-		Invokes(testing.NewUpdateActionWithOptions(openmodelsResource, c.ns, openModel, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.OpenModel), err
 }
 
-// UpdateStatus was generated because the type contains a Status member.
-// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
-func (c *FakeOpenModels) UpdateStatus(ctx context.Context, openModel *v1alpha1.OpenModel, opts v1.UpdateOptions) (result *v1alpha1.OpenModel, err error) {
-	emptyResult := &v1alpha1.OpenModel{}
-	obj, err := c.Fake.
-		Invokes(testing.NewUpdateSubresourceActionWithOptions(openmodelsResource, "status", c.ns, openModel, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.OpenModel), err
-}
-
-// Delete takes name of the openModel and deletes it. Returns an error if one occurs.
-func (c *FakeOpenModels) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error {
-	_, err := c.Fake.
-		Invokes(testing.NewDeleteActionWithOptions(openmodelsResource, c.ns, name, opts), &v1alpha1.OpenModel{})
-
-	return err
-}
-
-// DeleteCollection deletes a collection of objects.
-func (c *FakeOpenModels) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error {
-	action := testing.NewDeleteCollectionActionWithOptions(openmodelsResource, c.ns, opts, listOpts)
-
-	_, err := c.Fake.Invokes(action, &v1alpha1.OpenModelList{})
-	return err
-}
-
-// Patch applies the patch and returns the patched openModel.
-func (c *FakeOpenModels) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.OpenModel, err error) {
-	emptyResult := &v1alpha1.OpenModel{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(openmodelsResource, c.ns, name, pt, data, opts, subresources...), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.OpenModel), err
-}
-
-// Apply takes the given apply declarative configuration, applies it and returns the applied openModel.
-func (c *FakeOpenModels) Apply(ctx context.Context, openModel *corev1alpha1.OpenModelApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.OpenModel, err error) {
-	if openModel == nil {
-		return nil, fmt.Errorf("openModel provided to Apply must not be nil")
-	}
-	data, err := json.Marshal(openModel)
-	if err != nil {
-		return nil, err
-	}
-	name := openModel.Name
-	if name == nil {
-		return nil, fmt.Errorf("openModel.Name must be provided to Apply")
-	}
-	emptyResult := &v1alpha1.OpenModel{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(openmodelsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.OpenModel), err
-}
-
-// ApplyStatus was generated because the type contains a Status member.
-// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
-func (c *FakeOpenModels) ApplyStatus(ctx context.Context, openModel *corev1alpha1.OpenModelApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.OpenModel, err error) {
-	if openModel == nil {
-		return nil, fmt.Errorf("openModel provided to Apply must not be nil")
-	}
-	data, err := json.Marshal(openModel)
-	if err != nil {
-		return nil, err
-	}
-	name := openModel.Name
-	if name == nil {
-		return nil, fmt.Errorf("openModel.Name must be provided to Apply")
-	}
-	emptyResult := &v1alpha1.OpenModel{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(openmodelsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
+func newFakeOpenModels(fake *FakeLlmazV1alpha1, namespace string) typedcorev1alpha1.OpenModelInterface {
+	return &fakeOpenModels{
+		gentype.NewFakeClientWithListAndApply[*v1alpha1.OpenModel, *v1alpha1.OpenModelList, *corev1alpha1.OpenModelApplyConfiguration](
+			fake.Fake,
+			namespace,
+			v1alpha1.SchemeGroupVersion.WithResource("openmodels"),
+			v1alpha1.SchemeGroupVersion.WithKind("OpenModel"),
+			func() *v1alpha1.OpenModel { return &v1alpha1.OpenModel{} },
+			func() *v1alpha1.OpenModelList { return &v1alpha1.OpenModelList{} },
+			func(dst, src *v1alpha1.OpenModelList) { dst.ListMeta = src.ListMeta },
+			func(list *v1alpha1.OpenModelList) []*v1alpha1.OpenModel { return gentype.ToPointerSlice(list.Items) },
+			func(list *v1alpha1.OpenModelList, items []*v1alpha1.OpenModel) {
+				list.Items = gentype.FromPointerSlice(items)
+			},
+		),
+		fake,
 	}
-	return obj.(*v1alpha1.OpenModel), err
 }
diff --git a/client-go/clientset/versioned/typed/core/v1alpha1/openmodel.go b/client-go/clientset/versioned/typed/core/v1alpha1/openmodel.go
index de45f5ec..6340bb65 100644
--- a/client-go/clientset/versioned/typed/core/v1alpha1/openmodel.go
+++ b/client-go/clientset/versioned/typed/core/v1alpha1/openmodel.go
@@ -18,10 +18,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"context"
+	context "context"
 
-	v1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
-	corev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/core/v1alpha1"
+	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+	applyconfigurationcorev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/core/v1alpha1"
 	scheme "github.com/inftyai/llmaz/client-go/clientset/versioned/scheme"
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	types "k8s.io/apimachinery/pkg/types"
@@ -37,36 +37,37 @@ type OpenModelsGetter interface {
 
 // OpenModelInterface has methods to work with OpenModel resources.
 type OpenModelInterface interface {
-	Create(ctx context.Context, openModel *v1alpha1.OpenModel, opts v1.CreateOptions) (*v1alpha1.OpenModel, error)
-	Update(ctx context.Context, openModel *v1alpha1.OpenModel, opts v1.UpdateOptions) (*v1alpha1.OpenModel, error)
+	Create(ctx context.Context, openModel *corev1alpha1.OpenModel, opts v1.CreateOptions) (*corev1alpha1.OpenModel, error)
+	Update(ctx context.Context, openModel *corev1alpha1.OpenModel, opts v1.UpdateOptions) (*corev1alpha1.OpenModel, error)
 	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
-	UpdateStatus(ctx context.Context, openModel *v1alpha1.OpenModel, opts v1.UpdateOptions) (*v1alpha1.OpenModel, error)
+	UpdateStatus(ctx context.Context, openModel *corev1alpha1.OpenModel, opts v1.UpdateOptions) (*corev1alpha1.OpenModel, error)
 	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
 	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
-	Get(ctx context.Context, name string, opts v1.GetOptions) (*v1alpha1.OpenModel, error)
-	List(ctx context.Context, opts v1.ListOptions) (*v1alpha1.OpenModelList, error)
+	Get(ctx context.Context, name string, opts v1.GetOptions) (*corev1alpha1.OpenModel, error)
+	List(ctx context.Context, opts v1.ListOptions) (*corev1alpha1.OpenModelList, error)
 	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
-	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.OpenModel, err error)
-	Apply(ctx context.Context, openModel *corev1alpha1.OpenModelApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.OpenModel, err error)
+	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *corev1alpha1.OpenModel, err error)
+	Apply(ctx context.Context, openModel *applyconfigurationcorev1alpha1.OpenModelApplyConfiguration, opts v1.ApplyOptions) (result *corev1alpha1.OpenModel, err error)
 	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
-	ApplyStatus(ctx context.Context, openModel *corev1alpha1.OpenModelApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.OpenModel, err error)
+	ApplyStatus(ctx context.Context, openModel *applyconfigurationcorev1alpha1.OpenModelApplyConfiguration, opts v1.ApplyOptions) (result *corev1alpha1.OpenModel, err error)
 	OpenModelExpansion
 }
 
 // openModels implements OpenModelInterface
 type openModels struct {
-	*gentype.ClientWithListAndApply[*v1alpha1.OpenModel, *v1alpha1.OpenModelList, *corev1alpha1.OpenModelApplyConfiguration]
+	*gentype.ClientWithListAndApply[*corev1alpha1.OpenModel, *corev1alpha1.OpenModelList, *applyconfigurationcorev1alpha1.OpenModelApplyConfiguration]
 }
 
 // newOpenModels returns a OpenModels
 func newOpenModels(c *LlmazV1alpha1Client, namespace string) *openModels {
 	return &openModels{
-		gentype.NewClientWithListAndApply[*v1alpha1.OpenModel, *v1alpha1.OpenModelList, *corev1alpha1.OpenModelApplyConfiguration](
+		gentype.NewClientWithListAndApply[*corev1alpha1.OpenModel, *corev1alpha1.OpenModelList, *applyconfigurationcorev1alpha1.OpenModelApplyConfiguration](
 			"openmodels",
 			c.RESTClient(),
 			scheme.ParameterCodec,
 			namespace,
-			func() *v1alpha1.OpenModel { return &v1alpha1.OpenModel{} },
-			func() *v1alpha1.OpenModelList { return &v1alpha1.OpenModelList{} }),
+			func() *corev1alpha1.OpenModel { return &corev1alpha1.OpenModel{} },
+			func() *corev1alpha1.OpenModelList { return &corev1alpha1.OpenModelList{} },
+		),
 	}
 }
diff --git a/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_inference_client.go b/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_inference_client.go
index 4bfbaea1..26fd5b51 100644
--- a/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_inference_client.go
+++ b/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_inference_client.go
@@ -28,11 +28,11 @@ type FakeInferenceV1alpha1 struct {
 }
 
 func (c *FakeInferenceV1alpha1) Playgrounds(namespace string) v1alpha1.PlaygroundInterface {
-	return &FakePlaygrounds{c, namespace}
+	return newFakePlaygrounds(c, namespace)
 }
 
 func (c *FakeInferenceV1alpha1) Services(namespace string) v1alpha1.ServiceInterface {
-	return &FakeServices{c, namespace}
+	return newFakeServices(c, namespace)
 }
 
 // RESTClient returns a RESTClient that is used to communicate
diff --git a/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_playground.go b/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_playground.go
index 3e955af7..e51282d8 100644
--- a/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_playground.go
+++ b/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_playground.go
@@ -18,179 +18,33 @@ limitations under the License.
 package fake
 
 import (
-	"context"
-	json "encoding/json"
-	"fmt"
-
 	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	inferencev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/inference/v1alpha1"
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	labels "k8s.io/apimachinery/pkg/labels"
-	types "k8s.io/apimachinery/pkg/types"
-	watch "k8s.io/apimachinery/pkg/watch"
-	testing "k8s.io/client-go/testing"
+	typedinferencev1alpha1 "github.com/inftyai/llmaz/client-go/clientset/versioned/typed/inference/v1alpha1"
+	gentype "k8s.io/client-go/gentype"
 )
 
-// FakePlaygrounds implements PlaygroundInterface
-type FakePlaygrounds struct {
+// fakePlaygrounds implements PlaygroundInterface
+type fakePlaygrounds struct {
+	*gentype.FakeClientWithListAndApply[*v1alpha1.Playground, *v1alpha1.PlaygroundList, *inferencev1alpha1.PlaygroundApplyConfiguration]
 	Fake *FakeInferenceV1alpha1
-	ns   string
-}
-
-var playgroundsResource = v1alpha1.SchemeGroupVersion.WithResource("playgrounds")
-
-var playgroundsKind = v1alpha1.SchemeGroupVersion.WithKind("Playground")
-
-// Get takes name of the playground, and returns the corresponding playground object, and an error if there is any.
-func (c *FakePlaygrounds) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.Playground, err error) {
-	emptyResult := &v1alpha1.Playground{}
-	obj, err := c.Fake.
-		Invokes(testing.NewGetActionWithOptions(playgroundsResource, c.ns, name, options), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Playground), err
-}
-
-// List takes label and field selectors, and returns the list of Playgrounds that match those selectors.
-func (c *FakePlaygrounds) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.PlaygroundList, err error) {
-	emptyResult := &v1alpha1.PlaygroundList{}
-	obj, err := c.Fake.
-		Invokes(testing.NewListActionWithOptions(playgroundsResource, playgroundsKind, c.ns, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-
-	label, _, _ := testing.ExtractFromListOptions(opts)
-	if label == nil {
-		label = labels.Everything()
-	}
-	list := &v1alpha1.PlaygroundList{ListMeta: obj.(*v1alpha1.PlaygroundList).ListMeta}
-	for _, item := range obj.(*v1alpha1.PlaygroundList).Items {
-		if label.Matches(labels.Set(item.Labels)) {
-			list.Items = append(list.Items, item)
-		}
-	}
-	return list, err
-}
-
-// Watch returns a watch.Interface that watches the requested playgrounds.
-func (c *FakePlaygrounds) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) {
-	return c.Fake.
-		InvokesWatch(testing.NewWatchActionWithOptions(playgroundsResource, c.ns, opts))
-
-}
-
-// Create takes the representation of a playground and creates it.  Returns the server's representation of the playground, and an error, if there is any.
-func (c *FakePlaygrounds) Create(ctx context.Context, playground *v1alpha1.Playground, opts v1.CreateOptions) (result *v1alpha1.Playground, err error) {
-	emptyResult := &v1alpha1.Playground{}
-	obj, err := c.Fake.
-		Invokes(testing.NewCreateActionWithOptions(playgroundsResource, c.ns, playground, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Playground), err
-}
-
-// Update takes the representation of a playground and updates it. Returns the server's representation of the playground, and an error, if there is any.
-func (c *FakePlaygrounds) Update(ctx context.Context, playground *v1alpha1.Playground, opts v1.UpdateOptions) (result *v1alpha1.Playground, err error) {
-	emptyResult := &v1alpha1.Playground{}
-	obj, err := c.Fake.
-		Invokes(testing.NewUpdateActionWithOptions(playgroundsResource, c.ns, playground, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Playground), err
 }
 
-// UpdateStatus was generated because the type contains a Status member.
-// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
-func (c *FakePlaygrounds) UpdateStatus(ctx context.Context, playground *v1alpha1.Playground, opts v1.UpdateOptions) (result *v1alpha1.Playground, err error) {
-	emptyResult := &v1alpha1.Playground{}
-	obj, err := c.Fake.
-		Invokes(testing.NewUpdateSubresourceActionWithOptions(playgroundsResource, "status", c.ns, playground, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Playground), err
-}
-
-// Delete takes name of the playground and deletes it. Returns an error if one occurs.
-func (c *FakePlaygrounds) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error {
-	_, err := c.Fake.
-		Invokes(testing.NewDeleteActionWithOptions(playgroundsResource, c.ns, name, opts), &v1alpha1.Playground{})
-
-	return err
-}
-
-// DeleteCollection deletes a collection of objects.
-func (c *FakePlaygrounds) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error {
-	action := testing.NewDeleteCollectionActionWithOptions(playgroundsResource, c.ns, opts, listOpts)
-
-	_, err := c.Fake.Invokes(action, &v1alpha1.PlaygroundList{})
-	return err
-}
-
-// Patch applies the patch and returns the patched playground.
-func (c *FakePlaygrounds) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Playground, err error) {
-	emptyResult := &v1alpha1.Playground{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(playgroundsResource, c.ns, name, pt, data, opts, subresources...), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Playground), err
-}
-
-// Apply takes the given apply declarative configuration, applies it and returns the applied playground.
-func (c *FakePlaygrounds) Apply(ctx context.Context, playground *inferencev1alpha1.PlaygroundApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.Playground, err error) {
-	if playground == nil {
-		return nil, fmt.Errorf("playground provided to Apply must not be nil")
-	}
-	data, err := json.Marshal(playground)
-	if err != nil {
-		return nil, err
-	}
-	name := playground.Name
-	if name == nil {
-		return nil, fmt.Errorf("playground.Name must be provided to Apply")
-	}
-	emptyResult := &v1alpha1.Playground{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(playgroundsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Playground), err
-}
-
-// ApplyStatus was generated because the type contains a Status member.
-// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
-func (c *FakePlaygrounds) ApplyStatus(ctx context.Context, playground *inferencev1alpha1.PlaygroundApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.Playground, err error) {
-	if playground == nil {
-		return nil, fmt.Errorf("playground provided to Apply must not be nil")
-	}
-	data, err := json.Marshal(playground)
-	if err != nil {
-		return nil, err
-	}
-	name := playground.Name
-	if name == nil {
-		return nil, fmt.Errorf("playground.Name must be provided to Apply")
-	}
-	emptyResult := &v1alpha1.Playground{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(playgroundsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
+func newFakePlaygrounds(fake *FakeInferenceV1alpha1, namespace string) typedinferencev1alpha1.PlaygroundInterface {
+	return &fakePlaygrounds{
+		gentype.NewFakeClientWithListAndApply[*v1alpha1.Playground, *v1alpha1.PlaygroundList, *inferencev1alpha1.PlaygroundApplyConfiguration](
+			fake.Fake,
+			namespace,
+			v1alpha1.SchemeGroupVersion.WithResource("playgrounds"),
+			v1alpha1.SchemeGroupVersion.WithKind("Playground"),
+			func() *v1alpha1.Playground { return &v1alpha1.Playground{} },
+			func() *v1alpha1.PlaygroundList { return &v1alpha1.PlaygroundList{} },
+			func(dst, src *v1alpha1.PlaygroundList) { dst.ListMeta = src.ListMeta },
+			func(list *v1alpha1.PlaygroundList) []*v1alpha1.Playground { return gentype.ToPointerSlice(list.Items) },
+			func(list *v1alpha1.PlaygroundList, items []*v1alpha1.Playground) {
+				list.Items = gentype.FromPointerSlice(items)
+			},
+		),
+		fake,
 	}
-	return obj.(*v1alpha1.Playground), err
 }
diff --git a/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_service.go b/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_service.go
index 89f55d42..5d2697e5 100644
--- a/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_service.go
+++ b/client-go/clientset/versioned/typed/inference/v1alpha1/fake/fake_service.go
@@ -18,179 +18,33 @@ limitations under the License.
 package fake
 
 import (
-	"context"
-	json "encoding/json"
-	"fmt"
-
 	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	inferencev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/inference/v1alpha1"
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	labels "k8s.io/apimachinery/pkg/labels"
-	types "k8s.io/apimachinery/pkg/types"
-	watch "k8s.io/apimachinery/pkg/watch"
-	testing "k8s.io/client-go/testing"
+	typedinferencev1alpha1 "github.com/inftyai/llmaz/client-go/clientset/versioned/typed/inference/v1alpha1"
+	gentype "k8s.io/client-go/gentype"
 )
 
-// FakeServices implements ServiceInterface
-type FakeServices struct {
+// fakeServices implements ServiceInterface
+type fakeServices struct {
+	*gentype.FakeClientWithListAndApply[*v1alpha1.Service, *v1alpha1.ServiceList, *inferencev1alpha1.ServiceApplyConfiguration]
 	Fake *FakeInferenceV1alpha1
-	ns   string
-}
-
-var servicesResource = v1alpha1.SchemeGroupVersion.WithResource("services")
-
-var servicesKind = v1alpha1.SchemeGroupVersion.WithKind("Service")
-
-// Get takes name of the service, and returns the corresponding service object, and an error if there is any.
-func (c *FakeServices) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.Service, err error) {
-	emptyResult := &v1alpha1.Service{}
-	obj, err := c.Fake.
-		Invokes(testing.NewGetActionWithOptions(servicesResource, c.ns, name, options), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Service), err
-}
-
-// List takes label and field selectors, and returns the list of Services that match those selectors.
-func (c *FakeServices) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.ServiceList, err error) {
-	emptyResult := &v1alpha1.ServiceList{}
-	obj, err := c.Fake.
-		Invokes(testing.NewListActionWithOptions(servicesResource, servicesKind, c.ns, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-
-	label, _, _ := testing.ExtractFromListOptions(opts)
-	if label == nil {
-		label = labels.Everything()
-	}
-	list := &v1alpha1.ServiceList{ListMeta: obj.(*v1alpha1.ServiceList).ListMeta}
-	for _, item := range obj.(*v1alpha1.ServiceList).Items {
-		if label.Matches(labels.Set(item.Labels)) {
-			list.Items = append(list.Items, item)
-		}
-	}
-	return list, err
-}
-
-// Watch returns a watch.Interface that watches the requested services.
-func (c *FakeServices) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) {
-	return c.Fake.
-		InvokesWatch(testing.NewWatchActionWithOptions(servicesResource, c.ns, opts))
-
-}
-
-// Create takes the representation of a service and creates it.  Returns the server's representation of the service, and an error, if there is any.
-func (c *FakeServices) Create(ctx context.Context, service *v1alpha1.Service, opts v1.CreateOptions) (result *v1alpha1.Service, err error) {
-	emptyResult := &v1alpha1.Service{}
-	obj, err := c.Fake.
-		Invokes(testing.NewCreateActionWithOptions(servicesResource, c.ns, service, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Service), err
-}
-
-// Update takes the representation of a service and updates it. Returns the server's representation of the service, and an error, if there is any.
-func (c *FakeServices) Update(ctx context.Context, service *v1alpha1.Service, opts v1.UpdateOptions) (result *v1alpha1.Service, err error) {
-	emptyResult := &v1alpha1.Service{}
-	obj, err := c.Fake.
-		Invokes(testing.NewUpdateActionWithOptions(servicesResource, c.ns, service, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Service), err
 }
 
-// UpdateStatus was generated because the type contains a Status member.
-// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
-func (c *FakeServices) UpdateStatus(ctx context.Context, service *v1alpha1.Service, opts v1.UpdateOptions) (result *v1alpha1.Service, err error) {
-	emptyResult := &v1alpha1.Service{}
-	obj, err := c.Fake.
-		Invokes(testing.NewUpdateSubresourceActionWithOptions(servicesResource, "status", c.ns, service, opts), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Service), err
-}
-
-// Delete takes name of the service and deletes it. Returns an error if one occurs.
-func (c *FakeServices) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error {
-	_, err := c.Fake.
-		Invokes(testing.NewDeleteActionWithOptions(servicesResource, c.ns, name, opts), &v1alpha1.Service{})
-
-	return err
-}
-
-// DeleteCollection deletes a collection of objects.
-func (c *FakeServices) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error {
-	action := testing.NewDeleteCollectionActionWithOptions(servicesResource, c.ns, opts, listOpts)
-
-	_, err := c.Fake.Invokes(action, &v1alpha1.ServiceList{})
-	return err
-}
-
-// Patch applies the patch and returns the patched service.
-func (c *FakeServices) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Service, err error) {
-	emptyResult := &v1alpha1.Service{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(servicesResource, c.ns, name, pt, data, opts, subresources...), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Service), err
-}
-
-// Apply takes the given apply declarative configuration, applies it and returns the applied service.
-func (c *FakeServices) Apply(ctx context.Context, service *inferencev1alpha1.ServiceApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.Service, err error) {
-	if service == nil {
-		return nil, fmt.Errorf("service provided to Apply must not be nil")
-	}
-	data, err := json.Marshal(service)
-	if err != nil {
-		return nil, err
-	}
-	name := service.Name
-	if name == nil {
-		return nil, fmt.Errorf("service.Name must be provided to Apply")
-	}
-	emptyResult := &v1alpha1.Service{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(servicesResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
-	}
-	return obj.(*v1alpha1.Service), err
-}
-
-// ApplyStatus was generated because the type contains a Status member.
-// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
-func (c *FakeServices) ApplyStatus(ctx context.Context, service *inferencev1alpha1.ServiceApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.Service, err error) {
-	if service == nil {
-		return nil, fmt.Errorf("service provided to Apply must not be nil")
-	}
-	data, err := json.Marshal(service)
-	if err != nil {
-		return nil, err
-	}
-	name := service.Name
-	if name == nil {
-		return nil, fmt.Errorf("service.Name must be provided to Apply")
-	}
-	emptyResult := &v1alpha1.Service{}
-	obj, err := c.Fake.
-		Invokes(testing.NewPatchSubresourceActionWithOptions(servicesResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult)
-
-	if obj == nil {
-		return emptyResult, err
+func newFakeServices(fake *FakeInferenceV1alpha1, namespace string) typedinferencev1alpha1.ServiceInterface {
+	return &fakeServices{
+		gentype.NewFakeClientWithListAndApply[*v1alpha1.Service, *v1alpha1.ServiceList, *inferencev1alpha1.ServiceApplyConfiguration](
+			fake.Fake,
+			namespace,
+			v1alpha1.SchemeGroupVersion.WithResource("services"),
+			v1alpha1.SchemeGroupVersion.WithKind("Service"),
+			func() *v1alpha1.Service { return &v1alpha1.Service{} },
+			func() *v1alpha1.ServiceList { return &v1alpha1.ServiceList{} },
+			func(dst, src *v1alpha1.ServiceList) { dst.ListMeta = src.ListMeta },
+			func(list *v1alpha1.ServiceList) []*v1alpha1.Service { return gentype.ToPointerSlice(list.Items) },
+			func(list *v1alpha1.ServiceList, items []*v1alpha1.Service) {
+				list.Items = gentype.FromPointerSlice(items)
+			},
+		),
+		fake,
 	}
-	return obj.(*v1alpha1.Service), err
 }
diff --git a/client-go/clientset/versioned/typed/inference/v1alpha1/inference_client.go b/client-go/clientset/versioned/typed/inference/v1alpha1/inference_client.go
index a73a6333..1396071f 100644
--- a/client-go/clientset/versioned/typed/inference/v1alpha1/inference_client.go
+++ b/client-go/clientset/versioned/typed/inference/v1alpha1/inference_client.go
@@ -18,10 +18,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"net/http"
+	http "net/http"
 
-	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
-	"github.com/inftyai/llmaz/client-go/clientset/versioned/scheme"
+	inferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	scheme "github.com/inftyai/llmaz/client-go/clientset/versioned/scheme"
 	rest "k8s.io/client-go/rest"
 )
 
@@ -89,10 +89,10 @@ func New(c rest.Interface) *InferenceV1alpha1Client {
 }
 
 func setConfigDefaults(config *rest.Config) error {
-	gv := v1alpha1.SchemeGroupVersion
+	gv := inferencev1alpha1.SchemeGroupVersion
 	config.GroupVersion = &gv
 	config.APIPath = "/apis"
-	config.NegotiatedSerializer = scheme.Codecs.WithoutConversion()
+	config.NegotiatedSerializer = rest.CodecFactoryForGeneratedClient(scheme.Scheme, scheme.Codecs).WithoutConversion()
 
 	if config.UserAgent == "" {
 		config.UserAgent = rest.DefaultKubernetesUserAgent()
diff --git a/client-go/clientset/versioned/typed/inference/v1alpha1/playground.go b/client-go/clientset/versioned/typed/inference/v1alpha1/playground.go
index 33af2213..d67423d0 100644
--- a/client-go/clientset/versioned/typed/inference/v1alpha1/playground.go
+++ b/client-go/clientset/versioned/typed/inference/v1alpha1/playground.go
@@ -18,10 +18,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"context"
+	context "context"
 
-	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
-	inferencev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/inference/v1alpha1"
+	inferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	applyconfigurationinferencev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/inference/v1alpha1"
 	scheme "github.com/inftyai/llmaz/client-go/clientset/versioned/scheme"
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	types "k8s.io/apimachinery/pkg/types"
@@ -37,36 +37,37 @@ type PlaygroundsGetter interface {
 
 // PlaygroundInterface has methods to work with Playground resources.
 type PlaygroundInterface interface {
-	Create(ctx context.Context, playground *v1alpha1.Playground, opts v1.CreateOptions) (*v1alpha1.Playground, error)
-	Update(ctx context.Context, playground *v1alpha1.Playground, opts v1.UpdateOptions) (*v1alpha1.Playground, error)
+	Create(ctx context.Context, playground *inferencev1alpha1.Playground, opts v1.CreateOptions) (*inferencev1alpha1.Playground, error)
+	Update(ctx context.Context, playground *inferencev1alpha1.Playground, opts v1.UpdateOptions) (*inferencev1alpha1.Playground, error)
 	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
-	UpdateStatus(ctx context.Context, playground *v1alpha1.Playground, opts v1.UpdateOptions) (*v1alpha1.Playground, error)
+	UpdateStatus(ctx context.Context, playground *inferencev1alpha1.Playground, opts v1.UpdateOptions) (*inferencev1alpha1.Playground, error)
 	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
 	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
-	Get(ctx context.Context, name string, opts v1.GetOptions) (*v1alpha1.Playground, error)
-	List(ctx context.Context, opts v1.ListOptions) (*v1alpha1.PlaygroundList, error)
+	Get(ctx context.Context, name string, opts v1.GetOptions) (*inferencev1alpha1.Playground, error)
+	List(ctx context.Context, opts v1.ListOptions) (*inferencev1alpha1.PlaygroundList, error)
 	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
-	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Playground, err error)
-	Apply(ctx context.Context, playground *inferencev1alpha1.PlaygroundApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.Playground, err error)
+	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *inferencev1alpha1.Playground, err error)
+	Apply(ctx context.Context, playground *applyconfigurationinferencev1alpha1.PlaygroundApplyConfiguration, opts v1.ApplyOptions) (result *inferencev1alpha1.Playground, err error)
 	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
-	ApplyStatus(ctx context.Context, playground *inferencev1alpha1.PlaygroundApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.Playground, err error)
+	ApplyStatus(ctx context.Context, playground *applyconfigurationinferencev1alpha1.PlaygroundApplyConfiguration, opts v1.ApplyOptions) (result *inferencev1alpha1.Playground, err error)
 	PlaygroundExpansion
 }
 
 // playgrounds implements PlaygroundInterface
 type playgrounds struct {
-	*gentype.ClientWithListAndApply[*v1alpha1.Playground, *v1alpha1.PlaygroundList, *inferencev1alpha1.PlaygroundApplyConfiguration]
+	*gentype.ClientWithListAndApply[*inferencev1alpha1.Playground, *inferencev1alpha1.PlaygroundList, *applyconfigurationinferencev1alpha1.PlaygroundApplyConfiguration]
 }
 
 // newPlaygrounds returns a Playgrounds
 func newPlaygrounds(c *InferenceV1alpha1Client, namespace string) *playgrounds {
 	return &playgrounds{
-		gentype.NewClientWithListAndApply[*v1alpha1.Playground, *v1alpha1.PlaygroundList, *inferencev1alpha1.PlaygroundApplyConfiguration](
+		gentype.NewClientWithListAndApply[*inferencev1alpha1.Playground, *inferencev1alpha1.PlaygroundList, *applyconfigurationinferencev1alpha1.PlaygroundApplyConfiguration](
 			"playgrounds",
 			c.RESTClient(),
 			scheme.ParameterCodec,
 			namespace,
-			func() *v1alpha1.Playground { return &v1alpha1.Playground{} },
-			func() *v1alpha1.PlaygroundList { return &v1alpha1.PlaygroundList{} }),
+			func() *inferencev1alpha1.Playground { return &inferencev1alpha1.Playground{} },
+			func() *inferencev1alpha1.PlaygroundList { return &inferencev1alpha1.PlaygroundList{} },
+		),
 	}
 }
diff --git a/client-go/clientset/versioned/typed/inference/v1alpha1/service.go b/client-go/clientset/versioned/typed/inference/v1alpha1/service.go
index b8c7ded8..ff80bc12 100644
--- a/client-go/clientset/versioned/typed/inference/v1alpha1/service.go
+++ b/client-go/clientset/versioned/typed/inference/v1alpha1/service.go
@@ -18,10 +18,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"context"
+	context "context"
 
-	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
-	inferencev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/inference/v1alpha1"
+	inferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	applyconfigurationinferencev1alpha1 "github.com/inftyai/llmaz/client-go/applyconfiguration/inference/v1alpha1"
 	scheme "github.com/inftyai/llmaz/client-go/clientset/versioned/scheme"
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	types "k8s.io/apimachinery/pkg/types"
@@ -37,36 +37,37 @@ type ServicesGetter interface {
 
 // ServiceInterface has methods to work with Service resources.
 type ServiceInterface interface {
-	Create(ctx context.Context, service *v1alpha1.Service, opts v1.CreateOptions) (*v1alpha1.Service, error)
-	Update(ctx context.Context, service *v1alpha1.Service, opts v1.UpdateOptions) (*v1alpha1.Service, error)
+	Create(ctx context.Context, service *inferencev1alpha1.Service, opts v1.CreateOptions) (*inferencev1alpha1.Service, error)
+	Update(ctx context.Context, service *inferencev1alpha1.Service, opts v1.UpdateOptions) (*inferencev1alpha1.Service, error)
 	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
-	UpdateStatus(ctx context.Context, service *v1alpha1.Service, opts v1.UpdateOptions) (*v1alpha1.Service, error)
+	UpdateStatus(ctx context.Context, service *inferencev1alpha1.Service, opts v1.UpdateOptions) (*inferencev1alpha1.Service, error)
 	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
 	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
-	Get(ctx context.Context, name string, opts v1.GetOptions) (*v1alpha1.Service, error)
-	List(ctx context.Context, opts v1.ListOptions) (*v1alpha1.ServiceList, error)
+	Get(ctx context.Context, name string, opts v1.GetOptions) (*inferencev1alpha1.Service, error)
+	List(ctx context.Context, opts v1.ListOptions) (*inferencev1alpha1.ServiceList, error)
 	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
-	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Service, err error)
-	Apply(ctx context.Context, service *inferencev1alpha1.ServiceApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.Service, err error)
+	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *inferencev1alpha1.Service, err error)
+	Apply(ctx context.Context, service *applyconfigurationinferencev1alpha1.ServiceApplyConfiguration, opts v1.ApplyOptions) (result *inferencev1alpha1.Service, err error)
 	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
-	ApplyStatus(ctx context.Context, service *inferencev1alpha1.ServiceApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.Service, err error)
+	ApplyStatus(ctx context.Context, service *applyconfigurationinferencev1alpha1.ServiceApplyConfiguration, opts v1.ApplyOptions) (result *inferencev1alpha1.Service, err error)
 	ServiceExpansion
 }
 
 // services implements ServiceInterface
 type services struct {
-	*gentype.ClientWithListAndApply[*v1alpha1.Service, *v1alpha1.ServiceList, *inferencev1alpha1.ServiceApplyConfiguration]
+	*gentype.ClientWithListAndApply[*inferencev1alpha1.Service, *inferencev1alpha1.ServiceList, *applyconfigurationinferencev1alpha1.ServiceApplyConfiguration]
 }
 
 // newServices returns a Services
 func newServices(c *InferenceV1alpha1Client, namespace string) *services {
 	return &services{
-		gentype.NewClientWithListAndApply[*v1alpha1.Service, *v1alpha1.ServiceList, *inferencev1alpha1.ServiceApplyConfiguration](
+		gentype.NewClientWithListAndApply[*inferencev1alpha1.Service, *inferencev1alpha1.ServiceList, *applyconfigurationinferencev1alpha1.ServiceApplyConfiguration](
 			"services",
 			c.RESTClient(),
 			scheme.ParameterCodec,
 			namespace,
-			func() *v1alpha1.Service { return &v1alpha1.Service{} },
-			func() *v1alpha1.ServiceList { return &v1alpha1.ServiceList{} }),
+			func() *inferencev1alpha1.Service { return &inferencev1alpha1.Service{} },
+			func() *inferencev1alpha1.ServiceList { return &inferencev1alpha1.ServiceList{} },
+		),
 	}
 }
diff --git a/client-go/informers/externalversions/core/v1alpha1/openmodel.go b/client-go/informers/externalversions/core/v1alpha1/openmodel.go
index 72a49c82..371e8b77 100644
--- a/client-go/informers/externalversions/core/v1alpha1/openmodel.go
+++ b/client-go/informers/externalversions/core/v1alpha1/openmodel.go
@@ -18,13 +18,13 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"context"
+	context "context"
 	time "time"
 
-	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+	apicorev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
 	versioned "github.com/inftyai/llmaz/client-go/clientset/versioned"
 	internalinterfaces "github.com/inftyai/llmaz/client-go/informers/externalversions/internalinterfaces"
-	v1alpha1 "github.com/inftyai/llmaz/client-go/listers/core/v1alpha1"
+	corev1alpha1 "github.com/inftyai/llmaz/client-go/listers/core/v1alpha1"
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	runtime "k8s.io/apimachinery/pkg/runtime"
 	watch "k8s.io/apimachinery/pkg/watch"
@@ -35,7 +35,7 @@ import (
 // OpenModels.
 type OpenModelInformer interface {
 	Informer() cache.SharedIndexInformer
-	Lister() v1alpha1.OpenModelLister
+	Lister() corev1alpha1.OpenModelLister
 }
 
 type openModelInformer struct {
@@ -70,7 +70,7 @@ func NewFilteredOpenModelInformer(client versioned.Interface, namespace string,
 				return client.LlmazV1alpha1().OpenModels(namespace).Watch(context.TODO(), options)
 			},
 		},
-		&corev1alpha1.OpenModel{},
+		&apicorev1alpha1.OpenModel{},
 		resyncPeriod,
 		indexers,
 	)
@@ -81,9 +81,9 @@ func (f *openModelInformer) defaultInformer(client versioned.Interface, resyncPe
 }
 
 func (f *openModelInformer) Informer() cache.SharedIndexInformer {
-	return f.factory.InformerFor(&corev1alpha1.OpenModel{}, f.defaultInformer)
+	return f.factory.InformerFor(&apicorev1alpha1.OpenModel{}, f.defaultInformer)
 }
 
-func (f *openModelInformer) Lister() v1alpha1.OpenModelLister {
-	return v1alpha1.NewOpenModelLister(f.Informer().GetIndexer())
+func (f *openModelInformer) Lister() corev1alpha1.OpenModelLister {
+	return corev1alpha1.NewOpenModelLister(f.Informer().GetIndexer())
 }
diff --git a/client-go/informers/externalversions/generic.go b/client-go/informers/externalversions/generic.go
index 0d8240c0..70f9161b 100644
--- a/client-go/informers/externalversions/generic.go
+++ b/client-go/informers/externalversions/generic.go
@@ -18,7 +18,7 @@ limitations under the License.
 package externalversions
 
 import (
-	"fmt"
+	fmt "fmt"
 
 	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
 	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
diff --git a/client-go/informers/externalversions/inference/v1alpha1/playground.go b/client-go/informers/externalversions/inference/v1alpha1/playground.go
index a67cc57b..43aaf588 100644
--- a/client-go/informers/externalversions/inference/v1alpha1/playground.go
+++ b/client-go/informers/externalversions/inference/v1alpha1/playground.go
@@ -18,13 +18,13 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"context"
+	context "context"
 	time "time"
 
-	inferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	apiinferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	versioned "github.com/inftyai/llmaz/client-go/clientset/versioned"
 	internalinterfaces "github.com/inftyai/llmaz/client-go/informers/externalversions/internalinterfaces"
-	v1alpha1 "github.com/inftyai/llmaz/client-go/listers/inference/v1alpha1"
+	inferencev1alpha1 "github.com/inftyai/llmaz/client-go/listers/inference/v1alpha1"
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	runtime "k8s.io/apimachinery/pkg/runtime"
 	watch "k8s.io/apimachinery/pkg/watch"
@@ -35,7 +35,7 @@ import (
 // Playgrounds.
 type PlaygroundInformer interface {
 	Informer() cache.SharedIndexInformer
-	Lister() v1alpha1.PlaygroundLister
+	Lister() inferencev1alpha1.PlaygroundLister
 }
 
 type playgroundInformer struct {
@@ -70,7 +70,7 @@ func NewFilteredPlaygroundInformer(client versioned.Interface, namespace string,
 				return client.InferenceV1alpha1().Playgrounds(namespace).Watch(context.TODO(), options)
 			},
 		},
-		&inferencev1alpha1.Playground{},
+		&apiinferencev1alpha1.Playground{},
 		resyncPeriod,
 		indexers,
 	)
@@ -81,9 +81,9 @@ func (f *playgroundInformer) defaultInformer(client versioned.Interface, resyncP
 }
 
 func (f *playgroundInformer) Informer() cache.SharedIndexInformer {
-	return f.factory.InformerFor(&inferencev1alpha1.Playground{}, f.defaultInformer)
+	return f.factory.InformerFor(&apiinferencev1alpha1.Playground{}, f.defaultInformer)
 }
 
-func (f *playgroundInformer) Lister() v1alpha1.PlaygroundLister {
-	return v1alpha1.NewPlaygroundLister(f.Informer().GetIndexer())
+func (f *playgroundInformer) Lister() inferencev1alpha1.PlaygroundLister {
+	return inferencev1alpha1.NewPlaygroundLister(f.Informer().GetIndexer())
 }
diff --git a/client-go/informers/externalversions/inference/v1alpha1/service.go b/client-go/informers/externalversions/inference/v1alpha1/service.go
index ce514cef..8665bca7 100644
--- a/client-go/informers/externalversions/inference/v1alpha1/service.go
+++ b/client-go/informers/externalversions/inference/v1alpha1/service.go
@@ -18,13 +18,13 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"context"
+	context "context"
 	time "time"
 
-	inferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	apiinferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	versioned "github.com/inftyai/llmaz/client-go/clientset/versioned"
 	internalinterfaces "github.com/inftyai/llmaz/client-go/informers/externalversions/internalinterfaces"
-	v1alpha1 "github.com/inftyai/llmaz/client-go/listers/inference/v1alpha1"
+	inferencev1alpha1 "github.com/inftyai/llmaz/client-go/listers/inference/v1alpha1"
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	runtime "k8s.io/apimachinery/pkg/runtime"
 	watch "k8s.io/apimachinery/pkg/watch"
@@ -35,7 +35,7 @@ import (
 // Services.
 type ServiceInformer interface {
 	Informer() cache.SharedIndexInformer
-	Lister() v1alpha1.ServiceLister
+	Lister() inferencev1alpha1.ServiceLister
 }
 
 type serviceInformer struct {
@@ -70,7 +70,7 @@ func NewFilteredServiceInformer(client versioned.Interface, namespace string, re
 				return client.InferenceV1alpha1().Services(namespace).Watch(context.TODO(), options)
 			},
 		},
-		&inferencev1alpha1.Service{},
+		&apiinferencev1alpha1.Service{},
 		resyncPeriod,
 		indexers,
 	)
@@ -81,9 +81,9 @@ func (f *serviceInformer) defaultInformer(client versioned.Interface, resyncPeri
 }
 
 func (f *serviceInformer) Informer() cache.SharedIndexInformer {
-	return f.factory.InformerFor(&inferencev1alpha1.Service{}, f.defaultInformer)
+	return f.factory.InformerFor(&apiinferencev1alpha1.Service{}, f.defaultInformer)
 }
 
-func (f *serviceInformer) Lister() v1alpha1.ServiceLister {
-	return v1alpha1.NewServiceLister(f.Informer().GetIndexer())
+func (f *serviceInformer) Lister() inferencev1alpha1.ServiceLister {
+	return inferencev1alpha1.NewServiceLister(f.Informer().GetIndexer())
 }
diff --git a/client-go/listers/core/v1alpha1/openmodel.go b/client-go/listers/core/v1alpha1/openmodel.go
index b1521a14..fb746dbf 100644
--- a/client-go/listers/core/v1alpha1/openmodel.go
+++ b/client-go/listers/core/v1alpha1/openmodel.go
@@ -18,10 +18,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
-	"k8s.io/apimachinery/pkg/labels"
-	"k8s.io/client-go/listers"
-	"k8s.io/client-go/tools/cache"
+	corev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+	labels "k8s.io/apimachinery/pkg/labels"
+	listers "k8s.io/client-go/listers"
+	cache "k8s.io/client-go/tools/cache"
 )
 
 // OpenModelLister helps list OpenModels.
@@ -29,7 +29,7 @@ import (
 type OpenModelLister interface {
 	// List lists all OpenModels in the indexer.
 	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*v1alpha1.OpenModel, err error)
+	List(selector labels.Selector) (ret []*corev1alpha1.OpenModel, err error)
 	// OpenModels returns an object that can list and get OpenModels.
 	OpenModels(namespace string) OpenModelNamespaceLister
 	OpenModelListerExpansion
@@ -37,17 +37,17 @@ type OpenModelLister interface {
 
 // openModelLister implements the OpenModelLister interface.
 type openModelLister struct {
-	listers.ResourceIndexer[*v1alpha1.OpenModel]
+	listers.ResourceIndexer[*corev1alpha1.OpenModel]
 }
 
 // NewOpenModelLister returns a new OpenModelLister.
 func NewOpenModelLister(indexer cache.Indexer) OpenModelLister {
-	return &openModelLister{listers.New[*v1alpha1.OpenModel](indexer, v1alpha1.Resource("openmodel"))}
+	return &openModelLister{listers.New[*corev1alpha1.OpenModel](indexer, corev1alpha1.Resource("openmodel"))}
 }
 
 // OpenModels returns an object that can list and get OpenModels.
 func (s *openModelLister) OpenModels(namespace string) OpenModelNamespaceLister {
-	return openModelNamespaceLister{listers.NewNamespaced[*v1alpha1.OpenModel](s.ResourceIndexer, namespace)}
+	return openModelNamespaceLister{listers.NewNamespaced[*corev1alpha1.OpenModel](s.ResourceIndexer, namespace)}
 }
 
 // OpenModelNamespaceLister helps list and get OpenModels.
@@ -55,15 +55,15 @@ func (s *openModelLister) OpenModels(namespace string) OpenModelNamespaceLister
 type OpenModelNamespaceLister interface {
 	// List lists all OpenModels in the indexer for a given namespace.
 	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*v1alpha1.OpenModel, err error)
+	List(selector labels.Selector) (ret []*corev1alpha1.OpenModel, err error)
 	// Get retrieves the OpenModel from the indexer for a given namespace and name.
 	// Objects returned here must be treated as read-only.
-	Get(name string) (*v1alpha1.OpenModel, error)
+	Get(name string) (*corev1alpha1.OpenModel, error)
 	OpenModelNamespaceListerExpansion
 }
 
 // openModelNamespaceLister implements the OpenModelNamespaceLister
 // interface.
 type openModelNamespaceLister struct {
-	listers.ResourceIndexer[*v1alpha1.OpenModel]
+	listers.ResourceIndexer[*corev1alpha1.OpenModel]
 }
diff --git a/client-go/listers/inference/v1alpha1/playground.go b/client-go/listers/inference/v1alpha1/playground.go
index 6dd5d139..94ad068c 100644
--- a/client-go/listers/inference/v1alpha1/playground.go
+++ b/client-go/listers/inference/v1alpha1/playground.go
@@ -18,10 +18,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
-	"k8s.io/apimachinery/pkg/labels"
-	"k8s.io/client-go/listers"
-	"k8s.io/client-go/tools/cache"
+	inferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	labels "k8s.io/apimachinery/pkg/labels"
+	listers "k8s.io/client-go/listers"
+	cache "k8s.io/client-go/tools/cache"
 )
 
 // PlaygroundLister helps list Playgrounds.
@@ -29,7 +29,7 @@ import (
 type PlaygroundLister interface {
 	// List lists all Playgrounds in the indexer.
 	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*v1alpha1.Playground, err error)
+	List(selector labels.Selector) (ret []*inferencev1alpha1.Playground, err error)
 	// Playgrounds returns an object that can list and get Playgrounds.
 	Playgrounds(namespace string) PlaygroundNamespaceLister
 	PlaygroundListerExpansion
@@ -37,17 +37,17 @@ type PlaygroundLister interface {
 
 // playgroundLister implements the PlaygroundLister interface.
 type playgroundLister struct {
-	listers.ResourceIndexer[*v1alpha1.Playground]
+	listers.ResourceIndexer[*inferencev1alpha1.Playground]
 }
 
 // NewPlaygroundLister returns a new PlaygroundLister.
 func NewPlaygroundLister(indexer cache.Indexer) PlaygroundLister {
-	return &playgroundLister{listers.New[*v1alpha1.Playground](indexer, v1alpha1.Resource("playground"))}
+	return &playgroundLister{listers.New[*inferencev1alpha1.Playground](indexer, inferencev1alpha1.Resource("playground"))}
 }
 
 // Playgrounds returns an object that can list and get Playgrounds.
 func (s *playgroundLister) Playgrounds(namespace string) PlaygroundNamespaceLister {
-	return playgroundNamespaceLister{listers.NewNamespaced[*v1alpha1.Playground](s.ResourceIndexer, namespace)}
+	return playgroundNamespaceLister{listers.NewNamespaced[*inferencev1alpha1.Playground](s.ResourceIndexer, namespace)}
 }
 
 // PlaygroundNamespaceLister helps list and get Playgrounds.
@@ -55,15 +55,15 @@ func (s *playgroundLister) Playgrounds(namespace string) PlaygroundNamespaceList
 type PlaygroundNamespaceLister interface {
 	// List lists all Playgrounds in the indexer for a given namespace.
 	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*v1alpha1.Playground, err error)
+	List(selector labels.Selector) (ret []*inferencev1alpha1.Playground, err error)
 	// Get retrieves the Playground from the indexer for a given namespace and name.
 	// Objects returned here must be treated as read-only.
-	Get(name string) (*v1alpha1.Playground, error)
+	Get(name string) (*inferencev1alpha1.Playground, error)
 	PlaygroundNamespaceListerExpansion
 }
 
 // playgroundNamespaceLister implements the PlaygroundNamespaceLister
 // interface.
 type playgroundNamespaceLister struct {
-	listers.ResourceIndexer[*v1alpha1.Playground]
+	listers.ResourceIndexer[*inferencev1alpha1.Playground]
 }
diff --git a/client-go/listers/inference/v1alpha1/service.go b/client-go/listers/inference/v1alpha1/service.go
index fa3f09ae..cb72cc56 100644
--- a/client-go/listers/inference/v1alpha1/service.go
+++ b/client-go/listers/inference/v1alpha1/service.go
@@ -18,10 +18,10 @@ limitations under the License.
 package v1alpha1
 
 import (
-	v1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
-	"k8s.io/apimachinery/pkg/labels"
-	"k8s.io/client-go/listers"
-	"k8s.io/client-go/tools/cache"
+	inferencev1alpha1 "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	labels "k8s.io/apimachinery/pkg/labels"
+	listers "k8s.io/client-go/listers"
+	cache "k8s.io/client-go/tools/cache"
 )
 
 // ServiceLister helps list Services.
@@ -29,7 +29,7 @@ import (
 type ServiceLister interface {
 	// List lists all Services in the indexer.
 	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*v1alpha1.Service, err error)
+	List(selector labels.Selector) (ret []*inferencev1alpha1.Service, err error)
 	// Services returns an object that can list and get Services.
 	Services(namespace string) ServiceNamespaceLister
 	ServiceListerExpansion
@@ -37,17 +37,17 @@ type ServiceLister interface {
 
 // serviceLister implements the ServiceLister interface.
 type serviceLister struct {
-	listers.ResourceIndexer[*v1alpha1.Service]
+	listers.ResourceIndexer[*inferencev1alpha1.Service]
 }
 
 // NewServiceLister returns a new ServiceLister.
 func NewServiceLister(indexer cache.Indexer) ServiceLister {
-	return &serviceLister{listers.New[*v1alpha1.Service](indexer, v1alpha1.Resource("service"))}
+	return &serviceLister{listers.New[*inferencev1alpha1.Service](indexer, inferencev1alpha1.Resource("service"))}
 }
 
 // Services returns an object that can list and get Services.
 func (s *serviceLister) Services(namespace string) ServiceNamespaceLister {
-	return serviceNamespaceLister{listers.NewNamespaced[*v1alpha1.Service](s.ResourceIndexer, namespace)}
+	return serviceNamespaceLister{listers.NewNamespaced[*inferencev1alpha1.Service](s.ResourceIndexer, namespace)}
 }
 
 // ServiceNamespaceLister helps list and get Services.
@@ -55,15 +55,15 @@ func (s *serviceLister) Services(namespace string) ServiceNamespaceLister {
 type ServiceNamespaceLister interface {
 	// List lists all Services in the indexer for a given namespace.
 	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*v1alpha1.Service, err error)
+	List(selector labels.Selector) (ret []*inferencev1alpha1.Service, err error)
 	// Get retrieves the Service from the indexer for a given namespace and name.
 	// Objects returned here must be treated as read-only.
-	Get(name string) (*v1alpha1.Service, error)
+	Get(name string) (*inferencev1alpha1.Service, error)
 	ServiceNamespaceListerExpansion
 }
 
 // serviceNamespaceLister implements the ServiceNamespaceLister
 // interface.
 type serviceNamespaceLister struct {
-	listers.ResourceIndexer[*v1alpha1.Service]
+	listers.ResourceIndexer[*inferencev1alpha1.Service]
 }
diff --git a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
index 5be31aa7..80d79689 100644
--- a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
+++ b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
@@ -59,6 +59,7 @@ spec:
                         type: string
                       type: array
                     name:
+                      default: default
                       description: Name represents the identifier of the backendRuntime
                         argument.
                       type: string
diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
index ba52db66..d6a16ce9 100644
--- a/config/crd/bases/inference.llmaz.io_playgrounds.yaml
+++ b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
@@ -46,21 +46,26 @@ spec:
                   BackendRuntimeConfig represents the inference backendRuntime configuration
                   under the hood, e.g. vLLM, which is the default backendRuntime.
                 properties:
-                  argFlags:
+                  args:
                     description: |-
-                      ArgFlags represents the argument flags appended to the backend.
-                      You can add new flags or overwrite the default flags.
-                    items:
-                      type: string
-                    type: array
-                  argName:
-                    description: |-
-                      ArgName represents the argument name set in the backendRuntimeArg.
-                      If not set, will be derived by the model role, e.g. if one model's role
-                      is <draft>, the argName will be set to <speculative-decoding>. Better to
-                      set the argName explicitly.
-                      By default, the argName will be treated as <default> in runtime.
-                    type: string
+                      Args represents the specified arguments of the backendRuntime,
+                      will be append to the backendRuntime.spec.Args.
+                    properties:
+                      flags:
+                        description: |-
+                          Flags represents all the preset configurations.
+                          Flag around with {{ .CONFIG }} is a configuration waiting for render.
+                        items:
+                          type: string
+                        type: array
+                      name:
+                        default: default
+                        description: Name represents the identifier of the backendRuntime
+                          argument.
+                        type: string
+                    required:
+                    - name
+                    type: object
                   envs:
                     description: Envs represents the environments set to the container.
                     items:
diff --git a/config/crd/bases/llmaz.io_openmodels.yaml b/config/crd/bases/llmaz.io_openmodels.yaml
index 28cda1e4..6dac63c9 100644
--- a/config/crd/bases/llmaz.io_openmodels.yaml
+++ b/config/crd/bases/llmaz.io_openmodels.yaml
@@ -46,61 +46,66 @@ spec:
                   FamilyName represents the model type, like llama2, which will be auto injected
                   to the labels with the key of `llmaz.io/model-family-name`.
                 type: string
-              inferenceFlavors:
-                description: |-
-                  InferenceFlavors represents the accelerator requirements to serve the model.
-                  Flavors are fungible following the priority represented by the slice order.
-                items:
-                  description: |-
-                    Flavor defines the accelerator requirements for a model and the necessary parameters
-                    in autoscaling. Right now, it will be used in two places:
-                    - Pod scheduling with node selectors specified.
-                    - Cluster autoscaling with essential parameters provided.
-                  properties:
-                    name:
-                      description: Name represents the flavor name, which will be
-                        used in model claim.
-                      type: string
-                    nodeSelector:
-                      additionalProperties:
-                        type: string
-                      description: |-
-                        NodeSelector represents the node candidates for Pod placements, if a node doesn't
-                        meet the nodeSelector, it will be filtered out in the resourceFungibility scheduler plugin.
-                        If nodeSelector is empty, it means every node is a candidate.
-                      type: object
-                    params:
-                      additionalProperties:
-                        type: string
-                      description: |-
-                        Params stores other useful parameters and will be consumed by cluster-autoscaler / Karpenter
-                        for autoscaling or be defined as model parallelism parameters like TP or PP size.
-                        E.g. with autoscaling, when scaling up nodes with 8x Nvidia A00, the parameter can be injected
-                        with <INSTANCE-TYPE: p4d.24xlarge> for AWS.
-                        Preset parameters: TP, PP, INSTANCE-TYPE.
-                      type: object
-                    requests:
-                      additionalProperties:
-                        anyOf:
-                        - type: integer
-                        - type: string
-                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                        x-kubernetes-int-or-string: true
+              inferenceConfig:
+                description: InferenceConfig represents the inference configurations
+                  for the model.
+                properties:
+                  flavors:
+                    description: |-
+                      Flavors represents the accelerator requirements to serve the model.
+                      Flavors are fungible following the priority represented by the slice order.
+                    items:
                       description: |-
-                        Requests defines the required accelerators to serve the model for each replica,
-                        like <nvidia.com/gpu: 8>. For multi-hosts cases, the requests here indicates
-                        the resource requirements for each replica, usually equals to the TP size.
-                        Not recommended to set the cpu and memory usage here:
-                        - if using playground, you can define the cpu/mem usage at backendConfig.
-                        - if using inference service, you can define the cpu/mem at the container resources.
-                        However, if you define the same accelerator requests at playground/service as well,
-                        the requests will be overwritten by the flavor requests.
+                        Flavor defines the accelerator requirements for a model and the necessary parameters
+                        in autoscaling. Right now, it will be used in two places:
+                        - Pod scheduling with node selectors specified.
+                        - Cluster autoscaling with essential parameters provided.
+                      properties:
+                        name:
+                          description: Name represents the flavor name, which will
+                            be used in model claim.
+                          type: string
+                        nodeSelector:
+                          additionalProperties:
+                            type: string
+                          description: |-
+                            NodeSelector represents the node candidates for Pod placements, if a node doesn't
+                            meet the nodeSelector, it will be filtered out in the resourceFungibility scheduler plugin.
+                            If nodeSelector is empty, it means every node is a candidate.
+                          type: object
+                        params:
+                          additionalProperties:
+                            type: string
+                          description: |-
+                            Params stores other useful parameters and will be consumed by cluster-autoscaler / Karpenter
+                            for autoscaling or be defined as model parallelism parameters like TP or PP size.
+                            E.g. with autoscaling, when scaling up nodes with 8x Nvidia A00, the parameter can be injected
+                            with <INSTANCE-TYPE: p4d.24xlarge> for AWS.
+                            Preset parameters: TP, PP, INSTANCE-TYPE.
+                          type: object
+                        requests:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: |-
+                            Requests defines the required accelerators to serve the model for each replica,
+                            like <nvidia.com/gpu: 8>. For multi-hosts cases, the requests here indicates
+                            the resource requirements for each replica, usually equals to the TP size.
+                            Not recommended to set the cpu and memory usage here:
+                            - if using playground, you can define the cpu/mem usage at backendConfig.
+                            - if using inference service, you can define the cpu/mem at the container resources.
+                            However, if you define the same accelerator requests at playground/service as well,
+                            the requests will be overwritten by the flavor requests.
+                          type: object
+                      required:
+                      - name
                       type: object
-                  required:
-                  - name
-                  type: object
-                maxItems: 8
-                type: array
+                    maxItems: 8
+                    type: array
+                type: object
               source:
                 description: |-
                   Source represents the source of the model, there're several ways to load
diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml
index aab3a173..58f2e438 100644
--- a/config/manager/kustomization.yaml
+++ b/config/manager/kustomization.yaml
@@ -1,8 +1,8 @@
 resources:
-- manager.yaml
+  - manager.yaml
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 images:
-- name: controller
-  newName: inftyai/test
-  newTag: llmaz-011701
+  - name: controller
+    newName: inftyai/llmaz
+    newTag: v0.0.9
diff --git a/docs/examples/huggingface/model.yaml b/docs/examples/huggingface/model.yaml
index 2dd40692..2d7749f7 100644
--- a/docs/examples/huggingface/model.yaml
+++ b/docs/examples/huggingface/model.yaml
@@ -7,7 +7,8 @@ spec:
   source:
     modelHub:
       modelID: facebook/opt-125m
-  inferenceFlavors:
-    - name: t4 # GPU type
-      requests:
-        nvidia.com/gpu: 1
+  inferenceConfig:
+    flavors:
+      - name: t4 # GPU type
+        requests:
+          nvidia.com/gpu: 1
diff --git a/docs/examples/llamacpp/playground.yaml b/docs/examples/llamacpp/playground.yaml
index c2b94901..de621667 100644
--- a/docs/examples/llamacpp/playground.yaml
+++ b/docs/examples/llamacpp/playground.yaml
@@ -8,5 +8,7 @@ spec:
     modelName: qwen2-0--5b-gguf
   backendRuntimeConfig:
     name: llamacpp
-    argFlags:
-      - -fa # use flash attention
+    args:
+      name: "default"
+      flags:
+        - -fa # use flash attention
diff --git a/docs/examples/modelscope/model.yaml b/docs/examples/modelscope/model.yaml
index 86593fba..288b1dd8 100644
--- a/docs/examples/modelscope/model.yaml
+++ b/docs/examples/modelscope/model.yaml
@@ -8,7 +8,8 @@ spec:
     modelHub:
       name: ModelScope
       modelID: AI-ModelScope/opt-125
-  inferenceFlavors:
-  - name: t4 # GPU type
-    requests:
-      nvidia.com/gpu: 1
+  inferenceConfig:
+    flavors:
+      - name: t4 # GPU type
+        requests:
+          nvidia.com/gpu: 1
diff --git a/docs/examples/multi-nodes/model.yaml b/docs/examples/multi-nodes/model.yaml
index 513939fb..f3f072ff 100644
--- a/docs/examples/multi-nodes/model.yaml
+++ b/docs/examples/multi-nodes/model.yaml
@@ -6,15 +6,15 @@ spec:
   familyName: llama3
   source:
     modelHub:
-      # TODO:
       modelID: meta-llama/Llama-3.1-405B
-  inferenceFlavors:
-    - name: a100-80gb
-      requests:
-        nvidia.com/gpu: 1 # single node request
-      params:
-        TP: "8"
-        PP: "2"
+  inferenceConfig:
+    flavors:
+      - name: a100-80gb
+        requests:
+          nvidia.com/gpu: 1 # single node request
+        params:
+          TP: "8"
+          PP: "2"
   # - name: h100
   #   requests:
   #     nvidia.com/gpu: 8 # single node request
diff --git a/docs/examples/objstore-oss/model.yaml b/docs/examples/objstore-oss/model.yaml
index bf120d4a..1422cee9 100644
--- a/docs/examples/objstore-oss/model.yaml
+++ b/docs/examples/objstore-oss/model.yaml
@@ -8,7 +8,8 @@ spec:
     # You should replace this with your own oss address following the protocol:
     # oss://<bucket>.<endpoint>/<path-to-your-model>
     uri: oss://llmaz.oss-ap-southeast-1-internal.aliyuncs.com/models/Qwen2-7B
-  inferenceFlavors:
-  - name: t4 # GPU type
-    requests:
-      nvidia.com/gpu: 1
+  inferenceConfig:
+    flavors:
+      - name: t4 # GPU type
+        requests:
+          nvidia.com/gpu: 1
diff --git a/docs/examples/sglang/model.yaml b/docs/examples/sglang/model.yaml
index fe0ef7c1..8da61042 100644
--- a/docs/examples/sglang/model.yaml
+++ b/docs/examples/sglang/model.yaml
@@ -7,7 +7,8 @@ spec:
   source:
     modelHub:
       modelID: Qwen/Qwen2-0.5B-Instruct
-  inferenceFlavors:
-  - name: t4 # GPU type
-    requests:
-      nvidia.com/gpu: 1
+  inferenceConfig:
+    flavors:
+      - name: t4 # GPU type
+        requests:
+          nvidia.com/gpu: 1
diff --git a/docs/examples/speculative-decoding/llamacpp/playground.yaml b/docs/examples/speculative-decoding/llamacpp/playground.yaml
index 0c119b92..4d797263 100644
--- a/docs/examples/speculative-decoding/llamacpp/playground.yaml
+++ b/docs/examples/speculative-decoding/llamacpp/playground.yaml
@@ -9,14 +9,16 @@ spec:
   replicas: 1
   modelClaims:
     models:
-    - name: llama2-7b-q8-gguf # the target model
-      role: main
-    - name: llama2-7b-q2-k-gguf  # the draft model
-      role: draft
+      - name: llama2-7b-q8-gguf # the target model
+        role: main
+      - name: llama2-7b-q2-k-gguf # the draft model
+        role: draft
   backendRuntimeConfig:
     name: llamacpp
-    argFlags:
-      - -fa # use flash attention
+    args:
+      name: "speculative-decoding"
+      flags:
+        - -fa # use flash attention
     resources:
       requests:
         cpu: 4
diff --git a/docs/examples/speculative-decoding/vllm/model.yaml b/docs/examples/speculative-decoding/vllm/model.yaml
index 35b1e757..7468fea7 100644
--- a/docs/examples/speculative-decoding/vllm/model.yaml
+++ b/docs/examples/speculative-decoding/vllm/model.yaml
@@ -7,10 +7,11 @@ spec:
   source:
     modelHub:
       modelID: facebook/opt-6.7b
-  inferenceFlavors:
-  - name: a10 # gpu type
-    requests:
-      nvidia.com/gpu: 1
+  inferenceConfig:
+    flavors:
+      - name: a10 # gpu type
+        requests:
+          nvidia.com/gpu: 1
 ---
 apiVersion: llmaz.io/v1alpha1
 kind: OpenModel
diff --git a/docs/examples/tgi/model.yaml b/docs/examples/tgi/model.yaml
index fe0ef7c1..8da61042 100644
--- a/docs/examples/tgi/model.yaml
+++ b/docs/examples/tgi/model.yaml
@@ -7,7 +7,8 @@ spec:
   source:
     modelHub:
       modelID: Qwen/Qwen2-0.5B-Instruct
-  inferenceFlavors:
-  - name: t4 # GPU type
-    requests:
-      nvidia.com/gpu: 1
+  inferenceConfig:
+    flavors:
+      - name: t4 # GPU type
+        requests:
+          nvidia.com/gpu: 1
diff --git a/pkg/controller/inference/playground_controller.go b/pkg/controller/inference/playground_controller.go
index c6174747..10df367b 100644
--- a/pkg/controller/inference/playground_controller.go
+++ b/pkg/controller/inference/playground_controller.go
@@ -275,8 +275,10 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
 	envs := parser.Envs()
 
 	if playground.Spec.BackendRuntimeConfig != nil {
-		args = append(args, playground.Spec.BackendRuntimeConfig.ArgFlags...)
 		envs = append(envs, playground.Spec.BackendRuntimeConfig.Envs...)
+		if playground.Spec.BackendRuntimeConfig.Args != nil {
+			args = append(args, playground.Spec.BackendRuntimeConfig.Args.Flags...)
+		}
 	}
 
 	resources := corev1.ResourceRequirements{
diff --git a/pkg/controller/inference/service_controller.go b/pkg/controller/inference/service_controller.go
index e7439425..8d7308ff 100644
--- a/pkg/controller/inference/service_controller.go
+++ b/pkg/controller/inference/service_controller.go
@@ -175,7 +175,7 @@ func injectModelProperties(template *applyconfigurationv1.LeaderWorkerTemplateAp
 }
 
 func injectModelFlavor(template *corev1.PodTemplateSpec, model *coreapi.OpenModel, service *inferenceapi.Service) {
-	if len(model.Spec.InferenceFlavors) == 0 {
+	if model.Spec.InferenceConfig == nil || len(model.Spec.InferenceConfig.Flavors) == 0 {
 		return
 	}
 
@@ -186,15 +186,15 @@ func injectModelFlavor(template *corev1.PodTemplateSpec, model *coreapi.OpenMode
 		}
 	}
 
-	flavorName := model.Spec.InferenceFlavors[0].Name
+	flavorName := model.Spec.InferenceConfig.Flavors[0].Name
 	if len(service.Spec.ModelClaims.InferenceFlavors) > 0 {
 		// We only support the same resource request right now, so 0-index flavor is enough.
 		flavorName = service.Spec.ModelClaims.InferenceFlavors[0]
 	}
 
-	for i, flavor := range model.Spec.InferenceFlavors {
+	for i, flavor := range model.Spec.InferenceConfig.Flavors {
 		if flavor.Name == flavorName {
-			requests := model.Spec.InferenceFlavors[i].Requests
+			requests := model.Spec.InferenceConfig.Flavors[i].Requests
 			for k, v := range requests {
 				if container.Resources.Requests == nil {
 					container.Resources.Requests = map[corev1.ResourceName]resource.Quantity{}
diff --git a/pkg/controller_helper/backendruntime.go b/pkg/controller_helper/backendruntime.go
index 64434c59..d1ba2f1b 100644
--- a/pkg/controller_helper/backendruntime.go
+++ b/pkg/controller_helper/backendruntime.go
@@ -61,8 +61,8 @@ func (p *BackendRuntimeParser) Envs() []corev1.EnvVar {
 
 func (p *BackendRuntimeParser) Args(playground *inferenceapi.Playground, models []*coreapi.OpenModel, multiNodes bool) ([]string, error) {
 	var argName string
-	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.ArgName != nil {
-		argName = *playground.Spec.BackendRuntimeConfig.ArgName
+	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Args != nil {
+		argName = playground.Spec.BackendRuntimeConfig.Args.Name
 	} else {
 		// Auto detect the args from model roles.
 		argName = DetectArgFrom(playground, multiNodes)
diff --git a/pkg/controller_helper/helper.go b/pkg/controller_helper/helper.go
index b823f9ba..8f689643 100644
--- a/pkg/controller_helper/helper.go
+++ b/pkg/controller_helper/helper.go
@@ -100,16 +100,15 @@ func FirstAssignedFlavor(model *coreapi.OpenModel, playground *inferenceapi.Play
 		flavors = playground.Spec.ModelClaims.InferenceFlavors
 	}
 
-	// This should not happen.
-	if len(flavors) == 0 && len(model.Spec.InferenceFlavors) == 0 {
+	if len(flavors) == 0 && (model.Spec.InferenceConfig == nil || len(model.Spec.InferenceConfig.Flavors) == 0) {
 		return nil
 	}
 
 	if len(flavors) == 0 {
-		return []coreapi.Flavor{model.Spec.InferenceFlavors[0]}
+		return []coreapi.Flavor{model.Spec.InferenceConfig.Flavors[0]}
 	}
 
-	for _, flavor := range model.Spec.InferenceFlavors {
+	for _, flavor := range model.Spec.InferenceConfig.Flavors {
 		if flavor.Name == flavors[0] {
 			return []coreapi.Flavor{flavor}
 		}
diff --git a/test/integration/controller/inference/playground_test.go b/test/integration/controller/inference/playground_test.go
index 7c588ef5..5b3400df 100644
--- a/test/integration/controller/inference/playground_test.go
+++ b/test/integration/controller/inference/playground_test.go
@@ -194,7 +194,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with sglang", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("sglang").BackendRuntimeVersion("main").BackendRuntimeArgFlags([]string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("sglang").BackendRuntimeVersion("main").BackendRuntimeArgs("default", []string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -222,7 +222,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with llamacpp", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("llamacpp").BackendRuntimeVersion("main").BackendRuntimeArgFlags([]string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("llamacpp").BackendRuntimeVersion("main").BackendRuntimeArgs("default", []string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -250,7 +250,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with tgi", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("tgi").BackendRuntimeVersion("main").BackendRuntimeArgFlags([]string{"--model-id", "Qwen/Qwen2-0.5B-Instruct"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("tgi").BackendRuntimeVersion("main").BackendRuntimeArgs("default", []string{"--model-id", "Qwen/Qwen2-0.5B-Instruct"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -278,7 +278,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with ollama", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("ollama").BackendRuntimeVersion("main").BackendRuntimeArgFlags([]string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("ollama").BackendRuntimeVersion("main").BackendRuntimeArgs("default", []string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -306,7 +306,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with argName set", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("fake-backend").BackendRuntimeVersion("main").BackendRuntimeArgName("fuz").BackendRuntimeArgFlags([]string{"--model-id", "Qwen/Qwen2-0.5B-Instruct"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("fake-backend").BackendRuntimeVersion("main").BackendRuntimeArgs("fuz", []string{"--model-id", "Qwen/Qwen2-0.5B-Instruct"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
diff --git a/test/util/validation/validate_playground.go b/test/util/validation/validate_playground.go
index 0ef25a89..e3c3f562 100644
--- a/test/util/validation/validate_playground.go
+++ b/test/util/validation/validate_playground.go
@@ -232,8 +232,8 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
 		if err != nil {
 			return err
 		}
-		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.ArgFlags != nil {
-			args = append(args, playground.Spec.BackendRuntimeConfig.ArgFlags...)
+		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Args != nil {
+			args = append(args, playground.Spec.BackendRuntimeConfig.Args.Flags...)
 		}
 
 		for _, arg := range args {
diff --git a/test/util/validation/validate_service.go b/test/util/validation/validate_service.go
index 6dc787ab..18dd7157 100644
--- a/test/util/validation/validate_service.go
+++ b/test/util/validation/validate_service.go
@@ -79,7 +79,7 @@ func ValidateService(ctx context.Context, k8sClient client.Client, service *infe
 		}
 
 		// Validate injecting flavors.
-		if len(mainModel.Spec.InferenceFlavors) != 0 {
+		if mainModel.Spec.InferenceConfig != nil && len(mainModel.Spec.InferenceConfig.Flavors) != 0 {
 			if err := ValidateModelFlavor(service, mainModel, &workload); err != nil {
 				return err
 			}
@@ -163,12 +163,12 @@ func ValidateModelLoader(model *coreapi.OpenModel, index int, template corev1.Po
 }
 
 func ValidateModelFlavor(service *inferenceapi.Service, model *coreapi.OpenModel, workload *lws.LeaderWorkerSet) error {
-	flavorName := model.Spec.InferenceFlavors[0].Name
+	flavorName := model.Spec.InferenceConfig.Flavors[0].Name
 	if len(service.Spec.ModelClaims.InferenceFlavors) > 0 {
 		flavorName = service.Spec.ModelClaims.InferenceFlavors[0]
 	}
 
-	for _, flavor := range model.Spec.InferenceFlavors {
+	for _, flavor := range model.Spec.InferenceConfig.Flavors {
 		if flavor.Name == flavorName {
 			requests := flavor.Requests
 			container := workload.Spec.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0]
diff --git a/test/util/wrapper/model.go b/test/util/wrapper/model.go
index b8835d79..481c6859 100644
--- a/test/util/wrapper/model.go
+++ b/test/util/wrapper/model.go
@@ -92,7 +92,10 @@ func (w *ModelWrapper) ModelSourceWithURI(uri string) *ModelWrapper {
 }
 
 func (w *ModelWrapper) InferenceFlavors(flavors ...coreapi.Flavor) *ModelWrapper {
-	w.Spec.InferenceFlavors = flavors
+	if w.Spec.InferenceConfig == nil {
+		w.Spec.InferenceConfig = &coreapi.InferenceConfig{}
+	}
+	w.Spec.InferenceConfig.Flavors = flavors
 	return w
 }
 
diff --git a/test/util/wrapper/playground.go b/test/util/wrapper/playground.go
index fc4f2627..816897ca 100644
--- a/test/util/wrapper/playground.go
+++ b/test/util/wrapper/playground.go
@@ -109,19 +109,15 @@ func (w *PlaygroundWrapper) BackendRuntimeVersion(version string) *PlaygroundWra
 	return w
 }
 
-func (w *PlaygroundWrapper) BackendRuntimeArgName(name string) *PlaygroundWrapper {
+func (w *PlaygroundWrapper) BackendRuntimeArgs(name string, args []string) *PlaygroundWrapper {
 	if w.Spec.BackendRuntimeConfig == nil {
 		w = w.BackendRuntime("vllm")
 	}
-	w.Spec.BackendRuntimeConfig.ArgName = &name
-	return w
-}
-
-func (w *PlaygroundWrapper) BackendRuntimeArgFlags(args []string) *PlaygroundWrapper {
-	if w.Spec.BackendRuntimeConfig == nil {
-		w = w.BackendRuntime("vllm")
+	if w.Spec.BackendRuntimeConfig.Args == nil {
+		w.Spec.BackendRuntimeConfig.Args = &inferenceapi.BackendRuntimeArg{}
 	}
-	w.Spec.BackendRuntimeConfig.ArgFlags = args
+	w.Spec.BackendRuntimeConfig.Args.Name = name
+	w.Spec.BackendRuntimeConfig.Args.Flags = args
 	return w
 }