Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ spec:

#### Expose the service

By default, llmaz will create a ClusterIP service named like `<service>-lb` for load balancing.

```cmd
kubectl port-forward pod/opt-125m-0 8080:8080
kubectl port-forward svc/opt-125m-lb 8080:8080
```

#### Get registered models
Expand Down
66 changes: 30 additions & 36 deletions chart/templates/lws/leaderworkerset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16139,6 +16139,9 @@ spec:
description: |-
SubdomainPolicy determines the policy that will be used when creating
the headless service, defaults to shared
enum:
- Shared
- UniquePerReplica
type: string
required:
- subdomainPolicy
Expand Down Expand Up @@ -16410,6 +16413,12 @@ rules:
- patch
- update
- watch
- apiGroups:
- ""
resources:
- pods/finalizers
verbs:
- update
- apiGroups:
- ""
resources:
Expand All @@ -16432,6 +16441,7 @@ rules:
- apiGroups:
- apps
resources:
- controllerrevisions
- statefulsets
verbs:
- create
Expand All @@ -16444,12 +16454,14 @@ rules:
- apiGroups:
- apps
resources:
- controllerrevisions/finalizers
- statefulsets/finalizers
verbs:
- update
- apiGroups:
- apps
resources:
- controllerrevisions/status
- statefulsets/status
verbs:
- get
Expand Down Expand Up @@ -16486,7 +16498,6 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: kube-rbac-proxy
app.kubernetes.io/created-by: lws
app.kubernetes.io/instance: metrics-reader
app.kubernetes.io/managed-by: kustomize
Expand All @@ -16503,7 +16514,6 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: kube-rbac-proxy
app.kubernetes.io/created-by: lws
app.kubernetes.io/instance: proxy-role
app.kubernetes.io/managed-by: kustomize
Expand Down Expand Up @@ -16567,9 +16577,21 @@ subjects:
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: lws-metrics-reader-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: lws-metrics-reader
subjects:
- kind: ServiceAccount
name: lws-controller-manager
namespace: lws-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: kube-rbac-proxy
app.kubernetes.io/created-by: lws
app.kubernetes.io/instance: proxy-rolebinding
app.kubernetes.io/managed-by: kustomize
Expand All @@ -16595,12 +16617,8 @@ apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: kube-rbac-proxy
app.kubernetes.io/created-by: lws
app.kubernetes.io/instance: controller-manager-metrics-service
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/name: service
app.kubernetes.io/part-of: lws
app.kubernetes.io/name: lws
control-plane: controller-manager
name: lws-controller-manager-metrics-service
namespace: lws-system
Expand All @@ -16609,7 +16627,7 @@ spec:
- name: https
port: 8443
protocol: TCP
targetPort: https
targetPort: 8443
selector:
control-plane: controller-manager
---
Expand Down Expand Up @@ -16647,7 +16665,7 @@ metadata:
name: lws-controller-manager
namespace: lws-system
spec:
replicas: 1
replicas: 2
selector:
matchLabels:
control-plane: controller-manager
Expand All @@ -16660,12 +16678,10 @@ spec:
spec:
containers:
- args:
- --health-probe-bind-address=:8081
- --metrics-bind-address=127.0.0.1:8080
- --leader-elect
- --zap-log-level=2
command:
- /manager
image: registry.k8s.io/lws/lws:v0.4.2
image: registry.k8s.io/lws/lws:v0.5.0
livenessProbe:
httpGet:
path: /healthz
Expand Down Expand Up @@ -16696,28 +16712,6 @@ spec:
- mountPath: /tmp/k8s-webhook-server/serving-certs
name: cert
readOnly: true
- args:
- --secure-listen-address=0.0.0.0:8443
- --upstream=http://127.0.0.1:8080/
- --logtostderr=true
- --v=0
image: gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0
name: kube-rbac-proxy
ports:
- containerPort: 8443
name: https
protocol: TCP
resources:
limits:
memory: 1Gi
requests:
cpu: 5m
memory: 64Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
securityContext:
runAsNonRoot: true
serviceAccountName: lws-controller-manager
Expand Down
12 changes: 12 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ rules:
- list
- update
- watch
- apiGroups:
- ""
resources:
- services
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/ollama/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ metadata:
spec:
familyName: qwen2
source:
uri: ollama://qwen2:0.5b
uri: ollama://qwen2:0.5b
58 changes: 55 additions & 3 deletions pkg/controller/inference/service_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
Expand Down Expand Up @@ -66,6 +67,7 @@ func NewServiceReconciler(client client.Client, scheme *runtime.Scheme, record r
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update
//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
Expand All @@ -87,7 +89,7 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
}

workloadApplyConfiguration := buildWorkloadApplyConfiguration(service, models)
if err := setControllerReferenceForLWS(service, workloadApplyConfiguration, r.Scheme); err != nil {
if err := setControllerReferenceForWorkload(service, workloadApplyConfiguration, r.Scheme); err != nil {
return ctrl.Result{}, err
}

Expand All @@ -97,6 +99,11 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, err
}

// Create a service for the leader pods of the lws for loadbalancing.
if err := CreateServiceIfNotExists(ctx, r.Client, r.Scheme, service); err != nil {
return ctrl.Result{}, err
}

// Handle status.

workload := &lws.LeaderWorkerSet{}
Expand Down Expand Up @@ -280,8 +287,8 @@ func setServiceCondition(service *inferenceapi.Service, workload *lws.LeaderWork
}
}

// setControllerReferenceForLWS set service as the owner reference for lws.
func setControllerReferenceForLWS(owner metav1.Object, lws *applyconfigurationv1.LeaderWorkerSetApplyConfiguration, scheme *runtime.Scheme) error {
// setControllerReferenceForWorkload set service as the owner reference for the workload.
func setControllerReferenceForWorkload(owner metav1.Object, lws *applyconfigurationv1.LeaderWorkerSetApplyConfiguration, scheme *runtime.Scheme) error {
ro, ok := owner.(runtime.Object)
if !ok {
return fmt.Errorf("%T is not a runtime.Object, cannot call SetOwnerReference", owner)
Expand All @@ -299,3 +306,48 @@ func setControllerReferenceForLWS(owner metav1.Object, lws *applyconfigurationv1
WithController(true))
return nil
}

func CreateServiceIfNotExists(ctx context.Context, k8sClient client.Client, Scheme *runtime.Scheme, service *inferenceapi.Service) error {
log := ctrl.LoggerFrom(ctx)
// The load balancing service name.
svcName := service.Name + "-lb"

var svc corev1.Service
if err := k8sClient.Get(ctx, types.NamespacedName{Name: svcName, Namespace: service.Namespace}, &svc); err != nil {
if client.IgnoreNotFound(err) != nil {
return err
}
svc = corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: svcName,
Namespace: service.Namespace,
},
Spec: corev1.ServiceSpec{
Ports: []corev1.ServicePort{
{
Name: "http",
Protocol: corev1.ProtocolTCP,
Port: modelSource.DEFAULT_BACKEND_PORT,
TargetPort: intstr.FromInt(modelSource.DEFAULT_BACKEND_PORT),
},
},
Selector: map[string]string{
lws.SetNameLabelKey: service.Name,
// the leader pod.
lws.WorkerIndexLabelKey: "0",
},
},
}

// Set the controller owner reference for garbage collection and reconciliation.
if err := ctrl.SetControllerReference(service, &svc, Scheme); err != nil {
return err
}
// create the service in the cluster
log.V(2).Info("Creating service.")
if err := k8sClient.Create(ctx, &svc); err != nil {
return err
}
}
return nil
}
Loading
Loading