Skip to content

Commit 80f72ce

Browse files
committed
add HPA feature/unit/fvt/docs/script
Signed-off-by: jooho <[email protected]>
1 parent f794ffc commit 80f72ce

37 files changed

+1852
-74
lines changed

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ test:
4747

4848
# Run fvt tests. This requires an etcd, kubernetes connection, and model serving installation. Ginkgo CLI is used to run them in parallel
4949
fvt:
50-
ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero --timeout=40m
50+
ginkgo -v -p -progress --fail-fast fvt/predictor fvt/scaleToZero fvt/hpa --timeout=50m
5151

5252
# Command to regenerate the grpc go files from the proto files
5353
fvt-protoc:
@@ -87,6 +87,18 @@ deploy-release:
8787
deploy-release-dev-mode:
8888
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging
8989

90+
deploy-release-dev-mode-fvt:
91+
ifdef MODELMESH_SERVING_IMAGE
92+
$(eval extra_options += --modelmesh-serving-image ${MODELMESH_SERVING_IMAGE})
93+
endif
94+
ifdef ENABLE_SELF_SIGNED_CA
95+
$(eval extra_options += --enable-self-signed-ca)
96+
endif
97+
ifdef NAMESPACE_SCOPE_MODE
98+
$(eval extra_options += --namespace-scope-mode)
99+
endif
100+
./scripts/install.sh --namespace ${NAMESPACE} --install-config-path config --dev-mode-logging --fvt ${extra_options}
101+
90102
delete: oc-login
91103
./scripts/delete.sh --namespace ${NAMESPACE} --local-config-path config
92104

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
/*
2+
Copyright 2021 IBM Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
package v1alpha1
17+
18+
import (
19+
"context"
20+
"fmt"
21+
"net/http"
22+
"strconv"
23+
24+
kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
25+
"github.com/kserve/kserve/pkg/constants"
26+
"github.com/kserve/modelmesh-serving/controllers/autoscaler"
27+
"sigs.k8s.io/controller-runtime/pkg/client"
28+
logf "sigs.k8s.io/controller-runtime/pkg/log"
29+
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
30+
)
31+
32+
// log is for logging in this package.
33+
var servingruntimeWebhookLog = logf.Log.WithName("servingruntime-webhook")
34+
35+
//+kubebuilder:webhook:path=/validate-serving-modelmesh-io-v1alpha1-servingruntime,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=servingruntimes;clusterservingruntimes,verbs=create;update,versions=v1alpha1,name=servingruntime.modelmesh-webhook-server.default,admissionReviewVersions=v1
36+
type ServingRuntimeWebhook struct {
37+
Client client.Client
38+
decoder *admission.Decoder
39+
}
40+
41+
func (s *ServingRuntimeWebhook) Handle(ctx context.Context, req admission.Request) admission.Response {
42+
var srAnnotations map[string]string
43+
44+
if req.Kind.Kind == "ServingRuntime"{
45+
servingRuntime := &kservev1alpha.ServingRuntime{}
46+
err := s.decoder.Decode(req, servingRuntime)
47+
if err != nil {
48+
return admission.Errored(http.StatusBadRequest, err)
49+
}
50+
srAnnotations = servingRuntime.ObjectMeta.Annotations
51+
}else{
52+
clusterServingRuntime := &kservev1alpha.ClusterServingRuntime{}
53+
err := s.decoder.Decode(req, clusterServingRuntime)
54+
if err != nil {
55+
return admission.Errored(http.StatusBadRequest, err)
56+
}
57+
srAnnotations = clusterServingRuntime.ObjectMeta.Annotations
58+
}
59+
60+
if err := validateServingRuntimeAutoscaler(srAnnotations); err != nil {
61+
return admission.Denied(err.Error())
62+
}
63+
64+
if err := validateAutoscalerTargetUtilizationPercentage(srAnnotations); err != nil {
65+
return admission.Denied(err.Error())
66+
}
67+
68+
if err := validateAutoScalingReplicas(srAnnotations); err != nil {
69+
return admission.Denied(err.Error())
70+
}
71+
72+
return admission.Allowed("Passed all validation checks for ServingRuntime")
73+
}
74+
75+
// InjectDecoder injects the decoder.
76+
func (s *ServingRuntimeWebhook) InjectDecoder(d *admission.Decoder) error {
77+
s.decoder = d
78+
return nil
79+
}
80+
81+
// Validation of servingruntime autoscaler class
82+
func validateServingRuntimeAutoscaler(annotations map[string]string) error {
83+
value, ok := annotations[constants.AutoscalerClass]
84+
class := constants.AutoscalerClassType(value)
85+
if ok {
86+
for _, item := range constants.AutoscalerAllowedClassList {
87+
if class == item {
88+
switch class {
89+
case constants.AutoscalerClassHPA:
90+
if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
91+
return validateHPAMetrics(constants.AutoscalerMetricsType(metric))
92+
} else {
93+
return nil
94+
}
95+
default:
96+
return fmt.Errorf("unknown autoscaler class [%s]", class)
97+
}
98+
}
99+
}
100+
return fmt.Errorf("[%s] is not a supported autoscaler class type.\n", value)
101+
}
102+
103+
return nil
104+
}
105+
106+
// Validate of autoscaler targetUtilizationPercentage
107+
func validateAutoscalerTargetUtilizationPercentage(annotations map[string]string) error {
108+
if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
109+
t, err := strconv.Atoi(value)
110+
if err != nil {
111+
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
112+
} else {
113+
if t < 1 || t > 100 {
114+
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
115+
}
116+
}
117+
}
118+
119+
return nil
120+
}
121+
122+
// Validate scaling options
123+
func validateAutoScalingReplicas(annotations map[string]string) error {
124+
autoscalerClassType := autoscaler.AutoscalerClassNone
125+
if value, ok := annotations[constants.AutoscalerClass]; ok {
126+
autoscalerClassType = value
127+
}
128+
129+
switch autoscalerClassType {
130+
case string(constants.AutoscalerClassHPA):
131+
return validateScalingHPA(annotations)
132+
default:
133+
return nil
134+
}
135+
}
136+
137+
func validateScalingHPA(annotations map[string]string) error {
138+
metric := constants.AutoScalerMetricsCPU
139+
if value, ok := annotations[constants.AutoscalerMetrics]; ok {
140+
metric = constants.AutoscalerMetricsType(value)
141+
}
142+
143+
minReplicas := 1
144+
if value, ok := annotations[constants.MinScaleAnnotationKey]; ok {
145+
if valueInt, err := strconv.Atoi(value); err != nil {
146+
return fmt.Errorf("The min replicas should be a integer.")
147+
} else if valueInt < 0 {
148+
return fmt.Errorf("The min replicas should be more than -1")
149+
} else {
150+
minReplicas = valueInt
151+
}
152+
}
153+
154+
maxReplicas := 1
155+
if value, ok := annotations[constants.MaxScaleAnnotationKey]; ok {
156+
if valueInt, err := strconv.Atoi(value); err != nil {
157+
return fmt.Errorf("The max replicas should be a integer.")
158+
} else {
159+
maxReplicas = valueInt
160+
}
161+
}
162+
163+
if minReplicas > maxReplicas {
164+
return fmt.Errorf("The max replicas should be bigger than min replicas.")
165+
}
166+
167+
err := validateHPAMetrics(metric)
168+
if err != nil {
169+
return err
170+
}
171+
172+
if value, ok := annotations[constants.TargetUtilizationPercentage]; ok {
173+
t, err := strconv.Atoi(value)
174+
if err != nil {
175+
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
176+
} else if metric == constants.AutoScalerMetricsMemory && t < 1 {
177+
return fmt.Errorf("The target memory should be greater than 1 MiB")
178+
}
179+
}
180+
181+
return nil
182+
}
183+
184+
// Validate of autoscaler HPA metrics
185+
func validateHPAMetrics(metric constants.AutoscalerMetricsType) error {
186+
for _, item := range constants.AutoscalerAllowedMetricsList {
187+
if item == metric {
188+
return nil
189+
}
190+
}
191+
return fmt.Errorf("[%s] is not a supported metric.\n", metric)
192+
193+
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
Copyright 2021 IBM Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
package v1alpha1
17+
18+
import (
19+
// "fmt"
20+
"testing"
21+
22+
"github.com/onsi/gomega"
23+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
25+
kservev1alpha "github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
26+
"github.com/kserve/kserve/pkg/constants"
27+
)
28+
29+
func makeTestRawServingRuntime() kservev1alpha.ServingRuntime {
30+
servingRuntime := kservev1alpha.ServingRuntime{
31+
ObjectMeta: metav1.ObjectMeta{
32+
Name: "foo",
33+
Namespace: "default",
34+
Annotations: map[string]string{
35+
"serving.kserve.io/autoscalerClass": "hpa",
36+
"serving.kserve.io/metrics": "cpu",
37+
"serving.kserve.io/targetUtilizationPercentage": "75",
38+
"autoscaling.knative.dev/min-scale": "2",
39+
"autoscaling.knative.dev/max-scale": "3",
40+
},
41+
},
42+
}
43+
44+
return servingRuntime
45+
}
46+
47+
func TestValidAutoscalerTypeAndHPAMetrics(t *testing.T) {
48+
g := gomega.NewGomegaWithT(t)
49+
sr := makeTestRawServingRuntime()
50+
g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).Should(gomega.Succeed())
51+
}
52+
func TestInvalidAutoscalerClassType(t *testing.T) {
53+
g := gomega.NewGomegaWithT(t)
54+
sr := makeTestRawServingRuntime()
55+
sr.ObjectMeta.Annotations[constants.AutoscalerClass] = "test"
56+
g.Expect(validateServingRuntimeAutoscaler(sr.Annotations)).ShouldNot(gomega.Succeed())
57+
}
58+
59+
func TestInvalidAutoscalerTargetUtilizationPercentageLowValue(t *testing.T) {
60+
g := gomega.NewGomegaWithT(t)
61+
sr := makeTestRawServingRuntime()
62+
sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "-1"
63+
g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
64+
}
65+
66+
func TestInvalidAutoscalerTargetUtilizationPercentageHighValue(t *testing.T) {
67+
g := gomega.NewGomegaWithT(t)
68+
sr := makeTestRawServingRuntime()
69+
sr.ObjectMeta.Annotations[constants.TargetUtilizationPercentage] = "101"
70+
g.Expect(validateAutoscalerTargetUtilizationPercentage(sr.Annotations)).ShouldNot(gomega.Succeed())
71+
}
72+
73+
func TestInvalidAutoscalerLowMinReplicas(t *testing.T) {
74+
g := gomega.NewGomegaWithT(t)
75+
sr := makeTestRawServingRuntime()
76+
sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "-1"
77+
g.Expect(validateScalingHPA(sr.Annotations)).ShouldNot(gomega.Succeed())
78+
}
79+
80+
func TestInvalidAutoscalerMaxReplicasMustBiggerThanMixReplicas(t *testing.T) {
81+
g := gomega.NewGomegaWithT(t)
82+
sr := makeTestRawServingRuntime()
83+
sr.ObjectMeta.Annotations[constants.MinScaleAnnotationKey] = "4"
84+
sr.ObjectMeta.Annotations[constants.MaxScaleAnnotationKey] = "3"
85+
g.Expect(validateAutoScalingReplicas(sr.Annotations)).ShouldNot(gomega.Succeed())
86+
}
87+
88+
func TestValidAutoscalerMetricsType(t *testing.T) {
89+
g := gomega.NewGomegaWithT(t)
90+
sr := makeTestRawServingRuntime()
91+
sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "memory"
92+
g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("memory"))).Should(gomega.Succeed())
93+
}
94+
95+
func TestInvalidAutoscalerMetricsType(t *testing.T) {
96+
g := gomega.NewGomegaWithT(t)
97+
sr := makeTestRawServingRuntime()
98+
sr.ObjectMeta.Annotations[constants.AutoscalerMetrics] = "conccurrency"
99+
g.Expect(validateHPAMetrics(constants.AutoscalerMetricsType("conccurrency"))).ShouldNot(gomega.Succeed())
100+
}
101+

config/certmanager/certificate.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ spec:
3636
issuerRef:
3737
kind: Issuer
3838
name: selfsigned-issuer
39-
secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
39+
secretName: modelmesh-webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize

0 commit comments

Comments
 (0)