Skip to content

Commit 2beab49

Browse files
chore(api): Update go dependencies in merlin api server (#551)
# Description This MR basically bumps a couple of Go package dependencies used by the Merlin API server, which in turn is imported by the Turing API server and will have its own dependencies updated in a separate PR (the updates in the PR will allow the dependencies of the Turing API server to also be updated without having to be pinned to the versions used in the Merlin API server). The version of Go used has also been bumped up from 1.20 to 1.22. Main packages updated: ``` - github.com/GoogleCloudPlatform/spark-on-k8s-operator - github.com/google/go-containerregistry - github.com/kserve/kserve - k8s.io/api - k8s.io/apimachinery - k8s.io/client-go - knative.dev/networking - knative.dev/pkg - knative.dev/serving - sigs.k8s.io/controller-runtime ``` Some minor refactoring was performed on structs that may have changed or on functions called due to the changes in their expected argument - these are annotated directly on the diffs. # Modifications - `api/cluster/controller.go` - Updated the KServe controller functions as they now require a context - `api/cluster/resource/templater.go` - Updated the `Handler` field of a `Probe` struct to a `ProbeHandler` field - `api/go.mod` - Updated various packages here # Tests <!-- Besides the existing / updated automated tests, what specific scenarios should be tested? Consider the backward compatibility of the changes, whether corner cases are covered, etc. Please describe the tests and check the ones that have been completed. Eg: - [x] Deploying new and existing standard models - [ ] Deploying PyFunc models --> # Checklist - [x] Added PR label - [ ] Added unit test, integration, and/or e2e tests - [x] Tested locally - [ ] Updated documentation - [ ] Update Swagger spec if the PR introduce API changes - [ ] Regenerated Golang and Python client if the PR introduces API changes # Release Notes <!-- Does this PR introduce a user-facing change? If no, just write "NONE" in the release-note block below. If yes, a release note is required. Enter your extended release note in the block below. If the PR requires additional action from users switching to the new release, include the string "action required". For more information about release notes, see kubernetes' guide here: http://git.k8s.io/community/contributors/guide/release-notes.md --> ```release-note NONE ```
1 parent 1b8e3f5 commit 2beab49

File tree

28 files changed

+505
-1387
lines changed

28 files changed

+505
-1387
lines changed

.github/workflows/merlin.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ env:
1111
ARTIFACT_RETENTION_DAYS: 7
1212
DOCKER_BUILDKIT: 1
1313
DOCKER_REGISTRY: ghcr.io
14-
GO_VERSION: "1.20"
14+
GO_VERSION: "1.22"
1515

1616
jobs:
1717
create-version:
@@ -147,7 +147,7 @@ jobs:
147147
uses: golangci/golangci-lint-action@v3
148148
with:
149149
# Ensure the same version as the one defined in Makefile
150-
version: v1.51.2
150+
version: v1.56.2
151151
working-directory: api
152152

153153
test-api:

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# ============================================================
1616
# Build stage 1: Build API
1717
# ============================================================
18-
FROM golang:1.20-alpine as go-builder
18+
FROM golang:1.22-alpine as go-builder
1919

2020
RUN apk update && apk add --no-cache git ca-certificates bash
2121
RUN mkdir -p src/api

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ VERSION := $(or ${VERSION}, $(shell git describe --tags --always --first-parent)
99
LOG_URL?=localhost:8002
1010
TEST_TAGS?=
1111

12-
GOLANGCI_LINT_VERSION="v1.51.2"
12+
GOLANGCI_LINT_VERSION="v1.56.2"
1313
PROTOC_GEN_GO_JSON_VERSION="v1.1.0"
1414
PROTOC_GEN_GO_VERSION="v1.26"
1515
PYTHON_VERSION ?= "39" #set as 38 39 310 for 3.8-3.10 respectively

api/batch/controller.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ func NewController(
111111
manifestManager ManifestManager,
112112
envMetaData cluster.Metadata,
113113
batchJobTemplater *BatchJobTemplater,
114-
) Controller {
114+
) (Controller, error) {
115115
informerFactory := externalversions.NewSharedInformerFactory(sparkClient, resyncPeriod)
116116
informer := informerFactory.Sparkoperator().V1beta2().SparkApplications().Informer()
117117
queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
@@ -130,10 +130,13 @@ func NewController(
130130
ContainerFetcher: cluster.NewContainerFetcher(kubeClient.CoreV1(), envMetaData),
131131
}
132132

133-
informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
133+
_, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
134134
UpdateFunc: controller.onUpdate,
135135
})
136-
return controller
136+
if err != nil {
137+
return nil, err
138+
}
139+
return controller, nil
137140
}
138141

139142
func (c *controller) Submit(ctx context.Context, predictionJob *models.PredictionJob, namespace string) error {

api/batch/controller_test.go

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,10 @@ func TestSubmit(t *testing.T) {
328328
mockManifestManager := &batchMock.ManifestManager{}
329329
clusterMetadata := cluster.Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"}
330330
batchJobTemplater := NewBatchJobTemplater(defaultBatchConfig)
331-
ctl := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient, mockManifestManager,
331+
ctl, err := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient,
332+
mockManifestManager,
332333
clusterMetadata, batchJobTemplater)
334+
assert.NoError(t, err)
333335

334336
mockKubeClient.PrependReactor("get", "namespaces", func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
335337
return true, nil, kerrors.NewNotFound(schema.GroupResource{}, action.(ktesting.GetAction).GetName())
@@ -358,7 +360,7 @@ func TestSubmit(t *testing.T) {
358360
mockManifestManager.On("DeleteJobSpec", context.Background(), jobName, defaultNamespace).Return(nil)
359361
}
360362

361-
err := ctl.Submit(context.Background(), predictionJob, test.namespace)
363+
err = ctl.Submit(context.Background(), predictionJob, test.namespace)
362364
if test.wantError {
363365
assert.Error(t, err)
364366
assert.Equal(t, test.wantErrorMsg, err.Error())
@@ -396,13 +398,14 @@ func TestCleanupAfterSubmitFailed(t *testing.T) {
396398
mockManifestManager := &batchMock.ManifestManager{}
397399
clusterMetadata := cluster.Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"}
398400
batchJobTemplater := NewBatchJobTemplater(defaultBatchConfig)
399-
ctl := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient, mockManifestManager,
401+
ctl, err := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient, mockManifestManager,
400402
clusterMetadata, batchJobTemplater)
403+
assert.NoError(t, err)
401404

402405
mockManifestManager.On("DeleteSecret", context.Background(), jobName, defaultNamespace).Return(nil)
403406
mockManifestManager.On("DeleteJobSpec", context.Background(), jobName, defaultNamespace).Return(nil)
404407

405-
err := ctl.Submit(context.Background(), predictionJob, defaultNamespace)
408+
err = ctl.Submit(context.Background(), predictionJob, defaultNamespace)
406409
assert.Error(t, err)
407410
mockManifestManager.AssertExpectations(t)
408411
}
@@ -422,8 +425,12 @@ func TestOnUpdate(t *testing.T) {
422425
mockManifestManager := &batchMock.ManifestManager{}
423426
clusterMetadata := cluster.Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"}
424427
batchJobTemplater := NewBatchJobTemplater(defaultBatchConfig)
425-
ctl := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient, mockManifestManager,
426-
clusterMetadata, batchJobTemplater).(*controller)
428+
newController, err := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient,
429+
mockManifestManager,
430+
clusterMetadata, batchJobTemplater)
431+
assert.NoError(t, err)
432+
433+
ctl := newController.(*controller)
427434
stopCh := make(chan struct{})
428435
defer close(stopCh)
429436
go ctl.Run(stopCh)
@@ -499,8 +506,12 @@ func TestUpdateStatus(t *testing.T) {
499506
mockManifestManager := &batchMock.ManifestManager{}
500507
clusterMetadata := cluster.Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"}
501508
batchJobTemplater := NewBatchJobTemplater(defaultBatchConfig)
502-
ctl := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient, mockManifestManager,
503-
clusterMetadata, batchJobTemplater).(*controller)
509+
newController, err := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient,
510+
mockManifestManager,
511+
clusterMetadata, batchJobTemplater)
512+
assert.NoError(t, err)
513+
514+
ctl := newController.(*controller)
504515
stopCh := make(chan struct{})
505516
defer close(stopCh)
506517
go ctl.Run(stopCh)
@@ -597,9 +608,12 @@ func TestStop(t *testing.T) {
597608
mockManifestManager := &batchMock.ManifestManager{}
598609
clusterMetadata := cluster.Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"}
599610
batchJobTemplater := NewBatchJobTemplater(defaultBatchConfig)
600-
ctl := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient, mockManifestManager,
611+
newController, err := NewController(mockStorage, mockMlpAPIClient, mockSparkClient, mockKubeClient,
612+
mockManifestManager,
601613
clusterMetadata, batchJobTemplater)
614+
assert.NoError(t, err)
602615

616+
ctl := newController.(*controller)
603617
mockKubeClient.PrependReactor("get", "namespaces", func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
604618
return true, nil, kerrors.NewNotFound(schema.GroupResource{}, action.(ktesting.GetAction).GetName())
605619
})
@@ -616,7 +630,7 @@ func TestStop(t *testing.T) {
616630
})
617631
mockStorage.On("Delete", predictionJob).Return(nil)
618632

619-
err := ctl.Stop(context.Background(), predictionJob, namespace.Name)
633+
err = ctl.Stop(context.Background(), predictionJob, namespace.Name)
620634
if test.wantError {
621635
assert.Error(t, err)
622636
assert.Equal(t, test.wantErrorMsg, err.Error())

api/cluster/controller.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
217217
if modelService.CurrentIsvcName != "" {
218218
if modelService.DeploymentMode == deployment.ServerlessDeploymentMode ||
219219
modelService.DeploymentMode == deployment.EmptyDeploymentMode {
220-
currentIsvc, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(modelService.CurrentIsvcName, metav1.GetOptions{})
220+
currentIsvc, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(ctx,
221+
modelService.CurrentIsvcName, metav1.GetOptions{})
221222
if err != nil && !kerrors.IsNotFound(err) {
222223
return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToGetInferenceServiceStatus, isvcName))
223224
}
@@ -234,10 +235,10 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
234235
}
235236

236237
// check the cluster to see if the inference service has already been deployed
237-
s, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(modelService.Name, metav1.GetOptions{})
238+
s, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(ctx, modelService.Name, metav1.GetOptions{})
238239
if err != nil {
239240
if kerrors.IsNotFound(err) {
240-
s, err = c.kserveClient.InferenceServices(modelService.Namespace).Create(spec)
241+
s, err = c.kserveClient.InferenceServices(modelService.Namespace).Create(ctx, spec, metav1.CreateOptions{})
241242
if err != nil {
242243
log.Errorf("unable to create inference service %s: %v", isvcName, err)
243244
return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateInferenceService, isvcName))
@@ -260,7 +261,7 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
260261
s, err = c.waitInferenceServiceReady(s)
261262
if err != nil {
262263
// remove created inferenceservice when got error
263-
if err := c.deleteInferenceService(isvcName, modelService.Namespace); err != nil {
264+
if err := c.deleteInferenceService(ctx, isvcName, modelService.Namespace); err != nil {
264265
log.Errorf("unable to delete inference service %s with error %v", isvcName, err)
265266
}
266267

@@ -288,7 +289,7 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
288289

289290
// Delete previous inference service
290291
if modelService.CurrentIsvcName != "" {
291-
if err := c.deleteInferenceService(modelService.CurrentIsvcName, modelService.Namespace); err != nil {
292+
if err := c.deleteInferenceService(ctx, modelService.CurrentIsvcName, modelService.Namespace); err != nil {
292293
log.Errorf("unable to delete prevision revision %s with error %v", modelService.CurrentIsvcName, err)
293294
return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToDeletePreviousInferenceService, modelService.CurrentIsvcName))
294295
}
@@ -305,15 +306,16 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
305306
}
306307

307308
func (c *controller) Delete(ctx context.Context, modelService *models.Service) (*models.Service, error) {
308-
infSvc, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(modelService.Name, metav1.GetOptions{})
309+
infSvc, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(ctx, modelService.Name,
310+
metav1.GetOptions{})
309311
if err != nil {
310312
if !kerrors.IsNotFound(err) {
311313
return nil, errors.Wrapf(err, "unable to check status of inference service: %s", infSvc.Name)
312314
}
313315
return modelService, nil
314316
}
315317

316-
if err := c.deleteInferenceService(modelService.Name, modelService.Namespace); err != nil {
318+
if err := c.deleteInferenceService(ctx, modelService.Name, modelService.Namespace); err != nil {
317319
return nil, err
318320
}
319321

@@ -336,9 +338,10 @@ func (c *controller) Delete(ctx context.Context, modelService *models.Service) (
336338
return modelService, nil
337339
}
338340

339-
func (c *controller) deleteInferenceService(serviceName string, namespace string) error {
341+
func (c *controller) deleteInferenceService(ctx context.Context, serviceName string, namespace string) error {
340342
gracePeriod := int64(deletionGracePeriodSecond)
341-
err := c.kserveClient.InferenceServices(namespace).Delete(serviceName, &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
343+
err := c.kserveClient.InferenceServices(namespace).Delete(ctx, serviceName,
344+
metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
342345
if client.IgnoreNotFound(err) != nil {
343346
return errors.Wrapf(err, "unable to delete inference service: %s %v", serviceName, err)
344347
}
@@ -373,7 +376,7 @@ func (c *controller) waitInferenceServiceReady(service *kservev1beta1.InferenceS
373376
}
374377
}()
375378

376-
isvcWatcher, err := c.kserveClient.InferenceServices(service.Namespace).Watch(metav1.ListOptions{
379+
isvcWatcher, err := c.kserveClient.InferenceServices(service.Namespace).Watch(ctx, metav1.ListOptions{
377380
FieldSelector: fmt.Sprintf("metadata.name=%s", service.Name),
378381
})
379382
if err != nil {

api/cluster/resource/templater.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ func (t *InferenceServiceTemplater) enrichStandardTransformerEnvVars(modelServic
556556

557557
func createHTTPGetLivenessProbe(httpPath string, port int) *corev1.Probe {
558558
return &corev1.Probe{
559-
Handler: corev1.Handler{
559+
ProbeHandler: corev1.ProbeHandler{
560560
HTTPGet: &corev1.HTTPGetAction{
561561
Path: httpPath,
562562
Scheme: "HTTP",
@@ -575,7 +575,7 @@ func createHTTPGetLivenessProbe(httpPath string, port int) *corev1.Probe {
575575

576576
func createGRPCLivenessProbe(port int) *corev1.Probe {
577577
return &corev1.Probe{
578-
Handler: corev1.Handler{
578+
ProbeHandler: corev1.ProbeHandler{
579579
Exec: &corev1.ExecAction{
580580
Command: []string{grpcHealthProbeCommand, fmt.Sprintf("-addr=:%d", port)},
581581
},

api/cmd/api/setup.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ import (
1515
"google.golang.org/grpc"
1616
"google.golang.org/grpc/credentials/insecure"
1717
"gorm.io/gorm"
18-
"k8s.io/apimachinery/pkg/util/clock"
1918
"k8s.io/client-go/kubernetes"
2019
"k8s.io/client-go/rest"
20+
"k8s.io/utils/clock"
2121

2222
"github.com/caraml-dev/merlin/api"
2323
"github.com/caraml-dev/merlin/batch"
@@ -403,8 +403,11 @@ func initBatchControllers(cfg *config.Config, db *gorm.DB, mlpAPIClient mlp.APIC
403403

404404
batchJobTemplator := batch.NewBatchJobTemplater(cfg.BatchConfig)
405405

406-
ctl := batch.NewController(predictionJobStorage, mlpAPIClient, sparkClient, kubeClient, manifestManager,
406+
ctl, err := batch.NewController(predictionJobStorage, mlpAPIClient, sparkClient, kubeClient, manifestManager,
407407
envMetadata, batchJobTemplator)
408+
if err != nil {
409+
log.Panicf("unable to create batch controller: %v", err)
410+
}
408411
stopCh := make(chan struct{})
409412
go ctl.Run(stopCh)
410413

api/cmd/inference-logger/main.go

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,22 @@ import (
1313
"strings"
1414
"time"
1515

16+
"github.com/caraml-dev/merlin/pkg/inference-logger/liveness"
17+
merlinlogger "github.com/caraml-dev/merlin/pkg/inference-logger/logger"
1618
"github.com/confluentinc/confluent-kafka-go/v2/kafka"
1719
"github.com/kelseyhightower/envconfig"
1820
nrconfig "github.com/newrelic/newrelic-client-go/v2/pkg/config"
1921
nrlog "github.com/newrelic/newrelic-client-go/v2/pkg/logs"
2022
"github.com/pkg/errors"
2123
"go.uber.org/zap"
22-
network "knative.dev/networking/pkg"
24+
"knative.dev/networking/pkg/http/header"
25+
"knative.dev/networking/pkg/http/proxy"
2326
pkgnet "knative.dev/pkg/network"
2427
pkghandler "knative.dev/pkg/network/handlers"
2528
"knative.dev/pkg/signals"
2629
"knative.dev/serving/pkg/queue"
2730
"knative.dev/serving/pkg/queue/health"
2831
"knative.dev/serving/pkg/queue/readiness"
29-
30-
"github.com/caraml-dev/merlin/pkg/inference-logger/liveness"
31-
merlinlogger "github.com/caraml-dev/merlin/pkg/inference-logger/logger"
3232
)
3333

3434
var (
@@ -216,10 +216,9 @@ func buildServer(target *url.URL, dispatcher *merlinlogger.Dispatcher, loggingMo
216216

217217
httpProxy := httputil.NewSingleHostReverseProxy(target)
218218
httpProxy.Transport = pkgnet.NewAutoTransport(maxIdleConns /* max-idle */, maxIdleConns /* max-idle-per-host */)
219-
// nolint:staticcheck
220-
httpProxy.ErrorHandler = pkgnet.ErrorHandler(log)
221-
httpProxy.BufferPool = network.NewBufferPool()
222-
httpProxy.FlushInterval = network.FlushInterval
219+
httpProxy.ErrorHandler = pkghandler.Error(log)
220+
httpProxy.BufferPool = proxy.NewBufferPool()
221+
httpProxy.FlushInterval = proxy.FlushInterval
223222

224223
var composedHandler http.Handler = httpProxy
225224
composedHandler = merlinlogger.NewLoggerHandler(dispatcher, loggingMode, composedHandler, log)
@@ -230,7 +229,7 @@ func buildServer(target *url.URL, dispatcher *merlinlogger.Dispatcher, loggingMo
230229
drainer := &pkghandler.Drainer{
231230
QuietPeriod: drainSleepDuration,
232231
// Add Activator probe header to the drainer so it can handle probes directly from activator
233-
HealthCheckUAPrefixes: []string{network.ActivatorUserAgent},
232+
HealthCheckUAPrefixes: []string{header.ActivatorUserAgent},
234233
Inner: composedHandler,
235234
HealthCheck: health.ProbeHandler(probe, false),
236235
}

api/cmd/transformer/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"github.com/kelseyhightower/envconfig"
1111
"github.com/pkg/errors"
1212
"github.com/prometheus/client_golang/prometheus"
13-
"github.com/prometheus/common/version"
13+
"github.com/prometheus/client_golang/prometheus/collectors/version"
1414
"go.opentelemetry.io/contrib/propagators/b3"
1515
"go.opentelemetry.io/otel"
1616
"go.opentelemetry.io/otel/exporters/jaeger"

0 commit comments

Comments
 (0)