Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ coverage.html
*.test
*.cpuprofile
*.heapprofile
*.swp
go.work
go.work.sum

Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,5 @@ require (
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)

replace sigs.k8s.io/karpenter v0.33.1-0.20240202175636-0e77b7842c28 => github.com/smartnews/karpenter v0.34.0-sn-1
9 changes: 7 additions & 2 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,15 @@ spec:
memory leak protection, and disruption testing.
pattern: ^(([0-9]+(s|m|h))+)|(Never)$
type: string
utilizationThreshold:
description: |-
UtilizationThreshold is defined as sum of requested resources divided by capacity
below which a node can be considered for disruption.
maximum: 100
minimum: 1
type: integer
type: object
x-kubernetes-validations:
- message: consolidateAfter cannot be combined with consolidationPolicy=WhenUnderutilized
rule: 'has(self.consolidateAfter) ? self.consolidationPolicy != ''WhenUnderutilized'' || self.consolidateAfter == ''Never'' : true'
- message: consolidateAfter must be specified with consolidationPolicy=WhenEmpty
rule: 'self.consolidationPolicy == ''WhenEmpty'' ? has(self.consolidateAfter) : true'
limits:
Expand Down
2 changes: 1 addition & 1 deletion pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const (
// resources. Cache hits enable faster provisioning and reduced API load on
// AWS APIs, which can have a serious impact on performance and scalability.
// DO NOT CHANGE THIS VALUE WITHOUT DUE CONSIDERATION
DefaultTTL = time.Minute
DefaultTTL = 5 * time.Minute
// UnavailableOfferingsTTL is the time before offerings that were marked as unavailable
// are removed from the cache and are available for launch again
UnavailableOfferingsTTL = 3 * time.Minute
Expand Down
19 changes: 19 additions & 0 deletions pkg/controllers/interruption/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/samber/lo"
"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
"k8s.io/utils/clock"
"knative.dev/pkg/logging"
Expand Down Expand Up @@ -197,13 +198,31 @@ func (c *Controller) handleNodeClaim(ctx context.Context, msg messages.Message,
if zone != "" && instanceType != "" {
c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), instanceType, zone, v1beta1.CapacityTypeSpot)
}
// try to create a new nodeclaim immediately but ignore error if it fails
if err := c.createNodeClaim(ctx, nodeClaim); err != nil {
logging.FromContext(ctx).Errorf("[interruption handling]failed to create a new nodeclaim, %v", err)
}
}
if action != NoAction {
return c.deleteNodeClaim(ctx, nodeClaim, node)
}
return nil
}

// createNodeClaim creates a new NodeClaim with the same spec of the interrupted one
func (c *Controller) createNodeClaim(ctx context.Context, oldNodeClaim *v1beta1.NodeClaim) error {
newNodeClaim := &v1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
GenerateName: oldNodeClaim.ObjectMeta.GenerateName,
Annotations: oldNodeClaim.ObjectMeta.Annotations,
Labels: oldNodeClaim.ObjectMeta.Labels,
OwnerReferences: oldNodeClaim.ObjectMeta.OwnerReferences,
},
Spec: oldNodeClaim.Spec,
}
return c.kubeClient.Create(ctx, newNodeClaim)
}

// deleteNodeClaim removes the NodeClaim from the api-server
func (c *Controller) deleteNodeClaim(ctx context.Context, nodeClaim *v1beta1.NodeClaim, node *v1.Node) error {
if !nodeClaim.DeletionTimestamp.IsZero() {
Expand Down
1 change: 1 addition & 0 deletions pkg/providers/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ func getTags(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, nodeClaim *co
corev1beta1.NodePoolLabelKey: nodeClaim.Labels[corev1beta1.NodePoolLabelKey],
corev1beta1.ManagedByAnnotationKey: options.FromContext(ctx).ClusterName,
v1beta1.LabelNodeClass: nodeClass.Name,
"Componenet": nodeClaim.Labels[corev1beta1.NodePoolLabelKey], // used for aws explore
}
return lo.Assign(nodeClass.Spec.Tags, staticTags)
}
Expand Down