Skip to content

Commit 6c2ae22

Browse files
committed
feat: warmpool selection strategy
updated based on copilot added test to sandbox controller lint fixed fixed e2e tests
1 parent 2feb713 commit 6c2ae22

14 files changed

Lines changed: 491 additions & 8 deletions

api/v1beta1/sandbox_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,10 @@ type SandboxStatus struct {
230230
// A pod may have multiple IPs in dual-stack clusters.
231231
// +optional
232232
PodIPs []string `json:"podIPs,omitempty"`
233+
234+
// nodeName is the name of the node where the underlying pod is scheduled.
235+
// +optional
236+
NodeName string `json:"nodeName,omitempty"`
233237
}
234238

235239
// +genclient

controllers/sandbox_controller.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,11 @@ func (r *SandboxReconciler) reconcileChildResources(ctx context.Context, sandbox
237237
allErrors = errors.Join(allErrors, err)
238238
if pod == nil {
239239
sandbox.Status.PodIPs = nil
240+
sandbox.Status.NodeName = ""
240241
} else {
241242
sandbox.Status.LabelSelector = fmt.Sprintf("%s=%s", sandboxLabel, NameHash(sandbox.Name))
242243
sandbox.Status.PodIPs = podIPsFromStatus(pod.Status.PodIPs)
244+
sandbox.Status.NodeName = pod.Spec.NodeName
243245
}
244246

245247
// Reconcile Service

controllers/sandbox_controller_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,7 @@ func TestReconcile(t *testing.T) {
565565
},
566566
Spec: corev1.PodSpec{
567567
Containers: []corev1.Container{{Name: "test-container"}},
568+
NodeName: "node-1",
568569
},
569570
Status: corev1.PodStatus{
570571
PodIPs: []corev1.PodIP{{IP: "10.244.0.5"}, {IP: "fd00::5"}},
@@ -588,6 +589,7 @@ func TestReconcile(t *testing.T) {
588589
ServiceFQDN: "sandbox-name.sandbox-ns.svc.cluster.local",
589590
LabelSelector: "agents.x-k8s.io/sandbox-name-hash=ab179450",
590591
PodIPs: []string{"10.244.0.5", "fd00::5"},
592+
NodeName: "node-1",
591593
Conditions: []metav1.Condition{
592594
{
593595
Type: "Ready",
@@ -633,6 +635,7 @@ func TestReconcile(t *testing.T) {
633635
},
634636
Spec: corev1.PodSpec{
635637
Containers: []corev1.Container{{Name: "test-container"}},
638+
NodeName: "node-2",
636639
},
637640
Status: corev1.PodStatus{
638641
PodIPs: []corev1.PodIP{{IP: "10.244.0.5"}},
@@ -653,6 +656,7 @@ func TestReconcile(t *testing.T) {
653656
wantStatus: sandboxv1beta1.SandboxStatus{
654657
LabelSelector: "agents.x-k8s.io/sandbox-name-hash=ab179450",
655658
PodIPs: []string{"10.244.0.5"},
659+
NodeName: "node-2",
656660
Conditions: []metav1.Condition{
657661
{
658662
Type: "Ready",

docs/api.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ _Appears in:_
185185
| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#condition-v1-meta) array_ | conditions defines the status conditions array | | Optional: \{\} <br /> |
186186
| `selector` _string_ | selector is the label selector for pods. | | Optional: \{\} <br /> |
187187
| `podIPs` _string array_ | podIPs are the IP addresses of the underlying pod.<br />A pod may have multiple IPs in dual-stack clusters. | | Optional: \{\} <br /> |
188+
| `nodeName` _string_ | nodeName is the name of the node where the underlying pod is scheduled. | | Optional: \{\} <br /> |
188189

189190

190191
#### ShutdownPolicy
@@ -220,6 +221,23 @@ Package v1beta1 contains API Schema definitions for the agents v1beta1 API group
220221

221222

222223

224+
#### AdoptionStrategyType
225+
226+
_Underlying type:_ _string_
227+
228+
AdoptionStrategyType defines the strategy used to pick a Sandbox from a Warm Pool.
229+
230+
231+
232+
_Appears in:_
233+
- [SandboxTemplateSpec](#sandboxtemplatespec)
234+
235+
| Field | Description |
236+
| --- | --- |
237+
| `OldestReady` | AdoptionStrategyOldestReady selects the oldest ready Sandbox first.<br />If no ready sandboxes are available, it falls back to oldest matching sandboxes.<br /> |
238+
| `NodeSpread` | AdoptionStrategyNodeSpread spreads adopted Sandboxes across different nodes to prevent resource contention.<br />It prioritizes ready sandboxes first, then selects from the node with the fewest active sandboxes,<br />and uses creation timestamp as a tie-breaker among candidates with the same readiness and occupancy.<br /> |
239+
240+
223241
#### EnvVar
224242

225243

@@ -438,6 +456,7 @@ _Appears in:_
438456
| `networkPolicyManagement` _[NetworkPolicyManagement](#networkpolicymanagement)_ | networkPolicyManagement defines whether the controller manages the NetworkPolicy.<br />Valid values are "Managed" (default) or "Unmanaged". | Managed | Enum: [Managed Unmanaged] <br />Optional: \{\} <br /> |
439457
| `envVarsInjectionPolicy` _[EnvVarsInjectionPolicy](#envvarsinjectionpolicy)_ | envVarsInjectionPolicy allows a SandboxClaim to inject or override environment variables defined in the template.<br />If set to Disallowed, the SandboxClaim will be rejected if it specifies any environment variables. | Disallowed | Enum: [Allowed Overrides Disallowed] <br />Optional: \{\} <br /> |
440458
| `service` _boolean_ | service controls whether the controller should automatically create a<br />headless Service for Sandboxes created from this template.<br />When unset, the controller preserves existing Services for backward<br />compatibility but does not create new ones. Set to true to enable or false<br />to explicitly disable and remove the Service. | | Optional: \{\} <br /> |
459+
| `adoptionStrategy` _[AdoptionStrategyType](#adoptionstrategytype)_ | adoptionStrategy defines the strategy used to pick a Sandbox from a Warm Pool during adoption.<br />"OldestReady" selects the oldest ready Sandbox first.<br />"NodeSpread" selects a Sandbox to maximize spread across nodes (preferring ready sandboxes first, then fewest active workloads, then creation-time tie breakers). | OldestReady | Enum: [OldestReady NodeSpread] <br />Optional: \{\} <br /> |
441460

442461

443462
#### SandboxWarmPool

extensions/api/v1beta1/sandboxtemplate_types.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,30 @@ type SandboxTemplateSpec struct {
136136
//nolint:nobools // Enum not used to avoid duplicating the Service API; field is not expected to extend (issue #746).
137137
// +optional
138138
Service *bool `json:"service,omitempty"`
139+
140+
// adoptionStrategy defines the strategy used to pick a Sandbox from a Warm Pool during adoption.
141+
// "OldestReady" selects the oldest ready Sandbox first.
142+
// "NodeSpread" selects a Sandbox to maximize spread across nodes (preferring ready sandboxes first, then fewest active workloads, then creation-time tie breakers).
143+
// +kubebuilder:validation:Enum=OldestReady;NodeSpread
144+
// +kubebuilder:default=OldestReady
145+
// +optional
146+
AdoptionStrategy AdoptionStrategyType `json:"adoptionStrategy,omitempty"`
139147
}
140148

149+
// AdoptionStrategyType defines the strategy used to pick a Sandbox from a Warm Pool.
150+
type AdoptionStrategyType string
151+
152+
const (
153+
// AdoptionStrategyOldestReady selects the oldest ready Sandbox first.
154+
// If no ready sandboxes are available, it falls back to oldest matching sandboxes.
155+
AdoptionStrategyOldestReady AdoptionStrategyType = "OldestReady"
156+
157+
// AdoptionStrategyNodeSpread spreads adopted Sandboxes across different nodes to prevent resource contention.
158+
// It prioritizes ready sandboxes first, then selects from the node with the fewest active sandboxes,
159+
// and uses creation timestamp as a tie-breaker among candidates with the same readiness and occupancy.
160+
AdoptionStrategyNodeSpread AdoptionStrategyType = "NodeSpread"
161+
)
162+
141163
// +genclient
142164
// +kubebuilder:object:root=true
143165
// +kubebuilder:resource:scope=Namespaced,shortName=sandboxtemplate

extensions/controllers/queue/simple_sandbox_queue.go

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ type SandboxKey types.NamespacedName
2828
type SandboxQueue interface {
2929
Add(warmPoolName string, item SandboxKey)
3030
Get(warmPoolName string) (SandboxKey, bool)
31+
GetWithStrategy(warmPoolName string, pick func([]SandboxKey) (SandboxKey, bool)) (SandboxKey, bool)
3132
RemoveQueue(warmPoolName string)
3233
RemoveItem(warmPoolName string, item SandboxKey)
3334
}
@@ -58,6 +59,15 @@ func (s *SimpleSandboxQueue) Get(warmPoolName string) (SandboxKey, bool) {
5859
return q.(*synchronizedQueue).Pop()
5960
}
6061

62+
// GetWithStrategy pops an item from the specific warm pool's queue using a custom strategy.
63+
func (s *SimpleSandboxQueue) GetWithStrategy(warmPoolName string, pick func([]SandboxKey) (SandboxKey, bool)) (SandboxKey, bool) {
64+
q, ok := s.queues.Load(warmPoolName)
65+
if !ok {
66+
return SandboxKey{}, false
67+
}
68+
return q.(*synchronizedQueue).PopWithStrategy(pick)
69+
}
70+
6171
// RemoveItem deletes a specific sandbox from a warm pool's queue.
6272
func (s *SimpleSandboxQueue) RemoveItem(warmPoolName string, item SandboxKey) {
6373
if q, ok := s.queues.Load(warmPoolName); ok {
@@ -139,8 +149,51 @@ func (q *synchronizedQueue) Pop() (SandboxKey, bool) {
139149
return item, true
140150
}
141151

152+
// PopWithStrategy applies the strategy function to pick an item from the queue,
153+
// removes it thread-safely, and returns it.
154+
func (q *synchronizedQueue) PopWithStrategy(pick func([]SandboxKey) (SandboxKey, bool)) (SandboxKey, bool) {
155+
q.mu.Lock()
156+
if len(q.items) == 0 {
157+
q.mu.Unlock()
158+
return SandboxKey{}, false
159+
}
160+
161+
// Snapshot the queue items
162+
snapshot := make([]SandboxKey, len(q.items))
163+
copy(snapshot, q.items)
164+
q.mu.Unlock()
165+
166+
key, ok := pick(snapshot)
167+
if !ok {
168+
return SandboxKey{}, false
169+
}
170+
171+
q.mu.Lock()
172+
defer q.mu.Unlock()
173+
174+
// Verify the key is still present in the queue
175+
if _, exists := q.set[key]; !exists {
176+
return SandboxKey{}, false
177+
}
178+
179+
// Find the picked key in q.items and remove it
180+
for i, k := range q.items {
181+
if k == key {
182+
// Shift left and clear the tail slot
183+
last := len(q.items) - 1
184+
copy(q.items[i:], q.items[i+1:])
185+
q.items[last] = SandboxKey{}
186+
q.items = q.items[:last]
187+
break
188+
}
189+
}
190+
delete(q.set, key)
191+
192+
return key, true
193+
}
194+
142195
// RemoveQueue completely deletes a warm pool's queue from the sync.Map
143-
// to prevent memory leaks when WarmPools are deleted.
196+
// to prevent memory leaks when SandboxTemplates or WarmPools are deleted.
144197
func (s *SimpleSandboxQueue) RemoveQueue(warmPoolName string) {
145198
s.queues.Delete(warmPoolName)
146199
}

extensions/controllers/queue/simple_sandbox_queue_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,50 @@ func TestSimpleSandboxQueue_RemoveQueue_MemoryLeakFix(t *testing.T) {
131131
t.Errorf("Expected queue to be completely removed, but it still existed")
132132
}
133133
}
134+
135+
func TestSimpleSandboxQueue_GetWithStrategy(t *testing.T) {
136+
q := NewSimpleSandboxQueue()
137+
hash := "template-hash-1"
138+
139+
key1 := SandboxKey{Namespace: "default", Name: "sb-1"}
140+
key2 := SandboxKey{Namespace: "default", Name: "sb-2"}
141+
key3 := SandboxKey{Namespace: "default", Name: "sb-3"}
142+
143+
q.Add(hash, key1)
144+
q.Add(hash, key2)
145+
q.Add(hash, key3)
146+
147+
// Custom strategy to pick key2 specifically
148+
pickKey2 := func(items []SandboxKey) (SandboxKey, bool) {
149+
for _, item := range items {
150+
if item.Name == "sb-2" {
151+
return item, true
152+
}
153+
}
154+
return SandboxKey{}, false
155+
}
156+
157+
// Pop with strategy
158+
got, ok := q.GetWithStrategy(hash, pickKey2)
159+
if !ok || got != key2 {
160+
t.Errorf("Expected to pick %v, got %v (ok: %v)", key2, got, ok)
161+
}
162+
163+
// First standard pop should be key1 (since key2 was removed)
164+
got1, _ := q.Get(hash)
165+
if got1 != key1 {
166+
t.Errorf("Expected first remaining item to be %v, got %v", key1, got1)
167+
}
168+
169+
// Second standard pop should be key3
170+
got3, _ := q.Get(hash)
171+
if got3 != key3 {
172+
t.Errorf("Expected second remaining item to be %v, got %v", key3, got3)
173+
}
174+
175+
// Queue should now be empty
176+
_, ok3 := q.Get(hash)
177+
if ok3 {
178+
t.Errorf("Expected queue to be empty, but got an item")
179+
}
180+
}

0 commit comments

Comments
 (0)