Skip to content

Commit 5041797

Browse files
committed
refactor(kubeClientSandbox): keep a cache of evicted pods and allow to reset it at the end of each descheduling cycle
1 parent 3292cb0 commit 5041797

2 files changed

Lines changed: 482 additions & 19 deletions

File tree

pkg/descheduler/descheduler.go

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"math"
2323
"net/http"
2424
"strconv"
25+
"sync"
2526
"time"
2627

2728
promapi "github.com/prometheus/client_golang/api"
@@ -84,6 +85,50 @@ type profileRunner struct {
8485
descheduleEPs, balanceEPs eprunner
8586
}
8687

88+
type evictedPodInfo struct {
89+
Namespace string
90+
Name string
91+
UID string
92+
}
93+
94+
type evictedPodsCache struct {
95+
sync.RWMutex
96+
pods map[string]*evictedPodInfo
97+
}
98+
99+
func newEvictedPodsCache() *evictedPodsCache {
100+
return &evictedPodsCache{
101+
pods: make(map[string]*evictedPodInfo),
102+
}
103+
}
104+
105+
func (c *evictedPodsCache) add(pod *v1.Pod) {
106+
c.Lock()
107+
defer c.Unlock()
108+
c.pods[string(pod.UID)] = &evictedPodInfo{
109+
Namespace: pod.Namespace,
110+
Name: pod.Name,
111+
UID: string(pod.UID),
112+
}
113+
}
114+
115+
func (c *evictedPodsCache) list() []*evictedPodInfo {
116+
c.RLock()
117+
defer c.RUnlock()
118+
pods := make([]*evictedPodInfo, 0, len(c.pods))
119+
for _, pod := range c.pods {
120+
podCopy := *pod
121+
pods = append(pods, &podCopy)
122+
}
123+
return pods
124+
}
125+
126+
func (c *evictedPodsCache) clear() {
127+
c.Lock()
128+
defer c.Unlock()
129+
c.pods = make(map[string]*evictedPodInfo)
130+
}
131+
87132
type descheduler struct {
88133
rs *options.DeschedulerServer
89134
kubeClientSandbox *kubeClientSandbox
@@ -109,14 +154,16 @@ type kubeClientSandbox struct {
109154
fakeCli *fakeclientset.Clientset
110155
fakeFactory informers.SharedInformerFactory
111156
resourceToInformer map[schema.GroupVersionResource]informers.GenericInformer
112-
podEvictionReactionFnc func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error)
157+
evictedPodsCache *evictedPodsCache
158+
podEvictionReactionFnc func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error)
113159
}
114160

115161
func newKubeClientSandbox(client clientset.Interface, sharedInformerFactory informers.SharedInformerFactory, resources ...schema.GroupVersionResource) (*kubeClientSandbox, error) {
116162
sandbox := &kubeClientSandbox{
117163
client: client,
118164
sharedInformerFactory: sharedInformerFactory,
119165
resourceToInformer: make(map[schema.GroupVersionResource]informers.GenericInformer),
166+
evictedPodsCache: newEvictedPodsCache(),
120167
podEvictionReactionFnc: podEvictionReactionFnc,
121168
}
122169

@@ -134,7 +181,7 @@ func newKubeClientSandbox(client clientset.Interface, sharedInformerFactory info
134181
func (sandbox *kubeClientSandbox) buildSandbox() error {
135182
sandbox.fakeCli = fakeclientset.NewSimpleClientset()
136183
// simulate a pod eviction by deleting a pod
137-
sandbox.fakeCli.PrependReactor("create", "pods", sandbox.podEvictionReactionFnc(sandbox.fakeCli))
184+
sandbox.fakeCli.PrependReactor("create", "pods", sandbox.podEvictionReactionFnc(sandbox.fakeCli, sandbox.evictedPodsCache))
138185
sandbox.fakeFactory = informers.NewSharedInformerFactory(sandbox.fakeCli, 0)
139186

140187
for resource, informer := range sandbox.resourceToInformer {
@@ -164,6 +211,10 @@ func (sandbox *kubeClientSandbox) fakeSharedInformerFactory() informers.SharedIn
164211
return sandbox.fakeFactory
165212
}
166213

214+
func (sandbox *kubeClientSandbox) reset() {
215+
sandbox.evictedPodsCache.clear()
216+
}
217+
167218
func nodeSelectorFromPolicy(deschedulerPolicy *api.DeschedulerPolicy) (labels.Selector, error) {
168219
nodeSelector := labels.Everything()
169220
if deschedulerPolicy.NodeSelector != nil {
@@ -454,6 +505,10 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context) error {
454505

455506
d.runProfiles(ctx, client)
456507

508+
if d.rs.DryRun {
509+
d.kubeClientSandbox.reset()
510+
}
511+
457512
klog.V(1).InfoS("Number of evictions/requests", "totalEvicted", d.podEvictor.TotalEvicted(), "evictionRequests", d.podEvictor.TotalEvictionRequests())
458513

459514
return nil
@@ -628,7 +683,7 @@ func validateVersionCompatibility(discovery discovery.DiscoveryInterface, desche
628683
return nil
629684
}
630685

631-
func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
686+
func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset, evictedCache *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
632687
return func(action core.Action) (bool, runtime.Object, error) {
633688
if action.GetSubresource() == "eviction" {
634689
createAct, matched := action.(core.CreateActionImpl)
@@ -639,6 +694,16 @@ func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action cor
639694
if !matched {
640695
return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction")
641696
}
697+
podObj, err := fakeClient.Tracker().Get(action.GetResource(), eviction.GetNamespace(), eviction.GetName())
698+
if err == nil {
699+
if pod, ok := podObj.(*v1.Pod); ok {
700+
evictedCache.add(pod)
701+
} else {
702+
return false, nil, fmt.Errorf("unable to convert object to *v1.Pod for %v/%v", eviction.GetNamespace(), eviction.GetName())
703+
}
704+
} else if !apierrors.IsNotFound(err) {
705+
return false, nil, fmt.Errorf("unable to get pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
706+
}
642707
if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil {
643708
return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
644709
}

0 commit comments

Comments
 (0)