Skip to content

Commit a89cd98

Browse files
authored
koord-descheduler: implement PodMigrationJob controller (#404)
FYI: docs/proposals/scheduling/20220701-pod-migration-job.md Signed-off-by: Joseph <joseph.t.lee@outlook.com>
1 parent 9e8fc01 commit a89cd98

31 files changed

+3092
-43
lines changed

cmd/koord-descheduler/app/server.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ import (
4747
"k8s.io/component-base/version"
4848
"k8s.io/component-base/version/verflag"
4949
"k8s.io/klog/v2"
50+
"k8s.io/klog/v2/klogr"
51+
ctrl "sigs.k8s.io/controller-runtime"
5052
"sigs.k8s.io/controller-runtime/pkg/client"
5153

5254
deschedulerappconfig "github.com/koordinator-sh/koordinator/cmd/koord-descheduler/app/config"
@@ -267,6 +269,7 @@ func Setup(ctx context.Context, opts *options.Options, outOfTreeRegistryOptions
267269
cc := c.Complete()
268270

269271
deschedulercontrollersoptions.Manager = cc.Manager
272+
ctrl.SetLogger(klogr.New())
270273

271274
if err = fieldindex.RegisterFieldIndexes(cc.Manager.GetCache()); err != nil {
272275
return nil, nil, fmt.Errorf("failed to register field index, err: %w", err)

config/rbac/role.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,38 @@ rules:
5353
- get
5454
- list
5555
- watch
56+
- apiGroups:
57+
- scheduling.koordinator.sh
58+
resources:
59+
- podmigrationjobs
60+
verbs:
61+
- create
62+
- delete
63+
- get
64+
- list
65+
- patch
66+
- update
67+
- watch
68+
- apiGroups:
69+
- scheduling.koordinator.sh
70+
resources:
71+
- podmigrationjobs/status
72+
verbs:
73+
- get
74+
- patch
75+
- update
76+
- apiGroups:
77+
- scheduling.koordinator.sh
78+
resources:
79+
- reservations
80+
verbs:
81+
- create
82+
- delete
83+
- get
84+
- list
85+
- patch
86+
- update
87+
- watch
5688
- apiGroups:
5789
- slo.koordinator.sh
5890
resources:

docs/proposals/scheduling/20220701-pod-migration-job.md

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -367,41 +367,59 @@ See the Configuration section for more details
367367

368368
#### Controller Configuration
369369

370-
User can configure the `PodMigrationJobControllerConfiguration` through Koordinator Descheduler ConfigMap.
370+
User can configure the `MigrationControllerArgs` through Koordinator Descheduler ConfigMap.
371371

372372
```go
373-
type PodMigrationJobControllerConfiguration struct {
374-
// Paused indicates whether the PodMigrationJob Controller should to work or not.
375-
Paused bool `json:"paused,omitempty"`
376-
// DryRun means only execute the entire migration logic except create Reservation or Delete Pod
377-
// Default is false
378-
DryRun bool `json:"dryRun,omitempty"`
379-
380-
// FlowControlQPS controls the number of arbitrations per second
381-
FlowControlQPS string `json:"flowControlQPS,omitempty"`
382-
// FlowControlBurst is the maximum number of tokens
383-
FlowControlBurst int32 `json:"flowControlBurst,omitempty"`
384-
385-
// MaxMigratingPerNode represents he maximum number of pods that can be migrating during migrate per node.
386-
MaxMigratingPerNode *int32 `json:"maxMigratingPerNode,omitempty"`
387-
388-
// MaxMigratingPerNamespace represents he maximum number of pods that can be migrating during migrate per namespace.
389-
MaxMigratingPerNamespace *int32 `json:"maxMigratingPerNamespace,omitempty"`
390-
391-
// MaxMigratingPerWorkload represents he maximum number of pods that can be migrating during migrate per workload.
392-
// Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%).
393-
MaxMigratingPerWorkload *intstr.IntOrString `json:"maxMigratingPerWorkload,omitempty"`
394-
395-
// MaxUnavailablePerWorkload represents he maximum number of pods that can be unavailable during migrate per workload.
396-
// The unavailable state includes NotRunning/NotReady/Migrating/Evicting
397-
// Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%).
398-
MaxUnavailablePerWorkload *intstr.IntOrString `json:"maxUnavailablePerWorkload,omitempty"`
399-
400-
// EvictionPolicy represents how to delete Pod, support "Delete" and "Eviction", default value is "Eviction"
401-
EvictionPolicy string `evictionPolicy,omitempty`
402-
// DefaultDeleteOptions defines options when deleting migrated pods and preempted pods through the method specified by EvictionPolicy
403-
DefaultDeleteOptions *metav1.DeleteOptions `json:"defaultDeleteOptions,omitempty"`
373+
// MigrationControllerArgs holds arguments used to configure the MigrationController
374+
type MigrationControllerArgs struct {
375+
metav1.TypeMeta
376+
377+
// DryRun means only execute the entire migration logic except create Reservation or Delete Pod
378+
// Default is false
379+
DryRun bool `json:"dryRun,omitempty"`
380+
381+
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
382+
EvictFailedBarePods bool `json:"evictFailedBarePods"`
383+
384+
// EvictLocalStoragePods allows pods using local storage to be evicted.
385+
EvictLocalStoragePods bool `json:"evictLocalStoragePods"`
386+
387+
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
388+
EvictSystemCriticalPods bool `json:"evictSystemCriticalPods"`
389+
390+
// IgnorePVCPods prevents pods with PVCs from being evicted.
391+
IgnorePvcPods bool `json:"ignorePvcPods"`
392+
393+
// LabelSelector sets whether to apply label filtering when evicting.
394+
// Any pod matching the label selector is considered evictable.
395+
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"`
396+
397+
// FlowControlQPS controls the number of arbitrations per second
398+
FlowControlQPS string `json:"flowControlQPS,omitempty"`
399+
// FlowControlBurst is the maximum number of tokens
400+
FlowControlBurst int32 `json:"flowControlBurst,omitempty"`
401+
402+
// MaxMigratingPerNode represents he maximum number of pods that can be migrating during migrate per node.
403+
MaxMigratingPerNode *int32 `json:"maxMigratingPerNode,omitempty"`
404+
405+
// MaxMigratingPerNamespace represents he maximum number of pods that can be migrating during migrate per namespace.
406+
MaxMigratingPerNamespace *int32 `json:"maxMigratingPerNamespace,omitempty"`
407+
408+
// MaxMigratingPerWorkload represents he maximum number of pods that can be migrating during migrate per workload.
409+
// Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%).
410+
MaxMigratingPerWorkload *intstr.IntOrString `json:"maxMigratingPerWorkload,omitempty"`
411+
412+
// MaxUnavailablePerWorkload represents he maximum number of pods that can be unavailable during migrate per workload.
413+
// The unavailable state includes NotRunning/NotReady/Migrating/Evicting
414+
// Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%).
415+
MaxUnavailablePerWorkload *intstr.IntOrString `json:"maxUnavailablePerWorkload,omitempty"`
416+
417+
// EvictionPolicy represents how to delete Pod, support "Delete" and "Eviction", default value is "Eviction"
418+
EvictionPolicy string `json:"evictionPolicy,omitempty"`
419+
// DefaultDeleteOptions defines options when deleting migrated pods and preempted pods through the method specified by EvictionPolicy
420+
DefaultDeleteOptions *metav1.DeleteOptions `json:"defaultDeleteOptions,omitempty"`
404421
}
422+
405423
```
406424

407425
## Alternatives
@@ -412,3 +430,4 @@ type PodMigrationJobControllerConfiguration struct {
412430
- 2022-07-11: Refactor proposal for review
413431
- 2022-07-13: Update proposal based on review comments
414432
- 2022-07-22: Update Spec
433+
- 2022-08-02: Update MigrationJob configuration

pkg/descheduler/apis/config/register.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ func addKnownTypes(scheme *runtime.Scheme) error {
4848
&DeschedulerConfiguration{},
4949
&DefaultEvictorArgs{},
5050
&RemovePodsViolatingNodeAffinityArgs{},
51+
&MigrationControllerArgs{},
5152
)
5253
return nil
5354
}

pkg/descheduler/apis/config/types_pluginargs.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package config
1818

1919
import (
2020
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
"k8s.io/apimachinery/pkg/util/intstr"
2122
)
2223

2324
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -79,3 +80,55 @@ type Namespaces struct {
7980
Include []string
8081
Exclude []string
8182
}
83+
84+
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
85+
86+
// MigrationControllerArgs holds arguments used to configure the MigrationController
87+
type MigrationControllerArgs struct {
88+
metav1.TypeMeta
89+
90+
// DryRun means only execute the entire migration logic except create Reservation or Delete Pod
91+
// Default is false
92+
DryRun bool
93+
94+
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
95+
EvictFailedBarePods bool
96+
97+
// EvictLocalStoragePods allows pods using local storage to be evicted.
98+
EvictLocalStoragePods bool
99+
100+
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
101+
EvictSystemCriticalPods bool
102+
103+
// IgnorePVCPods prevents pods with PVCs from being evicted.
104+
IgnorePvcPods bool
105+
106+
// LabelSelector sets whether to apply label filtering when evicting.
107+
// Any pod matching the label selector is considered evictable.
108+
LabelSelector *metav1.LabelSelector
109+
110+
// FlowControlQPS controls the number of arbitrations per second
111+
FlowControlQPS string
112+
// FlowControlBurst is the maximum number of tokens
113+
FlowControlBurst int32
114+
115+
// MaxMigratingPerNode represents he maximum number of pods that can be migrating during migrate per node.
116+
MaxMigratingPerNode *int32
117+
118+
// MaxMigratingPerNamespace represents he maximum number of pods that can be migrating during migrate per namespace.
119+
MaxMigratingPerNamespace *int32
120+
121+
// MaxMigratingPerWorkload represents he maximum number of pods that can be migrating during migrate per workload.
122+
// Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%).
123+
MaxMigratingPerWorkload *intstr.IntOrString
124+
125+
// MaxUnavailablePerWorkload represents he maximum number of pods that can be unavailable during migrate per workload.
126+
// The unavailable state includes NotRunning/NotReady/Migrating/Evicting
127+
// Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%).
128+
MaxUnavailablePerWorkload *intstr.IntOrString
129+
130+
// EvictionPolicy represents how to delete Pod, support "Delete" and "Eviction", default value is "Eviction"
131+
EvictionPolicy string
132+
// DefaultDeleteOptions defines options when deleting migrated pods and preempted pods through the method specified by EvictionPolicy
133+
DefaultDeleteOptions *metav1.DeleteOptions
134+
}

pkg/descheduler/apis/config/v1alpha2/default_plugins.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,21 @@ import (
2020
"k8s.io/apimachinery/pkg/util/sets"
2121
"k8s.io/klog/v2"
2222

23-
"github.com/koordinator-sh/koordinator/pkg/descheduler/framework/plugins/defaultevictor"
24-
"github.com/koordinator-sh/koordinator/pkg/descheduler/framework/plugins/removepodsviolatingnodeaffinity"
23+
"github.com/koordinator-sh/koordinator/pkg/descheduler/controllers/names"
2524
)
2625

2726
// getDefaultPlugins returns the default set of plugins.
2827
func getDefaultPlugins() *Plugins {
2928
plugins := &Plugins{
3029
Deschedule: PluginSet{
3130
Enabled: []Plugin{
32-
{Name: removepodsviolatingnodeaffinity.PluginName},
31+
// NOTE: add default deschedule plugins here.
32+
{},
3333
},
3434
},
3535
Evictor: PluginSet{
3636
Enabled: []Plugin{
37-
{Name: defaultevictor.PluginName},
37+
{Name: names.MigrationController},
3838
},
3939
},
4040
}

pkg/descheduler/apis/config/v1alpha2/defaults.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"k8s.io/utils/pointer"
2727

2828
"github.com/koordinator-sh/koordinator/pkg/descheduler/apis/config"
29+
migrationevictor "github.com/koordinator-sh/koordinator/pkg/descheduler/controllers/migration/evictor"
2930
)
3031

3132
func addDefaultingFuncs(scheme *runtime.Scheme) error {
@@ -189,3 +190,9 @@ func SetDefaults_RemovePodsViolatingNodeAffinityArgs(obj *RemovePodsViolatingNod
189190
obj.NodeAffinityType = append(obj.NodeAffinityType, "requiredDuringSchedulingIgnoredDuringExecution")
190191
}
191192
}
193+
194+
func SetDefaults_MigrationControllerArgs(obj *MigrationControllerArgs) {
195+
if obj.EvictionPolicy == "" {
196+
obj.EvictionPolicy = migrationevictor.NativeEvictorName
197+
}
198+
}

pkg/descheduler/apis/config/v1alpha2/register.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ func addKnownTypes(scheme *runtime.Scheme) error {
5656
&DeschedulerConfiguration{},
5757
&DefaultEvictorArgs{},
5858
&RemovePodsViolatingNodeAffinityArgs{},
59+
&MigrationControllerArgs{},
5960
)
6061

6162
return nil

pkg/descheduler/apis/config/v1alpha2/types_pluginargs.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package v1alpha2
1818

1919
import (
2020
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
"k8s.io/apimachinery/pkg/util/intstr"
2122
)
2223

2324
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -79,3 +80,55 @@ type Namespaces struct {
7980
Include []string `json:"include,omitempty"`
8081
Exclude []string `json:"exclude,omitempty"`
8182
}
83+
84+
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
85+
86+
// MigrationControllerArgs holds arguments used to configure the MigrationController
87+
type MigrationControllerArgs struct {
88+
metav1.TypeMeta
89+
90+
// DryRun means only execute the entire migration logic except create Reservation or Delete Pod
91+
// Default is false
92+
DryRun bool `json:"dryRun,omitempty"`
93+
94+
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
95+
EvictFailedBarePods bool `json:"evictFailedBarePods"`
96+
97+
// EvictLocalStoragePods allows pods using local storage to be evicted.
98+
EvictLocalStoragePods bool `json:"evictLocalStoragePods"`
99+
100+
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
101+
EvictSystemCriticalPods bool `json:"evictSystemCriticalPods"`
102+
103+
// IgnorePVCPods prevents pods with PVCs from being evicted.
104+
IgnorePvcPods bool `json:"ignorePvcPods"`
105+
106+
// LabelSelector sets whether to apply label filtering when evicting.
107+
// Any pod matching the label selector is considered evictable.
108+
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"`
109+
110+
// FlowControlQPS controls the number of arbitrations per second
111+
FlowControlQPS string `json:"flowControlQPS,omitempty"`
112+
// FlowControlBurst is the maximum number of tokens
113+
FlowControlBurst int32 `json:"flowControlBurst,omitempty"`
114+
115+
// MaxMigratingPerNode represents he maximum number of pods that can be migrating during migrate per node.
116+
MaxMigratingPerNode *int32 `json:"maxMigratingPerNode,omitempty"`
117+
118+
// MaxMigratingPerNamespace represents he maximum number of pods that can be migrating during migrate per namespace.
119+
MaxMigratingPerNamespace *int32 `json:"maxMigratingPerNamespace,omitempty"`
120+
121+
// MaxMigratingPerWorkload represents he maximum number of pods that can be migrating during migrate per workload.
122+
// Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%).
123+
MaxMigratingPerWorkload *intstr.IntOrString `json:"maxMigratingPerWorkload,omitempty"`
124+
125+
// MaxUnavailablePerWorkload represents he maximum number of pods that can be unavailable during migrate per workload.
126+
// The unavailable state includes NotRunning/NotReady/Migrating/Evicting
127+
// Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%).
128+
MaxUnavailablePerWorkload *intstr.IntOrString `json:"maxUnavailablePerWorkload,omitempty"`
129+
130+
// EvictionPolicy represents how to delete Pod, support "Delete" and "Eviction", default value is "Eviction"
131+
EvictionPolicy string `json:"evictionPolicy,omitempty"`
132+
// DefaultDeleteOptions defines options when deleting migrated pods and preempted pods through the method specified by EvictionPolicy
133+
DefaultDeleteOptions *metav1.DeleteOptions `json:"defaultDeleteOptions,omitempty"`
134+
}

0 commit comments

Comments
 (0)