Skip to content

Commit d1b4856

Browse files
committed
koord-manager: improve batch resource calculation with system metrics
Signed-off-by: saintube <saintube@foxmail.com>
1 parent bb9907d commit d1b4856

File tree

4 files changed

+178
-50
lines changed

4 files changed

+178
-50
lines changed

pkg/slo-controller/noderesource/plugins/batchresource/plugin.go

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ func (p *Plugin) Reset(node *corev1.Node, message string) []framework.ResourceIt
8181

8282
// Calculate calculates Batch resources using the formula below:
8383
// Node.Total - Node.Reserved - System.Used - Pod(High-Priority).Used, System.Used = Node.Used - Pod(All).Used.
84+
// As node and podList are the nearly latest state at time T1, the resourceMetrics are the node metric and pod
85+
// metrics collected and snapshot at time T0 (T0 < T1). There can be gaps between the states of T0 and T1.
86+
// We firstly calculate an infimum of the batch allocatable at time T0.
87+
// `BatchAllocatable0 = NodeAllocatable * ratio - SystemUsed0 - Pod(HP and in Pods1).Used0` - Pod(not in Pods1).Used0.
88+
// Then we minus the sum requests of the pods newly scheduled but have not been reported metrics to give a safe result.
8489
func (p *Plugin) Calculate(strategy *configuration.ColocationStrategy, node *corev1.Node, podList *corev1.PodList,
8590
resourceMetrics *framework.ResourceMetrics) ([]framework.ResourceItem, error) {
8691
if strategy == nil || node == nil || podList == nil || resourceMetrics == nil || resourceMetrics.NodeMetric == nil {
@@ -96,15 +101,18 @@ func (p *Plugin) Calculate(strategy *configuration.ColocationStrategy, node *cor
96101

97102
// compute the requests and usages according to the pods' priority classes.
98103
// HP means High-Priority (i.e. not Batch or Free) pods
99-
// pod(HP).Used = pod(All).Used - pod(Batch/Free).Used
100-
podAllUsed := util.NewZeroResourceList()
101104
podHPRequest := util.NewZeroResourceList()
102105
podHPUsed := util.NewZeroResourceList()
106+
// podAllUsed is the sum usage of all pods reported in NodeMetric.
107+
// podKnownUsed is the sum usage of pods which are both reported in NodeMetric and shown in current pod list.
108+
podAllUsed := util.NewZeroResourceList()
109+
podKnownUsed := util.NewZeroResourceList()
103110

104111
nodeMetric := resourceMetrics.NodeMetric
105112
podMetricMap := make(map[string]*slov1alpha1.PodMetricInfo)
106113
for _, podMetric := range nodeMetric.Status.PodsMetric {
107114
podMetricMap[util.GetPodMetricKey(podMetric)] = podMetric
115+
podAllUsed = quotav1.Add(podAllUsed, getPodMetricUsage(podMetric))
108116
}
109117

110118
for i := range podList.Items {
@@ -113,36 +121,39 @@ func (p *Plugin) Calculate(strategy *configuration.ColocationStrategy, node *cor
113121
continue
114122
}
115123

124+
// check if the pod has metrics
125+
podKey := util.GetPodKey(pod)
126+
podMetric, hasMetric := podMetricMap[podKey]
127+
if hasMetric {
128+
podKnownUsed = quotav1.Add(podKnownUsed, getPodMetricUsage(podMetric))
129+
}
130+
131+
// count the high-priority usage
116132
priorityClass := extension.GetPodPriorityClassWithDefault(pod)
117133
podRequest := util.GetPodRequest(pod, corev1.ResourceCPU, corev1.ResourceMemory)
118134
isPodHighPriority := priorityClass != extension.PriorityBatch && priorityClass != extension.PriorityFree
119-
if isPodHighPriority {
120-
podHPRequest = quotav1.Add(podHPRequest, podRequest)
121-
}
122-
podKey := util.GetPodKey(pod)
123-
podMetric, ok := podMetricMap[podKey]
124-
if !ok {
125-
if isPodHighPriority {
126-
podHPUsed = quotav1.Add(podHPUsed, podRequest)
127-
}
128-
podAllUsed = quotav1.Add(podAllUsed, podRequest)
135+
if !isPodHighPriority {
129136
continue
130137
}
131-
132-
if isPodHighPriority {
138+
podHPRequest = quotav1.Add(podHPRequest, podRequest)
139+
if hasMetric {
133140
podHPUsed = quotav1.Add(podHPUsed, getPodMetricUsage(podMetric))
141+
} else {
142+
podHPUsed = quotav1.Add(podHPUsed, podRequest)
134143
}
135-
podAllUsed = quotav1.Add(podAllUsed, getPodMetricUsage(podMetric))
136144
}
137145

146+
// For the pods reported with metrics but not shown in the current list, count them into the HP used.
147+
podUnknownPriorityUsed := quotav1.Subtract(podAllUsed, podKnownUsed)
148+
podHPUsed = quotav1.Add(podHPUsed, podUnknownPriorityUsed)
149+
klog.V(6).InfoS("batch resource got unknown priority pods used", "node", node.Name,
150+
"cpu", podUnknownPriorityUsed.Cpu().String(), "memory", podUnknownPriorityUsed.Memory().String())
151+
138152
nodeAllocatable := getNodeAllocatable(node)
139153
nodeReservation := getNodeReservation(strategy, node)
140154

141-
// System.Used = Node.Used - Pod(All).Used
142-
nodeUsage := getNodeMetricUsage(nodeMetric.Status.NodeMetric)
143-
systemUsed := quotav1.Max(quotav1.Subtract(nodeUsage, podAllUsed), util.NewZeroResourceList())
144-
145-
// System.Used = max(System.Used, Node.Anno.Reserved)
155+
// System.Used = max(Node.Used - Pod(All).Used, Node.Anno.Reserved)
156+
systemUsed := getResourceListForCPUAndMemory(nodeMetric.Status.NodeMetric.SystemUsage.ResourceList)
146157
nodeAnnoReserved := util.GetNodeReservationFromAnnotation(node.Annotations)
147158
systemUsed = quotav1.Max(systemUsed, nodeAnnoReserved)
148159

@@ -240,27 +251,12 @@ func prepareNodeForResource(node *corev1.Node, nr *framework.NodeResource, name
240251

241252
// getPodMetricUsage gets pod usage from the PodMetricInfo
242253
func getPodMetricUsage(info *slov1alpha1.PodMetricInfo) corev1.ResourceList {
243-
cpuQuant := info.PodUsage.ResourceList[corev1.ResourceCPU]
244-
cpuUsageQuant := resource.NewMilliQuantity(cpuQuant.MilliValue(), cpuQuant.Format)
245-
memQuant := info.PodUsage.ResourceList[corev1.ResourceMemory]
246-
memUsageQuant := resource.NewQuantity(memQuant.Value(), memQuant.Format)
247-
return corev1.ResourceList{corev1.ResourceCPU: *cpuUsageQuant, corev1.ResourceMemory: *memUsageQuant}
248-
}
249-
250-
// getNodeMetricUsage gets node usage from the NodeMetricInfo
251-
func getNodeMetricUsage(info *slov1alpha1.NodeMetricInfo) corev1.ResourceList {
252-
cpuQ := info.NodeUsage.ResourceList[corev1.ResourceCPU]
253-
cpuUsageQ := resource.NewMilliQuantity(cpuQ.MilliValue(), cpuQ.Format)
254-
memQ := info.NodeUsage.ResourceList[corev1.ResourceMemory]
255-
memUsageQ := resource.NewQuantity(memQ.Value(), memQ.Format)
256-
return corev1.ResourceList{corev1.ResourceCPU: *cpuUsageQ, corev1.ResourceMemory: *memUsageQ}
254+
return getResourceListForCPUAndMemory(info.PodUsage.ResourceList)
257255
}
258256

259257
// getNodeAllocatable gets node allocatable and filters out non-CPU and non-Mem resources
260258
func getNodeAllocatable(node *corev1.Node) corev1.ResourceList {
261-
result := node.Status.Allocatable.DeepCopy()
262-
result = quotav1.Mask(result, []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory})
263-
return result
259+
return getResourceListForCPUAndMemory(node.Status.Allocatable)
264260
}
265261

266262
// getNodeReservation gets node-level safe-guarding reservation with the node's allocatable
@@ -275,6 +271,10 @@ func getNodeReservation(strategy *configuration.ColocationStrategy, node *corev1
275271
}
276272
}
277273

274+
func getResourceListForCPUAndMemory(rl corev1.ResourceList) corev1.ResourceList {
275+
return quotav1.Mask(rl, []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory})
276+
}
277+
278278
// getReserveRatio returns resource reserved ratio
279279
func getReserveRatio(reclaimThreshold int64) float64 {
280280
return float64(100-reclaimThreshold) / 100.0

pkg/slo-controller/noderesource/plugins/batchresource/plugin_test.go

Lines changed: 115 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ func getTestResourceMetrics() *framework.ResourceMetrics {
146146
NodeUsage: slov1alpha1.ResourceMap{
147147
ResourceList: makeResourceList("50", "55G"),
148148
},
149+
SystemUsage: slov1alpha1.ResourceMap{
150+
ResourceList: makeResourceList("7", "12G"),
151+
},
149152
},
150153
PodsMetric: []*slov1alpha1.PodMetricInfo{
151154
genPodMetric("test", "podA", "11", "11G"),
@@ -847,6 +850,12 @@ func TestPluginCalculate(t *testing.T) {
847850
NodeUsage: slov1alpha1.ResourceMap{
848851
ResourceList: makeResourceList("50", "55G"),
849852
},
853+
SystemUsage: slov1alpha1.ResourceMap{
854+
ResourceList: corev1.ResourceList{
855+
corev1.ResourceCPU: resource.MustParse("7"),
856+
corev1.ResourceMemory: resource.MustParse("12G"),
857+
},
858+
},
850859
},
851860
PodsMetric: []*slov1alpha1.PodMetricInfo{
852861
genPodMetric("test", "podProd", "5", "5G"),
@@ -872,6 +881,105 @@ func TestPluginCalculate(t *testing.T) {
872881
},
873882
wantErr: false,
874883
},
884+
{
885+
name: "calculate with pods terminated",
886+
args: args{
887+
strategy: &configuration.ColocationStrategy{
888+
Enable: pointer.Bool(true),
889+
CPUReclaimThresholdPercent: pointer.Int64(65),
890+
MemoryReclaimThresholdPercent: pointer.Int64(65),
891+
DegradeTimeMinutes: pointer.Int64(15),
892+
UpdateTimeThresholdSeconds: pointer.Int64(300),
893+
ResourceDiffThreshold: pointer.Float64(0.1),
894+
},
895+
node: &corev1.Node{
896+
ObjectMeta: metav1.ObjectMeta{
897+
Name: "test-node1",
898+
},
899+
Status: makeNodeStat("100", "120G"),
900+
},
901+
podList: &corev1.PodList{
902+
Items: []corev1.Pod{
903+
{
904+
ObjectMeta: metav1.ObjectMeta{
905+
Name: "podProd",
906+
Namespace: "test",
907+
Labels: map[string]string{
908+
extension.LabelPodQoS: string(extension.QoSLS),
909+
},
910+
},
911+
Spec: corev1.PodSpec{
912+
NodeName: "test-node1",
913+
Containers: []corev1.Container{
914+
{
915+
Resources: makeResourceReq("10", "10G"),
916+
},
917+
},
918+
// regarded as Prod by default
919+
},
920+
Status: corev1.PodStatus{
921+
Phase: corev1.PodRunning,
922+
},
923+
},
924+
{
925+
ObjectMeta: metav1.ObjectMeta{
926+
Name: "podProd1",
927+
Namespace: "test",
928+
// missing qos label
929+
},
930+
Spec: corev1.PodSpec{
931+
NodeName: "test-node1",
932+
Containers: []corev1.Container{
933+
{
934+
Resources: makeResourceReq("10", "10G"),
935+
},
936+
},
937+
PriorityClassName: string(extension.PriorityProd),
938+
Priority: pointer.Int32(extension.PriorityProdValueMax),
939+
},
940+
Status: corev1.PodStatus{
941+
Phase: corev1.PodRunning,
942+
},
943+
},
944+
},
945+
},
946+
resourceMetrics: &framework.ResourceMetrics{
947+
NodeMetric: &slov1alpha1.NodeMetric{
948+
Status: slov1alpha1.NodeMetricStatus{
949+
UpdateTime: &metav1.Time{Time: time.Now()},
950+
NodeMetric: &slov1alpha1.NodeMetricInfo{
951+
NodeUsage: slov1alpha1.ResourceMap{
952+
ResourceList: makeResourceList("25", "30G"),
953+
},
954+
SystemUsage: slov1alpha1.ResourceMap{
955+
ResourceList: corev1.ResourceList{
956+
corev1.ResourceCPU: resource.MustParse("5"),
957+
corev1.ResourceMemory: resource.MustParse("10G"),
958+
},
959+
},
960+
},
961+
PodsMetric: []*slov1alpha1.PodMetricInfo{
962+
genPodMetric("test", "podProd", "5", "5G"),
963+
genPodMetric("test", "podProd2", "10", "10G"),
964+
},
965+
},
966+
},
967+
},
968+
},
969+
want: []framework.ResourceItem{
970+
{
971+
Name: extension.BatchCPU,
972+
Quantity: resource.NewQuantity(35000, resource.DecimalSI),
973+
Message: "batchAllocatable[CPU(Milli-Core)]:35000 = nodeAllocatable:100000 - nodeReservation:35000 - systemUsage:5000 - podHPUsed:25000",
974+
},
975+
{
976+
Name: extension.BatchMemory,
977+
Quantity: resource.NewScaledQuantity(43, 9),
978+
Message: "batchAllocatable[Mem(GB)]:43 = nodeAllocatable:120 - nodeReservation:42 - systemUsage:10 - podHPUsed:25",
979+
},
980+
},
981+
wantErr: false,
982+
},
875983
}
876984

877985
for _, tt := range tests {
@@ -1046,9 +1154,9 @@ func Test_getPodMetricUsage(t *testing.T) {
10461154
}
10471155
}
10481156

1049-
func Test_getNodeMetricUsage(t *testing.T) {
1157+
func Test_getResourceListForCPUAndMemory(t *testing.T) {
10501158
type args struct {
1051-
info *slov1alpha1.NodeMetricInfo
1159+
rl corev1.ResourceList
10521160
}
10531161
tests := []struct {
10541162
name string
@@ -1058,22 +1166,18 @@ func Test_getNodeMetricUsage(t *testing.T) {
10581166
{
10591167
name: "get correct scaled resource quantity",
10601168
args: args{
1061-
info: &slov1alpha1.NodeMetricInfo{
1062-
NodeUsage: slov1alpha1.ResourceMap{
1063-
ResourceList: corev1.ResourceList{
1064-
corev1.ResourceCPU: resource.MustParse("40"),
1065-
corev1.ResourceMemory: resource.MustParse("80Gi"),
1066-
"unknown_resource": resource.MustParse("10"),
1067-
},
1068-
},
1169+
rl: corev1.ResourceList{
1170+
corev1.ResourceCPU: resource.MustParse("40"),
1171+
corev1.ResourceMemory: resource.MustParse("80Gi"),
1172+
"unknown_resource": resource.MustParse("10"),
10691173
},
10701174
},
10711175
want: makeResourceList("40", "80Gi"),
10721176
},
10731177
}
10741178
for _, tt := range tests {
10751179
t.Run(tt.name, func(t *testing.T) {
1076-
got := getNodeMetricUsage(tt.args.info)
1180+
got := getResourceListForCPUAndMemory(tt.args.rl)
10771181
testingCorrectResourceList(t, &tt.want, &got)
10781182
})
10791183
}

pkg/slo-controller/noderesource/resource_calculator.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ func (r *NodeResourceReconciler) resetNodeResource(node *corev1.Node, message st
5959
func (r *NodeResourceReconciler) calculateNodeResource(node *corev1.Node,
6060
nodeMetric *slov1alpha1.NodeMetric, podList *corev1.PodList) *framework.NodeResource {
6161
nr := framework.NewNodeResource()
62-
metrics := &framework.ResourceMetrics{
62+
resourceMetrics := &framework.ResourceMetrics{
6363
NodeMetric: nodeMetric,
6464
}
6565

6666
strategy := sloconfig.GetNodeColocationStrategy(r.cfgCache.GetCfgCopy(), node)
67-
framework.RunResourceCalculateExtenders(nr, strategy, node, podList, metrics)
67+
framework.RunResourceCalculateExtenders(nr, strategy, node, podList, resourceMetrics)
6868

6969
return nr
7070
}

pkg/slo-controller/noderesource/resource_calculator_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,12 @@ func Test_calculateNodeResource(t *testing.T) {
338338
corev1.ResourceMemory: resource.MustParse("55G"),
339339
},
340340
},
341+
SystemUsage: slov1alpha1.ResourceMap{
342+
ResourceList: corev1.ResourceList{
343+
corev1.ResourceCPU: resource.MustParse("7"),
344+
corev1.ResourceMemory: resource.MustParse("12G"),
345+
},
346+
},
341347
},
342348
PodsMetric: []*slov1alpha1.PodMetricInfo{
343349
{
@@ -542,6 +548,12 @@ func Test_calculateNodeResource(t *testing.T) {
542548
corev1.ResourceMemory: resource.MustParse("55G"),
543549
},
544550
},
551+
SystemUsage: slov1alpha1.ResourceMap{
552+
ResourceList: corev1.ResourceList{
553+
corev1.ResourceCPU: resource.MustParse("7"),
554+
corev1.ResourceMemory: resource.MustParse("12G"),
555+
},
556+
},
545557
},
546558
PodsMetric: []*slov1alpha1.PodMetricInfo{
547559
{
@@ -746,6 +758,12 @@ func Test_calculateNodeResource(t *testing.T) {
746758
corev1.ResourceMemory: resource.MustParse("55G"),
747759
},
748760
},
761+
SystemUsage: slov1alpha1.ResourceMap{
762+
ResourceList: corev1.ResourceList{
763+
corev1.ResourceCPU: resource.MustParse("7"),
764+
corev1.ResourceMemory: resource.MustParse("12G"),
765+
},
766+
},
749767
},
750768
PodsMetric: []*slov1alpha1.PodMetricInfo{
751769
{
@@ -947,6 +965,12 @@ func Test_calculateNodeResource(t *testing.T) {
947965
corev1.ResourceMemory: resource.MustParse("55G"),
948966
},
949967
},
968+
SystemUsage: slov1alpha1.ResourceMap{
969+
ResourceList: corev1.ResourceList{
970+
corev1.ResourceCPU: resource.MustParse("7"),
971+
corev1.ResourceMemory: resource.MustParse("12G"),
972+
},
973+
},
950974
},
951975
PodsMetric: []*slov1alpha1.PodMetricInfo{
952976
{

0 commit comments

Comments
 (0)