@@ -81,6 +81,11 @@ func (p *Plugin) Reset(node *corev1.Node, message string) []framework.ResourceIt
8181
8282// Calculate calculates Batch resources using the formula below:
8383// Node.Total - Node.Reserved - System.Used - Pod(High-Priority).Used, System.Used = Node.Used - Pod(All).Used.
84+ // As node and podList are the nearly latest state at time T1, the resourceMetrics are the node metric and pod
85+ // metrics collected and snapshot at time T0 (T0 < T1). There can be gaps between the states of T0 and T1.
86+ // We firstly calculate an infimum of the batch allocatable at time T0.
87+ // `BatchAllocatable0 = NodeAllocatable * ratio - SystemUsed0 - Pod(HP and in Pods1).Used0` - Pod(not in Pods1).Used0.
88+ // Then we minus the sum requests of the pods newly scheduled but have not been reported metrics to give a safe result.
8489func (p * Plugin ) Calculate (strategy * configuration.ColocationStrategy , node * corev1.Node , podList * corev1.PodList ,
8590 resourceMetrics * framework.ResourceMetrics ) ([]framework.ResourceItem , error ) {
8691 if strategy == nil || node == nil || podList == nil || resourceMetrics == nil || resourceMetrics .NodeMetric == nil {
@@ -96,15 +101,18 @@ func (p *Plugin) Calculate(strategy *configuration.ColocationStrategy, node *cor
96101
97102 // compute the requests and usages according to the pods' priority classes.
98103 // HP means High-Priority (i.e. not Batch or Free) pods
99- // pod(HP).Used = pod(All).Used - pod(Batch/Free).Used
100- podAllUsed := util .NewZeroResourceList ()
101104 podHPRequest := util .NewZeroResourceList ()
102105 podHPUsed := util .NewZeroResourceList ()
106+ // podAllUsed is the sum usage of all pods reported in NodeMetric.
107+ // podKnownUsed is the sum usage of pods which are both reported in NodeMetric and shown in current pod list.
108+ podAllUsed := util .NewZeroResourceList ()
109+ podKnownUsed := util .NewZeroResourceList ()
103110
104111 nodeMetric := resourceMetrics .NodeMetric
105112 podMetricMap := make (map [string ]* slov1alpha1.PodMetricInfo )
106113 for _ , podMetric := range nodeMetric .Status .PodsMetric {
107114 podMetricMap [util .GetPodMetricKey (podMetric )] = podMetric
115+ podAllUsed = quotav1 .Add (podAllUsed , getPodMetricUsage (podMetric ))
108116 }
109117
110118 for i := range podList .Items {
@@ -113,36 +121,39 @@ func (p *Plugin) Calculate(strategy *configuration.ColocationStrategy, node *cor
113121 continue
114122 }
115123
124+ // check if the pod has metrics
125+ podKey := util .GetPodKey (pod )
126+ podMetric , hasMetric := podMetricMap [podKey ]
127+ if hasMetric {
128+ podKnownUsed = quotav1 .Add (podKnownUsed , getPodMetricUsage (podMetric ))
129+ }
130+
131+ // count the high-priority usage
116132 priorityClass := extension .GetPodPriorityClassWithDefault (pod )
117133 podRequest := util .GetPodRequest (pod , corev1 .ResourceCPU , corev1 .ResourceMemory )
118134 isPodHighPriority := priorityClass != extension .PriorityBatch && priorityClass != extension .PriorityFree
119- if isPodHighPriority {
120- podHPRequest = quotav1 .Add (podHPRequest , podRequest )
121- }
122- podKey := util .GetPodKey (pod )
123- podMetric , ok := podMetricMap [podKey ]
124- if ! ok {
125- if isPodHighPriority {
126- podHPUsed = quotav1 .Add (podHPUsed , podRequest )
127- }
128- podAllUsed = quotav1 .Add (podAllUsed , podRequest )
135+ if ! isPodHighPriority {
129136 continue
130137 }
131-
132- if isPodHighPriority {
138+ podHPRequest = quotav1 . Add ( podHPRequest , podRequest )
139+ if hasMetric {
133140 podHPUsed = quotav1 .Add (podHPUsed , getPodMetricUsage (podMetric ))
141+ } else {
142+ podHPUsed = quotav1 .Add (podHPUsed , podRequest )
134143 }
135- podAllUsed = quotav1 .Add (podAllUsed , getPodMetricUsage (podMetric ))
136144 }
137145
146+ // For the pods reported with metrics but not shown in the current list, count them into the HP used.
147+ podUnknownPriorityUsed := quotav1 .Subtract (podAllUsed , podKnownUsed )
148+ podHPUsed = quotav1 .Add (podHPUsed , podUnknownPriorityUsed )
149+ klog .V (6 ).InfoS ("batch resource got unknown priority pods used" , "node" , node .Name ,
150+ "cpu" , podUnknownPriorityUsed .Cpu ().String (), "memory" , podUnknownPriorityUsed .Memory ().String ())
151+
138152 nodeAllocatable := getNodeAllocatable (node )
139153 nodeReservation := getNodeReservation (strategy , node )
140154
141- // System.Used = Node.Used - Pod(All).Used
142- nodeUsage := getNodeMetricUsage (nodeMetric .Status .NodeMetric )
143- systemUsed := quotav1 .Max (quotav1 .Subtract (nodeUsage , podAllUsed ), util .NewZeroResourceList ())
144-
145- // System.Used = max(System.Used, Node.Anno.Reserved)
155+ // System.Used = max(Node.Used - Pod(All).Used, Node.Anno.Reserved)
156+ systemUsed := getResourceListForCPUAndMemory (nodeMetric .Status .NodeMetric .SystemUsage .ResourceList )
146157 nodeAnnoReserved := util .GetNodeReservationFromAnnotation (node .Annotations )
147158 systemUsed = quotav1 .Max (systemUsed , nodeAnnoReserved )
148159
@@ -240,27 +251,12 @@ func prepareNodeForResource(node *corev1.Node, nr *framework.NodeResource, name
240251
241252// getPodMetricUsage gets pod usage from the PodMetricInfo
242253func getPodMetricUsage (info * slov1alpha1.PodMetricInfo ) corev1.ResourceList {
243- cpuQuant := info .PodUsage .ResourceList [corev1 .ResourceCPU ]
244- cpuUsageQuant := resource .NewMilliQuantity (cpuQuant .MilliValue (), cpuQuant .Format )
245- memQuant := info .PodUsage .ResourceList [corev1 .ResourceMemory ]
246- memUsageQuant := resource .NewQuantity (memQuant .Value (), memQuant .Format )
247- return corev1.ResourceList {corev1 .ResourceCPU : * cpuUsageQuant , corev1 .ResourceMemory : * memUsageQuant }
248- }
249-
250- // getNodeMetricUsage gets node usage from the NodeMetricInfo
251- func getNodeMetricUsage (info * slov1alpha1.NodeMetricInfo ) corev1.ResourceList {
252- cpuQ := info .NodeUsage .ResourceList [corev1 .ResourceCPU ]
253- cpuUsageQ := resource .NewMilliQuantity (cpuQ .MilliValue (), cpuQ .Format )
254- memQ := info .NodeUsage .ResourceList [corev1 .ResourceMemory ]
255- memUsageQ := resource .NewQuantity (memQ .Value (), memQ .Format )
256- return corev1.ResourceList {corev1 .ResourceCPU : * cpuUsageQ , corev1 .ResourceMemory : * memUsageQ }
254+ return getResourceListForCPUAndMemory (info .PodUsage .ResourceList )
257255}
258256
259257// getNodeAllocatable gets node allocatable and filters out non-CPU and non-Mem resources
260258func getNodeAllocatable (node * corev1.Node ) corev1.ResourceList {
261- result := node .Status .Allocatable .DeepCopy ()
262- result = quotav1 .Mask (result , []corev1.ResourceName {corev1 .ResourceCPU , corev1 .ResourceMemory })
263- return result
259+ return getResourceListForCPUAndMemory (node .Status .Allocatable )
264260}
265261
266262// getNodeReservation gets node-level safe-guarding reservation with the node's allocatable
@@ -275,6 +271,10 @@ func getNodeReservation(strategy *configuration.ColocationStrategy, node *corev1
275271 }
276272}
277273
274+ func getResourceListForCPUAndMemory (rl corev1.ResourceList ) corev1.ResourceList {
275+ return quotav1 .Mask (rl , []corev1.ResourceName {corev1 .ResourceCPU , corev1 .ResourceMemory })
276+ }
277+
278278// getReserveRatio returns resource reserved ratio
279279func getReserveRatio (reclaimThreshold int64 ) float64 {
280280 return float64 (100 - reclaimThreshold ) / 100.0
0 commit comments