Skip to content

Commit c38db5b

Browse files
committed
koord-descheduler: fix, use raw-allocatable for threshold calculation in descheduler loadaware plugin.
Signed-off-by: tan90github <wangy9834@163.com>
1 parent 1399086 commit c38db5b

File tree

2 files changed

+112
-6
lines changed

2 files changed

+112
-6
lines changed

pkg/descheduler/framework/plugins/loadaware/low_node_load_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
coretesting "k8s.io/client-go/testing"
3636
"k8s.io/client-go/tools/events"
3737

38+
"github.com/koordinator-sh/koordinator/apis/extension"
3839
slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
3940
koordinatorclientset "github.com/koordinator-sh/koordinator/pkg/client/clientset/versioned"
4041
koordfake "github.com/koordinator-sh/koordinator/pkg/client/clientset/versioned/fake"
@@ -1643,6 +1644,9 @@ func TestOverUtilizedEvictionReason(t *testing.T) {
16431644
node: &corev1.Node{
16441645
ObjectMeta: metav1.ObjectMeta{
16451646
Name: "test-node",
1647+
Annotations: map[string]string{
1648+
extension.AnnotationNodeRawAllocatable: `{"cpu":96,"memory":"512Gi"}`,
1649+
},
16461650
},
16471651
Status: corev1.NodeStatus{
16481652
Allocatable: corev1.ResourceList{
@@ -1666,6 +1670,9 @@ func TestOverUtilizedEvictionReason(t *testing.T) {
16661670
node: &corev1.Node{
16671671
ObjectMeta: metav1.ObjectMeta{
16681672
Name: "test-node",
1673+
Annotations: map[string]string{
1674+
extension.AnnotationNodeRawAllocatable: `{"cpu":96,"memory":"512Gi"}`,
1675+
},
16691676
},
16701677
Status: corev1.NodeStatus{
16711678
Allocatable: corev1.ResourceList{
@@ -1694,6 +1701,9 @@ func TestOverUtilizedEvictionReason(t *testing.T) {
16941701
node: &corev1.Node{
16951702
ObjectMeta: metav1.ObjectMeta{
16961703
Name: "test-node",
1704+
Annotations: map[string]string{
1705+
extension.AnnotationNodeRawAllocatable: `{"cpu":96,"memory":"512Gi"}`,
1706+
},
16971707
},
16981708
Status: corev1.NodeStatus{
16991709
Allocatable: corev1.ResourceList{
@@ -1726,6 +1736,9 @@ func TestOverUtilizedEvictionReason(t *testing.T) {
17261736
node: &corev1.Node{
17271737
ObjectMeta: metav1.ObjectMeta{
17281738
Name: "test-node",
1739+
Annotations: map[string]string{
1740+
extension.AnnotationNodeRawAllocatable: `{"cpu":96,"memory":"512Gi"}`,
1741+
},
17291742
},
17301743
Status: corev1.NodeStatus{
17311744
Allocatable: corev1.ResourceList{
@@ -1998,3 +2011,78 @@ func Test_filterRealAbnormalNodes(t *testing.T) {
19982011
})
19992012
}
20002013
}
2014+
2015+
func Test_GetNodeRawAllocatableForDescheduler(t *testing.T) {
2016+
tests := []struct {
2017+
name string
2018+
node *corev1.Node
2019+
want corev1.ResourceList
2020+
}{
2021+
{
2022+
name: "node has no annotation - use allocatable",
2023+
node: &corev1.Node{
2024+
ObjectMeta: metav1.ObjectMeta{
2025+
Name: "test-node",
2026+
},
2027+
Status: corev1.NodeStatus{
2028+
Allocatable: corev1.ResourceList{
2029+
corev1.ResourceCPU: resource.MustParse("16"),
2030+
corev1.ResourceMemory: resource.MustParse("32Gi"),
2031+
},
2032+
},
2033+
},
2034+
want: corev1.ResourceList{
2035+
corev1.ResourceCPU: resource.MustParse("16"),
2036+
corev1.ResourceMemory: resource.MustParse("32Gi"),
2037+
},
2038+
},
2039+
{
2040+
name: "node has valid raw allocatable annotation - use raw allocatable",
2041+
node: &corev1.Node{
2042+
ObjectMeta: metav1.ObjectMeta{
2043+
Name: "test-node",
2044+
Annotations: map[string]string{
2045+
extension.AnnotationNodeRawAllocatable: `{"cpu":"8","memory":"16Gi"}`,
2046+
},
2047+
},
2048+
Status: corev1.NodeStatus{
2049+
Allocatable: corev1.ResourceList{
2050+
corev1.ResourceCPU: resource.MustParse("32"),
2051+
corev1.ResourceMemory: resource.MustParse("64Gi"),
2052+
},
2053+
},
2054+
},
2055+
want: corev1.ResourceList{
2056+
corev1.ResourceCPU: resource.MustParse("8"),
2057+
corev1.ResourceMemory: resource.MustParse("16Gi"),
2058+
},
2059+
},
2060+
{
2061+
name: "node has invalid raw allocatable annotation - fallback to allocatable",
2062+
node: &corev1.Node{
2063+
ObjectMeta: metav1.ObjectMeta{
2064+
Name: "test-node",
2065+
Annotations: map[string]string{
2066+
extension.AnnotationNodeRawAllocatable: "invalid",
2067+
},
2068+
},
2069+
Status: corev1.NodeStatus{
2070+
Allocatable: corev1.ResourceList{
2071+
corev1.ResourceCPU: resource.MustParse("16"),
2072+
corev1.ResourceMemory: resource.MustParse("32Gi"),
2073+
},
2074+
},
2075+
},
2076+
want: corev1.ResourceList{
2077+
corev1.ResourceCPU: resource.MustParse("16"),
2078+
corev1.ResourceMemory: resource.MustParse("32Gi"),
2079+
},
2080+
},
2081+
}
2082+
for _, tt := range tests {
2083+
t.Run(tt.name, func(t *testing.T) {
2084+
got := GetNodeRawAllocatableFromNode(tt.node)
2085+
assert.Equal(t, tt.want, got)
2086+
})
2087+
}
2088+
}

pkg/descheduler/framework/plugins/loadaware/utilization_util.go

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func getNodeThresholds(
102102
prodLowResourceThreshold: map[corev1.ResourceName]*resource.Quantity{},
103103
prodHighResourceThreshold: map[corev1.ResourceName]*resource.Quantity{},
104104
}
105-
allocatable := nodeUsage.node.Status.Allocatable
105+
allocatable := GetNodeRawAllocatableFromNode(nodeUsage.node)
106106
for _, resourceName := range resourceNames {
107107
if useDeviationThresholds {
108108
resourceCapacity := allocatable[resourceName]
@@ -287,7 +287,7 @@ func classifyNodes(
287287
}
288288

289289
func resourceUsagePercentages(nodeUsage *NodeUsage, prod bool) map[corev1.ResourceName]float64 {
290-
allocatable := nodeUsage.node.Status.Allocatable
290+
allocatable := GetNodeRawAllocatableFromNode(nodeUsage.node)
291291
resourceUsagePercentage := map[corev1.ResourceName]float64{}
292292
var usage map[corev1.ResourceName]*resource.Quantity
293293
if prod {
@@ -585,8 +585,8 @@ func sortNodesByUsage(nodes []NodeInfo, resourceToWeightMap map[corev1.ResourceN
585585
jNodeUsage = usageToResourceList(nodes[j].usage)
586586
}
587587

588-
iScore := scorer(iNodeUsage, nodes[i].node.Status.Allocatable)
589-
jScore := scorer(jNodeUsage, nodes[j].node.Status.Allocatable)
588+
iScore := scorer(iNodeUsage, GetNodeRawAllocatableFromNode(nodes[i].node))
589+
jScore := scorer(jNodeUsage, GetNodeRawAllocatableFromNode(nodes[j].node))
590590
if ascending {
591591
return iScore < jScore
592592
}
@@ -661,7 +661,7 @@ func calcAverageResourceUsagePercent(nodeUsages map[string]*NodeUsage) (Resource
661661
for _, nodeUsage := range nodeUsages {
662662
usage := nodeUsage.usage
663663
prodUsage := nodeUsage.prodUsage
664-
allocatable := nodeUsage.node.Status.Allocatable
664+
allocatable := GetNodeRawAllocatableFromNode(nodeUsage.node)
665665
for resourceName, used := range usage {
666666
total := allocatable[resourceName]
667667
if total.IsZero() {
@@ -721,7 +721,7 @@ func sortPodsOnOneOverloadedNode(srcNode NodeInfo, removablePods []*corev1.Pod,
721721
resourcesThatExceedThresholds,
722722
removablePods,
723723
srcNode.podMetrics,
724-
map[string]corev1.ResourceList{srcNode.node.Name: srcNode.node.Status.Allocatable},
724+
map[string]corev1.ResourceList{srcNode.node.Name: GetNodeRawAllocatableFromNode(srcNode.node)},
725725
weights,
726726
)
727727
}
@@ -777,3 +777,21 @@ func podFitsAnyNodeWithThreshold(nodeIndexer podutil.GetPodsAssignedToNodeFunc,
777777
}
778778
return false
779779
}
780+
781+
// GetNodeRawAllocatableFromNode gets the raw allocatable from node annotation.
782+
// In the cpu-normalization or amplification scenario, node Allocatable will be amplified,
783+
// so raw-allocatable needs to be obtained during descheduling to accurately calculate node usage percent.
784+
// If raw-allocatable is not set or fails to parse, returns the amplified Allocatable as fallback.
785+
func GetNodeRawAllocatableFromNode(node *corev1.Node) corev1.ResourceList {
786+
allocatable := node.Status.Allocatable
787+
rawAllocatable, err := extension.GetNodeRawAllocatable(node.Annotations)
788+
if err != nil {
789+
klog.Errorf("Failed to parse %s raw allocatable, using amplified allocatable as fallback, err: %v", node.Name, err)
790+
return allocatable
791+
}
792+
if rawAllocatable == nil {
793+
klog.V(3).Infof("Node %s has no raw-allocatable annotation, using node status allocatable", node.Name)
794+
return allocatable
795+
}
796+
return rawAllocatable
797+
}

0 commit comments

Comments
 (0)