Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions apis/extension/numa_aware.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package extension
import (
"encoding/json"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)
Expand Down Expand Up @@ -65,8 +66,13 @@ type ResourceStatus struct {
// CPUSet represents the allocated CPUs. It is Linux CPU list formatted string.
// When LSE/LSR Pod requested, koord-scheduler will update the field.
CPUSet string `json:"cpuset,omitempty"`
// CPUSharedPools represents the desired CPU Shared Pools used by LS Pods.
CPUSharedPools []CPUSharedPool `json:"cpuSharedPools,omitempty"`
// NUMANodeResources indicates that the Pod is constrained to run on the specified NUMA Node.
NUMANodeResources []NUMANodeResource `json:"numaNodeResources,omitempty"`
}

type NUMANodeResource struct {
Node int32 `json:"node"`
Resources corev1.ResourceList `json:"resources,omitempty"`
}

// CPUBindPolicy defines the CPU binding policy
Expand Down Expand Up @@ -94,14 +100,6 @@ const (
CPUExclusivePolicyNUMANodeLevel CPUExclusivePolicy = "NUMANodeLevel"
)

type NUMACPUSharedPools []CPUSharedPool

type CPUSharedPool struct {
Socket int32 `json:"socket"`
Node int32 `json:"node"`
CPUSet string `json:"cpuset,omitempty"`
}

type NodeCPUBindPolicy string

const (
Expand Down Expand Up @@ -171,6 +169,12 @@ type PodCPUAlloc struct {

type PodCPUAllocs []PodCPUAlloc

type CPUSharedPool struct {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This share pool is not referenced?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

koordlet only needs to report pools to help diagnose, and koordlet will bind cpus per NUMANode of NUMANodeResources. This is also the current koordlet implementation.

Socket int32 `json:"socket"`
Node int32 `json:"node"`
CPUSet string `json:"cpuset,omitempty"`
}

type KubeletCPUManagerPolicy struct {
Policy string `json:"policy,omitempty"`
Options map[string]string `json:"options,omitempty"`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ reviewers:
- "@stormgbs"
- "@zwzhang0107"
creation-date: 2022-05-30
last-updated: 2022-12-13
last-updated: 2023-08-28
status: provisional

---
Expand Down Expand Up @@ -235,13 +235,18 @@ The scheme corresponding to the annotation value is defined as follows:

```go
type ResourceStatus struct {
CPUSet string `json:"cpuset,omitempty"`
CPUSharedPools []CPUSharedPool `json:"cpuSharedPools,omitempty"`
CPUSet string `json:"cpuset,omitempty"`
NUMANodeResources []NUMANodeResource `json:"numaNodeResources,omitempty"`
}

type NUMANodeResource struct {
Node int32 `json:"node"`
Resources corev1.ResourceList `json:"resources,omitempty"`
}
```

- `CPUSet` represents the allocated CPUs. When LSE/LSR Pod requested, koord-scheduler will update the field. It is Linux CPU list formatted string. For more details, please refer to [doc](http://man7.org/linux/man-pages/man7/cpuset.7.html#FORMATS).
- `CPUSharedPools` represents the desired CPU Shared Pools used by LS Pods. If the Node has the label `node.koordinator.sh/numa-topology-alignment-policy` with `Restricted/SingleNUMANode`, koord-scheduler will find the best-fit NUMA Node for the LS Pod, and update the field that requires koordlet uses the specified CPU Shared Pool. It should be noted that the scheduler does not update the `CPUSet` field in the `CPUSharedPool`, koordlet binds the CPU Shared Pool of the corresponding NUMA Node according to the `SocketID` and `NodeID` fields in the `CPUSharedPool`.
- `NUMANodeResources` indicates that the Pod is constrained to run on the specified NUMA Node. If the Node has the label `node.koordinator.sh/numa-topology-alignment-policy` with `BestEffort/Restricted/SingleNUMANode`, koord-scheduler will find the best-fit NUMA Node for the Pod, and update the field. The koordlet binds the CPU Shared Pool of the corresponding NUMA Node according to the `Node` fields in the `NUMANodeResource`.

##### Example

Expand Down Expand Up @@ -400,8 +405,6 @@ type PodCPUAllocs []PodCPUAlloc
- The annotation `node.koordinator.sh/cpu-shared-pools` describes the CPU Shared Pool defined by Koordinator. The shared pool is mainly used by Koordinator LS Pods or K8s Burstable Pods. The scheme is defined as follows:

```go
type NUMACPUSharedPools []CPUSharedPool

type CPUSharedPool struct {
Socket int32 `json:"socket"`
Node int32 `json:"node"`
Expand Down Expand Up @@ -708,4 +711,5 @@ type ScoringStrategy struct {
- 2022-08-02: Update PodCPUAllocs definition
- 2022-09-08: Add ReservedCPUs in KubeletCPUManagerPolicy
- 2022-12-02: Clarify the mistakes in the original text and add QoS CPU orchestration picture
- 2022-12-12: NodeCPUBindPolicy support SpreadByPCPUs
- 2022-12-12: NodeCPUBindPolicy support SpreadByPCPUs
- 2023-08-28: Update the ResourceStatus
28 changes: 12 additions & 16 deletions pkg/koordlet/runtimehooks/hooks/cpuset/cpuset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,9 @@ func Test_cpusetPlugin_SetContainerCPUSet(t *testing.T) {
},
args: args{
podAlloc: &ext.ResourceStatus{
CPUSharedPools: []ext.CPUSharedPool{
NUMANodeResources: []ext.NUMANodeResource{
{
Socket: 0,
Node: 0,
Node: 0,
},
},
},
Expand All @@ -213,7 +212,7 @@ func Test_cpusetPlugin_SetContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
Expand Down Expand Up @@ -245,7 +244,7 @@ func Test_cpusetPlugin_SetContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
Expand Down Expand Up @@ -277,18 +276,17 @@ func Test_cpusetPlugin_SetContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
},
},
args: args{
podAlloc: &ext.ResourceStatus{
CPUSharedPools: []ext.CPUSharedPool{
NUMANodeResources: []ext.NUMANodeResource{
{
Socket: 0,
Node: 0,
Node: 0,
},
},
},
Expand All @@ -313,7 +311,7 @@ func Test_cpusetPlugin_SetContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
Expand Down Expand Up @@ -436,10 +434,9 @@ func TestUnsetPodCPUQuota(t *testing.T) {
name: "not change cfs quota by pod allocated share pool",
args: args{
podAlloc: &ext.ResourceStatus{
CPUSharedPools: []ext.CPUSharedPool{
NUMANodeResources: []ext.NUMANodeResource{
{
Socket: 0,
Node: 0,
Node: 0,
},
},
},
Expand Down Expand Up @@ -566,10 +563,9 @@ func TestUnsetContainerCPUQuota(t *testing.T) {
name: "not change cfs quota by pod allocated share pool",
args: args{
podAlloc: &ext.ResourceStatus{
CPUSharedPools: []ext.CPUSharedPool{
NUMANodeResources: []ext.NUMANodeResource{
{
Socket: 0,
Node: 0,
Node: 0,
},
},
},
Expand Down
8 changes: 4 additions & 4 deletions pkg/koordlet/runtimehooks/hooks/cpuset/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ func (r *cpusetRule) getContainerCPUSet(containerReq *protocol.ContainerRequest)
return nil, err
}

if len(podAlloc.CPUSharedPools) != 0 {
if len(podAlloc.NUMANodeResources) != 0 {
// LS pods which have specified cpu share pool
cpusetList := make([]string, 0, len(podAlloc.CPUSharedPools))
for _, specifiedSharePool := range podAlloc.CPUSharedPools {
cpusetList := make([]string, 0, len(podAlloc.NUMANodeResources))
for _, numaNode := range podAlloc.NUMANodeResources {
for _, nodeSharePool := range r.sharePools {
if specifiedSharePool.Socket == nodeSharePool.Socket && specifiedSharePool.Node == nodeSharePool.Node {
if numaNode.Node == nodeSharePool.Node {
cpusetList = append(cpusetList, nodeSharePool.CPUSet)
}
}
Expand Down
15 changes: 7 additions & 8 deletions pkg/koordlet/runtimehooks/hooks/cpuset/rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func Test_cpusetRule_getContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
Expand All @@ -100,10 +100,9 @@ func Test_cpusetRule_getContainerCPUSet(t *testing.T) {
CgroupParent: "burstable/test-pod/test-container",
},
podAlloc: &ext.ResourceStatus{
CPUSharedPools: []ext.CPUSharedPool{
NUMANodeResources: []ext.NUMANodeResource{
{
Socket: 0,
Node: 0,
Node: 0,
},
},
},
Expand All @@ -122,7 +121,7 @@ func Test_cpusetRule_getContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
Expand Down Expand Up @@ -153,7 +152,7 @@ func Test_cpusetRule_getContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
Expand Down Expand Up @@ -182,7 +181,7 @@ func Test_cpusetRule_getContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
Expand Down Expand Up @@ -210,7 +209,7 @@ func Test_cpusetRule_getContainerCPUSet(t *testing.T) {
},
{
Socket: 1,
Node: 0,
Node: 1,
CPUSet: "8-15",
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func buildCPUTopologyForTest(numSockets, nodesPerSocket, coresPerNode, cpusPerCo
for p := 0; p < cpusPerCore; p++ {
topo.CPUDetails[cpuID] = CPUInfo{
SocketID: s,
NodeID: s<<16 | nodeID,
NodeID: nodeID,
CoreID: coreID,
CPUID: cpuID,
}
Expand Down
1 change: 0 additions & 1 deletion pkg/scheduler/plugins/nodenumaresource/cpu_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ func NewCPUTopologyBuilder() *CPUTopologyBuilder {

func (b *CPUTopologyBuilder) AddCPUInfo(socketID, nodeID, coreID, cpuID int) *CPUTopologyBuilder {
coreID = socketID<<16 | coreID
nodeID = socketID<<16 | nodeID
cpuInfo := &CPUInfo{
CPUID: cpuID,
CoreID: coreID,
Expand Down