Skip to content

Commit e547931

Browse files
authored
koordlet: support init container for CPUSetAllocator (#2349)
Signed-off-by: 佑祎 <zzw261520@alibaba-inc.com>
1 parent 7a9e18c commit e547931

File tree

7 files changed

+200
-17
lines changed

7 files changed

+200
-17
lines changed

pkg/koordlet/runtimehooks/hooks/cpuset/rule.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,11 @@ func (p *cpusetPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error
208208
return nil
209209
}
210210
for _, podMeta := range target.Pods {
211-
for _, containerStat := range podMeta.Pod.Status.ContainerStatuses {
211+
allContainerStatus := make([]corev1.ContainerStatus, 0, len(podMeta.Pod.Status.ContainerStatuses)+len(podMeta.Pod.Status.InitContainerStatuses))
212+
allContainerStatus = append(allContainerStatus, podMeta.Pod.Status.ContainerStatuses...)
213+
allContainerStatus = append(allContainerStatus, podMeta.Pod.Status.InitContainerStatuses...)
214+
for _, containerStat := range allContainerStatus {
215+
// TODO exclude some init containers, e.g. restartPolicy != Always
212216
containerCtx := &protocol.ContainerContext{}
213217
containerCtx.FromReconciler(podMeta, containerStat.Name, false)
214218
if err := p.SetContainerCPUSet(containerCtx); err != nil {

pkg/koordlet/runtimehooks/hooks/cpuset/rule_test.go

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,7 @@ func Test_cpusetPlugin_ruleUpdateCbForPods(t *testing.T) {
794794
sandboxID string
795795
}
796796
type args struct {
797+
rule *cpusetRule
797798
pods []*testPod
798799
podAllocs map[string]ext.ResourceStatus
799800
}
@@ -810,6 +811,15 @@ func Test_cpusetPlugin_ruleUpdateCbForPods(t *testing.T) {
810811
{
811812
name: "set container cpuset",
812813
args: args{
814+
rule: &cpusetRule{
815+
sharePools: []ext.CPUSharedPool{
816+
{
817+
Socket: 0,
818+
Node: 0,
819+
CPUSet: "0-1,5-7",
820+
},
821+
},
822+
},
813823
pods: []*testPod{
814824
{
815825
pod: &corev1.Pod{
@@ -834,6 +844,43 @@ func Test_cpusetPlugin_ruleUpdateCbForPods(t *testing.T) {
834844
},
835845
sandboxID: "containerd://pod-with-cpuset-alloc-sandbox-id",
836846
},
847+
{
848+
pod: &corev1.Pod{
849+
ObjectMeta: metav1.ObjectMeta{
850+
UID: "pod-cpu-share-uid",
851+
Labels: map[string]string{
852+
ext.LabelPodQoS: string(ext.QoSLS),
853+
},
854+
},
855+
Spec: corev1.PodSpec{
856+
InitContainers: []corev1.Container{
857+
{
858+
Name: "init-container-with-cpu-share-name",
859+
},
860+
},
861+
Containers: []corev1.Container{
862+
{
863+
Name: "container-with-cpu-share-name",
864+
},
865+
},
866+
},
867+
Status: corev1.PodStatus{
868+
InitContainerStatuses: []corev1.ContainerStatus{
869+
{
870+
Name: "init-container-with-cpu-share-name",
871+
ContainerID: "containerd://init-container-with-cpu-share-uid",
872+
},
873+
},
874+
ContainerStatuses: []corev1.ContainerStatus{
875+
{
876+
Name: "container-with-cpu-share-name",
877+
ContainerID: "containerd://container-with-cpu-share-uid",
878+
},
879+
},
880+
},
881+
},
882+
sandboxID: "containerd://pod-cpu-share-sandbox-id",
883+
},
837884
{
838885
pod: &corev1.Pod{
839886
ObjectMeta: metav1.ObjectMeta{
@@ -870,10 +917,13 @@ func Test_cpusetPlugin_ruleUpdateCbForPods(t *testing.T) {
870917
wants: wants{
871918
containersCPUSet: map[string]string{
872919
"container-with-cpuset-alloc-name": "2-4",
920+
"init-container-with-cpu-share-name": "0-1,5-7",
921+
"container-with-cpu-share-name": "0-1,5-7",
873922
"container-with-bad-cpuset-alloc-name": "",
874923
},
875924
sandboxCPUSet: map[string]string{
876925
"pod-with-cpuset-alloc-uid": "2-4",
926+
"pod-cpu-share-uid": "0-1,5-7",
877927
"pod-with-bad-cpuset-alloc-uid": "",
878928
},
879929
},
@@ -897,6 +947,11 @@ func Test_cpusetPlugin_ruleUpdateCbForPods(t *testing.T) {
897947
// init cgroups cpuset file
898948
for _, testPod := range tt.args.pods {
899949
podMeta := podUIDMetas[string(testPod.pod.UID)]
950+
for _, initContainerStat := range podMeta.Pod.Status.InitContainerStatuses {
951+
containerPath, err := koordletutil.GetContainerCgroupParentDirByID(podMeta.CgroupDir, initContainerStat.ContainerID)
952+
assert.NoError(t, err, "get init container cgroup path during init container cpuset")
953+
initCPUSet(containerPath, "", testHelper)
954+
}
900955
for _, containerStat := range podMeta.Pod.Status.ContainerStatuses {
901956
containerPath, err := koordletutil.GetContainerCgroupParentDirByID(podMeta.CgroupDir, containerStat.ContainerID)
902957
assert.NoError(t, err, "get container cgroup path during init container cpuset")
@@ -921,7 +976,7 @@ func Test_cpusetPlugin_ruleUpdateCbForPods(t *testing.T) {
921976
}
922977
}
923978

924-
p := &cpusetPlugin{executor: resourceexecutor.NewResourceUpdateExecutor()}
979+
p := &cpusetPlugin{executor: resourceexecutor.NewResourceUpdateExecutor(), rule: tt.args.rule}
925980
stop := make(chan struct{})
926981
defer func() { close(stop) }()
927982
p.executor.Run(stop)
@@ -940,6 +995,14 @@ func Test_cpusetPlugin_ruleUpdateCbForPods(t *testing.T) {
940995

941996
for _, testPod := range tt.args.pods {
942997
podMeta := podUIDMetas[string(testPod.pod.UID)]
998+
for _, initContainerStat := range podMeta.Pod.Status.InitContainerStatuses {
999+
containerPath, err := koordletutil.GetContainerCgroupParentDirByID(podMeta.CgroupDir, initContainerStat.ContainerID)
1000+
assert.NoError(t, err, "get init contaienr cgorup path during check container cpuset")
1001+
gotCPUSet := getCPUSet(containerPath, testHelper)
1002+
assert.Equal(t, tt.wants.containersCPUSet[initContainerStat.Name], gotCPUSet,
1003+
"container cpuset after callback should be equal")
1004+
}
1005+
9431006
for _, containerStat := range podMeta.Pod.Status.ContainerStatuses {
9441007
containerPath, err := koordletutil.GetContainerCgroupParentDirByID(podMeta.CgroupDir, containerStat.ContainerID)
9451008
assert.NoError(t, err, "get contaienr cgorup path during check container cpuset")

pkg/koordlet/runtimehooks/protocol/container_context.go

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -128,27 +128,55 @@ func (c *ContainerRequest) FromReconciler(podMeta *statesinformer.PodMeta, conta
128128
c.PodMeta.FromReconciler(podMeta.Pod.ObjectMeta)
129129
c.ContainerMeta.Name = containerName
130130
c.ContainerMeta.Sandbox = sandbox
131+
c.PodLabels = podMeta.Pod.Labels
132+
c.PodAnnotations = podMeta.Pod.Annotations
133+
131134
if sandbox {
132135
var err error
133136
if c.ContainerMeta.ID, err = koordletutil.GetPodSandboxContainerID(podMeta.Pod); err != nil {
134137
klog.V(4).Infof("failed to get sandbox container ID for pod %s, err: %s",
135138
podMeta.Key(), err)
136-
return
137139
} else if c.ContainerMeta.ID == "" {
138140
klog.V(4).Infof("container ID is empty for pod %s, pod may not start, skip", podMeta.Key())
139-
return
141+
} else {
142+
c.CgroupParent, _ = koordletutil.GetContainerCgroupParentDirByID(podMeta.CgroupDir, c.ContainerMeta.ID)
143+
klog.V(5).Infof("got sandbox %s cgroup parent %s for pod %s", c.ContainerMeta.ID, c.CgroupParent, podMeta.Key())
140144
}
141-
} else {
142-
for _, containerStat := range podMeta.Pod.Status.ContainerStatuses {
143-
if containerStat.Name == containerName {
144-
c.ContainerMeta.ID = containerStat.ContainerID
145+
return
146+
}
147+
148+
foundInContainer := false
149+
for _, containerStat := range podMeta.Pod.Status.ContainerStatuses {
150+
if containerStat.Name == containerName {
151+
c.ContainerMeta.ID = containerStat.ContainerID
152+
foundInContainer = true
153+
break
154+
}
155+
}
156+
if !foundInContainer {
157+
for _, initContainerStat := range podMeta.Pod.Status.InitContainerStatuses {
158+
if initContainerStat.Name == containerName {
159+
c.ContainerMeta.ID = initContainerStat.ContainerID
145160
break
146161
}
147162
}
148163
}
164+
if c.ContainerMeta.ID == "" {
165+
klog.V(4).Infof("container ID not found in container and init container status for pod %s, pod may not start, skip", podMeta.Key())
166+
return
167+
}
168+
c.CgroupParent, _ = koordletutil.GetContainerCgroupParentDirByID(podMeta.CgroupDir, c.ContainerMeta.ID)
169+
170+
var containerCandidates []corev1.Container
171+
if foundInContainer {
172+
containerCandidates = podMeta.Pod.Spec.Containers
173+
} else {
174+
containerCandidates = podMeta.Pod.Spec.InitContainers
175+
}
176+
149177
var specFromContainer *apiext.ExtendedResourceContainerSpec
150-
for i := range podMeta.Pod.Spec.Containers {
151-
containerSpec := podMeta.Pod.Spec.Containers[i]
178+
for i := range containerCandidates {
179+
containerSpec := containerCandidates[i]
152180
if containerSpec.Name == containerName {
153181
if c.ContainerEnvs == nil {
154182
c.ContainerEnvs = map[string]string{}
@@ -162,9 +190,7 @@ func (c *ContainerRequest) FromReconciler(podMeta *statesinformer.PodMeta, conta
162190
break
163191
}
164192
}
165-
c.PodLabels = podMeta.Pod.Labels
166-
c.PodAnnotations = podMeta.Pod.Annotations
167-
c.CgroupParent, _ = koordletutil.GetContainerCgroupParentDirByID(podMeta.CgroupDir, c.ContainerMeta.ID)
193+
168194
// retrieve ExtendedResources from container spec and pod annotations (prefer container spec)
169195
specFromAnnotations, err := apiext.GetExtendedResourceSpec(podMeta.Pod.Annotations)
170196
if err != nil {

pkg/koordlet/runtimehooks/reconciler/reconciler.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,32 @@ func (c *reconciler) reconcilePodCgroup(stopCh <-chan struct{}) {
484484
}
485485
}
486486

487+
for _, initContainerStat := range podMeta.Pod.Status.InitContainerStatuses {
488+
// TODO exclude some init containers, e.g. restartPolicy != Always
489+
for resourceType, r := range globalCgroupReconcilers.containerLevel {
490+
condition := r.filter.Filter(podMeta)
491+
reconcileFn, ok := r.fn[condition]
492+
if !ok {
493+
klog.V(5).Infof("calling reconcile function %v aborted for init container %v/%v, condition %s not registered",
494+
r.description[condition], podMeta.Key(), initContainerStat.Name, condition)
495+
continue
496+
}
497+
498+
containerCtx := protocol.HooksProtocolBuilder.Container(podMeta, initContainerStat.Name)
499+
start := time.Now()
500+
if err := reconcileFn(containerCtx); err != nil {
501+
metrics.RecordRuntimeHookReconcilerInvokedDurationMilliSeconds(string(ContainerLevel), resourceType, err, metrics.SinceInSeconds(start))
502+
klog.Warningf("calling reconcile function %v for init container %v/%v failed, error %v",
503+
r.description[condition], podMeta.Key(), initContainerStat.Name, err)
504+
} else {
505+
containerCtx.ReconcilerDone(c.executor)
506+
metrics.RecordRuntimeHookReconcilerInvokedDurationMilliSeconds(string(ContainerLevel), resourceType, nil, metrics.SinceInSeconds(start))
507+
klog.V(5).Infof("calling reconcile function %v for init container %v/%v finish",
508+
r.description[condition], podMeta.Key(), initContainerStat.Name)
509+
}
510+
}
511+
}
512+
487513
for _, containerStat := range podMeta.Pod.Status.ContainerStatuses {
488514
for resourceType, r := range globalCgroupReconcilers.containerLevel {
489515
condition := r.filter.Filter(podMeta)

pkg/koordlet/runtimehooks/reconciler/reconciler_test.go

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,13 +195,24 @@ func Test_reconciler_reconcilePodCgroup(t *testing.T) {
195195
UID: "test1-pod-uid",
196196
},
197197
Spec: corev1.PodSpec{
198+
InitContainers: []corev1.Container{
199+
{
200+
Name: "test1-init-container-name",
201+
},
202+
},
198203
Containers: []corev1.Container{
199204
{
200205
Name: "test1-container-name",
201206
},
202207
},
203208
},
204209
Status: corev1.PodStatus{
210+
InitContainerStatuses: []corev1.ContainerStatus{
211+
{
212+
Name: "test1-init-container-name",
213+
ContainerID: "test1-init-container-id",
214+
},
215+
},
205216
ContainerStatuses: []corev1.ContainerStatus{
206217
{
207218
Name: "test1-container-name",
@@ -219,13 +230,24 @@ func Test_reconciler_reconcilePodCgroup(t *testing.T) {
219230
UID: "test2-pod-uid",
220231
},
221232
Spec: corev1.PodSpec{
233+
InitContainers: []corev1.Container{
234+
{
235+
Name: "test2-init-container-name",
236+
},
237+
},
222238
Containers: []corev1.Container{
223239
{
224240
Name: "test2-container-name",
225241
},
226242
},
227243
},
228244
Status: corev1.PodStatus{
245+
InitContainerStatuses: []corev1.ContainerStatus{
246+
{
247+
Name: "test2-init-container-name",
248+
ContainerID: "test2-init-container-id",
249+
},
250+
},
229251
ContainerStatuses: []corev1.ContainerStatus{
230252
{
231253
Name: "test2-container-name",
@@ -243,13 +265,24 @@ func Test_reconciler_reconcilePodCgroup(t *testing.T) {
243265
UID: "test3-pod-uid",
244266
},
245267
Spec: corev1.PodSpec{
268+
InitContainers: []corev1.Container{
269+
{
270+
Name: "test3-init-container-name",
271+
},
272+
},
246273
Containers: []corev1.Container{
247274
{
248275
Name: "test3-container-name",
249276
},
250277
},
251278
},
252279
Status: corev1.PodStatus{
280+
InitContainerStatuses: []corev1.ContainerStatus{
281+
{
282+
Name: "test3-init-container-name",
283+
ContainerID: "test3-init-container-id",
284+
},
285+
},
253286
ContainerStatuses: []corev1.ContainerStatus{
254287
{
255288
Name: "test3-container-name",
@@ -268,9 +301,12 @@ func Test_reconciler_reconcilePodCgroup(t *testing.T) {
268301
genPodKey("test-ns", "test3-pod-name"): "test3-pod-uid",
269302
},
270303
wantContainers: map[string]string{
271-
genContainerKey("test-ns", "test1-pod-name", "test1-container-name"): "test1-container-id",
272-
genContainerKey("test-ns", "test2-pod-name", "test2-container-name"): "test2-container-id",
273-
genContainerKey("test-ns", "test3-pod-name", "test3-container-name"): "test3-container-id",
304+
genContainerKey("test-ns", "test1-pod-name", "test1-container-name"): "test1-container-id",
305+
genContainerKey("test-ns", "test1-pod-name", "test1-init-container-name"): "test1-init-container-id",
306+
genContainerKey("test-ns", "test2-pod-name", "test2-container-name"): "test2-container-id",
307+
genContainerKey("test-ns", "test2-pod-name", "test2-init-container-name"): "test2-init-container-id",
308+
genContainerKey("test-ns", "test3-pod-name", "test3-container-name"): "test3-container-id",
309+
genContainerKey("test-ns", "test3-pod-name", "test3-init-container-name"): "test3-init-container-id",
274310
},
275311
wantPods4AllPods: map[string]string{
276312
genPodKey("test-ns", "test1-pod-name"): "test1-pod-uid",

pkg/koordlet/util/pod.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,25 @@ func GetPodSandboxContainerID(pod *corev1.Pod) (string, error) {
6161
// get runtime type and dir names of known containers
6262
containerSubDirNames := make(map[string]struct{}, len(pod.Status.ContainerStatuses))
6363
containerRuntime := system.RuntimeTypeUnknown
64+
for _, containerStat := range pod.Status.InitContainerStatuses {
65+
runtimeType, containerDirName, err := system.CgroupPathFormatter.ContainerDirFn(containerStat.ContainerID)
66+
if err != nil {
67+
return "", err
68+
}
69+
containerSubDirNames[containerDirName] = struct{}{}
70+
if containerRuntime == system.RuntimeTypeUnknown {
71+
containerRuntime = runtimeType
72+
}
73+
}
6474
for _, containerStat := range pod.Status.ContainerStatuses {
6575
runtimeType, containerDirName, err := system.CgroupPathFormatter.ContainerDirFn(containerStat.ContainerID)
6676
if err != nil {
6777
return "", err
6878
}
6979
containerSubDirNames[containerDirName] = struct{}{}
70-
containerRuntime = runtimeType
80+
if containerRuntime == system.RuntimeTypeUnknown {
81+
containerRuntime = runtimeType
82+
}
7183
}
7284

7385
sandboxCandidates := make([]string, 0)

0 commit comments

Comments
 (0)