Skip to content

Commit 9391cd3

Browse files
chown cgroup to process uid in container namespace
Delegating cgroups to the container enables more complex workloads, including systemd-based workloads. The OCI runtime-spec was recently updated to explicitly admit such delegation, through specification of cgroup ownership semantics: opencontainers/runtime-spec#1123 Pursuant to the updated OCI runtime-spec, change the ownership of the container's cgroup directory and particular files therein, when using cgroups v2 and when the cgroupfs is to be mounted read/write. As a result of this change, systemd workloads can run in isolated user namespaces on OpenShift when the sandbox's cgroupfs is mounted read/write. It might be possible to implement this feature in other cgroup managers, but that work is deferred. Signed-off-by: Fraser Tweedale <ftweedal@redhat.com>
1 parent b6fa634 commit 9391cd3

2 files changed

Lines changed: 46 additions & 0 deletions

File tree

libcontainer/cgroups/systemd/v2.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package systemd
22

33
import (
4+
"bufio"
45
"fmt"
56
"math"
7+
"os"
68
"path/filepath"
79
"strconv"
810
"strings"
@@ -288,9 +290,46 @@ func (m *unifiedManager) Apply(pid int) error {
288290
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
289291
return err
290292
}
293+
294+
if c.OwnerUID != nil {
295+
filesToChown, err := cgroupFilesToChown()
296+
if err != nil {
297+
return err
298+
}
299+
300+
for _, v := range filesToChown {
301+
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
302+
if err != nil {
303+
return err
304+
}
305+
}
306+
}
307+
291308
return nil
292309
}
293310

311+
// The kernel exposes a list of files that should be chowned to the delegate
312+
// uid in /sys/kernel/cgroup/delegate. If the file is not present
313+
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
314+
func cgroupFilesToChown() ([]string, error) {
315+
filesToChown := []string{"."} // the directory itself must be chowned
316+
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
317+
f, err := os.Open(cgroupDelegateFile)
318+
if err == nil {
319+
defer f.Close()
320+
scanner := bufio.NewScanner(f)
321+
for scanner.Scan() {
322+
filesToChown = append(filesToChown, scanner.Text())
323+
}
324+
if err := scanner.Err(); err != nil {
325+
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
326+
}
327+
} else {
328+
filesToChown = append(filesToChown, "cgroup.procs", "cgroup.subtree_control", "cgroup.threads")
329+
}
330+
return filesToChown, nil
331+
}
332+
294333
func (m *unifiedManager) Destroy() error {
295334
m.mu.Lock()
296335
defer m.mu.Unlock()

libcontainer/configs/cgroup_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ type Cgroup struct {
4141

4242
// Rootless tells if rootless cgroups should be used.
4343
Rootless bool
44+
45+
// The host UID that should own the cgroup, or nil to accept
46+
// the default ownership. This should only be set when the
47+
// cgroupfs is to be mounted read/write.
48+
// Not all cgroup manager implementations support changing
49+
// the ownership.
50+
OwnerUID *int `json:"owner_uid,omitempty"`
4451
}
4552

4653
type Resources struct {

0 commit comments

Comments
 (0)