Skip to content

Commit 653207b

Browse files
author
Mrunal Patel
authored
Merge pull request opencontainers#774 from cyphar/rootless-containers
Rootless Containers
2 parents ef9a4b3 + ba38383 commit 653207b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1257
-306
lines changed

Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ RUN echo 'deb http://httpredir.debian.org/debian jessie-backports main' > /etc/a
66
RUN apt-get update && apt-get install -y \
77
build-essential \
88
curl \
9+
sudo \
910
gawk \
1011
iptables \
1112
jq \
@@ -22,6 +23,12 @@ RUN apt-get update && apt-get install -y \
2223
--no-install-recommends \
2324
&& apt-get clean
2425

26+
# Add a dummy user for the rootless integration tests. While runC does
27+
# not require an entry in /etc/passwd to operate, one of the tests uses
28+
# `git clone` -- and `git clone` does not allow you to clone a
29+
# repository if the current uid does not have an entry in /etc/passwd.
30+
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
31+
2532
# install bats
2633
RUN cd /tmp \
2734
&& git clone https://github.com/sstephenson/bats.git \

Makefile

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
66
PREFIX := $(DESTDIR)/usr/local
7-
BINDIR := $(PREFIX)/sbin
7+
BINDIR := $(PREFIX)/bin
88
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
99
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
1010
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
@@ -79,10 +79,10 @@ runcimage:
7979
docker build -t $(RUNC_IMAGE) .
8080

8181
test:
82-
make unittest integration
82+
make unittest integration rootlessintegration
8383

8484
localtest:
85-
make localunittest localintegration
85+
make localunittest localintegration localrootlessintegration
8686

8787
unittest: runcimage
8888
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest
@@ -96,6 +96,13 @@ integration: runcimage
9696
localintegration: all
9797
bats -t tests/integration${TESTFLAGS}
9898

99+
rootlessintegration: runcimage
100+
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) --cap-drop=ALL -u rootless $(RUNC_IMAGE) make localintegration
101+
102+
# FIXME: This should not be separate from rootlessintegration's method of running.
103+
localrootlessintegration: all
104+
sudo -u rootless -H PATH="${PATH}" bats -t tests/integration${TESTFLAGS}
105+
99106
shell: all
100107
docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
101108

checkpoint.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ checkpointed.`,
3939
if err := checkArgs(context, 1, exactArgs); err != nil {
4040
return err
4141
}
42+
// XXX: Currently this is untested with rootless containers.
43+
if isRootless() {
44+
return fmt.Errorf("runc checkpoint requires root")
45+
}
46+
4247
container, err := getContainer(context)
4348
if err != nil {
4449
return err

exec.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,6 @@ following will output a list of processes running in the container:
9090
if err := checkArgs(context, 1, minArgs); err != nil {
9191
return err
9292
}
93-
if os.Geteuid() != 0 {
94-
return fmt.Errorf("runc should be run as root")
95-
}
9693
if err := revisePidFile(context); err != nil {
9794
return err
9895
}

libcontainer/cgroups/fs/apply_raw.go

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
267267
}, nil
268268
}
269269

270-
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
271-
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
272-
// process could in container and shared pid namespace with host, and
273-
// /proc/1/cgroup could point to whole other world of cgroups.
274-
initPath, err := cgroups.GetThisCgroupDir(subsystem)
275-
if err != nil {
276-
return "", err
277-
}
278-
// This is needed for nested containers, because in /proc/self/cgroup we
279-
// see pathes from host, which don't exist in container.
280-
relDir, err := filepath.Rel(root, initPath)
281-
if err != nil {
282-
return "", err
283-
}
284-
return filepath.Join(mountpoint, relDir), nil
285-
}
286-
287270
func (raw *cgroupData) path(subsystem string) (string, error) {
288-
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
271+
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
289272
// If we didn't mount the subsystem, there is no point we make the path.
290273
if err != nil {
291274
return "", err
@@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
297280
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
298281
}
299282

300-
parentPath, err := raw.parentPath(subsystem, mnt, root)
283+
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
284+
// process could in container and shared pid namespace with host, and
285+
// /proc/1/cgroup could point to whole other world of cgroups.
286+
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
301287
if err != nil {
302288
return "", err
303289
}
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// +build linux
2+
3+
package rootless
4+
5+
import (
6+
"fmt"
7+
8+
"github.com/opencontainers/runc/libcontainer/cgroups"
9+
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
10+
"github.com/opencontainers/runc/libcontainer/configs"
11+
"github.com/opencontainers/runc/libcontainer/configs/validate"
12+
)
13+
14+
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
15+
// needlessly. We should probably export this list.
16+
17+
var subsystems = []subsystem{
18+
&fs.CpusetGroup{},
19+
&fs.DevicesGroup{},
20+
&fs.MemoryGroup{},
21+
&fs.CpuGroup{},
22+
&fs.CpuacctGroup{},
23+
&fs.PidsGroup{},
24+
&fs.BlkioGroup{},
25+
&fs.HugetlbGroup{},
26+
&fs.NetClsGroup{},
27+
&fs.NetPrioGroup{},
28+
&fs.PerfEventGroup{},
29+
&fs.FreezerGroup{},
30+
&fs.NameGroup{GroupName: "name=systemd"},
31+
}
32+
33+
type subsystem interface {
34+
// Name returns the name of the subsystem.
35+
Name() string
36+
37+
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
38+
GetStats(path string, stats *cgroups.Stats) error
39+
}
40+
41+
// The noop cgroup manager is used for rootless containers, because we currently
42+
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
43+
// by factory if we are in rootless mode. We error out if any cgroup options are
44+
// set in the config -- this may change in the future with upcoming kernel features
45+
// like the cgroup namespace.
46+
47+
type Manager struct {
48+
Cgroups *configs.Cgroup
49+
Paths map[string]string
50+
}
51+
52+
func (m *Manager) Apply(pid int) error {
53+
// If there are no cgroup settings, there's nothing to do.
54+
if m.Cgroups == nil {
55+
return nil
56+
}
57+
58+
// We can't set paths.
59+
// TODO(cyphar): Implement the case where the runner of a rootless container
60+
// owns their own cgroup, which would allow us to set up a
61+
// cgroup for each path.
62+
if m.Cgroups.Paths != nil {
63+
return fmt.Errorf("cannot change cgroup path in rootless container")
64+
}
65+
66+
// We load the paths into the manager.
67+
paths := make(map[string]string)
68+
for _, sys := range subsystems {
69+
name := sys.Name()
70+
71+
path, err := cgroups.GetOwnCgroupPath(name)
72+
if err != nil {
73+
// Ignore paths we couldn't resolve.
74+
continue
75+
}
76+
77+
paths[name] = path
78+
}
79+
80+
m.Paths = paths
81+
return nil
82+
}
83+
84+
func (m *Manager) GetPaths() map[string]string {
85+
return m.Paths
86+
}
87+
88+
func (m *Manager) Set(container *configs.Config) error {
89+
// We have to re-do the validation here, since someone might decide to
90+
// update a rootless container.
91+
return validate.New().Validate(container)
92+
}
93+
94+
func (m *Manager) GetPids() ([]int, error) {
95+
dir, err := cgroups.GetOwnCgroupPath("devices")
96+
if err != nil {
97+
return nil, err
98+
}
99+
return cgroups.GetPids(dir)
100+
}
101+
102+
func (m *Manager) GetAllPids() ([]int, error) {
103+
dir, err := cgroups.GetOwnCgroupPath("devices")
104+
if err != nil {
105+
return nil, err
106+
}
107+
return cgroups.GetAllPids(dir)
108+
}
109+
110+
func (m *Manager) GetStats() (*cgroups.Stats, error) {
111+
// TODO(cyphar): We can make this work if we figure out a way to allow usage
112+
// of cgroups with a rootless container. While this doesn't
113+
// actually require write access to a cgroup directory, the
114+
// statistics are not useful if they can be affected by
115+
// non-container processes.
116+
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
117+
}
118+
119+
func (m *Manager) Freeze(state configs.FreezerState) error {
120+
// TODO(cyphar): We can make this work if we figure out a way to allow usage
121+
// of cgroups with a rootless container.
122+
return fmt.Errorf("cannot use freezer cgroup in rootless container")
123+
}
124+
125+
func (m *Manager) Destroy() error {
126+
// We don't have to do anything here because we didn't do any setup.
127+
return nil
128+
}

libcontainer/cgroups/systemd/apply_systemd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
426426
return "", err
427427
}
428428

429-
initPath, err := cgroups.GetInitCgroupDir(subsystem)
429+
initPath, err := cgroups.GetInitCgroup(subsystem)
430430
if err != nil {
431431
return "", err
432432
}

libcontainer/cgroups/utils.go

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ type Mount struct {
109109
Subsystems []string
110110
}
111111

112-
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
112+
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
113113
if len(m.Subsystems) == 0 {
114114
return "", fmt.Errorf("no subsystem for mount")
115115
}
@@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) {
203203
return subsystems, nil
204204
}
205205

206-
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
207-
func GetThisCgroupDir(subsystem string) (string, error) {
206+
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
207+
func GetOwnCgroup(subsystem string) (string, error) {
208208
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
209209
if err != nil {
210210
return "", err
@@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
213213
return getControllerPath(subsystem, cgroups)
214214
}
215215

216-
func GetInitCgroupDir(subsystem string) (string, error) {
216+
func GetOwnCgroupPath(subsystem string) (string, error) {
217+
cgroup, err := GetOwnCgroup(subsystem)
218+
if err != nil {
219+
return "", err
220+
}
217221

222+
return getCgroupPathHelper(subsystem, cgroup)
223+
}
224+
225+
func GetInitCgroup(subsystem string) (string, error) {
218226
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
219227
if err != nil {
220228
return "", err
@@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
223231
return getControllerPath(subsystem, cgroups)
224232
}
225233

234+
func GetInitCgroupPath(subsystem string) (string, error) {
235+
cgroup, err := GetInitCgroup(subsystem)
236+
if err != nil {
237+
return "", err
238+
}
239+
240+
return getCgroupPathHelper(subsystem, cgroup)
241+
}
242+
243+
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
244+
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
245+
if err != nil {
246+
return "", err
247+
}
248+
249+
// This is needed for nested containers, because in /proc/self/cgroup we
250+
// see pathes from host, which don't exist in container.
251+
relCgroup, err := filepath.Rel(root, cgroup)
252+
if err != nil {
253+
return "", err
254+
}
255+
256+
return filepath.Join(mnt, relCgroup), nil
257+
}
258+
226259
func readProcsFile(dir string) ([]int, error) {
227260
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
228261
if err != nil {

libcontainer/configs/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ type Config struct {
183183
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
184184
// callers keyring in this case.
185185
NoNewKeyring bool `json:"no_new_keyring"`
186+
187+
// Rootless specifies whether the container is a rootless container.
188+
Rootless bool `json:"rootless"`
186189
}
187190

188191
type Hooks struct {

0 commit comments

Comments
 (0)