Skip to content

Commit ffdf3e3

Browse files
committed
linuxc: try support faster new clone3(CLONE_INTO_CGROUP) syscall
criyle/go-sandbox#13
1 parent 60a1859 commit ffdf3e3

File tree

9 files changed

+93
-23
lines changed

9 files changed

+93
-23
lines changed

README.cn.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,8 @@ interface Output {
538538

539539
`systemd``init` 的发行版中运行时,`go-judge` 会使用 `dbus` 通知 `systemd` 来创建一个临时 `scope` 作为 `cgroup` 的根。
540540

541+
在高于 5.7 的内核中运行时,`go-judge` 会尝试更快的 `clone3(CLONE_INTO_CGROUP)` 方法.
542+
541543
#### 内存使用
542544

543545
控制进程通常会使用 `20M` 内存,每个容器进程最大会使用 `20M` 内存,每个请求最大会使用 `2 * 16M` + 总 copy out max 限制 * 2 内存。请注意,缓存文件会存储在宿主机的共享内存中 (`/dev/shm`),请保证其大小足够存储运行时最大可能文件。

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,8 @@ When running in containers, the `go-judge` will migrate all processed into `/api
762762

763763
When running in Linux distributions powered by `systemd`, the `go-judge` will contact `systemd` via `dbus` to create a transient scope as cgroup root.
764764

765+
When running with kernel >= 5.7, the `go-judge` will try faster `clone3(CLONE_INTO_CGROUP)` method.
766+
765767
#### Memory Usage
766768

767769
The controller will consume `20M` memory and each container will consume `20M` + size of tmpfs `2 * 128M`. For each request, it consumes as much as user program limit + extra limit (`16k`) + total copy out max. Notice that the cached file stores in the shared memory (`/dev/shm`) of the host, so please ensure enough size allocated.

env/env_linux.go

Lines changed: 60 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ import (
1010
"github.com/coreos/go-systemd/v22/dbus"
1111
"github.com/criyle/go-judge/env/linuxcontainer"
1212
"github.com/criyle/go-judge/env/pool"
13+
"github.com/criyle/go-judge/envexec"
1314
"github.com/criyle/go-sandbox/container"
1415
"github.com/criyle/go-sandbox/pkg/cgroup"
1516
"github.com/criyle/go-sandbox/pkg/forkexec"
1617
"github.com/criyle/go-sandbox/pkg/mount"
18+
"github.com/criyle/go-sandbox/runner"
1719
ddbus "github.com/godbus/dbus/v5"
1820
"github.com/google/shlex"
1921
"golang.org/x/sys/unix"
@@ -141,26 +143,65 @@ func NewBuilder(c Config) (pool.EnvBuilder, map[string]any, error) {
141143
if ct != nil {
142144
cgroupControllers = ct.Names()
143145
}
144-
return linuxcontainer.NewEnvBuilder(linuxcontainer.Config{
145-
Builder: b,
146-
CgroupPool: cgroupPool,
147-
WorkDir: workDir,
148-
Cpuset: c.Cpuset,
149-
CPURate: c.EnableCPURate,
150-
Seccomp: seccomp,
151-
}), map[string]any{
152-
"cgroupType": cgroupType,
153-
"mount": m,
154-
"symbolicLink": symbolicLinks,
155-
"maskedPaths": maskPaths,
156-
"hostName": hostName,
157-
"domainName": domainName,
158-
"workDir": workDir,
159-
"uid": cUID,
160-
"gid": cGID,
146+
conf := map[string]any{
147+
"cgroupType": cgroupType,
148+
"mount": m,
149+
"symbolicLink": symbolicLinks,
150+
"maskedPaths": maskPaths,
151+
"hostName": hostName,
152+
"domainName": domainName,
153+
"workDir": workDir,
154+
"uid": cUID,
155+
"gid": cGID,
161156

162-
"cgroupControllers": cgroupControllers,
163-
}, nil
157+
"cgroupControllers": cgroupControllers,
158+
}
159+
if cgb != nil && cgroupType == cgroup.TypeV2 && major >= 5 && minor >= 7 {
160+
c.Info("Running kernel ", major, ".", minor, " >= 5.7 with cgroup V2, trying faster clone3(CLONE_INTO_CGROUP)")
161+
if b := func() pool.EnvBuilder {
162+
b := linuxcontainer.NewEnvBuilder(linuxcontainer.Config{
163+
Builder: b,
164+
CgroupPool: cgroupPool,
165+
WorkDir: workDir,
166+
Cpuset: c.Cpuset,
167+
CPURate: c.EnableCPURate,
168+
Seccomp: seccomp,
169+
CgroupFd: true,
170+
})
171+
e, err := b.Build()
172+
if err != nil {
173+
c.Info("Environment build failed: ", err)
174+
return nil
175+
}
176+
defer e.Destroy()
177+
p, err := e.Execve(context.TODO(), envexec.ExecveParam{
178+
Args: []string{"/usr/bin/env"},
179+
})
180+
if err != nil {
181+
c.Info("Environment run failed: ", err)
182+
return nil
183+
}
184+
<-p.Done()
185+
r := p.Result()
186+
if r.Status == runner.StatusRunnerError {
187+
c.Info("Environment result failed: ", r)
188+
return nil
189+
}
190+
return b
191+
}(); b != nil {
192+
conf["clone3"] = true
193+
return b, conf, nil
194+
}
195+
}
196+
197+
return linuxcontainer.NewEnvBuilder(linuxcontainer.Config{
198+
Builder: b,
199+
CgroupPool: cgroupPool,
200+
WorkDir: workDir,
201+
Cpuset: c.Cpuset,
202+
CPURate: c.EnableCPURate,
203+
Seccomp: seccomp,
204+
}), conf, nil
164205
}
165206

166207
func newCgroup(c Config) (cgroup.Cgroup, *cgroup.Controllers, error) {

env/linuxcontainer/cgroup_wrapper_linux.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package linuxcontainer
22

33
import (
4+
"os"
45
"time"
56

67
"github.com/criyle/go-judge/envexec"
@@ -63,3 +64,7 @@ func (c *wCgroup) Reset() error {
6364
func (c *wCgroup) Destroy() error {
6465
return c.cg.Destroy()
6566
}
67+
68+
func (c *wCgroup) Open() (*os.File, error) {
69+
return c.cg.Open()
70+
}

env/linuxcontainer/cgrouppool_linux.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package linuxcontainer
22

33
import (
4+
"os"
45
"sync"
56
"time"
67

@@ -22,6 +23,8 @@ type Cgroup interface {
2223
AddProc(int) error
2324
Reset() error
2425
Destroy() error
26+
27+
Open() (*os.File, error)
2528
}
2629

2730
// CgroupPool implements pool of Cgroup

env/linuxcontainer/envbuilder_linux.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ type Config struct {
1616
Seccomp []syscall.SockFilter
1717
Cpuset string
1818
CPURate bool
19+
CgroupFd bool // whether to enable cgroup fd with clone3, kernel >= 5.7
1920
}
2021

2122
type environmentBuilder struct {
@@ -25,6 +26,7 @@ type environmentBuilder struct {
2526
seccomp []syscall.SockFilter
2627
cpuset string
2728
cpuRate bool
29+
cgFd bool
2830
}
2931

3032
// NewEnvBuilder creates builder for linux container pools
@@ -36,6 +38,7 @@ func NewEnvBuilder(c Config) pool.EnvBuilder {
3638
seccomp: c.Seccomp,
3739
cpuset: c.Cpuset,
3840
cpuRate: c.CPURate,
41+
cgFd: c.CgroupFd,
3942
}
4043
}
4144

@@ -61,5 +64,6 @@ func (b *environmentBuilder) Build() (pool.Environment, error) {
6164
cpuset: b.cpuset,
6265
cpuRate: b.cpuRate,
6366
seccomp: b.seccomp,
67+
cgFd: b.cgFd,
6468
}, nil
6569
}

env/linuxcontainer/environment_linux.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ type environ struct {
2828
cpuset string
2929
seccomp []syscall.SockFilter
3030
cpuRate bool
31+
cgFd bool
3132
}
3233

3334
// Destroy destroys the environment
@@ -45,6 +46,7 @@ func (c *environ) Execve(ctx context.Context, param envexec.ExecveParam) (envexe
4546
cg Cgroup
4647
syncFunc func(int) error
4748
err error
49+
cgFd uintptr
4850
)
4951

5052
limit := param.Limit
@@ -56,7 +58,16 @@ func (c *environ) Execve(ctx context.Context, param envexec.ExecveParam) (envexe
5658
if err := c.setCgroupLimit(cg, limit); err != nil {
5759
return nil, err
5860
}
59-
syncFunc = cg.AddProc
61+
if c.cgFd {
62+
f, err := cg.Open()
63+
if err != nil {
64+
return nil, fmt.Errorf("execve: failed to get cgroup fd %v", err)
65+
}
66+
defer f.Close()
67+
cgFd = f.Fd()
68+
} else {
69+
syncFunc = cg.AddProc
70+
}
6071
}
6172

6273
rLimits := rlimit.RLimits{
@@ -92,6 +103,8 @@ func (c *environ) Execve(ctx context.Context, param envexec.ExecveParam) (envexe
92103
}
93104
return nil
94105
},
106+
SyncAfterExec: syncFunc == nil,
107+
CgroupFD: cgFd,
95108
}
96109
proc := newProcess(func() runner.Result {
97110
return c.Environment.Execve(ctx, p)

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ go 1.23
55
require (
66
github.com/coreos/go-systemd/v22 v22.5.0
77
github.com/creack/pty v1.1.24
8-
github.com/criyle/go-sandbox v0.10.9
8+
github.com/criyle/go-sandbox v0.11.0
99
github.com/elastic/go-seccomp-bpf v1.5.0
1010
github.com/elastic/go-ucfg v0.8.8
1111
github.com/gin-contrib/zap v1.1.4

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8
1818
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
1919
github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
2020
github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
21-
github.com/criyle/go-sandbox v0.10.9 h1:IYXVrfdSi8GgXlxBDNBINpH4VqnznxHi3R5J17d69fs=
22-
github.com/criyle/go-sandbox v0.10.9/go.mod h1:9IZSv7cxcDkVaPSRufhMPLUg+7M7lTPAt8hjd/iMKFo=
21+
github.com/criyle/go-sandbox v0.11.0 h1:1jkfLigilxQza3zPyF1aIGR5WdDuiMYE1rzVYWlJC9I=
22+
github.com/criyle/go-sandbox v0.11.0/go.mod h1:9IZSv7cxcDkVaPSRufhMPLUg+7M7lTPAt8hjd/iMKFo=
2323
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2424
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2525
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

0 commit comments

Comments
 (0)