Skip to content

Commit f0c0766

Browse files
author
yyrdl
committed
support collecting FsUsageMetrics for containerd
1 parent 9c29bca commit f0c0766

File tree

6 files changed

+164
-50
lines changed

6 files changed

+164
-50
lines changed

container/common/fsHandler.go

Lines changed: 82 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"time"
2222

2323
"github.com/google/cadvisor/fs"
24-
2524
"k8s.io/klog/v2"
2625
)
2726

@@ -37,15 +36,21 @@ type FsUsage struct {
3736
InodeUsage uint64
3837
}
3938

39+
type FsUsageProvider interface {
40+
// Usage returns the fs usage
41+
Usage() (*FsUsage, error)
42+
// Targets returns where the fs usage metric is collected,it maybe a directory ,a file or some
43+
// information about the snapshotter(for containerd)
44+
Targets() []string
45+
}
46+
4047
type realFsHandler struct {
4148
sync.RWMutex
42-
lastUpdate time.Time
43-
usage FsUsage
44-
period time.Duration
45-
minPeriod time.Duration
46-
rootfs string
47-
extraDir string
48-
fsInfo fs.FsInfo
49+
lastUpdate time.Time
50+
usage FsUsage
51+
period time.Duration
52+
minPeriod time.Duration
53+
usageProvider FsUsageProvider
4954
// Tells the container to stop.
5055
stopChan chan struct{}
5156
}
@@ -58,51 +63,33 @@ const DefaultPeriod = time.Minute
5863

5964
var _ FsHandler = &realFsHandler{}
6065

61-
func NewFsHandler(period time.Duration, rootfs, extraDir string, fsInfo fs.FsInfo) FsHandler {
66+
func NewFsHandler(period time.Duration, provider FsUsageProvider) FsHandler {
6267
return &realFsHandler{
63-
lastUpdate: time.Time{},
64-
usage: FsUsage{},
65-
period: period,
66-
minPeriod: period,
67-
rootfs: rootfs,
68-
extraDir: extraDir,
69-
fsInfo: fsInfo,
70-
stopChan: make(chan struct{}, 1),
68+
lastUpdate: time.Time{},
69+
usage: FsUsage{},
70+
period: period,
71+
minPeriod: period,
72+
usageProvider: provider,
73+
stopChan: make(chan struct{}, 1),
7174
}
7275
}
7376

7477
func (fh *realFsHandler) update() error {
75-
var (
76-
rootUsage, extraUsage fs.UsageInfo
77-
rootErr, extraErr error
78-
)
79-
// TODO(vishh): Add support for external mounts.
80-
if fh.rootfs != "" {
81-
rootUsage, rootErr = fh.fsInfo.GetDirUsage(fh.rootfs)
82-
}
8378

84-
if fh.extraDir != "" {
85-
extraUsage, extraErr = fh.fsInfo.GetDirUsage(fh.extraDir)
79+
usage, err := fh.usageProvider.Usage()
80+
81+
if err != nil {
82+
return err
8683
}
8784

88-
// Wait to handle errors until after all operartions are run.
89-
// An error in one will not cause an early return, skipping others
9085
fh.Lock()
9186
defer fh.Unlock()
9287
fh.lastUpdate = time.Now()
93-
if fh.rootfs != "" && rootErr == nil {
94-
fh.usage.InodeUsage = rootUsage.Inodes
95-
fh.usage.BaseUsageBytes = rootUsage.Bytes
96-
fh.usage.TotalUsageBytes = rootUsage.Bytes
97-
}
98-
if fh.extraDir != "" && extraErr == nil {
99-
fh.usage.TotalUsageBytes += extraUsage.Bytes
100-
}
10188

102-
// Combine errors into a single error to return
103-
if rootErr != nil || extraErr != nil {
104-
return fmt.Errorf("rootDiskErr: %v, extraDiskErr: %v", rootErr, extraErr)
105-
}
89+
fh.usage.InodeUsage = usage.InodeUsage
90+
fh.usage.BaseUsageBytes = usage.BaseUsageBytes
91+
fh.usage.TotalUsageBytes = usage.TotalUsageBytes
92+
10693
return nil
10794
}
10895

@@ -125,7 +112,8 @@ func (fh *realFsHandler) trackUsage() {
125112
// if the long duration is persistent either because of slow
126113
// disk or lots of containers.
127114
longOp = longOp + time.Second
128-
klog.V(2).Infof("fs: disk usage and inodes count on following dirs took %v: %v; will not log again for this container unless duration exceeds %v", duration, []string{fh.rootfs, fh.extraDir}, longOp)
115+
klog.V(2).Infof(`fs: disk usage and inodes count on targets took %v: %v; `+
116+
`will not log again for this container unless duration exceeds %v`, duration, fh.usageProvider.Targets(), longOp)
129117
}
130118
select {
131119
case <-fh.stopChan:
@@ -148,3 +136,55 @@ func (fh *realFsHandler) Usage() FsUsage {
148136
defer fh.RUnlock()
149137
return fh.usage
150138
}
139+
140+
type fsUsageProvider struct {
141+
fsInfo fs.FsInfo
142+
rootFs string
143+
extraDir string
144+
}
145+
146+
func NewGeneralFsUsageProvider(fsInfo fs.FsInfo, rootFs, extraDir string) FsUsageProvider {
147+
return &fsUsageProvider{
148+
fsInfo: fsInfo,
149+
rootFs: rootFs,
150+
extraDir: extraDir,
151+
}
152+
}
153+
154+
func (f *fsUsageProvider) Targets() []string {
155+
return []string{f.rootFs, f.extraDir}
156+
}
157+
158+
func (f *fsUsageProvider) Usage() (*FsUsage, error) {
159+
var (
160+
rootUsage, extraUsage fs.UsageInfo
161+
rootErr, extraErr error
162+
)
163+
164+
if f.rootFs != "" {
165+
rootUsage, rootErr = f.fsInfo.GetDirUsage(f.rootFs)
166+
}
167+
168+
if f.extraDir != "" {
169+
extraUsage, extraErr = f.fsInfo.GetDirUsage(f.extraDir)
170+
}
171+
172+
usage := &FsUsage{}
173+
174+
if f.rootFs != "" && rootErr == nil {
175+
usage.InodeUsage = rootUsage.Inodes
176+
usage.BaseUsageBytes = rootUsage.Bytes
177+
usage.TotalUsageBytes = rootUsage.Bytes
178+
}
179+
180+
if f.extraDir != "" && extraErr == nil {
181+
usage.TotalUsageBytes += extraUsage.Bytes
182+
}
183+
184+
// Combine errors into a single error to return
185+
if rootErr != nil || extraErr != nil {
186+
return nil, fmt.Errorf("rootDiskErr: %v, extraDiskErr: %v", rootErr, extraErr)
187+
}
188+
189+
return usage, nil
190+
}

container/containerd/client.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ import (
2222
"time"
2323

2424
containersapi "github.com/containerd/containerd/api/services/containers/v1"
25+
snaptshotapi "github.com/containerd/containerd/api/services/snapshots/v1"
2526
tasksapi "github.com/containerd/containerd/api/services/tasks/v1"
2627
versionapi "github.com/containerd/containerd/api/services/version/v1"
2728
"github.com/containerd/containerd/containers"
2829
"github.com/containerd/containerd/errdefs"
2930
"github.com/containerd/containerd/pkg/dialer"
3031
ptypes "github.com/gogo/protobuf/types"
32+
"github.com/google/cadvisor/container/common"
3133
"google.golang.org/grpc"
3234
"google.golang.org/grpc/backoff"
3335
)
@@ -36,12 +38,14 @@ type client struct {
3638
containerService containersapi.ContainersClient
3739
taskService tasksapi.TasksClient
3840
versionService versionapi.VersionClient
41+
snapshotsService snaptshotapi.SnapshotsClient
3942
}
4043

4144
type ContainerdClient interface {
4245
LoadContainer(ctx context.Context, id string) (*containers.Container, error)
4346
TaskPid(ctx context.Context, id string) (uint32, error)
4447
Version(ctx context.Context) (string, error)
48+
ContainerFsUsage(ctx context.Context, snapshotter, snapshotkey string) (*common.FsUsage, error)
4549
}
4650

4751
var once sync.Once
@@ -92,6 +96,7 @@ func Client(address, namespace string) (ContainerdClient, error) {
9296
containerService: containersapi.NewContainersClient(conn),
9397
taskService: tasksapi.NewTasksClient(conn),
9498
versionService: versionapi.NewVersionClient(conn),
99+
snapshotsService: snaptshotapi.NewSnapshotsClient(conn),
95100
}
96101
})
97102
return ctrdClient, retErr
@@ -125,6 +130,21 @@ func (c *client) Version(ctx context.Context) (string, error) {
125130
return response.Version, nil
126131
}
127132

133+
func (c *client) ContainerFsUsage(ctx context.Context, snapshotter, snapshotkey string) (*common.FsUsage, error) {
134+
usage, err := c.snapshotsService.Usage(ctx, &snaptshotapi.UsageRequest{
135+
Snapshotter: snapshotter,
136+
Key: snapshotkey,
137+
})
138+
if err != nil {
139+
return nil, err
140+
}
141+
return &common.FsUsage{
142+
BaseUsageBytes: uint64(usage.Size_),
143+
TotalUsageBytes: uint64(usage.Size_),
144+
InodeUsage: uint64(usage.Inodes),
145+
}, nil
146+
}
147+
128148
func containerFromProto(containerpb containersapi.Container) *containers.Container {
129149
var runtime containers.RuntimeInfo
130150
if containerpb.Runtime != nil {

container/containerd/client_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"fmt"
2020

2121
"github.com/containerd/containerd/containers"
22+
"github.com/google/cadvisor/container/common"
2223
)
2324

2425
type containerdClientMock struct {
@@ -45,6 +46,11 @@ func (c *containerdClientMock) TaskPid(ctx context.Context, id string) (uint32,
4546
return 2389, nil
4647
}
4748

49+
func (c *containerdClientMock) ContainerFsUsage(ctx context.Context, snapshotter,
50+
snapshotkey string) (*common.FsUsage, error) {
51+
return &common.FsUsage{}, nil
52+
}
53+
4854
func mockcontainerdClient(cntrs map[string]*containers.Container, returnErr error) ContainerdClient {
4955
return &containerdClientMock{
5056
cntrs: cntrs,

container/containerd/handler.go

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
// Handler for containerd containers.
16+
1617
package containerd
1718

1819
import (
@@ -38,6 +39,7 @@ type containerdContainerHandler struct {
3839
// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
3940
cgroupPaths map[string]string
4041
fsInfo fs.FsInfo
42+
fsHandler common.FsHandler
4143
// Metadata associated with the container.
4244
reference info.ContainerReference
4345
envs map[string]string
@@ -122,9 +124,15 @@ func newContainerdContainerHandler(
122124
libcontainerHandler := containerlibcontainer.NewHandler(cgroupManager, rootfs, int(taskPid), includedMetrics)
123125

124126
handler := &containerdContainerHandler{
125-
machineInfoFactory: machineInfoFactory,
126-
cgroupPaths: cgroupPaths,
127-
fsInfo: fsInfo,
127+
machineInfoFactory: machineInfoFactory,
128+
cgroupPaths: cgroupPaths,
129+
fsInfo: fsInfo,
130+
fsHandler: common.NewFsHandler(common.DefaultPeriod, &fsUsageProvider{
131+
ctx: ctx,
132+
client: client,
133+
snapshotter: cntr.Snapshotter,
134+
snapshotkey: cntr.SnapshotKey,
135+
}),
128136
envs: make(map[string]string),
129137
labels: cntr.Labels,
130138
includedMetrics: includedMetrics,
@@ -169,9 +177,7 @@ func (h *containerdContainerHandler) needNet() bool {
169177
}
170178

171179
func (h *containerdContainerHandler) GetSpec() (info.ContainerSpec, error) {
172-
// TODO: Since we dont collect disk usage stats for containerd, we set hasFilesystem
173-
// to false. Revisit when we support disk usage stats for containerd
174-
hasFilesystem := false
180+
hasFilesystem := true
175181
spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, h.needNet(), hasFilesystem)
176182
spec.Labels = h.labels
177183
spec.Envs = h.envs
@@ -189,6 +195,25 @@ func (h *containerdContainerHandler) getFsStats(stats *info.ContainerStats) erro
189195
if h.includedMetrics.Has(container.DiskIOMetrics) {
190196
common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
191197
}
198+
if !h.includedMetrics.Has(container.DiskUsageMetrics) {
199+
return nil
200+
}
201+
202+
// TODO(yyrdl):for overlay ,the 'upperPath' is:
203+
// `${containerd.Config.Root}/io.containerd.snapshotter.v1.overlayfs/snapshots/${snapshots.ID}/fs`,
204+
// and for other snapshots plugins, we can also find the law from containerd's source code.
205+
206+
// Device 、fsType and fsLimits and other information are not supported yet, unless there is a way to
207+
// know the id of the snapshot , or the `Stat`(snapshotsClient.Stat) method returns these information directly.
208+
209+
fsStat := info.FsStats{}
210+
usage := h.fsHandler.Usage()
211+
fsStat.BaseUsage = usage.BaseUsageBytes
212+
fsStat.Usage = usage.TotalUsageBytes
213+
fsStat.Inodes = usage.InodeUsage
214+
215+
stats.Filesystem = append(stats.Filesystem, fsStat)
216+
192217
return nil
193218
}
194219

@@ -239,12 +264,33 @@ func (h *containerdContainerHandler) Type() container.ContainerType {
239264
}
240265

241266
func (h *containerdContainerHandler) Start() {
267+
if h.fsHandler != nil {
268+
h.fsHandler.Start()
269+
}
242270
}
243271

244272
func (h *containerdContainerHandler) Cleanup() {
273+
if h.fsHandler != nil {
274+
h.fsHandler.Stop()
275+
}
245276
}
246277

247278
func (h *containerdContainerHandler) GetContainerIPAddress() string {
248279
// containerd doesnt take care of networking.So it doesnt maintain networking states
249280
return ""
250281
}
282+
283+
type fsUsageProvider struct {
284+
ctx context.Context
285+
snapshotter string
286+
snapshotkey string
287+
client ContainerdClient
288+
}
289+
290+
func (f *fsUsageProvider) Usage() (*common.FsUsage, error) {
291+
return f.client.ContainerFsUsage(f.ctx, f.snapshotter, f.snapshotkey)
292+
}
293+
294+
func (f *fsUsageProvider) Targets() []string {
295+
return []string{fmt.Sprintf("snapshotter(%s) with key (%s)", f.snapshotter, f.snapshotkey)}
296+
}

container/crio/handler.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,8 @@ func newCrioContainerHandler(
183183

184184
// we optionally collect disk usage metrics
185185
if includedMetrics.Has(container.DiskUsageMetrics) {
186-
handler.fsHandler = common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, storageLogDir, fsInfo)
186+
handler.fsHandler = common.NewFsHandler(common.DefaultPeriod, common.NewGeneralFsUsageProvider(
187+
fsInfo, rootfsStorageDir, storageLogDir))
187188
}
188189
// TODO for env vars we wanted to show from container.Config.Env from whitelist
189190
//for _, exposedEnv := range metadataEnvs {

container/docker/handler.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ func newDockerContainerHandler(
240240

241241
if includedMetrics.Has(container.DiskUsageMetrics) {
242242
handler.fsHandler = &dockerFsHandler{
243-
fsHandler: common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, otherStorageDir, fsInfo),
243+
fsHandler: common.NewFsHandler(common.DefaultPeriod, common.NewGeneralFsUsageProvider(
244+
fsInfo, rootfsStorageDir, otherStorageDir)),
244245
thinPoolWatcher: thinPoolWatcher,
245246
zfsWatcher: zfsWatcher,
246247
deviceID: ctnr.GraphDriver.Data["DeviceId"],

0 commit comments

Comments
 (0)