Skip to content

Commit 8fc9b6d

Browse files
committed
support collecting FsUsageMetrics for containerd
1 parent 9c29bca commit 8fc9b6d

File tree

6 files changed

+162
-49
lines changed

6 files changed

+162
-49
lines changed

container/common/fsHandler.go

Lines changed: 80 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"time"
2222

2323
"github.com/google/cadvisor/fs"
24-
2524
"k8s.io/klog/v2"
2625
)
2726

@@ -37,15 +36,18 @@ type FsUsage struct {
3736
InodeUsage uint64
3837
}
3938

39+
type FsUsageProvider interface {
40+
Usage() (*FsUsage, error)
41+
Targets() []string
42+
}
43+
4044
type realFsHandler struct {
4145
sync.RWMutex
42-
lastUpdate time.Time
43-
usage FsUsage
44-
period time.Duration
45-
minPeriod time.Duration
46-
rootfs string
47-
extraDir string
48-
fsInfo fs.FsInfo
46+
lastUpdate time.Time
47+
usage FsUsage
48+
period time.Duration
49+
minPeriod time.Duration
50+
usageProvider FsUsageProvider
4951
// Tells the container to stop.
5052
stopChan chan struct{}
5153
}
@@ -58,51 +60,35 @@ const DefaultPeriod = time.Minute
5860

5961
var _ FsHandler = &realFsHandler{}
6062

61-
func NewFsHandler(period time.Duration, rootfs, extraDir string, fsInfo fs.FsInfo) FsHandler {
63+
func NewFsHandler(period time.Duration, provider FsUsageProvider) FsHandler {
6264
return &realFsHandler{
63-
lastUpdate: time.Time{},
64-
usage: FsUsage{},
65-
period: period,
66-
minPeriod: period,
67-
rootfs: rootfs,
68-
extraDir: extraDir,
69-
fsInfo: fsInfo,
70-
stopChan: make(chan struct{}, 1),
65+
lastUpdate: time.Time{},
66+
usage: FsUsage{},
67+
period: period,
68+
minPeriod: period,
69+
usageProvider: provider,
70+
stopChan: make(chan struct{}, 1),
7171
}
7272
}
7373

7474
func (fh *realFsHandler) update() error {
75-
var (
76-
rootUsage, extraUsage fs.UsageInfo
77-
rootErr, extraErr error
78-
)
79-
// TODO(vishh): Add support for external mounts.
80-
if fh.rootfs != "" {
81-
rootUsage, rootErr = fh.fsInfo.GetDirUsage(fh.rootfs)
82-
}
8375

84-
if fh.extraDir != "" {
85-
extraUsage, extraErr = fh.fsInfo.GetDirUsage(fh.extraDir)
76+
usage, err := fh.usageProvider.Usage()
77+
78+
if err != nil {
79+
return err
8680
}
8781

88-
// Wait to handle errors until after all operartions are run.
82+
// Wait to handle errors until after all operations are run.
8983
// An error in one will not cause an early return, skipping others
9084
fh.Lock()
9185
defer fh.Unlock()
9286
fh.lastUpdate = time.Now()
93-
if fh.rootfs != "" && rootErr == nil {
94-
fh.usage.InodeUsage = rootUsage.Inodes
95-
fh.usage.BaseUsageBytes = rootUsage.Bytes
96-
fh.usage.TotalUsageBytes = rootUsage.Bytes
97-
}
98-
if fh.extraDir != "" && extraErr == nil {
99-
fh.usage.TotalUsageBytes += extraUsage.Bytes
100-
}
10187

102-
// Combine errors into a single error to return
103-
if rootErr != nil || extraErr != nil {
104-
return fmt.Errorf("rootDiskErr: %v, extraDiskErr: %v", rootErr, extraErr)
105-
}
88+
fh.usage.InodeUsage = usage.InodeUsage
89+
fh.usage.BaseUsageBytes = usage.BaseUsageBytes
90+
fh.usage.TotalUsageBytes = usage.TotalUsageBytes
91+
10692
return nil
10793
}
10894

@@ -125,7 +111,8 @@ func (fh *realFsHandler) trackUsage() {
125111
// if the long duration is persistent either because of slow
126112
// disk or lots of containers.
127113
longOp = longOp + time.Second
128-
klog.V(2).Infof("fs: disk usage and inodes count on following dirs took %v: %v; will not log again for this container unless duration exceeds %v", duration, []string{fh.rootfs, fh.extraDir}, longOp)
114+
klog.V(2).Infof(`fs: disk usage and inodes count on targets took %v: %v; `+
115+
`will not log again for this container unless duration exceeds %v`, duration, fh.usageProvider.Targets(), longOp)
129116
}
130117
select {
131118
case <-fh.stopChan:
@@ -148,3 +135,55 @@ func (fh *realFsHandler) Usage() FsUsage {
148135
defer fh.RUnlock()
149136
return fh.usage
150137
}
138+
139+
type fsUsageProvider struct {
140+
fsInfo fs.FsInfo
141+
rootFs string
142+
extraDir string
143+
}
144+
145+
func NewGeneralFsUsageProvider(fsInfo fs.FsInfo, rootFs, extraDir string) FsUsageProvider {
146+
return &fsUsageProvider{
147+
fsInfo: fsInfo,
148+
rootFs: rootFs,
149+
extraDir: extraDir,
150+
}
151+
}
152+
153+
func (f *fsUsageProvider) Targets() []string {
154+
return []string{f.rootFs, f.extraDir}
155+
}
156+
157+
func (f *fsUsageProvider) Usage() (*FsUsage, error) {
158+
var (
159+
rootUsage, extraUsage fs.UsageInfo
160+
rootErr, extraErr error
161+
)
162+
163+
if f.rootFs != "" {
164+
rootUsage, rootErr = f.fsInfo.GetDirUsage(f.rootFs)
165+
}
166+
167+
if f.extraDir != "" {
168+
extraUsage, extraErr = f.fsInfo.GetDirUsage(f.extraDir)
169+
}
170+
171+
usage := &FsUsage{}
172+
173+
if f.rootFs != "" && rootErr == nil {
174+
usage.InodeUsage = rootUsage.Inodes
175+
usage.BaseUsageBytes = rootUsage.Bytes
176+
usage.TotalUsageBytes = rootUsage.Bytes
177+
}
178+
179+
if f.extraDir != "" && extraErr == nil {
180+
usage.TotalUsageBytes += extraUsage.Bytes
181+
}
182+
183+
// Combine errors into a single error to return
184+
if rootErr != nil || extraErr != nil {
185+
return nil, fmt.Errorf("rootDiskErr: %v, extraDiskErr: %v", rootErr, extraErr)
186+
}
187+
188+
return usage, nil
189+
}

container/containerd/client.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ import (
2222
"time"
2323

2424
containersapi "github.com/containerd/containerd/api/services/containers/v1"
25+
snaptshotapi "github.com/containerd/containerd/api/services/snapshots/v1"
2526
tasksapi "github.com/containerd/containerd/api/services/tasks/v1"
2627
versionapi "github.com/containerd/containerd/api/services/version/v1"
2728
"github.com/containerd/containerd/containers"
2829
"github.com/containerd/containerd/errdefs"
2930
"github.com/containerd/containerd/pkg/dialer"
3031
ptypes "github.com/gogo/protobuf/types"
32+
"github.com/google/cadvisor/container/common"
3133
"google.golang.org/grpc"
3234
"google.golang.org/grpc/backoff"
3335
)
@@ -36,12 +38,14 @@ type client struct {
3638
containerService containersapi.ContainersClient
3739
taskService tasksapi.TasksClient
3840
versionService versionapi.VersionClient
41+
snapshotsService snaptshotapi.SnapshotsClient
3942
}
4043

4144
type ContainerdClient interface {
4245
LoadContainer(ctx context.Context, id string) (*containers.Container, error)
4346
TaskPid(ctx context.Context, id string) (uint32, error)
4447
Version(ctx context.Context) (string, error)
48+
ContainerFsUsage(ctx context.Context, snapshotter, snapshotkey string) (*common.FsUsage, error)
4549
}
4650

4751
var once sync.Once
@@ -92,6 +96,7 @@ func Client(address, namespace string) (ContainerdClient, error) {
9296
containerService: containersapi.NewContainersClient(conn),
9397
taskService: tasksapi.NewTasksClient(conn),
9498
versionService: versionapi.NewVersionClient(conn),
99+
snapshotsService: snaptshotapi.NewSnapshotsClient(conn),
95100
}
96101
})
97102
return ctrdClient, retErr
@@ -125,6 +130,21 @@ func (c *client) Version(ctx context.Context) (string, error) {
125130
return response.Version, nil
126131
}
127132

133+
func (c *client) ContainerFsUsage(ctx context.Context, snapshotter, snapshotkey string) (*common.FsUsage, error) {
134+
usage, err := c.snapshotsService.Usage(ctx, &snaptshotapi.UsageRequest{
135+
Snapshotter: snapshotter,
136+
Key: snapshotkey,
137+
})
138+
if err != nil {
139+
return nil, err
140+
}
141+
return &common.FsUsage{
142+
BaseUsageBytes: uint64(usage.Size_),
143+
TotalUsageBytes: uint64(usage.Size_),
144+
InodeUsage: uint64(usage.Inodes),
145+
}, nil
146+
}
147+
128148
func containerFromProto(containerpb containersapi.Container) *containers.Container {
129149
var runtime containers.RuntimeInfo
130150
if containerpb.Runtime != nil {

container/containerd/client_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"fmt"
2020

2121
"github.com/containerd/containerd/containers"
22+
"github.com/google/cadvisor/container/common"
2223
)
2324

2425
type containerdClientMock struct {
@@ -45,6 +46,11 @@ func (c *containerdClientMock) TaskPid(ctx context.Context, id string) (uint32,
4546
return 2389, nil
4647
}
4748

49+
func (c *containerdClientMock) ContainerFsUsage(ctx context.Context, snapshotter,
50+
snapshotkey string) (*common.FsUsage, error) {
51+
return &common.FsUsage{}, nil
52+
}
53+
4854
func mockcontainerdClient(cntrs map[string]*containers.Container, returnErr error) ContainerdClient {
4955
return &containerdClientMock{
5056
cntrs: cntrs,

container/containerd/handler.go

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
// Handler for containerd containers.
16+
1617
package containerd
1718

1819
import (
@@ -38,6 +39,7 @@ type containerdContainerHandler struct {
3839
// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
3940
cgroupPaths map[string]string
4041
fsInfo fs.FsInfo
42+
fsHandler common.FsHandler
4143
// Metadata associated with the container.
4244
reference info.ContainerReference
4345
envs map[string]string
@@ -122,9 +124,15 @@ func newContainerdContainerHandler(
122124
libcontainerHandler := containerlibcontainer.NewHandler(cgroupManager, rootfs, int(taskPid), includedMetrics)
123125

124126
handler := &containerdContainerHandler{
125-
machineInfoFactory: machineInfoFactory,
126-
cgroupPaths: cgroupPaths,
127-
fsInfo: fsInfo,
127+
machineInfoFactory: machineInfoFactory,
128+
cgroupPaths: cgroupPaths,
129+
fsInfo: fsInfo,
130+
fsHandler: common.NewFsHandler(common.DefaultPeriod, &fsUsageProvider{
131+
ctx: ctx,
132+
client: client,
133+
snapshotter: cntr.Snapshotter,
134+
snapshotkey: cntr.SnapshotKey,
135+
}),
128136
envs: make(map[string]string),
129137
labels: cntr.Labels,
130138
includedMetrics: includedMetrics,
@@ -169,9 +177,7 @@ func (h *containerdContainerHandler) needNet() bool {
169177
}
170178

171179
func (h *containerdContainerHandler) GetSpec() (info.ContainerSpec, error) {
172-
// TODO: Since we dont collect disk usage stats for containerd, we set hasFilesystem
173-
// to false. Revisit when we support disk usage stats for containerd
174-
hasFilesystem := false
180+
hasFilesystem := true
175181
spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, h.needNet(), hasFilesystem)
176182
spec.Labels = h.labels
177183
spec.Envs = h.envs
@@ -189,6 +195,25 @@ func (h *containerdContainerHandler) getFsStats(stats *info.ContainerStats) erro
189195
if h.includedMetrics.Has(container.DiskIOMetrics) {
190196
common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
191197
}
198+
if !h.includedMetrics.Has(container.DiskUsageMetrics) {
199+
return nil
200+
}
201+
202+
// TODO(yyrdl):for overlay ,the 'upperPath' is:
203+
// `${containerd.Config.Root}/io.containerd.snapshotter.v1.overlayfs/snapshots/${snapshots.ID}/fs`,
204+
// and for other snapshots plugins, we can also find the law from containerd's source code.
205+
206+
// Device 、fsType and fsLimits and other information are not supported yet, unless there is a way to
207+
// know the id of the snapshot , or the `Stat`(snapshotsClient.Stat) method returns these information directly.
208+
209+
fsStat := info.FsStats{}
210+
usage := h.fsHandler.Usage()
211+
fsStat.BaseUsage = usage.BaseUsageBytes
212+
fsStat.Usage = usage.TotalUsageBytes
213+
fsStat.Inodes = usage.InodeUsage
214+
215+
stats.Filesystem = append(stats.Filesystem, fsStat)
216+
192217
return nil
193218
}
194219

@@ -239,12 +264,33 @@ func (h *containerdContainerHandler) Type() container.ContainerType {
239264
}
240265

241266
func (h *containerdContainerHandler) Start() {
267+
if h.fsHandler != nil {
268+
h.fsHandler.Start()
269+
}
242270
}
243271

244272
func (h *containerdContainerHandler) Cleanup() {
273+
if h.fsHandler != nil {
274+
h.fsHandler.Stop()
275+
}
245276
}
246277

247278
func (h *containerdContainerHandler) GetContainerIPAddress() string {
248279
// containerd doesnt take care of networking.So it doesnt maintain networking states
249280
return ""
250281
}
282+
283+
type fsUsageProvider struct {
284+
ctx context.Context
285+
snapshotter string
286+
snapshotkey string
287+
client ContainerdClient
288+
}
289+
290+
func (f *fsUsageProvider) Usage() (*common.FsUsage, error) {
291+
return f.client.ContainerFsUsage(f.ctx, f.snapshotter, f.snapshotkey)
292+
}
293+
294+
func (f *fsUsageProvider) Targets() []string {
295+
return []string{fmt.Sprintf("snapshotter(%s) with key (%s)", f.snapshotter, f.snapshotkey)}
296+
}

container/crio/handler.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,8 @@ func newCrioContainerHandler(
183183

184184
// we optionally collect disk usage metrics
185185
if includedMetrics.Has(container.DiskUsageMetrics) {
186-
handler.fsHandler = common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, storageLogDir, fsInfo)
186+
handler.fsHandler = common.NewFsHandler(common.DefaultPeriod, common.NewGeneralFsUsageProvider(
187+
fsInfo, rootfsStorageDir, storageLogDir))
187188
}
188189
// TODO for env vars we wanted to show from container.Config.Env from whitelist
189190
//for _, exposedEnv := range metadataEnvs {

container/docker/handler.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ func newDockerContainerHandler(
240240

241241
if includedMetrics.Has(container.DiskUsageMetrics) {
242242
handler.fsHandler = &dockerFsHandler{
243-
fsHandler: common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, otherStorageDir, fsInfo),
243+
fsHandler: common.NewFsHandler(common.DefaultPeriod, common.NewGeneralFsUsageProvider(
244+
fsInfo, rootfsStorageDir, otherStorageDir)),
244245
thinPoolWatcher: thinPoolWatcher,
245246
zfsWatcher: zfsWatcher,
246247
deviceID: ctnr.GraphDriver.Data["DeviceId"],

0 commit comments

Comments
 (0)