-
Notifications
You must be signed in to change notification settings - Fork 2.4k
support collecting FsUsageMetrics for containerd #2872
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,6 @@ import ( | |
"time" | ||
|
||
"github.com/google/cadvisor/fs" | ||
|
||
"k8s.io/klog/v2" | ||
) | ||
|
||
|
@@ -37,15 +36,21 @@ type FsUsage struct { | |
InodeUsage uint64 | ||
} | ||
|
||
type FsUsageProvider interface { | ||
// Usage returns the fs usage | ||
Usage() (*FsUsage, error) | ||
// Targets returns where the fs usage metric is collected,it maybe a directory ,a file or some | ||
// information about the snapshotter(for containerd) | ||
Targets() []string | ||
} | ||
|
||
type realFsHandler struct { | ||
sync.RWMutex | ||
lastUpdate time.Time | ||
usage FsUsage | ||
period time.Duration | ||
minPeriod time.Duration | ||
rootfs string | ||
extraDir string | ||
fsInfo fs.FsInfo | ||
lastUpdate time.Time | ||
usage FsUsage | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
period time.Duration | ||
minPeriod time.Duration | ||
usageProvider FsUsageProvider | ||
// Tells the container to stop. | ||
stopChan chan struct{} | ||
} | ||
|
@@ -58,51 +63,33 @@ const DefaultPeriod = time.Minute | |
|
||
var _ FsHandler = &realFsHandler{} | ||
|
||
func NewFsHandler(period time.Duration, rootfs, extraDir string, fsInfo fs.FsInfo) FsHandler { | ||
func NewFsHandler(period time.Duration, provider FsUsageProvider) FsHandler { | ||
return &realFsHandler{ | ||
lastUpdate: time.Time{}, | ||
usage: FsUsage{}, | ||
period: period, | ||
minPeriod: period, | ||
rootfs: rootfs, | ||
extraDir: extraDir, | ||
fsInfo: fsInfo, | ||
stopChan: make(chan struct{}, 1), | ||
lastUpdate: time.Time{}, | ||
usage: FsUsage{}, | ||
period: period, | ||
minPeriod: period, | ||
usageProvider: provider, | ||
stopChan: make(chan struct{}, 1), | ||
} | ||
} | ||
|
||
func (fh *realFsHandler) update() error { | ||
var ( | ||
rootUsage, extraUsage fs.UsageInfo | ||
rootErr, extraErr error | ||
) | ||
// TODO(vishh): Add support for external mounts. | ||
if fh.rootfs != "" { | ||
rootUsage, rootErr = fh.fsInfo.GetDirUsage(fh.rootfs) | ||
} | ||
|
||
if fh.extraDir != "" { | ||
extraUsage, extraErr = fh.fsInfo.GetDirUsage(fh.extraDir) | ||
usage, err := fh.usageProvider.Usage() | ||
|
||
if err != nil { | ||
return err | ||
} | ||
|
||
// Wait to handle errors until after all operartions are run. | ||
// An error in one will not cause an early return, skipping others | ||
fh.Lock() | ||
defer fh.Unlock() | ||
fh.lastUpdate = time.Now() | ||
if fh.rootfs != "" && rootErr == nil { | ||
fh.usage.InodeUsage = rootUsage.Inodes | ||
fh.usage.BaseUsageBytes = rootUsage.Bytes | ||
fh.usage.TotalUsageBytes = rootUsage.Bytes | ||
} | ||
if fh.extraDir != "" && extraErr == nil { | ||
fh.usage.TotalUsageBytes += extraUsage.Bytes | ||
} | ||
|
||
// Combine errors into a single error to return | ||
if rootErr != nil || extraErr != nil { | ||
return fmt.Errorf("rootDiskErr: %v, extraDiskErr: %v", rootErr, extraErr) | ||
} | ||
fh.usage.InodeUsage = usage.InodeUsage | ||
fh.usage.BaseUsageBytes = usage.BaseUsageBytes | ||
fh.usage.TotalUsageBytes = usage.TotalUsageBytes | ||
|
||
return nil | ||
} | ||
|
||
|
@@ -125,7 +112,8 @@ func (fh *realFsHandler) trackUsage() { | |
// if the long duration is persistent either because of slow | ||
// disk or lots of containers. | ||
longOp = longOp + time.Second | ||
klog.V(2).Infof("fs: disk usage and inodes count on following dirs took %v: %v; will not log again for this container unless duration exceeds %v", duration, []string{fh.rootfs, fh.extraDir}, longOp) | ||
klog.V(2).Infof(`fs: disk usage and inodes count on targets took %v: %v; `+ | ||
`will not log again for this container unless duration exceeds %v`, duration, fh.usageProvider.Targets(), longOp) | ||
} | ||
select { | ||
case <-fh.stopChan: | ||
|
@@ -148,3 +136,55 @@ func (fh *realFsHandler) Usage() FsUsage { | |
defer fh.RUnlock() | ||
return fh.usage | ||
} | ||
|
||
type fsUsageProvider struct { | ||
fsInfo fs.FsInfo | ||
rootFs string | ||
extraDir string | ||
} | ||
|
||
func NewGeneralFsUsageProvider(fsInfo fs.FsInfo, rootFs, extraDir string) FsUsageProvider { | ||
return &fsUsageProvider{ | ||
fsInfo: fsInfo, | ||
rootFs: rootFs, | ||
extraDir: extraDir, | ||
} | ||
} | ||
|
||
func (f *fsUsageProvider) Targets() []string { | ||
return []string{f.rootFs, f.extraDir} | ||
} | ||
|
||
func (f *fsUsageProvider) Usage() (*FsUsage, error) { | ||
var ( | ||
rootUsage, extraUsage fs.UsageInfo | ||
rootErr, extraErr error | ||
) | ||
|
||
if f.rootFs != "" { | ||
rootUsage, rootErr = f.fsInfo.GetDirUsage(f.rootFs) | ||
} | ||
|
||
if f.extraDir != "" { | ||
extraUsage, extraErr = f.fsInfo.GetDirUsage(f.extraDir) | ||
} | ||
|
||
usage := &FsUsage{} | ||
|
||
if f.rootFs != "" && rootErr == nil { | ||
usage.InodeUsage = rootUsage.Inodes | ||
usage.BaseUsageBytes = rootUsage.Bytes | ||
usage.TotalUsageBytes = rootUsage.Bytes | ||
} | ||
|
||
if f.extraDir != "" && extraErr == nil { | ||
usage.TotalUsageBytes += extraUsage.Bytes | ||
} | ||
|
||
// Combine errors into a single error to return | ||
if rootErr != nil || extraErr != nil { | ||
return nil, fmt.Errorf("rootDiskErr: %v, extraDiskErr: %v", rootErr, extraErr) | ||
} | ||
|
||
return usage, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,12 +22,14 @@ import ( | |
"time" | ||
|
||
containersapi "github.com/containerd/containerd/api/services/containers/v1" | ||
snaptshotapi "github.com/containerd/containerd/api/services/snapshots/v1" | ||
tasksapi "github.com/containerd/containerd/api/services/tasks/v1" | ||
versionapi "github.com/containerd/containerd/api/services/version/v1" | ||
"github.com/containerd/containerd/containers" | ||
"github.com/containerd/containerd/errdefs" | ||
"github.com/containerd/containerd/pkg/dialer" | ||
ptypes "github.com/gogo/protobuf/types" | ||
"github.com/google/cadvisor/container/common" | ||
"google.golang.org/grpc" | ||
"google.golang.org/grpc/backoff" | ||
) | ||
|
@@ -36,12 +38,14 @@ type client struct { | |
containerService containersapi.ContainersClient | ||
taskService tasksapi.TasksClient | ||
versionService versionapi.VersionClient | ||
snapshotsService snaptshotapi.SnapshotsClient | ||
} | ||
|
||
type ContainerdClient interface { | ||
LoadContainer(ctx context.Context, id string) (*containers.Container, error) | ||
TaskPid(ctx context.Context, id string) (uint32, error) | ||
Version(ctx context.Context) (string, error) | ||
ContainerFsUsage(ctx context.Context, snapshotter, snapshotkey string) (*common.FsUsage, error) | ||
} | ||
|
||
var once sync.Once | ||
|
@@ -92,6 +96,7 @@ func Client(address, namespace string) (ContainerdClient, error) { | |
containerService: containersapi.NewContainersClient(conn), | ||
taskService: tasksapi.NewTasksClient(conn), | ||
versionService: versionapi.NewVersionClient(conn), | ||
snapshotsService: snaptshotapi.NewSnapshotsClient(conn), | ||
} | ||
}) | ||
return ctrdClient, retErr | ||
|
@@ -125,6 +130,21 @@ func (c *client) Version(ctx context.Context) (string, error) { | |
return response.Version, nil | ||
} | ||
|
||
func (c *client) ContainerFsUsage(ctx context.Context, snapshotter, snapshotkey string) (*common.FsUsage, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes , this client is delivered as interface, if this method is not exported, we can't get the fs usage. func newContainerdContainerHandler(
client ContainerdClient,// Here
name string,
machineInfoFactory info.MachineInfoFactory,
fsInfo fs.FsInfo,
cgroupSubsystems *containerlibcontainer.CgroupSubsystems,
inHostNamespace bool,
metadataEnvs []string,
includedMetrics container.MetricSet,
) (container.ContainerHandler, error) |
||
usage, err := c.snapshotsService.Usage(ctx, &snaptshotapi.UsageRequest{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how expensive is this? will containerd snapshotter have to recaculate the usage every time this is called or does it cache the usage internally? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, there is a cache in it's CRI implemention . And |
||
Snapshotter: snapshotter, | ||
Key: snapshotkey, | ||
}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return &common.FsUsage{ | ||
BaseUsageBytes: uint64(usage.Size_), | ||
TotalUsageBytes: uint64(usage.Size_), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it correct that these are both set to
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes ,for containerd ,there is no log file(used to store logs from container's stdout or stderr) ( At least ,I didn't find it (T_T) ) . But for docker and crio , there are log files, so when collect fs usage, the usage of log file is counted. By the way, kubelet storge container's logs in another place ( // NOTE: This doesn't support the old pod log path, `/var/log/pods/UID`. For containers
// using old log path, empty log stats are returned. This is fine, because we don't
// officially support in-place upgrade anyway.
var (
containerLogPath = kuberuntime.BuildContainerLogsDirectory(meta.GetNamespace(),
meta.GetName(), types.UID(meta.GetUid()), container.GetMetadata().GetName())
err error
)
result.Logs, err = p.getPathFsStats(containerLogPath, rootFsInfo)
if err != nil {
klog.Errorf("Unable to fetch container log stats for path %s: %v ", containerLogPath, err)
}
return result There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't cadvisor take the log size of |
||
InodeUsage: uint64(usage.Inodes), | ||
}, nil | ||
} | ||
|
||
func containerFromProto(containerpb containersapi.Container) *containers.Container { | ||
var runtime containers.RuntimeInfo | ||
if containerpb.Runtime != nil { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please add godoc on what "Targets" is referring to
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added .
this method is used to get where the fs usage is collected.