Skip to content

Commit 74e2fee

Browse files
authored
[ws-daemon] Expose controller metrics (#16465)
1 parent c487681 commit 74e2fee

File tree

2 files changed

+53
-28
lines changed

2 files changed

+53
-28
lines changed

components/ws-daemon/cmd/run.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import (
1717

1818
"github.com/bombsimon/logrusr/v2"
1919
"github.com/heptiolabs/healthcheck"
20-
"github.com/prometheus/client_golang/prometheus"
2120
"github.com/spf13/cobra"
2221
"google.golang.org/grpc"
2322
"google.golang.org/grpc/credentials"
@@ -49,21 +48,22 @@ var runCmd = &cobra.Command{
4948

5049
ctrl.SetLogger(logrusr.New(log.Log))
5150

51+
dmn, err := daemon.NewDaemon(cfg.Daemon)
52+
if err != nil {
53+
log.WithError(err).Fatal("Cannot create daemon.")
54+
}
55+
5256
health := healthcheck.NewHandler()
5357
srv, err := baseserver.New(grpcServerName,
5458
baseserver.WithGRPC(&cfg.Service),
5559
baseserver.WithHealthHandler(health),
60+
baseserver.WithMetricsRegistry(dmn.MetricsRegistry()),
5661
baseserver.WithVersion(Version),
5762
)
5863
if err != nil {
5964
log.WithError(err).Fatal("Cannot set up server.")
6065
}
6166

62-
dmn, err := daemon.NewDaemon(cfg.Daemon, prometheus.WrapRegistererWithPrefix("gitpod_ws_daemon_", srv.MetricsRegistry()))
63-
if err != nil {
64-
log.WithError(err).Fatal("Cannot create daemon.")
65-
}
66-
6767
health.AddReadinessCheck("grpc-server", grpcProbe(cfg.Service))
6868
health.AddReadinessCheck("ws-daemon", dmn.ReadinessProbe())
6969
health.AddReadinessCheck("disk-space", freeDiskSpace(cfg.Daemon))

components/ws-daemon/pkg/daemon/daemon.go

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
1414
"github.com/prometheus/client_golang/prometheus"
15+
"github.com/prometheus/client_golang/prometheus/collectors"
1516
"golang.org/x/xerrors"
1617
"google.golang.org/grpc"
1718
"k8s.io/apimachinery/pkg/runtime"
@@ -22,6 +23,7 @@ import (
2223
"k8s.io/client-go/tools/clientcmd"
2324
ctrl "sigs.k8s.io/controller-runtime"
2425
"sigs.k8s.io/controller-runtime/pkg/manager"
26+
"sigs.k8s.io/controller-runtime/pkg/metrics"
2527

2628
"github.com/gitpod-io/gitpod/common-go/log"
2729
"github.com/gitpod-io/gitpod/ws-daemon/api"
@@ -48,7 +50,24 @@ func init() {
4850
}
4951

5052
// NewDaemon produces a new daemon
51-
func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
53+
func NewDaemon(config Config) (*Daemon, error) {
54+
// Use the metrics registry from the controller manager. The manager's registry
55+
// isn't configurable so we use this instead of the baseserver's default registry.
56+
// Hack: cast the registry as a *prometheus.Registry, as that's the type required
57+
// by baseserver.
58+
registry, ok := metrics.Registry.(*prometheus.Registry)
59+
if ok {
60+
// These collectors are also registered by baseserver. Use the ones from baseserver
61+
// and remove the collectors registered by controller-manager, to prevent an error
62+
// for duplicate collectors.
63+
registry.Unregister(collectors.NewGoCollector())
64+
registry.Unregister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
65+
} else {
66+
log.Error("failed to use controller-runtime metrics registry, not of expected type. Using default registry instead, but will not collect controller metrics...")
67+
registry = prometheus.NewRegistry()
68+
}
69+
wrappedReg := prometheus.WrapRegistererWithPrefix("gitpod_ws_daemon_", registry)
70+
5271
restCfg, err := newClientConfig(config.Runtime.Kubeconfig)
5372
if err != nil {
5473
return nil, err
@@ -71,7 +90,7 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
7190
return nil, xerrors.Errorf("NODENAME env var isn't set")
7291
}
7392

74-
markUnmountFallback, err := NewMarkUnmountFallback(reg)
93+
markUnmountFallback, err := NewMarkUnmountFallback(wrappedReg)
7594
if err != nil {
7695
return nil, err
7796
}
@@ -111,7 +130,7 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
111130
},
112131
},
113132
procV2Plugin,
114-
cgroup.NewPSIMetrics(reg),
133+
cgroup.NewPSIMetrics(wrappedReg),
115134
)
116135
if err != nil {
117136
return nil, err
@@ -121,18 +140,18 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
121140
return nil, xerrors.Errorf("only cgroup v2 is supported")
122141
}
123142

124-
err = reg.Register(cgroupPlugins)
143+
err = wrappedReg.Register(cgroupPlugins)
125144
if err != nil {
126145
return nil, xerrors.Errorf("cannot register cgroup plugin metrics: %w", err)
127146
}
128147

129148
listener := []dispatch.Listener{
130-
cpulimit.NewDispatchListener(&config.CPULimit, reg),
149+
cpulimit.NewDispatchListener(&config.CPULimit, wrappedReg),
131150
markUnmountFallback,
132151
cgroupPlugins,
133152
}
134153

135-
netlimiter := netlimit.NewConnLimiter(config.NetLimit, reg)
154+
netlimiter := netlimit.NewConnLimiter(config.NetLimit, wrappedReg)
136155
if config.NetLimit.Enabled {
137156
listener = append(listener, netlimiter)
138157
}
@@ -154,7 +173,7 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
154173
Port: 9443,
155174
Namespace: config.Runtime.KubernetesNamespace,
156175
HealthProbeBindAddress: "0",
157-
MetricsBindAddress: "0",
176+
MetricsBindAddress: "0", // Metrics are exposed through baseserver.
158177
})
159178
if err != nil {
160179
return nil, err
@@ -172,7 +191,7 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
172191
UIDMapperConfig: config.Uidmapper,
173192
ContainerRuntime: containerRuntime,
174193
CGroupMountPoint: config.CPULimit.CGroupBasePath,
175-
MetricsRegistry: reg,
194+
MetricsRegistry: wrappedReg,
176195
})
177196
if err != nil {
178197
return nil, err
@@ -195,7 +214,7 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
195214
dsptch.WorkspaceExistsOnNode,
196215
&iws.Uidmapper{Config: config.Uidmapper, Runtime: containerRuntime},
197216
config.CPULimit.CGroupBasePath,
198-
reg,
217+
wrappedReg,
199218
)
200219
if err != nil {
201220
return nil, xerrors.Errorf("cannot create content service: %w", err)
@@ -209,13 +228,14 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
209228
}
210229

211230
return &Daemon{
212-
Config: config,
213-
dispatch: dsptch,
214-
content: contentService,
215-
diskGuards: dsk,
216-
hosts: hsts,
217-
configReloader: configReloader,
218-
mgr: mgr,
231+
Config: config,
232+
dispatch: dsptch,
233+
content: contentService,
234+
diskGuards: dsk,
235+
hosts: hsts,
236+
configReloader: configReloader,
237+
mgr: mgr,
238+
metricsRegistry: registry,
219239
}, nil
220240
}
221241

@@ -231,12 +251,13 @@ func newClientConfig(kubeconfig string) (*rest.Config, error) {
231251
type Daemon struct {
232252
Config Config
233253

234-
dispatch *dispatch.Dispatch
235-
content *content.WorkspaceService
236-
diskGuards []*diskguard.Guard
237-
hosts hosts.Controller
238-
configReloader ConfigReloader
239-
mgr ctrl.Manager
254+
dispatch *dispatch.Dispatch
255+
content *content.WorkspaceService
256+
diskGuards []*diskguard.Guard
257+
hosts hosts.Controller
258+
configReloader ConfigReloader
259+
mgr ctrl.Manager
260+
metricsRegistry *prometheus.Registry
240261

241262
cancel context.CancelFunc
242263
}
@@ -325,3 +346,7 @@ func (d *Daemon) ReadinessProbe() func() error {
325346
return nil
326347
}
327348
}
349+
350+
func (d *Daemon) MetricsRegistry() *prometheus.Registry {
351+
return d.metricsRegistry
352+
}

0 commit comments

Comments
 (0)