Skip to content

Commit 81303d2

Browse files
committed
[tmpnet] Enable monitoring of local kind cluster
1 parent da195d7 commit 81303d2

File tree

14 files changed

+734
-41
lines changed

14 files changed

+734
-41
lines changed

scripts/start_kind_cluster.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ set -euo pipefail
88
# Enable reuse of the arguments to ginkgo relevant to starting a cluster
99
START_CLUSTER_ARGS=()
1010
for arg in "$@"; do
11-
if [[ "${arg}" =~ "--kubeconfig=" || "${arg}" =~ "--kubeconfig-context=" ]]; then
11+
if [[ "${arg}" =~ "--kubeconfig=" || "${arg}" =~ "--kubeconfig-context=" || "${arg}" =~ "--start-metrics-collector" || "${arg}" =~ "--start-logs-controller" ]]; then
1212
START_CLUSTER_ARGS+=("${arg}")
1313
fi
1414
done

tests/fixture/bootstrapmonitor/e2e/e2e_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ func newNodeStatefulSet(name string, flags tmpnet.FlagsMap) *appsv1.StatefulSet
273273
volumeSize,
274274
nodeDataDir,
275275
flags,
276+
nil, /* labels */
276277
)
277278

278279
// The version annotations key needs to be present to ensure compatibility with json patch replace

tests/fixture/e2e/env.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,15 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
193193

194194
// Once one or more nodes are running it should be safe to wait for promtail to report readiness
195195
if flagVars.StartLogsCollector() {
196-
require.NoError(tmpnet.WaitForPromtailReadiness(tc.DefaultContext(), tc.Log()))
196+
runtimeConfig, err := flagVars.NodeRuntimeConfig()
197+
require.NoError(err)
198+
if runtimeConfig.Kube != nil {
199+
// TODO(marun) Maybe make this configurable to enable the check for a test suite that writes service
200+
// discovery configuration for its own metrics endpoint?
201+
tc.Log().Warn("skipping check for logs collection readiness since kube nodes won't create have created the required service discovery config")
202+
} else {
203+
require.NoError(tmpnet.WaitForPromtailReadiness(tc.DefaultContext(), tc.Log()))
204+
}
197205
}
198206

199207
if networkCmd == StartNetworkCmd {

tests/fixture/e2e/flags.go

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@ type FlagVars struct {
3030
startNetwork bool
3131
startNetworkVars *flags.StartNetworkVars
3232

33-
// The collectors configured by these flags run as local processes
34-
startMetricsCollector bool
35-
startLogsCollector bool
33+
collectorVars *flags.CollectorVars
3634

3735
checkMetricsCollected bool
3836
checkLogsCollected bool
@@ -88,21 +86,19 @@ func (v *FlagVars) NodeRuntimeConfig() (*tmpnet.NodeRuntimeConfig, error) {
8886
}
8987

9088
func (v *FlagVars) StartMetricsCollector() bool {
91-
return v.startMetricsCollector
89+
return v.collectorVars.StartMetricsCollector
9290
}
9391

9492
func (v *FlagVars) StartLogsCollector() bool {
95-
return v.startLogsCollector
93+
return v.collectorVars.StartLogsCollector
9694
}
9795

9896
func (v *FlagVars) CheckMetricsCollected() bool {
99-
// TODO(marun) Enable this check for kube in a subsequent PR
100-
return v.startNetworkVars.ProcessRuntimeConfigured() && v.checkMetricsCollected
97+
return v.checkMetricsCollected
10198
}
10299

103100
func (v *FlagVars) CheckLogsCollected() bool {
104-
// TODO(marun) Enable this check for kube in a subsequent PR
105-
return v.startNetworkVars.ProcessRuntimeConfigured() && v.checkLogsCollected
101+
return v.checkLogsCollected
106102
}
107103

108104
func (v *FlagVars) NetworkDir() string {
@@ -144,9 +140,9 @@ func RegisterFlagsWithDefaultOwner(defaultOwner string) *FlagVars {
144140

145141
vars.startNetworkVars = flags.NewStartNetworkFlagVars(defaultOwner)
146142

147-
SetMonitoringFlags(
148-
&vars.startMetricsCollector,
149-
&vars.startLogsCollector,
143+
vars.collectorVars = flags.NewCollectorFlagVars()
144+
145+
SetCheckCollectionFlags(
150146
&vars.checkMetricsCollected,
151147
&vars.checkLogsCollected,
152148
)
@@ -189,19 +185,7 @@ func RegisterFlagsWithDefaultOwner(defaultOwner string) *FlagVars {
189185
return &vars
190186
}
191187

192-
func SetMonitoringFlags(startMetricsCollector, startLogsCollector, checkMetricsCollected, checkLogsCollected *bool) {
193-
flag.BoolVar(
194-
startMetricsCollector,
195-
"start-metrics-collector",
196-
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_METRICS_COLLECTOR", "false")),
197-
"[optional] whether to start a local collector of metrics from nodes of the temporary network.",
198-
)
199-
flag.BoolVar(
200-
startLogsCollector,
201-
"start-logs-collector",
202-
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_LOGS_COLLECTOR", "false")),
203-
"[optional] whether to start a local collector of logs from nodes of the temporary network.",
204-
)
188+
func SetCheckCollectionFlags(checkMetricsCollected *bool, checkLogsCollected *bool) {
205189
flag.BoolVar(
206190
checkMetricsCollected,
207191
"check-metrics-collected",

tests/fixture/tmpnet/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,13 @@ the following non-test files:
5959
| Filename | Types | Purpose |
6060
|:----------------------------|:---------------|:-----------------------------------------------------------------------|
6161
| flags/ | | Directory defining flags usable with both stdlib flags and spf13/pflag |
62+
| flags/collector.go | | Defines flags configuring collection of logs and metrics |
6263
| flags/common.go | | Defines type definitions common across other files |
6364
| flags/process_runtime.go | | Defines flags configuring the process node runtime |
6465
| flags/runtime.go | | Defines flags configuring node runtime |
6566
| flags/start_network.go | | Defines flags configuring network start |
6667
| tmpnetctl/ | | Directory containing main entrypoint for tmpnetctl command |
68+
| yaml/ | | Directory defining kubernetes resources in yaml format |
6769
| check_monitoring.go | | Enables checking if logs and metrics were collected |
6870
| defaults.go | | Defines common default configuration |
6971
| detached_process_default.go | | Configures detached processes for darwin and linux |
@@ -72,6 +74,7 @@ the following non-test files:
7274
| genesis.go | | Creates test genesis |
7375
| kube.go | | Library for Kubernetes interaction |
7476
| local_network.go | | Defines configuration for the default local network |
77+
| monitor_kube.go | | Enables collection of logs and metrics from kube pods |
7578
| monitor_processes.go | | Enables collection of logs and metrics from local processes |
7679
| network.go | Network | Orchestrates and configures temporary networks |
7780
| network_config.go | Network | Reads and writes network configuration |
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package flags
5+
6+
import (
7+
"flag"
8+
9+
"github.com/spf13/cast"
10+
"github.com/spf13/pflag"
11+
12+
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
13+
)
14+
15+
type CollectorVars struct {
16+
StartMetricsCollector bool
17+
StartLogsCollector bool
18+
}
19+
20+
// NewCollectorFlagVars registers collector flag variables for stdlib flag
21+
func NewCollectorFlagVars() *CollectorVars {
22+
v := &CollectorVars{}
23+
v.register(flag.BoolVar)
24+
return v
25+
}
26+
27+
// NewRuntimeConfigFlagSetVars registers collector flag variables for pflag
28+
func NewCollectorFlagSetVars(flagSet *pflag.FlagSet) *CollectorVars {
29+
v := &CollectorVars{}
30+
v.register(flagSet.BoolVar)
31+
return v
32+
}
33+
34+
func (v *CollectorVars) register(boolVar varFunc[bool]) {
35+
boolVar(
36+
&v.StartMetricsCollector,
37+
"start-metrics-collector",
38+
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_METRICS_COLLECTOR", "false")),
39+
"[optional] whether to start a local collector of metrics from nodes of the temporary network.",
40+
)
41+
42+
boolVar(
43+
&v.StartLogsCollector,
44+
"start-logs-collector",
45+
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_LOGS_COLLECTOR", "false")),
46+
"[optional] whether to start a local collector of logs from nodes of the temporary network.",
47+
)
48+
}

tests/fixture/tmpnet/kube.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,33 @@ func NewNodeStatefulSet(
5858
volumeSize string,
5959
volumeMountPath string,
6060
flags FlagsMap,
61+
labels map[string]string,
6162
) *appsv1.StatefulSet {
6263
objectMeta := metav1.ObjectMeta{}
6364
if generateName {
6465
objectMeta.GenerateName = name + "-"
6566
} else {
6667
objectMeta.Name = name
6768
}
69+
70+
podAnnotations := map[string]string{
71+
"prometheus.io/scrape": "true",
72+
"prometheus.io/path": "/ext/metrics",
73+
"promtail/collect": "true",
74+
}
75+
76+
podLabels := map[string]string{
77+
"app": name,
78+
}
79+
for label, value := range labels {
80+
// These labels may contain values invalid for use in labels. Set them as annotations instead.
81+
if label == "gh_repo" || label == "gh_workflow" {
82+
podAnnotations[label] = value
83+
continue
84+
}
85+
podLabels[label] = value
86+
}
87+
6888
return &appsv1.StatefulSet{
6989
ObjectMeta: objectMeta,
7090
Spec: appsv1.StatefulSetSpec{
@@ -94,9 +114,8 @@ func NewNodeStatefulSet(
94114
},
95115
Template: corev1.PodTemplateSpec{
96116
ObjectMeta: metav1.ObjectMeta{
97-
Labels: map[string]string{
98-
"app": name,
99-
},
117+
Labels: podLabels,
118+
Annotations: podAnnotations,
100119
},
101120
Spec: corev1.PodSpec{
102121
Containers: []corev1.Container{

tests/fixture/tmpnet/kube_runtime.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ func (p *KubeRuntime) Start(ctx context.Context) error {
249249
fmt.Sprintf("%dGi", runtimeConfig.VolumeSizeGB),
250250
volumeMountPath,
251251
flags,
252+
p.node.getMonitoringLabels(),
252253
)
253254

254255
_, err = clientset.AppsV1().StatefulSets(runtimeConfig.Namespace).Create(

0 commit comments

Comments
 (0)