Skip to content

Commit 8932fa3

Browse files
committed
[tmpnet] Enable monitoring of local kind cluster
1 parent e8c9f86 commit 8932fa3

File tree

15 files changed

+734
-45
lines changed

15 files changed

+734
-45
lines changed

scripts/start_kind_cluster.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ set -euo pipefail
88
# Enable reuse of the arguments to ginkgo relevant to starting a cluster
99
START_CLUSTER_ARGS=()
1010
for arg in "$@"; do
11-
if [[ "${arg}" =~ "--kubeconfig=" || "${arg}" =~ "--kubeconfig-context=" ]]; then
11+
if [[ "${arg}" =~ "--kubeconfig=" || "${arg}" =~ "--kubeconfig-context=" || "${arg}" =~ "--start-metrics-collector" || "${arg}" =~ "--start-logs-controller" ]]; then
1212
START_CLUSTER_ARGS+=("${arg}")
1313
fi
1414
done

tests/fixture/bootstrapmonitor/e2e/e2e_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ func newNodeStatefulSet(name string, flags tmpnet.FlagsMap) *appsv1.StatefulSet
273273
volumeSize,
274274
nodeDataDir,
275275
flags,
276+
nil, /* labels */
276277
)
277278

278279
// The version annotations key needs to be present to ensure compatibility with json patch replace

tests/fixture/e2e/env.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,15 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
193193

194194
// Once one or more nodes are running it should be safe to wait for promtail to report readiness
195195
if flagVars.StartLogsCollector() {
196-
require.NoError(tmpnet.WaitForPromtailReadiness(tc.DefaultContext(), tc.Log()))
196+
runtimeConfig, err := flagVars.NodeRuntimeConfig()
197+
require.NoError(err)
198+
if runtimeConfig.Kube != nil {
199+
// TODO(marun) Maybe make this configurable to enable the check for a test suite that writes service
200+
// discovery configuration for its own metrics endpoint?
201+
tc.Log().Warn("skipping check for logs collection readiness since kube nodes won't create have created the required service discovery config")
202+
} else {
203+
require.NoError(tmpnet.WaitForPromtailReadiness(tc.DefaultContext(), tc.Log()))
204+
}
197205
}
198206

199207
if networkCmd == StartNetworkCmd {

tests/fixture/e2e/flags.go

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@ type FlagVars struct {
3030
startNetwork bool
3131
startNetworkVars *flags.StartNetworkVars
3232

33-
// The collectors configured by these flags run as local processes
34-
startMetricsCollector bool
35-
startLogsCollector bool
33+
collectorVars *flags.CollectorVars
3634

3735
checkMetricsCollected bool
3836
checkLogsCollected bool
@@ -88,21 +86,19 @@ func (v *FlagVars) NodeRuntimeConfig() (*tmpnet.NodeRuntimeConfig, error) {
8886
}
8987

9088
func (v *FlagVars) StartMetricsCollector() bool {
91-
return v.startMetricsCollector
89+
return v.collectorVars.StartMetricsCollector
9290
}
9391

9492
func (v *FlagVars) StartLogsCollector() bool {
95-
return v.startLogsCollector
93+
return v.collectorVars.StartLogsCollector
9694
}
9795

9896
func (v *FlagVars) CheckMetricsCollected() bool {
99-
// TODO(marun) Enable this check for kube in a subsequent PR
100-
return v.startNetworkVars.ProcessRuntimeConfigured() && v.checkMetricsCollected
97+
return v.checkMetricsCollected
10198
}
10299

103100
func (v *FlagVars) CheckLogsCollected() bool {
104-
// TODO(marun) Enable this check for kube in a subsequent PR
105-
return v.startNetworkVars.ProcessRuntimeConfigured() && v.checkLogsCollected
101+
return v.checkLogsCollected
106102
}
107103

108104
func (v *FlagVars) NetworkDir() string {
@@ -144,9 +140,9 @@ func RegisterFlagsWithDefaultOwner(defaultOwner string) *FlagVars {
144140

145141
vars.startNetworkVars = flags.NewStartNetworkFlagVars(defaultOwner)
146142

147-
SetMonitoringFlags(
148-
&vars.startMetricsCollector,
149-
&vars.startLogsCollector,
143+
vars.collectorVars = flags.NewCollectorFlagVars()
144+
145+
SetCheckCollectionFlags(
150146
&vars.checkMetricsCollected,
151147
&vars.checkLogsCollected,
152148
)
@@ -189,19 +185,7 @@ func RegisterFlagsWithDefaultOwner(defaultOwner string) *FlagVars {
189185
return &vars
190186
}
191187

192-
func SetMonitoringFlags(startMetricsCollector, startLogsCollector, checkMetricsCollected, checkLogsCollected *bool) {
193-
flag.BoolVar(
194-
startMetricsCollector,
195-
"start-metrics-collector",
196-
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_METRICS_COLLECTOR", "false")),
197-
"[optional] whether to start a local collector of metrics from nodes of the temporary network.",
198-
)
199-
flag.BoolVar(
200-
startLogsCollector,
201-
"start-logs-collector",
202-
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_LOGS_COLLECTOR", "false")),
203-
"[optional] whether to start a local collector of logs from nodes of the temporary network.",
204-
)
188+
func SetCheckCollectionFlags(checkMetricsCollected *bool, checkLogsCollected *bool) {
205189
flag.BoolVar(
206190
checkMetricsCollected,
207191
"check-metrics-collected",

tests/fixture/tmpnet/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,13 @@ the following non-test files:
5959
| Filename | Types | Purpose |
6060
|:----------------------------|:---------------|:-----------------------------------------------------------------------|
6161
| flags/ | | Directory defining flags usable with both stdlib flags and spf13/pflag |
62+
| flags/collector.go | | Defines flags configuring collection of logs and metrics |
6263
| flags/common.go | | Defines type definitions common across other files |
6364
| flags/process_runtime.go | | Defines flags configuring the process node runtime |
6465
| flags/runtime.go | | Defines flags configuring node runtime |
6566
| flags/start_network.go | | Defines flags configuring network start |
6667
| tmpnetctl/ | | Directory containing main entrypoint for tmpnetctl command |
68+
| yaml/ | | Directory defining kubernetes resources in yaml format |
6769
| check_monitoring.go | | Enables checking if logs and metrics were collected |
6870
| defaults.go | | Defines common default configuration |
6971
| detached_process_default.go | | Configures detached processes for darwin and linux |
@@ -72,6 +74,7 @@ the following non-test files:
7274
| genesis.go | | Creates test genesis |
7375
| kube.go | | Library for Kubernetes interaction |
7476
| local_network.go | | Defines configuration for the default local network |
77+
| monitor_kube.go | | Enables collection of logs and metrics from kube pods |
7578
| monitor_processes.go | | Enables collection of logs and metrics from local processes |
7679
| network.go | Network | Orchestrates and configures temporary networks |
7780
| network_config.go | Network | Reads and writes network configuration |
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package flags
5+
6+
import (
7+
"flag"
8+
9+
"github.com/spf13/cast"
10+
"github.com/spf13/pflag"
11+
12+
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
13+
)
14+
15+
type CollectorVars struct {
16+
StartMetricsCollector bool
17+
StartLogsCollector bool
18+
}
19+
20+
// NewCollectorFlagVars registers collector flag variables for stdlib flag
21+
func NewCollectorFlagVars() *CollectorVars {
22+
v := &CollectorVars{}
23+
v.register(flag.BoolVar)
24+
return v
25+
}
26+
27+
// NewRuntimeConfigFlagSetVars registers collector flag variables for pflag
28+
func NewCollectorFlagSetVars(flagSet *pflag.FlagSet) *CollectorVars {
29+
v := &CollectorVars{}
30+
v.register(flagSet.BoolVar)
31+
return v
32+
}
33+
34+
func (v *CollectorVars) register(boolVar varFunc[bool]) {
35+
boolVar(
36+
&v.StartMetricsCollector,
37+
"start-metrics-collector",
38+
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_METRICS_COLLECTOR", "false")),
39+
"[optional] whether to start a local collector of metrics from nodes of the temporary network.",
40+
)
41+
42+
boolVar(
43+
&v.StartLogsCollector,
44+
"start-logs-collector",
45+
cast.ToBool(tmpnet.GetEnvWithDefault("TMPNET_START_LOGS_COLLECTOR", "false")),
46+
"[optional] whether to start a local collector of logs from nodes of the temporary network.",
47+
)
48+
}

tests/fixture/tmpnet/flags/start_network.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,6 @@ func (v *StartNetworkVars) register(stringVar varFunc[string], intVar varFunc[in
5959
)
6060
}
6161

62-
func (v *StartNetworkVars) ProcessRuntimeConfigured() bool {
63-
return v.runtimeVars.runtime == processRuntime
64-
}
65-
6662
func (v *StartNetworkVars) GetNodeCount() (int, error) {
6763
if v.nodeCount < 1 {
6864
return 0, fmt.Errorf("--node-count must be greater than 0 but got %d", v.nodeCount)

tests/fixture/tmpnet/kube.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,33 @@ func NewNodeStatefulSet(
5959
volumeSize string,
6060
volumeMountPath string,
6161
flags FlagsMap,
62+
labels map[string]string,
6263
) *appsv1.StatefulSet {
6364
objectMeta := metav1.ObjectMeta{}
6465
if generateName {
6566
objectMeta.GenerateName = name + "-"
6667
} else {
6768
objectMeta.Name = name
6869
}
70+
71+
podAnnotations := map[string]string{
72+
"prometheus.io/scrape": "true",
73+
"prometheus.io/path": "/ext/metrics",
74+
"promtail/collect": "true",
75+
}
76+
77+
podLabels := map[string]string{
78+
"app": name,
79+
}
80+
for label, value := range labels {
81+
// These labels may contain values invalid for use in labels. Set them as annotations instead.
82+
if label == "gh_repo" || label == "gh_workflow" {
83+
podAnnotations[label] = value
84+
continue
85+
}
86+
podLabels[label] = value
87+
}
88+
6989
return &appsv1.StatefulSet{
7090
ObjectMeta: objectMeta,
7191
Spec: appsv1.StatefulSetSpec{
@@ -95,9 +115,8 @@ func NewNodeStatefulSet(
95115
},
96116
Template: corev1.PodTemplateSpec{
97117
ObjectMeta: metav1.ObjectMeta{
98-
Labels: map[string]string{
99-
"app": name,
100-
},
118+
Labels: podLabels,
119+
Annotations: podAnnotations,
101120
},
102121
Spec: corev1.PodSpec{
103122
Containers: []corev1.Container{

tests/fixture/tmpnet/kube_runtime.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ func (p *KubeRuntime) Start(ctx context.Context) error {
262262
fmt.Sprintf("%dGi", runtimeConfig.VolumeSizeGB),
263263
volumeMountPath,
264264
flags,
265+
p.node.getMonitoringLabels(),
265266
)
266267

267268
_, err = clientset.AppsV1().StatefulSets(runtimeConfig.Namespace).Create(

0 commit comments

Comments
 (0)