Skip to content

Commit 93a22bd

Browse files
marunmaru-avaStephenButtolph
authored
[tmpnet] Enable deployment to kube (#3615)
Signed-off-by: maru <[email protected]> Co-authored-by: Maru Newby <[email protected]> Co-authored-by: Stephen Buttolph <[email protected]>
1 parent a9ea4d6 commit 93a22bd

File tree

19 files changed

+1172
-51
lines changed

19 files changed

+1172
-51
lines changed

.github/actions/run-monitored-tmpnet-cmd/action.yml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ inputs:
88
run_env:
99
description: 'a string containing env vars for the command e.g. "MY_VAR1=foo MY_VAR2=bar"'
1010
default: ''
11+
runtime:
12+
description: 'the tmpnet runtime being used'
13+
default: 'process'
1114
filter_by_owner:
1215
default: ''
1316
artifact_prefix:
@@ -67,8 +70,10 @@ runs:
6770
# --impure ensures the env vars are accessible to the command
6871
run: ${{ inputs.run_env }} ${{ github.action_path }}/nix-develop.sh --impure --command bash -x ${{ inputs.run }}
6972
env:
73+
# Always collect metrics locally even when nodes are running in kube to enable collection from the test workload
7074
TMPNET_START_METRICS_COLLECTOR: ${{ inputs.prometheus_username != '' }}
71-
TMPNET_START_LOGS_COLLECTOR: ${{ inputs.loki_username != '' }}
75+
# Skip local log collection when nodes are running in kube since collection will occur in-cluster.
76+
TMPNET_START_LOGS_COLLECTOR: ${{ inputs.loki_username != '' && inputs.runtime == 'process' }}
7277
TMPNET_CHECK_METRICS_COLLECTED: ${{ inputs.prometheus_username != '' }}
7378
TMPNET_CHECK_LOGS_COLLECTED: ${{ inputs.loki_username != '' }}
7479
LOKI_USERNAME: ${{ inputs.loki_username }}
@@ -86,7 +91,7 @@ runs:
8691
# easy way to compose custom actions for use by other repos
8792
# without running into versioning issues.
8893
- name: Upload tmpnet data
89-
if: always()
94+
if: always() && (inputs.runtime == 'process')
9095
uses: actions/upload-artifact@v4
9196
with:
9297
name: ${{ inputs.artifact_prefix }}-tmpnet-data
@@ -95,3 +100,14 @@ runs:
95100
~/.tmpnet/prometheus/prometheus.log
96101
~/.tmpnet/promtail/promtail.log
97102
if-no-files-found: error
103+
- name: Export kind logs
104+
if: always() && (inputs.runtime == 'kube')
105+
shell: bash
106+
run: kind export logs /tmp/kind-logs
107+
- name: Upload kind logs
108+
if: always() && (inputs.runtime == 'kube')
109+
uses: actions/upload-artifact@v4
110+
with:
111+
name: ${{ inputs.artifact_prefix }}-kind-logs
112+
path: /tmp/kind-logs
113+
if-no-files-found: error

.github/workflows/ci.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,21 @@ jobs:
7676
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
7777
loki_username: ${{ secrets.LOKI_ID || '' }}
7878
loki_password: ${{ secrets.LOKI_PASSWORD || '' }}
79+
e2e_kube:
80+
runs-on: ubuntu-latest
81+
steps:
82+
- uses: actions/checkout@v4
83+
- uses: ./.github/actions/setup-go-for-project
84+
- uses: ./.github/actions/run-monitored-tmpnet-cmd
85+
with:
86+
run: ./scripts/run_task.sh test-e2e-kube-ci
87+
runtime: kube
88+
artifact_prefix: e2e-kube
89+
filter_by_owner: avalanchego-e2e
90+
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
91+
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
92+
loki_username: ${{ secrets.LOKI_ID || '' }}
93+
loki_password: ${{ secrets.LOKI_PASSWORD || '' }}
7994
e2e_existing_network:
8095
runs-on: ubuntu-latest
8196
steps:

Taskfile.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ tasks:
4747
desc: Builds xsvm plugin
4848
cmd: ./scripts/build_xsvm.sh
4949

50+
build-xsvm-image:
51+
desc: Builds xsvm image
52+
cmd: ./scripts/build_xsvm_image.sh
53+
5054
check-clean-branch:
5155
desc: Checks that the git working tree is clean
5256
cmd: .github/workflows/check-clean-branch.sh
@@ -177,6 +181,18 @@ tasks:
177181
- task: build-xsvm
178182
- cmd: bash -x ./scripts/tests.e2e.existing.sh {{.CLI_ARGS}}
179183

184+
test-e2e-kube:
185+
desc: Runs e2e tests against a network deployed to kube
186+
cmds:
187+
- cmd: bash -x ./scripts/tests.e2e.kube.sh {{.CLI_ARGS}}
188+
189+
test-e2e-kube-ci:
190+
desc: Runs e2e tests against a network deployed to kube [serially]
191+
env:
192+
E2E_SERIAL: 1
193+
cmds:
194+
- task: test-e2e-kube
195+
180196
# To use a different fuzz time, run `task test-fuzz FUZZTIME=[value in seconds]`.
181197
# A value of `-1` will run until it encounters a failing output.
182198

scripts/build_xsvm_image.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
# e.g.,
6+
# ./scripts/build_image.sh # Build local single-arch image
7+
# AVALANCHEGO_IMAGE=localhost:5001/avalanchego ./scripts/build_xsvm_image.sh # Build and push image to private registry
8+
9+
if ! [[ "$0" =~ scripts/build_xsvm_image.sh ]]; then
10+
echo "must be run from repository root"
11+
exit 255
12+
fi
13+
14+
source ./scripts/image_tag.sh
15+
16+
AVALANCHEGO_IMAGE="${AVALANCHEGO_IMAGE:-avalanchego}"
17+
XSVM_IMAGE="${XSVM_IMAGE:-avalanchego-xsvm}"
18+
19+
# Build the avalanchego base image
20+
SKIP_BUILD_RACE=1 DOCKER_IMAGE="${AVALANCHEGO_IMAGE}" bash -x ./scripts/build_image.sh
21+
22+
DOCKER_CMD=("docker" "buildx" "build")
23+
if [[ "${XSVM_IMAGE}" == *"/"* ]]; then
24+
# Push to a registry when the image name includes a slash which indicates the
25+
# use of a registry e.g.
26+
#
27+
# - dockerhub: [repo]/[image name]:[tag]
28+
# - private registry: [private registry hostname]/[image name]:[tag]
29+
DOCKER_CMD+=("--push")
30+
fi
31+
32+
GO_VERSION="$(go list -m -f '{{.GoVersion}}')"
33+
34+
"${DOCKER_CMD[@]}" --build-arg GO_VERSION="${GO_VERSION}" --build-arg AVALANCHEGO_NODE_IMAGE="${AVALANCHEGO_IMAGE}:${image_tag}" \
35+
-t "${XSVM_IMAGE}" -f ./vms/example/xsvm/Dockerfile .

scripts/tests.e2e.kube.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
# Run e2e tests against nodes deployed to a kind cluster.
6+
7+
# TODO(marun) Support testing against a remote cluster
8+
9+
if ! [[ "$0" =~ scripts/tests.e2e.kube.sh ]]; then
10+
echo "must be run from repository root"
11+
exit 255
12+
fi
13+
14+
# This script will use kubeconfig arguments if supplied
15+
./scripts/start_kind_cluster.sh "$@"
16+
17+
# Use an image that will be pushed to the local registry that the kind cluster is configured to use.
18+
AVALANCHEGO_IMAGE="localhost:5001/avalanchego"
19+
XSVM_IMAGE="${AVALANCHEGO_IMAGE}-xsvm"
20+
if [[ -n "${SKIP_BUILD_IMAGE:-}" ]]; then
21+
echo "Skipping build of xsvm image due to SKIP_BUILD_IMAGE=${SKIP_BUILD_IMAGE}"
22+
else
23+
XSVM_IMAGE="${XSVM_IMAGE}" AVALANCHEGO_IMAGE="${AVALANCHEGO_IMAGE}" bash -x ./scripts/build_xsvm_image.sh
24+
fi
25+
26+
bash -x ./scripts/tests.e2e.sh --runtime=kube --kube-image="${XSVM_IMAGE}" "$@"

scripts/tests.e2e.sh

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,14 @@ fi
2020
# the instructions to build non-portable BLST.
2121
source ./scripts/constants.sh
2222

23-
# Ensure an absolute path to avoid dependency on the working directory
24-
# of script execution.
25-
AVALANCHEGO_PATH="$(realpath "${AVALANCHEGO_PATH:-./build/avalanchego}")"
26-
E2E_ARGS="--avalanchego-path=${AVALANCHEGO_PATH}"
23+
E2E_ARGS=("${@}")
24+
25+
# If not running in kubernetes, default to using a local avalanchego binary
26+
if ! [[ "${E2E_ARGS[*]}" =~ "--runtime=kube" && ! "${E2E_ARGS[*]}" =~ "--avalanchego-path" ]]; then
27+
# Ensure an absolute path to avoid dependency on the working directory of script execution.
28+
AVALANCHEGO_PATH="$(realpath "${AVALANCHEGO_PATH:-./build/avalanchego}")"
29+
E2E_ARGS+=("--avalanchego-path=${AVALANCHEGO_PATH}")
30+
fi
2731

2832
#################################
2933
# Determine ginkgo args
@@ -55,4 +59,4 @@ fi
5559

5660
#################################
5761
# shellcheck disable=SC2086
58-
./bin/ginkgo ${GINKGO_ARGS} -v ./tests/e2e -- "${E2E_ARGS[@]}" "${@}"
62+
./bin/ginkgo ${GINKGO_ARGS} -v ./tests/e2e -- "${E2E_ARGS[@]}"

tests/e2e/faultinjection/duplicate_node_id.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ var _ = ginkgo.Describe("Duplicate node handling", func() {
2525
ginkgo.It("should ensure that a given Node ID (i.e. staking keypair) can be used at most once on a network", func() {
2626
network := e2e.GetEnv(tc).GetNetwork()
2727

28+
if network.DefaultRuntimeConfig.Kube != nil {
29+
// Enabling this test for kube requires supporting a flexible name mapping
30+
ginkgo.Skip("This test is not supported on kube to avoid having to deviate from composing the statefulset name with the network uuid + nodeid")
31+
}
32+
2833
tc.By("creating new node")
2934
node1 := e2e.AddEphemeralNode(tc, network, tmpnet.NewEphemeralNode(tmpnet.FlagsMap{}))
3035
e2e.WaitForHealthy(tc, node1)

tests/fixture/bootstrapmonitor/e2e/e2e_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,10 @@ func buildImage(tc tests.TestContext, imageName string, forceNewHash bool, scrip
263263
require.NoError(err, "Image build failed: %s", output)
264264
}
265265

266-
func newNodeStatefulSet(name string, flags map[string]string) *appsv1.StatefulSet {
266+
func newNodeStatefulSet(name string, flags tmpnet.FlagsMap) *appsv1.StatefulSet {
267267
statefulSet := tmpnet.NewNodeStatefulSet(
268268
name,
269+
true, // generateName
269270
latestAvalanchegoImage,
270271
nodeContainerName,
271272
volumeName,

tests/fixture/e2e/flags.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,13 @@ func (v *FlagVars) StartLogsCollector() bool {
9696
}
9797

9898
func (v *FlagVars) CheckMetricsCollected() bool {
99-
return v.checkMetricsCollected
99+
// TODO(marun) Enable this check for kube in a subsequent PR
100+
return v.startNetworkVars.ProcessRuntimeConfigured() && v.checkMetricsCollected
100101
}
101102

102103
func (v *FlagVars) CheckLogsCollected() bool {
103-
return v.checkLogsCollected
104+
// TODO(marun) Enable this check for kube in a subsequent PR
105+
return v.startNetworkVars.ProcessRuntimeConfigured() && v.checkLogsCollected
104106
}
105107

106108
func (v *FlagVars) NetworkDir() string {

tests/fixture/e2e/ginkgo_test_context.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func newGinkgoLogger(cfg zapcore.Encoder) logging.Logger {
4646
return logging.NewLogger(
4747
"",
4848
logging.NewWrappedCore(
49-
logging.Verbo,
49+
logging.Info,
5050
&ginkgoWriteCloser{},
5151
cfg,
5252
),
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package flags
5+
6+
import (
7+
"errors"
8+
"flag"
9+
"fmt"
10+
11+
"github.com/spf13/pflag"
12+
13+
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
14+
)
15+
16+
const (
17+
kubeRuntime = "kube"
18+
kubeFlagsPrefix = kubeRuntime + "-"
19+
kubeDocPrefix = "[kube runtime] "
20+
)
21+
22+
var (
23+
errKubeNamespaceRequired = errors.New("--kube-namespace is required")
24+
errKubeImageRequired = errors.New("--kube-image is required")
25+
errKubeMinVolumeSizeRequired = fmt.Errorf("--kube-volume-size must be >= %d", tmpnet.MinimumVolumeSizeGB)
26+
)
27+
28+
type kubeRuntimeVars struct {
29+
namespace string
30+
image string
31+
volumeSizeGB uint
32+
config *KubeconfigVars
33+
}
34+
35+
func (v *kubeRuntimeVars) registerWithFlag() {
36+
v.config = newKubeconfigFlagVars(kubeDocPrefix)
37+
v.register(flag.StringVar, flag.UintVar)
38+
}
39+
40+
func (v *kubeRuntimeVars) registerWithFlagSet(flagSet *pflag.FlagSet) {
41+
v.config = newKubeconfigFlagSetVars(flagSet, kubeDocPrefix)
42+
v.register(flagSet.StringVar, flagSet.UintVar)
43+
}
44+
45+
func (v *kubeRuntimeVars) register(stringVar varFunc[string], uintVar varFunc[uint]) {
46+
stringVar(
47+
&v.namespace,
48+
"kube-namespace",
49+
tmpnet.DefaultTmpnetNamespace,
50+
kubeDocPrefix+"The namespace in the target cluster to create nodes in",
51+
)
52+
stringVar(
53+
&v.image,
54+
"kube-image",
55+
"avaplatform/avalanchego:latest",
56+
kubeDocPrefix+"The name of the docker image to use for creating nodes",
57+
)
58+
uintVar(
59+
&v.volumeSizeGB,
60+
"kube-volume-size",
61+
tmpnet.MinimumVolumeSizeGB,
62+
kubeDocPrefix+fmt.Sprintf(
63+
"The size in gigabytes of the PeristentVolumeClaim to create for the data directory of each node. Value must be >= %d.",
64+
tmpnet.MinimumVolumeSizeGB,
65+
),
66+
)
67+
}
68+
69+
func (v *kubeRuntimeVars) getKubeRuntimeConfig() (*tmpnet.KubeRuntimeConfig, error) {
70+
if len(v.namespace) == 0 {
71+
return nil, errKubeNamespaceRequired
72+
}
73+
if len(v.image) == 0 {
74+
return nil, errKubeImageRequired
75+
}
76+
if v.volumeSizeGB < tmpnet.MinimumVolumeSizeGB {
77+
return nil, errKubeMinVolumeSizeRequired
78+
}
79+
return &tmpnet.KubeRuntimeConfig{
80+
ConfigPath: v.config.Path,
81+
ConfigContext: v.config.Context,
82+
Namespace: v.namespace,
83+
Image: v.image,
84+
VolumeSizeGB: v.volumeSizeGB,
85+
}, nil
86+
}

tests/fixture/tmpnet/flags/runtime.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,20 @@ import (
1414

1515
var validRuntimes = []string{
1616
processRuntime,
17+
kubeRuntime,
1718
}
1819

1920
type RuntimeConfigVars struct {
2021
runtime string
2122
processRuntimeVars processRuntimeVars
23+
kubeRuntimeVars kubeRuntimeVars
2224
}
2325

2426
// NewRuntimeConfigFlagVars registers runtime config flag variables for stdlib flag
2527
func NewRuntimeConfigFlagVars() *RuntimeConfigVars {
2628
v := &RuntimeConfigVars{}
2729
v.processRuntimeVars.registerWithFlag()
30+
v.kubeRuntimeVars.registerWithFlag()
2831
v.register(flag.StringVar)
2932
return v
3033
}
@@ -33,6 +36,7 @@ func NewRuntimeConfigFlagVars() *RuntimeConfigVars {
3336
func NewRuntimeConfigFlagSetVars(flagSet *pflag.FlagSet) *RuntimeConfigVars {
3437
v := &RuntimeConfigVars{}
3538
v.processRuntimeVars.registerWithFlagSet(flagSet)
39+
v.kubeRuntimeVars.registerWithFlagSet(flagSet)
3640
v.register(flagSet.StringVar)
3741
return v
3842
}
@@ -60,6 +64,14 @@ func (v *RuntimeConfigVars) GetNodeRuntimeConfig() (*tmpnet.NodeRuntimeConfig, e
6064
return &tmpnet.NodeRuntimeConfig{
6165
Process: processRuntimeConfig,
6266
}, nil
67+
case kubeRuntime:
68+
kubeRuntimeConfig, err := v.kubeRuntimeVars.getKubeRuntimeConfig()
69+
if err != nil {
70+
return nil, err
71+
}
72+
return &tmpnet.NodeRuntimeConfig{
73+
Kube: kubeRuntimeConfig,
74+
}, nil
6375
default:
6476
return nil, fmt.Errorf("--runtime expected one of %v, got: %s", validRuntimes, v.runtime)
6577
}

tests/fixture/tmpnet/flags/start_network.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ func (v *StartNetworkVars) register(stringVar varFunc[string], intVar varFunc[in
5959
)
6060
}
6161

62+
func (v *StartNetworkVars) ProcessRuntimeConfigured() bool {
63+
return v.runtimeVars.runtime == processRuntime
64+
}
65+
6266
func (v *StartNetworkVars) GetNodeCount() (int, error) {
6367
if v.nodeCount < 1 {
6468
return 0, fmt.Errorf("--node-count must be greater than 0 but got %d", v.nodeCount)

0 commit comments

Comments
 (0)