Skip to content

Commit e4d85ac

Browse files
authored
feat: add "node" destination for NodeDiagnostic (#58)
1 parent 47e1c89 commit e4d85ac

3 files changed

Lines changed: 172 additions & 0 deletions

File tree

e2e/setup/e2e.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,8 @@ func TestWrapper(t *testing.T, Testenv env.Environment) {
169169
// log collection runs at the end, which effectively makes it collects logs
170170
// and data from prior tests.
171171
Testenv.Test(t, nodediagnostic.LogCollection(awsCfg))
172+
Testenv.Test(t,
173+
nodediagnostic.LogCollection(awsCfg),
174+
nodediagnostic.NodeDestination(),
175+
)
172176
}

e2e/suites/nodediagnostic/nodediagnostic.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,93 @@ func assertLogsValid(t *testing.T, reader io.Reader) {
199199
t.Logf("found the following paths from the log archive: %s", strings.Join(fileNames, ","))
200200
}
201201
}
202+
203+
func NodeDestination() types.Feature {
204+
var nodeDiagnostics []v1alpha1.NodeDiagnostic
205+
206+
return features.New("NodeDestination").
207+
WithLabel("type", "log-collection").
208+
Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
209+
client, err := cfg.NewClient()
210+
if err != nil {
211+
t.Fatal(err)
212+
}
213+
if err := v1alpha1.SchemeBuilder.AddToScheme(client.Resources().GetScheme()); err != nil {
214+
t.Fatal(err)
215+
}
216+
var nodes corev1.NodeList
217+
if err := client.Resources().List(ctx, &nodes); err != nil {
218+
t.Fatal(err)
219+
}
220+
if len(nodes.Items) == 0 {
221+
t.Fatal("no nodes were found in the cluster")
222+
}
223+
for _, node := range nodes.Items {
224+
if node.DeletionTimestamp != nil {
225+
t.Logf("skipping node %q because it is being deleted", node.Name)
226+
continue
227+
}
228+
229+
nodeDiagnostic := v1alpha1.NodeDiagnostic{
230+
ObjectMeta: metav1.ObjectMeta{
231+
Name: node.Name,
232+
},
233+
Spec: v1alpha1.NodeDiagnosticSpec{
234+
LogCapture: &v1alpha1.LogCapture{
235+
UploadDestination: "node",
236+
},
237+
},
238+
}
239+
t.Logf("creating NodeDiagnostic for node %q with node destination", node.Name)
240+
if err := client.Resources().Create(ctx, &nodeDiagnostic); err != nil {
241+
t.Fatal(err)
242+
}
243+
nodeDiagnostics = append(nodeDiagnostics, nodeDiagnostic)
244+
}
245+
if len(nodeDiagnostics) == 0 {
246+
t.Fatal("no non-terminating nodes were found in the cluster")
247+
}
248+
return ctx
249+
}).
250+
Assess("CollectLogs", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
251+
for _, nodeDiagnostic := range nodeDiagnostics {
252+
t.Run(nodeDiagnostic.Name, func(t *testing.T) {
253+
if err := cfg.Client().Resources().Get(ctx, nodeDiagnostic.Name, nodeDiagnostic.Namespace, &nodeDiagnostic); err != nil {
254+
t.Fatal(err)
255+
}
256+
257+
if err := wait.For(
258+
conditions.New(cfg.Client().Resources()).ResourceMatch(&nodeDiagnostic, func(object k8s.Object) bool {
259+
nd := object.(*v1alpha1.NodeDiagnostic)
260+
return len(nd.Status.CaptureStatuses) > 0 && nd.Status.CaptureStatuses[0].State.Completed != nil
261+
}),
262+
wait.WithTimeout(time.Minute),
263+
wait.WithContext(ctx),
264+
); err != nil {
265+
t.Error(err)
266+
}
267+
for _, status := range nodeDiagnostic.Status.CaptureStatuses {
268+
if status.State.Completed == nil {
269+
t.Errorf("capture was not complete: %+v", status.State)
270+
} else if status.State.Completed.Reason == v1alpha1.CaptureStateFailure {
271+
t.Errorf("capture failed with reason: %s, message: %s", status.State.Completed.Reason, status.State.Completed.Message)
272+
} else {
273+
t.Logf("capture succeeded with reason: %s, message: %s", status.State.Completed.Reason, status.State.Completed.Message)
274+
}
275+
}
276+
})
277+
}
278+
return ctx
279+
}).
280+
Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
281+
for _, nodeDiagnostic := range nodeDiagnostics {
282+
t.Run(nodeDiagnostic.Name, func(t *testing.T) {
283+
if err := cfg.Client().Resources().Delete(ctx, &nodeDiagnostic); err != nil {
284+
t.Fatal(err)
285+
}
286+
})
287+
}
288+
return ctx
289+
}).
290+
Feature()
291+
}

pkg/controllers/nodediagnostic.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"path/filepath"
1111
"reflect"
12+
"time"
1213

1314
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1415
controllerruntime "sigs.k8s.io/controller-runtime"
@@ -100,6 +101,83 @@ func (c *nodeDiagnosticController) Reconcile(ctx context.Context, nodeDiagnostic
100101
}
101102

102103
log.Info("uploading logs", "url", nodeDiagnostic.Spec.UploadDestination)
104+
105+
if nodeDiagnostic.Spec.UploadDestination == "node" {
106+
log.Info("saving logs to /var/log/support for download via node proxy")
107+
supportDir := filepath.Join(config.HostRoot(), "var/log/support")
108+
if err := os.MkdirAll(supportDir, 0600); err != nil {
109+
log.Error(err, "failed to create support directory")
110+
captureStatus.State = v1alpha1.CaptureState{
111+
Completed: &v1alpha1.CaptureStateCompleted{
112+
Reason: v1alpha1.CaptureStateFailure,
113+
Message: "failed to create support directory",
114+
StartedAt: captureStatus.State.Running.StartedAt,
115+
FinishedAt: metav1.Now(),
116+
},
117+
}
118+
stored := nodeDiagnostic.DeepCopy()
119+
nodeDiagnostic.Status.SetCaptureStatus(captureStatus)
120+
return reconcile.Result{}, c.kubeClient.Status().Patch(ctx, nodeDiagnostic, client.MergeFrom(stored))
121+
}
122+
destPath := filepath.Join(supportDir, fmt.Sprintf("%s-logs.tar.gz", c.nodeName))
123+
destFile, err := os.Create(destPath)
124+
if err != nil {
125+
log.Error(err, "failed to create log file")
126+
captureStatus.State = v1alpha1.CaptureState{
127+
Completed: &v1alpha1.CaptureStateCompleted{
128+
Reason: v1alpha1.CaptureStateFailure,
129+
Message: "failed to create log file",
130+
StartedAt: captureStatus.State.Running.StartedAt,
131+
FinishedAt: metav1.Now(),
132+
},
133+
}
134+
stored := nodeDiagnostic.DeepCopy()
135+
nodeDiagnostic.Status.SetCaptureStatus(captureStatus)
136+
return reconcile.Result{}, c.kubeClient.Status().Patch(ctx, nodeDiagnostic, client.MergeFrom(stored))
137+
}
138+
defer destFile.Close()
139+
if _, err := io.Copy(destFile, archiveReader); err != nil {
140+
log.Error(err, "failed to write logs to file")
141+
captureStatus.State = v1alpha1.CaptureState{
142+
Completed: &v1alpha1.CaptureStateCompleted{
143+
Reason: v1alpha1.CaptureStateFailure,
144+
Message: "failed to write logs to file",
145+
StartedAt: captureStatus.State.Running.StartedAt,
146+
FinishedAt: metav1.Now(),
147+
},
148+
}
149+
stored := nodeDiagnostic.DeepCopy()
150+
nodeDiagnostic.Status.SetCaptureStatus(captureStatus)
151+
return reconcile.Result{}, c.kubeClient.Status().Patch(ctx, nodeDiagnostic, client.MergeFrom(stored))
152+
}
153+
log.Info("logs saved successfully", "path", destPath)
154+
captureStatus.State = v1alpha1.CaptureState{
155+
Completed: &v1alpha1.CaptureStateCompleted{
156+
Reason: v1alpha1.CaptureStateSuccess,
157+
Message: fmt.Sprintf("successfully saved logs to %s", destPath),
158+
StartedAt: captureStatus.State.Running.StartedAt,
159+
FinishedAt: metav1.Now(),
160+
},
161+
}
162+
if issueCount > 0 {
163+
captureStatus.State.Completed.Reason = v1alpha1.CaptureStateSuccessWithErrors
164+
captureStatus.State.Completed.Message = fmt.Sprintf("successfully saved logs to %s with some errors", destPath)
165+
}
166+
stored := nodeDiagnostic.DeepCopy()
167+
nodeDiagnostic.Status.SetCaptureStatus(captureStatus)
168+
// Delete file from /var/log/support after 10 minutes
169+
go func() {
170+
time.Sleep(600 * time.Second)
171+
if err := os.Remove(destPath); err != nil {
172+
if !os.IsNotExist(err) {
173+
log.Error(err, "failed to delete log file after timeout", "path", destPath)
174+
}
175+
} else {
176+
log.Info("successfully deleted log file after timeout", "path", destPath)
177+
}
178+
}()
179+
return reconcile.Result{}, c.kubeClient.Status().Patch(ctx, nodeDiagnostic, client.MergeFrom(stored))
180+
}
103181
// wrapping this setup into one function to avoid redundant failure code
104182
doUpload := func() error {
105183
uploadRequest, err := http.NewRequestWithContext(ctx, http.MethodPut, string(nodeDiagnostic.Spec.UploadDestination), archiveReader)

0 commit comments

Comments
 (0)