1
1
package healthcheck
2
2
3
3
import (
4
+ "context"
5
+ "fmt"
4
6
"os"
5
7
"path/filepath"
6
8
"strings"
7
9
8
10
"github.com/openshift/microshift/pkg/config"
11
+ corev1 "k8s.io/api/core/v1"
12
+ v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13
+ "k8s.io/apimachinery/pkg/util/sets"
9
14
"k8s.io/cli-runtime/pkg/genericclioptions"
15
+ coreclientv1 "k8s.io/client-go/kubernetes/typed/core/v1"
10
16
"k8s.io/client-go/util/homedir"
11
17
"k8s.io/klog/v2"
12
18
"k8s.io/kubectl/pkg/cmd/get"
13
19
cmdutil "k8s.io/kubectl/pkg/cmd/util"
14
20
"k8s.io/utils/ptr"
15
21
)
16
22
17
- func logPodsAndEvents () {
23
+ func printPostFailureDebugInfo (ctx context.Context , coreClient * coreclientv1.CoreV1Client ) {
24
+ output := strings.Builder {}
25
+
26
+ unpulledOrFailedImages (ctx , coreClient , & output )
27
+ allPodsAndEvents (& output )
28
+
29
+ klog .Infof ("DEBUG INFORMATION\n %s" , output .String ())
30
+ }
31
+
32
+ func allPodsAndEvents (output * strings.Builder ) {
18
33
cliOptions := genericclioptions .NewConfigFlags (true ).WithDeprecatedPasswordFlag ()
19
34
cliOptions .KubeConfig = ptr .To (filepath .Join (config .DataDir , "resources" , string (config .KubeAdmin ), "kubeconfig" ))
20
35
if homedir .HomeDir () == "" {
@@ -27,8 +42,7 @@ func logPodsAndEvents() {
27
42
matchVersionKubeConfigFlags := cmdutil .NewMatchVersionFlags (cliOptions )
28
43
f := cmdutil .NewFactory (matchVersionKubeConfigFlags )
29
44
30
- output := strings.Builder {}
31
- ioStreams := genericclioptions.IOStreams {In : os .Stdin , Out : & output , ErrOut : & output }
45
+ ioStreams := genericclioptions.IOStreams {In : os .Stdin , Out : output , ErrOut : output }
32
46
33
47
cmdGet := get .NewCmdGet ("" , f , ioStreams )
34
48
opts := get .NewGetOptions ("" , ioStreams )
@@ -49,12 +63,118 @@ func logPodsAndEvents() {
49
63
klog .Errorf ("Failed to run 'get pods': %v" , err )
50
64
return
51
65
}
66
+ output .WriteString ("\n " )
52
67
output .WriteString ("\n ---------- EVENTS:\n " )
53
68
opts .SortBy = ".metadata.creationTimestamp"
54
69
if err := opts .Run (f , []string {"events" }); err != nil {
55
70
klog .Errorf ("Failed to run 'get events': %v" , err )
56
71
return
57
72
}
73
+ output .WriteString ("\n " )
74
+ }
58
75
59
- klog .Infof ("DEBUG INFORMATION\n %s" , output .String ())
76
+ // unpulledOrFailedImages prepares a debug log with information about images that are still being pulled or failed to be pulled.
77
+ func unpulledOrFailedImages (ctx context.Context , coreClient * coreclientv1.CoreV1Client , output * strings.Builder ) {
78
+ // Get list of existing Pods to skip Events belonging to non-existing Pods to avoid false positives:
79
+ // If someone creates and deletes a lot of workloads, there might be "Pulling" events for each Pod without
80
+ // the corresponding "Pulled" event.
81
+ pods , err := coreClient .Pods ("" ).List (ctx , v1.ListOptions {})
82
+ if err != nil {
83
+ klog .Errorf ("Failed to retrieve pods: %v" , err )
84
+ return
85
+ }
86
+ existingPodsNames := sets .New [string ]()
87
+ for _ , pod := range pods .Items {
88
+ existingPodsNames .Insert (pod .Name )
89
+ }
90
+
91
+ var pullingEvents , pulledEvents , failedEvents * corev1.EventList
92
+ if pullingEvents , err = coreClient .Events ("" ).List (ctx , v1.ListOptions {FieldSelector : "reportingComponent=kubelet,reason=Pulling" }); err != nil {
93
+ klog .Errorf ("Failed to retrieve Pulling events: %v" , err )
94
+ return
95
+ }
96
+ if pulledEvents , err = coreClient .Events ("" ).List (ctx , v1.ListOptions {FieldSelector : "reportingComponent=kubelet,reason=Pulled" }); err != nil {
97
+ klog .Errorf ("Failed to retrieve Pulled events: %v" , err )
98
+ return
99
+ }
100
+ if failedEvents , err = coreClient .Events ("" ).List (ctx , v1.ListOptions {FieldSelector : "reportingComponent=kubelet,reason=Failed" }); err != nil {
101
+ klog .Errorf ("Failed to retrieve Failed events: %v" , err )
102
+ return
103
+ }
104
+
105
+ unpulledImages , failedImages := analyzeEventsLookingForUnpulledOrFailedImages (existingPodsNames , pullingEvents , pulledEvents , failedEvents )
106
+
107
+ if len (unpulledImages ) > 0 {
108
+ output .WriteString ("---------- IMAGES THAT ARE STILL BEING PULLED:\n " )
109
+ for _ , unpulledImage := range unpulledImages {
110
+ output .WriteString (fmt .Sprintf ("- %q for Pod %q in namespace %q\n " , unpulledImage .Image , unpulledImage .PodName , unpulledImage .Namespace ))
111
+ }
112
+ output .WriteString ("\n " )
113
+ }
114
+
115
+ if len (failedImages ) > 0 {
116
+ output .WriteString ("---------- IMAGES THAT FAILED TO BE PULLED:\n " )
117
+ for _ , failedImage := range failedImages {
118
+ output .WriteString (fmt .Sprintf ("- %q for Pod %q in namespace %q: %s\n " , failedImage .Image , failedImage .PodName , failedImage .Namespace , failedImage .Message ))
119
+ }
120
+ output .WriteString ("\n " )
121
+ }
122
+ }
123
+
124
+ type unpulledImage struct {
125
+ Namespace string
126
+ PodName string
127
+ Image string
128
+ }
129
+
130
+ type failedImage struct {
131
+ unpulledImage
132
+ Message string
133
+ }
134
+
135
+ // analyzeEventsLookingForUnpulledOrFailedImages goes through and tries to match
136
+ // image related Events to find images that are still being pulled
137
+ // and images that failed to be pulled.
138
+ func analyzeEventsLookingForUnpulledOrFailedImages (existingPodsNames sets.Set [string ], pullingEvents , pulledEvents , failedEvents * corev1.EventList ) ([]unpulledImage , []failedImage ) {
139
+ getImageInfo := func (event corev1.Event ) (string , string , string ) {
140
+ pod := event .InvolvedObject .Name
141
+ ns := event .InvolvedObject .Namespace
142
+ img := strings .Split (event .Message , "\" " )[1 ]
143
+ return ns , pod , img
144
+ }
145
+
146
+ unpulledImages := sets .New [unpulledImage ]()
147
+
148
+ for _ , event := range pullingEvents .Items {
149
+ ns , pod , img := getImageInfo (event )
150
+ if ! existingPodsNames .Has (pod ) {
151
+ continue
152
+ }
153
+ unpulledImages .Insert (unpulledImage {Namespace : ns , PodName : pod , Image : img })
154
+ }
155
+
156
+ for _ , event := range pulledEvents .Items {
157
+ ns , pod , img := getImageInfo (event )
158
+ unpulledImages .Delete (unpulledImage {Namespace : ns , PodName : pod , Image : img })
159
+ }
160
+
161
+ failedImages := sets .New [failedImage ]()
162
+
163
+ for _ , event := range failedEvents .Items {
164
+ if ! strings .HasPrefix (event .Message , "Failed to pull image" ) {
165
+ continue
166
+ }
167
+ ns , pod , img := getImageInfo (event )
168
+ if ! existingPodsNames .Has (pod ) {
169
+ continue
170
+ }
171
+ unpulledImages .Delete (unpulledImage {Namespace : ns , PodName : pod , Image : img })
172
+
173
+ failedImages .Insert (failedImage {
174
+ unpulledImage : unpulledImage {Namespace : ns , PodName : pod , Image : img },
175
+ Message : event .Message ,
176
+ })
177
+ }
178
+
179
+ return unpulledImages .UnsortedList (), failedImages .UnsortedList ()
60
180
}
0 commit comments