From 87d7018d3bc7c55a8f61edb1f6c39218e9521341 Mon Sep 17 00:00:00 2001 From: Enxebre Date: Tue, 26 Feb 2019 17:20:42 +0100 Subject: [PATCH 1/2] Improve logging messages --- cmd/machine-api-operator/helpers.go | 4 +-- cmd/machine-api-operator/start.go | 2 +- cmd/nodelink-controller/main.go | 42 ++++++++++++++--------------- pkg/operator/operator.go | 22 ++++++--------- pkg/operator/status.go | 10 ++++--- pkg/operator/sync.go | 20 ++++++-------- 6 files changed, 47 insertions(+), 53 deletions(-) diff --git a/cmd/machine-api-operator/helpers.go b/cmd/machine-api-operator/helpers.go index 813b3f8b53..386c1e95d2 100644 --- a/cmd/machine-api-operator/helpers.go +++ b/cmd/machine-api-operator/helpers.go @@ -7,7 +7,7 @@ import ( "github.com/golang/glog" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/client-go/kubernetes/scheme" @@ -41,7 +41,7 @@ func CreateResourceLock(cb *ClientBuilder, componentNamespace, componentName str id, err := os.Hostname() if err != nil { - glog.Fatalf("error creating lock: %v", err) + glog.Fatalf("Error creating lock: %v", err) } // add a uniquifier so that two processes on the same host don't accidentally both become active diff --git a/cmd/machine-api-operator/start.go b/cmd/machine-api-operator/start.go index b8bc7f853c..11cc336584 100644 --- a/cmd/machine-api-operator/start.go +++ b/cmd/machine-api-operator/start.go @@ -67,7 +67,7 @@ func runStartCmd(cmd *cobra.Command, args []string) { Callbacks: leaderelection.LeaderCallbacks{ OnStartedLeading: run, OnStoppedLeading: func() { - glog.Fatalf("leaderelection lost") + glog.Fatalf("Leader election lost") }, }, }) diff --git a/cmd/nodelink-controller/main.go b/cmd/nodelink-controller/main.go index 014decc3ba..e07fe005a0 100644 --- a/cmd/nodelink-controller/main.go +++ b/cmd/nodelink-controller/main.go @@ -136,7 +136,7 @@ type Controller struct { func (c *Controller) addNode(obj interface{}) { node := obj.(*corev1.Node) - glog.V(3).Infof("adding node: %s", node.Name) + glog.V(3).Infof("Adding node: %q", node.Name) c.enqueueNode(node) } @@ -144,7 +144,7 @@ func (c *Controller) updateNode(old, cur interface{}) { //oldNode := old.(*corev1.Node) curNode := cur.(*corev1.Node) - glog.V(3).Infof("updating node: %s", curNode.Name) + glog.V(3).Infof("Updating node: %q", curNode.Name) c.enqueueNode(curNode) } @@ -164,7 +164,7 @@ func (c *Controller) deleteNode(obj interface{}) { } } - glog.V(3).Infof("deleting node") + glog.V(3).Infof("Deleting node") c.enqueueNode(node) } @@ -177,7 +177,7 @@ func (c *Controller) addMachine(obj interface{}) { for _, a := range machine.Status.Addresses { // Use the internal IP to look for matches: if a.Type == corev1.NodeInternalIP { - glog.V(3).Infof("adding machine %s into machineAddress list for %s", machine.Name, a.Address) + glog.V(3).Infof("Adding machine %q into machineAddress list for %q", machine.Name, a.Address) c.machineAddress[a.Address] = machine break } @@ -194,7 +194,7 @@ func (c *Controller) updateMachine(old, cur interface{}) { // Use the internal IP to look for matches: if a.Type == corev1.NodeInternalIP { c.machineAddress[a.Address] = machine - glog.V(3).Infof("updating machine addresses list. Machine: %s, address: %s", machine.Name, a.Address) + glog.V(3).Infof("Updating machine addresses list. Machine: %q, address: %q", machine.Name, a.Address) break } } @@ -213,8 +213,7 @@ func (c *Controller) deleteMachine(obj interface{}) { break } } - - glog.V(3).Infof("delete obsolete machines from machine adressses list") + glog.V(3).Infof("Delete obsolete machines from machine addresses list") } // WaitForCacheSync is a wrapper around cache.WaitForCacheSync that generates log messages @@ -307,9 +306,9 @@ func (c *Controller) handleErr(err error, key interface{}) { // This function is not meant to be invoked concurrently with the same key. func (c *Controller) syncNode(key string) error { startTime := time.Now() - glog.V(3).Infof("syncing node") + glog.V(3).Infof("Syncing node") defer func() { - glog.V(3).Infof("finished syncing node, duration: %s", time.Now().Sub(startTime)) + glog.V(3).Infof("Finished syncing node, duration: %s", time.Now().Sub(startTime)) }() _, _, err := cache.SplitMetaNamespaceKey(key) @@ -319,7 +318,7 @@ func (c *Controller) syncNode(key string) error { node, err := c.nodeLister.Get(key) if errors.IsNotFound(err) { - glog.Info("node has been deleted") + glog.Infof("Error syncing, Node %s has been deleted", key) return nil } if err != nil { @@ -337,14 +336,14 @@ func (c *Controller) processNode(node *corev1.Node) error { var err error namespace, machineName, err := cache.SplitMetaNamespaceKey(machineKey) if err != nil { - glog.Infof("machine annotation format is incorrect %v: %v\n", machineKey, err) + glog.Infof("Error processing node %q. Machine annotation format is incorrect %q: %v", node.Name, machineKey, err) return err } matchingMachine, err = c.machinesLister.Machines(namespace).Get(machineName) // If machine matching annotation is not found, we'll still try to find one via IP matching: if err != nil { if errors.IsNotFound(err) { - glog.Warning("machine matching node has been deleted, will attempt to find new machine by IP") + glog.Warningf("Machine %q associated to node %q has been deleted, will attempt to find new machine by IP", machineKey, node.Name) } else { return err } @@ -361,11 +360,11 @@ func (c *Controller) processNode(node *corev1.Node) error { } } if nodeInternalIP == "" { - glog.Warning("unable to find InternalIP for node") - return fmt.Errorf("unable to find InternalIP for node: %s", node.Name) + glog.Warningf("Unable to find InternalIP for node %q", node.Name) + return fmt.Errorf("unable to find InternalIP for node: %q", node.Name) } - glog.V(3).Infof("searching machine cache for IP match for node") + glog.V(4).Infof("Searching machine cache for IP match for node %q", node.Name) c.machineAddressMux.Lock() machine, found := c.machineAddress[nodeInternalIP] c.machineAddressMux.Unlock() @@ -376,10 +375,11 @@ func (c *Controller) processNode(node *corev1.Node) error { } if matchingMachine == nil { - glog.Warning("no matching machine found for node") - return fmt.Errorf("no matching machine found for node: %s", node.Name) + glog.Warning("No machine was found for node %q", node.Name) + return fmt.Errorf("no machine was found for node: %q", node.Name) } + glog.V(3).Infof("Found machine %s for node %s", machineKey, node.Name) modNode := node.DeepCopy() modNode.Annotations[machineAnnotationKey] = fmt.Sprintf("%s/%s", matchingMachine.Namespace, matchingMachine.Name) @@ -388,17 +388,17 @@ func (c *Controller) processNode(node *corev1.Node) error { } for k, v := range matchingMachine.Spec.Labels { - glog.V(3).Infof("copying label %s = %s", k, v) + glog.V(3).Infof("Copying label %s = %s", k, v) modNode.Labels[k] = v } addTaintsToNode(modNode, matchingMachine) if !reflect.DeepEqual(node, modNode) { - glog.V(3).Infof("node has changed, updating") + glog.V(3).Infof("Node %q has changed, updating", modNode.Name) _, err := c.kubeClient.CoreV1().Nodes().Update(modNode) if err != nil { - glog.Errorf("error updating node: %v", err) + glog.Errorf("Error updating node: %v", err) return err } } @@ -410,7 +410,7 @@ func (c *Controller) processNode(node *corev1.Node) error { // However, we believe many components can directly taint a node and there is no direct source of truth that should enforce a single writer of taints func addTaintsToNode(node *corev1.Node, machine *mapiv1.Machine) { for _, mTaint := range machine.Spec.Taints { - glog.V(3).Infof("machine taint: %v", mTaint) + glog.V(3).Infof("Adding taint %v from machine %q to node %q", mTaint, machine.Name, node.Name) alreadyPresent := false for _, nTaint := range node.Spec.Taints { if nTaint.Key == mTaint.Key && nTaint.Effect == mTaint.Effect { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 082bf40415..486380d223 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -103,15 +103,15 @@ func (optr *Operator) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer optr.queue.ShutDown() - glog.Info("Starting MachineAPIOperator") - defer glog.Info("Shutting down MachineAPIOperator") + glog.Info("Starting Machine API Operator") + defer glog.Info("Shutting down Machine API Operator") if !cache.WaitForCacheSync(stopCh, optr.deployListerSynced) { - glog.Error("failed to sync caches") + glog.Error("Failed to sync caches") return } - glog.Info("Synched up caches") + glog.Info("Synced up caches") for i := 0; i < workers; i++ { go wait.Until(optr.worker, time.Second, stopCh) } @@ -134,14 +134,13 @@ func (optr *Operator) worker() { } func (optr *Operator) processNextWorkItem() bool { - glog.V(4).Info("processing next work item") key, quit := optr.queue.Get() if quit { return false } defer optr.queue.Done(key) - glog.V(4).Infof("processing key %s", key) + glog.V(4).Infof("Processing key %s", key) err := optr.syncHandler(key.(string)) optr.handleErr(err, key) @@ -150,22 +149,18 @@ func (optr *Operator) processNextWorkItem() bool { func (optr *Operator) handleErr(err error, key interface{}) { if err == nil { - //TODO: set operator Done. - optr.queue.Forget(key) return } - //TODO: set operator degraded. - if optr.queue.NumRequeues(key) < maxRetries { - glog.V(2).Infof("Error syncing operator %v: %v", key, err) + glog.V(1).Infof("Error syncing operator %v: %v", key, err) optr.queue.AddRateLimited(key) return } utilruntime.HandleError(err) - glog.V(2).Infof("Dropping operator %q out of the queue: %v", key, err) + glog.V(1).Infof("Dropping operator %q out of the queue: %v", key, err) optr.queue.Forget(key) } @@ -176,10 +171,9 @@ func (optr *Operator) sync(key string) error { glog.V(4).Infof("Finished syncing operator %q (%v)", key, time.Since(startTime)) }() - glog.Infof("Getting operator config using kubeclient") operatorConfig, err := optr.maoConfigFromInstallConfig() if err != nil { - glog.Errorf("failed getting operator config: %v", err) + glog.Errorf("Failed getting operator config: %v", err) return err } diff --git a/pkg/operator/status.go b/pkg/operator/status.go index d8e2d9d565..b91decd80a 100644 --- a/pkg/operator/status.go +++ b/pkg/operator/status.go @@ -35,15 +35,17 @@ func (optr *Operator) statusProgressing() error { desiredVersions := optr.operandVersions currentVersions, err := optr.getCurrentVersions() if err != nil { - glog.Errorf("error getting current versions: %v", err) + glog.Errorf("Error getting operator current versions: %v", err) return err } var isProgressing osconfigv1.ConditionStatus var message string if !reflect.DeepEqual(desiredVersions, currentVersions) { + glog.V(2).Info("Syncing status: progressing") message = fmt.Sprintf("Progressing towards %s", optr.printOperandVersions()) isProgressing = osconfigv1.ConditionTrue } else { + glog.V(2).Info("Syncing status: re-syncing") message = fmt.Sprintf("Running resync for %s", optr.printOperandVersions()) isProgressing = osconfigv1.ConditionFalse } @@ -67,7 +69,7 @@ func (optr *Operator) statusProgressing() error { co, err := optr.getOrCreateClusterOperator() if err != nil { - glog.Errorf("failed to get or create Cluster Operator: %v", err) + glog.Errorf("Failed to get or create Cluster Operator: %v", err) return err } return optr.syncStatus(co, conds) @@ -99,6 +101,7 @@ func (optr *Operator) statusAvailable() error { return err } co.Status.Versions = optr.operandVersions + glog.V(2).Info("Syncing status: available") return optr.syncStatus(co, conds) } @@ -110,7 +113,7 @@ func (optr *Operator) statusFailing(error string) error { desiredVersions := optr.operandVersions currentVersions, err := optr.getCurrentVersions() if err != nil { - glog.Errorf("error getting current versions: %v", err) + glog.Errorf("Error getting current versions: %v", err) return err } @@ -142,6 +145,7 @@ func (optr *Operator) statusFailing(error string) error { if err != nil { return err } + glog.V(2).Info("Syncing status: failing") return optr.syncStatus(co, conds) } diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 553e5827d2..c85ecdcd02 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -22,10 +22,8 @@ const ( ) func (optr *Operator) syncAll(config OperatorConfig) error { - glog.Infof("Syncing ClusterOperatorStatus") - if err := optr.statusProgressing(); err != nil { - glog.Errorf("Error synching ClusterOperatorStatus: %v", err) + glog.Errorf("Error syncing ClusterOperatorStatus: %v", err) return fmt.Errorf("error syncing ClusterOperatorStatus: %v", err) } @@ -33,17 +31,15 @@ func (optr *Operator) syncAll(config OperatorConfig) error { if err := optr.statusFailing(err.Error()); err != nil { // Just log the error here. We still want to // return the outer error. - glog.Errorf("Error synching ClusterOperatorStatus: %v", err) + glog.Errorf("Error syncing ClusterOperatorStatus: %v", err) } - - glog.Errorf("Failed sync-up cluster api controller: %v", err) + glog.Errorf("Error syncing cluster api controller: %v", err) return err } - - glog.Info("Synched up cluster api controller") + glog.V(3).Info("Synced up all components") if err := optr.statusAvailable(); err != nil { - glog.Errorf("Error synching ClusterOperatorStatus: %v", err) + glog.Errorf("Error syncing ClusterOperatorStatus: %v", err) return fmt.Errorf("error syncing ClusterOperatorStatus: %v", err) } @@ -78,18 +74,18 @@ func (optr *Operator) waitForDeploymentRollout(resource *appsv1.Deployment) erro if err != nil { // Do not return error here, as we could be updating the API Server itself, in which case we // want to continue waiting. - glog.Errorf("error getting Deployment %s during rollout: %v", resource.Name, err) + glog.Errorf("Error getting Deployment %q during rollout: %v", resource.Name, err) return false, nil } if d.DeletionTimestamp != nil { - return false, fmt.Errorf("deployment %s is being deleted", resource.Name) + return false, fmt.Errorf("deployment %q is being deleted", resource.Name) } if d.Generation <= d.Status.ObservedGeneration && d.Status.UpdatedReplicas == d.Status.Replicas && d.Status.UnavailableReplicas == 0 { return true, nil } - glog.V(4).Infof("Deployment %s is not ready. status: (replicas: %d, updated: %d, ready: %d, unavailable: %d)", d.Name, d.Status.Replicas, d.Status.UpdatedReplicas, d.Status.ReadyReplicas, d.Status.UnavailableReplicas) + glog.V(4).Infof("Deployment %q is not ready. status: (replicas: %d, updated: %d, ready: %d, unavailable: %d)", d.Name, d.Status.Replicas, d.Status.UpdatedReplicas, d.Status.ReadyReplicas, d.Status.UnavailableReplicas) return false, nil }) } From a5b6fb5972cc8564d5fa3e201edafefdb53c55e9 Mon Sep 17 00:00:00 2001 From: Enxebre Date: Wed, 27 Feb 2019 10:49:10 +0100 Subject: [PATCH 2/2] Record events for cluster operator status --- pkg/operator/status.go | 16 +++++++++++----- pkg/operator/status_test.go | 5 ++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pkg/operator/status.go b/pkg/operator/status.go index b91decd80a..49b82f4d68 100644 --- a/pkg/operator/status.go +++ b/pkg/operator/status.go @@ -5,6 +5,8 @@ import ( "reflect" "strings" + v1 "k8s.io/api/core/v1" + "github.com/golang/glog" osconfigv1 "github.com/openshift/api/config/v1" @@ -39,10 +41,18 @@ func (optr *Operator) statusProgressing() error { return err } var isProgressing osconfigv1.ConditionStatus + + co, err := optr.getOrCreateClusterOperator() + if err != nil { + glog.Errorf("Failed to get or create Cluster Operator: %v", err) + return err + } + var message string if !reflect.DeepEqual(desiredVersions, currentVersions) { glog.V(2).Info("Syncing status: progressing") message = fmt.Sprintf("Progressing towards %s", optr.printOperandVersions()) + optr.eventRecorder.Eventf(co, v1.EventTypeNormal, "Status upgrade", message) isProgressing = osconfigv1.ConditionTrue } else { glog.V(2).Info("Syncing status: re-syncing") @@ -67,11 +77,6 @@ func (optr *Operator) statusProgressing() error { }, } - co, err := optr.getOrCreateClusterOperator() - if err != nil { - glog.Errorf("Failed to get or create Cluster Operator: %v", err) - return err - } return optr.syncStatus(co, conds) } @@ -145,6 +150,7 @@ func (optr *Operator) statusFailing(error string) error { if err != nil { return err } + optr.eventRecorder.Eventf(co, v1.EventTypeWarning, "Status failing", error) glog.V(2).Info("Syncing status: failing") return optr.syncStatus(co, conds) } diff --git a/pkg/operator/status_test.go b/pkg/operator/status_test.go index 6c83c4e74a..18b34b4c4e 100644 --- a/pkg/operator/status_test.go +++ b/pkg/operator/status_test.go @@ -1,11 +1,13 @@ package operator import ( + "testing" + osconfigv1 "github.com/openshift/api/config/v1" fakeconfigclientset "github.com/openshift/client-go/config/clientset/versioned/fake" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "testing" + "k8s.io/client-go/tools/record" ) func TestPrintOperandVersions(t *testing.T) { @@ -61,6 +63,7 @@ func TestOperatorStatusProgressing(t *testing.T) { Version: "1.0", }, }, + eventRecorder: record.NewFakeRecorder(5), } for i, tc := range tCases { co := &osconfigv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: clusterOperatorName}}