Skip to content

Commit dd7309a

Browse files
Merge pull request #4302 from pmtk/go-healthcheck/deployments-daemonsets
USHIFT-4918 USHIFT-4920: microshift healthcheck - wait for workloads
2 parents 1caeae3 + 83bb8a6 commit dd7309a

14 files changed

+518
-283
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ require (
124124
github.com/karrick/godirwalk v1.17.0 // indirect
125125
github.com/libopenstorage/openstorage v1.0.0 // indirect
126126
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
127+
github.com/lithammer/dedent v1.1.0 // indirect
127128
github.com/mailru/easyjson v0.7.7 // indirect
128129
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible // indirect
129130
github.com/mitchellh/go-wordwrap v1.0.1 // indirect

packaging/greenboot/functions.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@ OCGET_OPT="--no-headers"
1111
OCGET_CMD="oc get ${OCCONFIG_OPT}"
1212
OCROLLOUT_CMD="oc rollout ${OCCONFIG_OPT}"
1313

14+
echo "--------------------"
15+
echo "DEPRECATION NOTICE:"
16+
echo "/usr/share/microshift/functions/greenboot.sh is now deprecated and will be removed in future release."
17+
echo "Planned removal: MicroShift 4.21"
18+
echo ""
19+
echo "As a replacement consider using 'microshift healthcheck --namespaces' command"
20+
echo "--------------------"
21+
echo ""
22+
1423
# Note about the output
1524
# This file runs as part of a systemd unit, greenboot-healthcheck. All of the
1625
# output is captured by journald, and in order to link it to the unit it

packaging/greenboot/microshift-running-check-gateway-api.sh

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,11 @@
88
set -eu -o pipefail
99

1010
SCRIPT_NAME=$(basename "$0")
11-
SCRIPT_PID=$$
12-
CHECK_DEPLOY_NS="openshift-gateway-api"
13-
LOG_POD_EVENTS=false
1411

1512
# Source the MicroShift health check functions library
1613
# shellcheck source=packaging/greenboot/functions.sh
1714
source /usr/share/microshift/functions/greenboot.sh
1815

19-
# Set the term handler to convert exit code to 1
20-
trap 'forced_termination' TERM SIGINT
21-
22-
# Set the exit handler to log the exit status
23-
trap 'log_script_exit' EXIT
24-
25-
# Handler that will be called when the script is terminated by sending TERM or
26-
# INT signals. To override default exit codes it forces returning 1 like the
27-
# rest of the error conditions throughout the health check.
28-
function forced_termination() {
29-
echo "Signal received, terminating."
30-
exit 1
31-
}
32-
33-
#
34-
# Main
35-
#
36-
3716
# Exit if the current user is not 'root'
3817
if [ "$(id -u)" -ne 0 ] ; then
3918
echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges"
@@ -45,22 +24,7 @@ echo "STARTED"
4524
# Print the boot variable status
4625
print_boot_status
4726

48-
# Exit if the MicroShift service is not enabled
49-
if [ "$(systemctl is-enabled microshift.service 2>/dev/null)" != "enabled" ] ; then
50-
echo "MicroShift service is not enabled. Exiting..."
51-
exit 0
52-
fi
53-
5427
# Set the wait timeout for the current check based on the boot counter
5528
WAIT_TIMEOUT_SECS=$(get_wait_timeout)
5629

57-
# Starting pod-specific checks
58-
# Log list of pods and their events on failure
59-
LOG_POD_EVENTS=true
60-
61-
# Wait for the Deployments to be ready
62-
echo "Waiting ${WAIT_TIMEOUT_SECS}s for '${CHECK_DEPLOY_NS}' Deployments to be ready"
63-
if ! wait_for "${WAIT_TIMEOUT_SECS}" namespace_deployment_ready ; then
64-
echo "Error: Timed out waiting for '${CHECK_DEPLOY_NS}' Deployments to be ready"
65-
exit 1
66-
fi
30+
/usr/bin/microshift healthcheck -v=2 --timeout="${WAIT_TIMEOUT_SECS}s" --namespace openshift-gateway-api --deployments servicemesh-operator3,istiod-openshift-gateway-api

packaging/greenboot/microshift-running-check-multus.sh

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,11 @@
88
set -eu -o pipefail
99

1010
SCRIPT_NAME=$(basename "$0")
11-
SCRIPT_PID=$$
12-
CHECK_DAEMONSET_NS="openshift-multus"
13-
LOG_POD_EVENTS=false
1411

1512
# Source the MicroShift health check functions library
1613
# shellcheck source=packaging/greenboot/functions.sh
1714
source /usr/share/microshift/functions/greenboot.sh
1815

19-
# Set the term handler to convert exit code to 1
20-
trap 'forced_termination' TERM SIGINT
21-
22-
# Set the exit handler to log the exit status
23-
trap 'log_script_exit' EXIT
24-
25-
# Handler that will be called when the script is terminated by sending TERM or
26-
# INT signals. To override default exit codes it forces returning 1 like the
27-
# rest of the error conditions throughout the health check.
28-
function forced_termination() {
29-
echo "Signal received, terminating."
30-
exit 1
31-
}
32-
33-
#
34-
# Main
35-
#
36-
3716
# Exit if the current user is not 'root'
3817
if [ "$(id -u)" -ne 0 ] ; then
3918
echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges"
@@ -45,22 +24,7 @@ echo "STARTED"
4524
# Print the boot variable status
4625
print_boot_status
4726

48-
# Exit if the MicroShift service is not enabled
49-
if [ "$(systemctl is-enabled microshift.service 2>/dev/null)" != "enabled" ] ; then
50-
echo "MicroShift service is not enabled. Exiting..."
51-
exit 0
52-
fi
53-
5427
# Set the wait timeout for the current check based on the boot counter
5528
WAIT_TIMEOUT_SECS=$(get_wait_timeout)
5629

57-
# Starting pod-specific checks
58-
# Log list of pods and their events on failure
59-
LOG_POD_EVENTS=true
60-
61-
# Wait for the DaemonSets to be ready
62-
echo "Waiting ${WAIT_TIMEOUT_SECS}s for '${CHECK_DAEMONSET_NS}' DaemonSets to be ready"
63-
if ! wait_for "${WAIT_TIMEOUT_SECS}" namespace_daemonset_ready ; then
64-
echo "Error: Timed out waiting for '${CHECK_DAEMONSET_NS}' DaemonSets to be ready"
65-
exit 1
66-
fi
30+
/usr/bin/microshift healthcheck -v=2 --timeout="${WAIT_TIMEOUT_SECS}s" --namespace openshift-multus --daemonsets multus,dhcp-daemon

packaging/greenboot/microshift-running-check-olm.sh

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,11 @@
88
set -eu -o pipefail
99

1010
SCRIPT_NAME=$(basename "$0")
11-
SCRIPT_PID=$$
12-
CHECK_DEPLOY_NS="openshift-operator-lifecycle-manager"
13-
LOG_POD_EVENTS=false
1411

1512
# Source the MicroShift health check functions library
1613
# shellcheck source=packaging/greenboot/functions.sh
1714
source /usr/share/microshift/functions/greenboot.sh
1815

19-
# Set the term handler to convert exit code to 1
20-
trap 'forced_termination' TERM SIGINT
21-
22-
# Set the exit handler to log the exit status
23-
trap 'log_script_exit' EXIT
24-
25-
# Handler that will be called when the script is terminated by sending TERM or
26-
# INT signals. To override default exit codes it forces returning 1 like the
27-
# rest of the error conditions throughout the health check.
28-
function forced_termination() {
29-
echo "Signal received, terminating."
30-
exit 1
31-
}
32-
33-
#
34-
# Main
35-
#
36-
3716
# Exit if the current user is not 'root'
3817
if [ "$(id -u)" -ne 0 ] ; then
3918
echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges"
@@ -45,22 +24,7 @@ echo "STARTED"
4524
# Print the boot variable status
4625
print_boot_status
4726

48-
# Exit if the MicroShift service is not enabled
49-
if [ "$(systemctl is-enabled microshift.service 2>/dev/null)" != "enabled" ] ; then
50-
echo "MicroShift service is not enabled. Exiting..."
51-
exit 0
52-
fi
53-
5427
# Set the wait timeout for the current check based on the boot counter
5528
WAIT_TIMEOUT_SECS=$(get_wait_timeout)
5629

57-
# Starting pod-specific checks
58-
# Log list of pods and their events on failure
59-
LOG_POD_EVENTS=true
60-
61-
# Wait for the deployments to be ready
62-
echo "Waiting ${WAIT_TIMEOUT_SECS}s for '${CHECK_DEPLOY_NS}' deployments to be ready"
63-
if ! wait_for "${WAIT_TIMEOUT_SECS}" namespace_deployment_ready ; then
64-
echo "Error: Timed out waiting for '${CHECK_DEPLOY_NS}' deployments to be ready"
65-
exit 1
66-
fi
30+
/usr/bin/microshift healthcheck -v=2 --timeout="${WAIT_TIMEOUT_SECS}s" --namespace openshift-operator-lifecycle-manager --deployments olm-operator,catalog-operator
Lines changed: 0 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,10 @@
11
#!/bin/bash
22
set -e
33

4-
SCRIPT_NAME=$(basename "$0")
5-
SCRIPT_PID=$$
6-
PODS_NS_LIST=(openshift-ovn-kubernetes openshift-service-ca openshift-ingress openshift-dns)
7-
PODS_CT_LIST=(2 1 1 2)
8-
LOG_POD_EVENTS=false
9-
104
# Source the MicroShift health check functions library
115
# shellcheck source=packaging/greenboot/functions.sh
126
source /usr/share/microshift/functions/greenboot.sh
137

14-
# Set the term handler to convert exit code to 1
15-
trap 'forced_termination' TERM SIGINT
16-
17-
# Set the exit handler to log the exit status
18-
trap 'log_script_exit' EXIT
19-
20-
# Handler that will be called when the script is terminated by sending TERM or
21-
# INT signals. To override default exit codes it forces returning 1 like the
22-
# rest of the error conditions throughout the health check.
23-
function forced_termination() {
24-
echo "Signal received, terminating."
25-
exit 1
26-
}
27-
28-
# Check preconditions for existence of lvms deployment.
29-
# Adapted from MicroShift code.
30-
#
31-
# args: None
32-
# return: 0 if lvms readiness should be checked, 1 otherwise
33-
function lvmsShouldBeDeployed() {
34-
if ! hash vgs 2>/dev/null; then
35-
return 1
36-
fi
37-
if [ -f /etc/microshift/lvmd.yaml ]; then
38-
return 0
39-
fi
40-
if ! lvmsDriverShouldExist; then
41-
return 1
42-
fi
43-
44-
local -r volume_groups=$(vgs --readonly --options=name --noheadings)
45-
local -r volume_groups_count=$(echo "${volume_groups}" | wc -w)
46-
if [ "${volume_groups_count}" -eq 0 ]; then
47-
return 1
48-
elif [ "${volume_groups_count}" -eq 1 ]; then
49-
return 0
50-
elif echo "${volume_groups}" | grep -qw "microshift"; then
51-
return 0
52-
else
53-
return 1
54-
fi
55-
}
56-
57-
# Check if MicroShift API 'readyz' and 'livez' health endpoints are OK
58-
#
59-
# args: None
60-
# return: 0 if all API health endpoints are OK, or 1 otherwise
61-
function microshift_health_endpoints_ok() {
62-
local -r check_rd=$(${OCGET_CMD} --raw='/readyz?verbose' | awk '$2 != "ok"')
63-
local -r check_lv=$(${OCGET_CMD} --raw='/livez?verbose' | awk '$2 != "ok"')
64-
65-
[ "${check_rd}" != "readyz check passed" ] && return 1
66-
[ "${check_lv}" != "livez check passed" ] && return 1
67-
return 0
68-
}
69-
70-
# Check if any MicroShift pods are in the 'Running' status
71-
#
72-
# args: None
73-
# return: 0 if any pods are in the 'Running' status, or 1 otherwise
74-
function any_pods_running() {
75-
local -r count=$(${OCGET_CMD} pods ${OCGET_OPT} -A 2>/dev/null | awk '$4~/Running/' | wc -l)
76-
77-
[ "${count}" -gt 0 ] && return 0
78-
return 1
79-
}
80-
81-
#
82-
# Main
83-
#
84-
858
# Exit if the current user is not 'root'
869
if [ "$(id -u)" -ne 0 ] ; then
8710
echo "The '${SCRIPT_NAME}' script must be run with the 'root' user privileges"
@@ -93,102 +16,7 @@ echo "STARTED"
9316
# Print the boot variable status
9417
print_boot_status
9518

96-
# Exit if the MicroShift service is not enabled
97-
# TODO: Remove when `microshift healthcheck` is complete.
98-
if [ "$(systemctl is-enabled microshift.service 2>/dev/null)" != "enabled" ] ; then
99-
echo "MicroShift service is not enabled. Exiting..."
100-
exit 0
101-
fi
102-
10319
# Set the wait timeout for the current check based on the boot counter
10420
WAIT_TIMEOUT_SECS=$(get_wait_timeout)
10521

10622
/usr/bin/microshift healthcheck -v=2 --timeout="${WAIT_TIMEOUT_SECS}s"
107-
108-
# Wait for MicroShift API health endpoints to be OK
109-
echo "Waiting ${WAIT_TIMEOUT_SECS}s for MicroShift API health endpoints to be OK"
110-
if ! wait_for "${WAIT_TIMEOUT_SECS}" microshift_health_endpoints_ok ; then
111-
log_failure_cmd "health-readyz" "${OCGET_CMD} --raw=/readyz?verbose"
112-
log_failure_cmd "health-livez" "${OCGET_CMD} --raw=/livez?verbose"
113-
114-
echo "Error: Timed out waiting for MicroShift API health endpoints to be OK"
115-
exit 1
116-
fi
117-
118-
if lvmsShouldBeDeployed; then
119-
PODS_NS_LIST+=(openshift-storage)
120-
PODS_CT_LIST+=(2)
121-
fi
122-
declare -a csi_components=('csi-snapshot-controller' 'csi-snapshot-webhook')
123-
csi_pods_ct=0
124-
for csi_c in "${csi_components[@]}"; do
125-
if csiComponentShouldBeDeployed "${csi_c}"; then
126-
(( csi_pods_ct += 1 ))
127-
fi
128-
done
129-
if [ ${csi_pods_ct} -gt 0 ]; then
130-
PODS_NS_LIST+=(kube-system)
131-
PODS_CT_LIST+=("${csi_pods_ct}")
132-
fi
133-
134-
# Starting pod-specific checks
135-
# Log list of pods and their events on failure
136-
LOG_POD_EVENTS=true
137-
138-
# Wait for any pods to enter running state
139-
echo "Waiting ${WAIT_TIMEOUT_SECS}s for any pods to be running"
140-
if ! wait_for "${WAIT_TIMEOUT_SECS}" any_pods_running ; then
141-
echo "Error: Timed out waiting for any MicroShift pod to be running"
142-
exit 1
143-
fi
144-
145-
# Wait for MicroShift core pod images to be downloaded
146-
for i in "${!PODS_NS_LIST[@]}"; do
147-
CHECK_PODS_NS=${PODS_NS_LIST[${i}]}
148-
149-
echo "Waiting ${WAIT_TIMEOUT_SECS}s for pod image(s) from the '${CHECK_PODS_NS}' namespace to be downloaded"
150-
if ! wait_for "${WAIT_TIMEOUT_SECS}" namespace_images_downloaded; then
151-
echo "Error: Timed out waiting for pod image(s) from the '${CHECK_PODS_NS}' namespace to be downloaded"
152-
exit 1
153-
fi
154-
done
155-
156-
# Wait for MicroShift core pods to enter ready state
157-
for i in "${!PODS_NS_LIST[@]}"; do
158-
CHECK_PODS_NS=${PODS_NS_LIST[${i}]}
159-
CHECK_PODS_CT=${PODS_CT_LIST[${i}]}
160-
161-
echo "Waiting ${WAIT_TIMEOUT_SECS}s for ${CHECK_PODS_CT} pod(s) from the '${CHECK_PODS_NS}' namespace to be in 'Ready' state"
162-
if ! wait_for "${WAIT_TIMEOUT_SECS}" namespace_pods_ready; then
163-
echo "Error: Timed out waiting for ${CHECK_PODS_CT} pod(s) in the '${CHECK_PODS_NS}' namespace to be in 'Ready' state"
164-
exit 1
165-
fi
166-
done
167-
168-
# Verify that MicroShift core pods are not restarting
169-
declare -A pid2name
170-
for i in "${!PODS_NS_LIST[@]}"; do
171-
CHECK_PODS_NS=${PODS_NS_LIST[${i}]}
172-
173-
echo "Checking pod restart count in the '${CHECK_PODS_NS}' namespace"
174-
namespace_pods_not_restarting "${CHECK_PODS_NS}" &
175-
pid=$!
176-
177-
pid2name["${pid}"]="${CHECK_PODS_NS}"
178-
done
179-
180-
# Wait for the restart check functions to complete, printing errors in case of a failure
181-
check_failed=false
182-
for pid in "${!pid2name[@]}"; do
183-
if ! wait "${pid}" ; then
184-
check_failed=true
185-
186-
name=${pid2name["${pid}"]}
187-
echo "Error: Pods are restarting too frequently in the '${name}' namespace"
188-
fi
189-
done
190-
191-
# Exit with an error code if the pod restart check failed
192-
if ${check_failed} ; then
193-
exit 1
194-
fi

0 commit comments

Comments
 (0)