4
4
"context"
5
5
"fmt"
6
6
7
+ configv1 "github.com/openshift/api/config/v1"
7
8
configv1listers "github.com/openshift/client-go/config/listers/config/v1"
8
9
"github.com/openshift/library-go/pkg/operator/bootstrap"
9
10
"github.com/openshift/library-go/pkg/operator/v1helpers"
@@ -35,6 +36,22 @@ const (
35
36
// annotation to the openshift-etcd namesapce.
36
37
DelayedHAScalingStrategy BootstrapScalingStrategy = "DelayedHAScalingStrategy"
37
38
39
+ // TwoNodeScalingStrategy means the etcd cluster will only be scaled up when at least
40
+ // 2 nodes are available so that quorum is maintained at all times. This rule applies
41
+ // during bootstrapping and the steady state.
42
+ //
43
+ // This strategy is used for deployments of Two Node OpenShift with Fencing.
44
+ TwoNodeScalingStrategy BootstrapScalingStrategy = "TwoNodeScalingStrategy"
45
+
46
+ // DelayedTwoNodeScalingStrategy means that during bootstrapping, the etcd cluster will
47
+ // be allowed to scale when at least 1 member is available (which is unsafe),
48
+ // but after bootstrapping any further scaling will require 2 nodes in the same
49
+ // way as TwoNodeScalingStrategy.
50
+ //
51
+ // This strategy is intended for deploys of Two Node OpenShift with Fencing via
52
+ // the assisted or agent-based installers.
53
+ DelayedTwoNodeScalingStrategy BootstrapScalingStrategy = "DelayedTwoNodeScalingStrategy"
54
+
38
55
// BootstrapInPlaceStrategy means that the bootstrap node will never exist
39
56
// during the lifecycle of the cluster. Bootkube will run on a live iso
40
57
// afterwards the node will pivot into the manifests generated during that
@@ -54,9 +71,18 @@ const (
54
71
)
55
72
56
73
const (
57
- // DelayedHABootstrapScalingStrategyAnnotation is an annotation on the openshift-etcd
58
- // namespace which, if present indicates the DelayedHAScalingStrategy strategy
59
- // should be used.
74
+ // DelayedBootstrapScalingStrategyAnnotation is an annotation on the openshift-etcd
75
+ // namespace which, if present, indicates that one of the delayed scaling strategies
76
+ // should be used. This is generally used by the assisted installer to ensure that
77
+ // the bootstrap node can reboot into a cluster node.
78
+ //
79
+ // For HA clusters, this will be set to DelayedHAScalingStrategy.
80
+ //
81
+ // For Two Node OpenShift with Fencing, this is set to DelayedTwoNodeScalingStrategy.
82
+ DelayedBootstrapScalingStrategyAnnotation = "openshift.io/delayed-bootstrap"
83
+
84
+ // DelayedHABootstrapScalingStrategyAnnotation performs the same function as the annotation
85
+ // above, and is kept for backwards compatibility.
60
86
DelayedHABootstrapScalingStrategyAnnotation = "openshift.io/delayed-ha-bootstrap"
61
87
)
62
88
@@ -78,17 +104,25 @@ func GetBootstrapScalingStrategy(staticPodClient v1helpers.StaticPodOperatorClie
78
104
if err != nil {
79
105
return strategy , fmt .Errorf ("failed to get %s namespace: %w" , operatorclient .TargetNamespace , err )
80
106
}
107
+
108
+ // Check for both the delayed annotation and the legacy DelayedHABootrapScalingStrategyAnnotation
109
+ _ , hasDelayedAnnotation := etcdNamespace .Annotations [DelayedBootstrapScalingStrategyAnnotation ]
81
110
_ , hasDelayedHAAnnotation := etcdNamespace .Annotations [DelayedHABootstrapScalingStrategyAnnotation ]
111
+ hasDelayedAnnotation = hasDelayedAnnotation || hasDelayedHAAnnotation
82
112
83
- singleNode , err := IsSingleNodeTopology (infraLister )
113
+ topology , err := GetControlPlaneTopology (infraLister )
84
114
if err != nil {
85
115
return strategy , fmt .Errorf ("failed to get control plane topology: %w" , err )
86
116
}
87
117
88
118
switch {
89
- case isUnsupportedUnsafeEtcd || singleNode :
119
+ case isUnsupportedUnsafeEtcd || topology == configv1 . SingleReplicaTopologyMode :
90
120
strategy = UnsafeScalingStrategy
91
- case hasDelayedHAAnnotation :
121
+ case topology == configv1 .DualReplicaTopologyMode && hasDelayedAnnotation :
122
+ strategy = DelayedTwoNodeScalingStrategy
123
+ case topology == configv1 .DualReplicaTopologyMode && ! hasDelayedAnnotation :
124
+ strategy = TwoNodeScalingStrategy
125
+ case hasDelayedAnnotation :
92
126
strategy = DelayedHAScalingStrategy
93
127
default :
94
128
strategy = HAScalingStrategy
@@ -126,10 +160,10 @@ func CheckSafeToScaleCluster(
126
160
127
161
var minimumNodes int
128
162
switch scalingStrategy {
129
- case HAScalingStrategy :
130
- minimumNodes = 3
131
- case DelayedHAScalingStrategy :
163
+ case HAScalingStrategy , DelayedHAScalingStrategy :
132
164
minimumNodes = 3
165
+ case TwoNodeScalingStrategy , DelayedTwoNodeScalingStrategy :
166
+ minimumNodes = 2
133
167
default :
134
168
return fmt .Errorf ("CheckSafeToScaleCluster unrecognized scaling strategy %q" , scalingStrategy )
135
169
}
@@ -139,8 +173,19 @@ func CheckSafeToScaleCluster(
139
173
return fmt .Errorf ("CheckSafeToScaleCluster couldn't determine member health: %w" , err )
140
174
}
141
175
176
+ if len (memberHealth .GetHealthyMembers ()) < minimumNodes {
177
+ return fmt .Errorf ("CheckSafeToScaleCluster found %d healthy member(s) out of the %d required by the %s" ,
178
+ len (memberHealth .GetHealthyMembers ()), minimumNodes , scalingStrategy )
179
+ }
180
+
181
+ // Fault tolerance protection is only enforced by for HA topologies
182
+ //
183
+ // TwoNodeScalingStrategy and DelayedTwoNodeScalingStrategy are used by Two Node OpenShift with
184
+ // Fencing (TNF), which protects etcd using a service called pacemaker that is running on the nodes.
185
+ // This service will intercept the static pod rollout, have that member of etcd leave the cluster,
186
+ // restart the static pod with the updates, and have it rejoin the cluster as a learner
142
187
err = etcdcli .IsQuorumFaultTolerantErr (memberHealth )
143
- if err != nil {
188
+ if err != nil && len ( memberHealth ) != 2 && ! ( scalingStrategy == TwoNodeScalingStrategy || scalingStrategy == DelayedTwoNodeScalingStrategy ) {
144
189
return err
145
190
}
146
191
0 commit comments