Skip to content

Commit 9cc6039

Browse files
committed
OCPBUGS-1500: Added scaling strategies for TNF
- TwoNodeScalingStrategy is for core installer installations fo Two Node OpenShift with Fencing - DelayedTwoNodeScalingStrategy is for assisted installs of Two Node OpenShift with Fencing
1 parent 5bbe494 commit 9cc6039

32 files changed

+332
-85
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,5 @@ replace (
146146
)
147147

148148
replace github.com/openshift/library-go => github.com/benluddy/library-go v0.0.0-20250129150747-314ad28512db
149+
150+
replace github.com/openshift/api => github.com/eggfoobar/api v0.0.0-20250207054050-9a92c12ec7ba

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8
9191
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
9292
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
9393
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
94+
github.com/eggfoobar/api v0.0.0-20250207054050-9a92c12ec7ba h1:tzadVk7yPlwIK3s16JrjxrT/JgF2tGRS8A+nKllh+aQ=
95+
github.com/eggfoobar/api v0.0.0-20250207054050-9a92c12ec7ba/go.mod h1:yk60tHAmHhtVpJQo3TwVYq2zpuP70iJIFDCmeKMIzPw=
9496
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
9597
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
9698
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -321,8 +323,6 @@ github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM
321323
github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
322324
github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4=
323325
github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
324-
github.com/openshift/api v0.0.0-20250124212313-a770960d61e0 h1:dCvNfygMrPLVNQ06bpHXrxKfrXHiprO4+etHrRUqI8g=
325-
github.com/openshift/api v0.0.0-20250124212313-a770960d61e0/go.mod h1:yk60tHAmHhtVpJQo3TwVYq2zpuP70iJIFDCmeKMIzPw=
326326
github.com/openshift/build-machinery-go v0.0.0-20250102153059-e85a1a7ecb5c h1:6XcszPFZpan4qll5XbdLll7n1So3IsPn28aw2j1obMo=
327327
github.com/openshift/build-machinery-go v0.0.0-20250102153059-e85a1a7ecb5c/go.mod h1:8jcm8UPtg2mCAsxfqKil1xrmRMI3a+XU2TZ9fF8A7TE=
328328
github.com/openshift/client-go v0.0.0-20250125113824-8e1f0b8fa9a7 h1:4iliLcvr1P9EUMZgIaSNEKNQQzBn+L6PSequlFOuB6Q=

pkg/cmd/render/render.go

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,10 @@ func newTemplateData(opts *renderOpts) (*TemplateData, error) {
256256
}
257257

258258
// If bootstrap scaling strategy is delayed HA set annotation signal
259-
if templateData.BootstrapScalingStrategy == ceohelpers.DelayedHAScalingStrategy {
259+
if templateData.BootstrapScalingStrategy == ceohelpers.DelayedHAScalingStrategy ||
260+
templateData.BootstrapScalingStrategy == ceohelpers.DelayedTwoNodeScalingStrategy {
260261
templateData.NamespaceAnnotations = map[string]string{
261-
ceohelpers.DelayedHABootstrapScalingStrategyAnnotation: "",
262+
ceohelpers.DelayedBootstrapScalingStrategyAnnotation: "",
262263
}
263264
}
264265

@@ -721,26 +722,45 @@ func getInfrastructure(file string) (*configv1.Infrastructure, error) {
721722
}
722723

723724
func getBootstrapScalingStrategy(installConfig map[string]interface{}, delayedHAMarkerFile string) (ceohelpers.BootstrapScalingStrategy, error) {
724-
// Delayed HA strategy is set if marker file exists on disk.
725-
if _, err := os.Stat(delayedHAMarkerFile); err == nil {
726-
return ceohelpers.DelayedHAScalingStrategy, nil
727-
}
728-
729725
controlPlane, found := installConfig["controlPlane"].(map[string]interface{})
730726
if !found {
731727
return "", fmt.Errorf("unrecognized data structure in controlPlane field")
732728
}
733-
replicaCount, found := controlPlane["replicas"].(float64)
729+
cpReplicaCount, found := controlPlane["replicas"].(float64)
734730
if !found {
735731
return "", fmt.Errorf("unrecognized data structure in controlPlane replica field")
736732
}
737733

738734
// Bootstrap in place strategy when bootstrapInPlace root key exists in the install-config
739735
// and controlPlane replicas is 1.
740-
if _, found := installConfig["bootstrapInPlace"]; found && int(replicaCount) == 1 {
736+
if _, found := installConfig["bootstrapInPlace"]; found && int(cpReplicaCount) == 1 {
741737
return ceohelpers.BootstrapInPlaceStrategy, nil
742738
}
743739

740+
// Delayed HA strategy is set if marker file exists on disk.
741+
if _, err := os.Stat(delayedHAMarkerFile); err == nil {
742+
743+
// Handle two-node topologies; if an arbiter node is not present
744+
// then use DelayedTwoNodeScalingStrategy to allow bootstrap to proceed
745+
// without a third etcd member
746+
if int(cpReplicaCount) == 2 {
747+
arbiter, arbiterDefined := installConfig["aribiter"].(map[string]interface{})
748+
if !arbiterDefined {
749+
return ceohelpers.DelayedTwoNodeScalingStrategy, nil
750+
}
751+
752+
arbReplicaCount, arbReplicasDefined := arbiter["replicas"].(float64)
753+
if !arbReplicasDefined || arbReplicaCount < 1 {
754+
return ceohelpers.DelayedTwoNodeScalingStrategy, nil
755+
}
756+
}
757+
758+
// TODO check for SNO here?
759+
760+
// This should handle both delayed two-node with arbiter and delayed HA clusters
761+
return ceohelpers.DelayedHAScalingStrategy, nil
762+
}
763+
744764
// HA "default".
745765
return ceohelpers.HAScalingStrategy, nil
746766
}

pkg/cmd/render/render_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ func TestRenderScalingStrategyBootstrapInPlace(t *testing.T) {
298298
func TestRenderScalingStrategyDelayedHA(t *testing.T) {
299299
want := TemplateData{
300300
BootstrapScalingStrategy: ceohelpers.DelayedHAScalingStrategy,
301-
NamespaceAnnotations: map[string]string{ceohelpers.DelayedHABootstrapScalingStrategyAnnotation: ""},
301+
NamespaceAnnotations: map[string]string{ceohelpers.DelayedBootstrapScalingStrategyAnnotation: ""},
302302
}
303303
config := &testConfig{
304304
t: t,

pkg/etcdcli/health.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,16 @@ func IsQuorumFaultTolerant(memberHealth []healthCheck) bool {
227227
}
228228
healthyMembers := len(GetHealthyMemberNames(memberHealth))
229229
switch {
230+
// This case should never occur when this function is called by CheckSafeToScaleCluster
231+
// since this function is never called for the UnsafeScalingStrategy (which covers Single Node OpenShift)
232+
// and the TwoNodeScalingStrategy and DelayedTwoNodeScalingStrategy when the cluster has two etcd members
233+
// which is a special expection we make for TwoNodeOpenShift with fencing.
234+
//
235+
// It is also never triggered by the HAScalingStrategy and DelayedHAScalingStrategy because having less
236+
// than 3 healthy nodes violates these scaling strategies, which is checked before this function is called.
237+
//
238+
// The reason this is here is to ensure protection against 1 and 2 node membership if ever this function
239+
// is called directly.
230240
case totalMembers-quorum < 1:
231241
klog.Errorf("etcd cluster has quorum of %d which is not fault tolerant: %+v", quorum, memberHealth)
232242
return false

pkg/operator/bootstrapteardown/bootstrap_teardown_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,11 @@ func (c *BootstrapTeardownController) canRemoveEtcdBootstrap(ctx context.Context
194194
if len(members) < 4 {
195195
return false, hasBootstrap, bootstrapMemberID, nil
196196
}
197-
case ceohelpers.DelayedHAScalingStrategy:
197+
case ceohelpers.DelayedHAScalingStrategy, ceohelpers.TwoNodeScalingStrategy:
198198
if len(members) < 3 {
199199
return false, hasBootstrap, bootstrapMemberID, nil
200200
}
201-
case ceohelpers.UnsafeScalingStrategy:
201+
case ceohelpers.UnsafeScalingStrategy, ceohelpers.DelayedTwoNodeScalingStrategy:
202202
if len(members) < 2 {
203203
return false, hasBootstrap, bootstrapMemberID, nil
204204
}

pkg/operator/ceohelpers/bootstrap.go

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66

7+
configv1 "github.com/openshift/api/config/v1"
78
configv1listers "github.com/openshift/client-go/config/listers/config/v1"
89
"github.com/openshift/library-go/pkg/operator/bootstrap"
910
"github.com/openshift/library-go/pkg/operator/v1helpers"
@@ -35,6 +36,22 @@ const (
3536
// annotation to the openshift-etcd namesapce.
3637
DelayedHAScalingStrategy BootstrapScalingStrategy = "DelayedHAScalingStrategy"
3738

39+
// TwoNodeScalingStrategy means the etcd cluster will only be scaled up when at least
40+
// 2 nodes are available so that quorum is maintained at all times. This rule applies
41+
// during bootstrapping and the steady state.
42+
//
43+
// This strategy is used for deployments of Two Node OpenShift with Fencing.
44+
TwoNodeScalingStrategy BootstrapScalingStrategy = "TwoNodeScalingStrategy"
45+
46+
// DelayedTwoNodeScalingStrategy means that during bootstrapping, the etcd cluster will
47+
// be allowed to scale when at least 1 member is available (which is unsafe),
48+
// but after bootstrapping any further scaling will require 2 nodes in the same
49+
// way as TwoNodeScalingStrategy.
50+
//
51+
// This strategy is intended for deploys of Two Node OpenShift with Fencing via
52+
// the assisted or agent-based installers.
53+
DelayedTwoNodeScalingStrategy BootstrapScalingStrategy = "DelayedTwoNodeScalingStrategy"
54+
3855
// BootstrapInPlaceStrategy means that the bootstrap node will never exist
3956
// during the lifecycle of the cluster. Bootkube will run on a live iso
4057
// afterwards the node will pivot into the manifests generated during that
@@ -54,9 +71,18 @@ const (
5471
)
5572

5673
const (
57-
// DelayedHABootstrapScalingStrategyAnnotation is an annotation on the openshift-etcd
58-
// namespace which, if present indicates the DelayedHAScalingStrategy strategy
59-
// should be used.
74+
// DelayedBootstrapScalingStrategyAnnotation is an annotation on the openshift-etcd
75+
// namespace which, if present, indicates that one of the delayed scaling strategies
76+
// should be used. This is generally used by the assisted installer to ensure that
77+
// the bootstrap node can reboot into a cluster node.
78+
//
79+
// For HA clusters, this will be set to DelayedHAScalingStrategy.
80+
//
81+
// For Two Node OpenShift with Fencing, this is set to DelayedTwoNodeScalingStrategy.
82+
DelayedBootstrapScalingStrategyAnnotation = "openshift.io/delayed-bootstrap"
83+
84+
// DelayedHABootstrapScalingStrategyAnnotation performs the same function as the annotation
85+
// above, and is kept for backwards compatibility.
6086
DelayedHABootstrapScalingStrategyAnnotation = "openshift.io/delayed-ha-bootstrap"
6187
)
6288

@@ -78,17 +104,25 @@ func GetBootstrapScalingStrategy(staticPodClient v1helpers.StaticPodOperatorClie
78104
if err != nil {
79105
return strategy, fmt.Errorf("failed to get %s namespace: %w", operatorclient.TargetNamespace, err)
80106
}
107+
108+
// Check for both the delayed annotation and the legacy DelayedHABootrapScalingStrategyAnnotation
109+
_, hasDelayedAnnotation := etcdNamespace.Annotations[DelayedBootstrapScalingStrategyAnnotation]
81110
_, hasDelayedHAAnnotation := etcdNamespace.Annotations[DelayedHABootstrapScalingStrategyAnnotation]
111+
hasDelayedAnnotation = hasDelayedAnnotation || hasDelayedHAAnnotation
82112

83-
singleNode, err := IsSingleNodeTopology(infraLister)
113+
topology, err := GetControlPlaneTopology(infraLister)
84114
if err != nil {
85115
return strategy, fmt.Errorf("failed to get control plane topology: %w", err)
86116
}
87117

88118
switch {
89-
case isUnsupportedUnsafeEtcd || singleNode:
119+
case isUnsupportedUnsafeEtcd || topology == configv1.SingleReplicaTopologyMode:
90120
strategy = UnsafeScalingStrategy
91-
case hasDelayedHAAnnotation:
121+
case topology == configv1.DualReplicaTopologyMode && hasDelayedAnnotation:
122+
strategy = DelayedTwoNodeScalingStrategy
123+
case topology == configv1.DualReplicaTopologyMode && !hasDelayedAnnotation:
124+
strategy = TwoNodeScalingStrategy
125+
case hasDelayedAnnotation:
92126
strategy = DelayedHAScalingStrategy
93127
default:
94128
strategy = HAScalingStrategy
@@ -126,10 +160,10 @@ func CheckSafeToScaleCluster(
126160

127161
var minimumNodes int
128162
switch scalingStrategy {
129-
case HAScalingStrategy:
130-
minimumNodes = 3
131-
case DelayedHAScalingStrategy:
163+
case HAScalingStrategy, DelayedHAScalingStrategy:
132164
minimumNodes = 3
165+
case TwoNodeScalingStrategy, DelayedTwoNodeScalingStrategy:
166+
minimumNodes = 2
133167
default:
134168
return fmt.Errorf("CheckSafeToScaleCluster unrecognized scaling strategy %q", scalingStrategy)
135169
}
@@ -139,8 +173,19 @@ func CheckSafeToScaleCluster(
139173
return fmt.Errorf("CheckSafeToScaleCluster couldn't determine member health: %w", err)
140174
}
141175

176+
if len(memberHealth.GetHealthyMembers()) < minimumNodes {
177+
return fmt.Errorf("CheckSafeToScaleCluster found %d healthy member(s) out of the %d required by the %s",
178+
len(memberHealth.GetHealthyMembers()), minimumNodes, scalingStrategy)
179+
}
180+
181+
// Fault tolerance protection is only enforced by for HA topologies
182+
//
183+
// TwoNodeScalingStrategy and DelayedTwoNodeScalingStrategy are used by Two Node OpenShift with
184+
// Fencing (TNF), which protects etcd using a service called pacemaker that is running on the nodes.
185+
// This service will intercept the static pod rollout, have that member of etcd leave the cluster,
186+
// restart the static pod with the updates, and have it rejoin the cluster as a learner
142187
err = etcdcli.IsQuorumFaultTolerantErr(memberHealth)
143-
if err != nil {
188+
if err != nil && len(memberHealth) != 2 && !(scalingStrategy == TwoNodeScalingStrategy || scalingStrategy == DelayedTwoNodeScalingStrategy) {
144189
return err
145190
}
146191

0 commit comments

Comments
 (0)