Skip to content

Commit f6bf5d8

Browse files
committed
OCPBUGS-1500: Added scaling strategies for TNF
1 parent 5bbe494 commit f6bf5d8

29 files changed

+162
-58
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,5 @@ replace (
146146
)
147147

148148
replace github.com/openshift/library-go => github.com/benluddy/library-go v0.0.0-20250129150747-314ad28512db
149+
150+
replace github.com/openshift/api => github.com/eggfoobar/api v0.0.0-20250207054050-9a92c12ec7ba

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8
9191
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
9292
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
9393
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
94+
github.com/eggfoobar/api v0.0.0-20250207054050-9a92c12ec7ba h1:tzadVk7yPlwIK3s16JrjxrT/JgF2tGRS8A+nKllh+aQ=
95+
github.com/eggfoobar/api v0.0.0-20250207054050-9a92c12ec7ba/go.mod h1:yk60tHAmHhtVpJQo3TwVYq2zpuP70iJIFDCmeKMIzPw=
9496
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
9597
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
9698
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -321,8 +323,6 @@ github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM
321323
github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
322324
github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4=
323325
github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
324-
github.com/openshift/api v0.0.0-20250124212313-a770960d61e0 h1:dCvNfygMrPLVNQ06bpHXrxKfrXHiprO4+etHrRUqI8g=
325-
github.com/openshift/api v0.0.0-20250124212313-a770960d61e0/go.mod h1:yk60tHAmHhtVpJQo3TwVYq2zpuP70iJIFDCmeKMIzPw=
326326
github.com/openshift/build-machinery-go v0.0.0-20250102153059-e85a1a7ecb5c h1:6XcszPFZpan4qll5XbdLll7n1So3IsPn28aw2j1obMo=
327327
github.com/openshift/build-machinery-go v0.0.0-20250102153059-e85a1a7ecb5c/go.mod h1:8jcm8UPtg2mCAsxfqKil1xrmRMI3a+XU2TZ9fF8A7TE=
328328
github.com/openshift/client-go v0.0.0-20250125113824-8e1f0b8fa9a7 h1:4iliLcvr1P9EUMZgIaSNEKNQQzBn+L6PSequlFOuB6Q=

pkg/cmd/render/render.go

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,10 @@ func newTemplateData(opts *renderOpts) (*TemplateData, error) {
256256
}
257257

258258
// If bootstrap scaling strategy is delayed HA set annotation signal
259-
if templateData.BootstrapScalingStrategy == ceohelpers.DelayedHAScalingStrategy {
259+
if templateData.BootstrapScalingStrategy == ceohelpers.DelayedHAScalingStrategy ||
260+
templateData.BootstrapScalingStrategy == ceohelpers.DelayedTwoNodeScalingStrategy {
260261
templateData.NamespaceAnnotations = map[string]string{
261-
ceohelpers.DelayedHABootstrapScalingStrategyAnnotation: "",
262+
ceohelpers.DelayedBootstrapScalingStrategyAnnotation: "",
262263
}
263264
}
264265

@@ -721,26 +722,45 @@ func getInfrastructure(file string) (*configv1.Infrastructure, error) {
721722
}
722723

723724
func getBootstrapScalingStrategy(installConfig map[string]interface{}, delayedHAMarkerFile string) (ceohelpers.BootstrapScalingStrategy, error) {
724-
// Delayed HA strategy is set if marker file exists on disk.
725-
if _, err := os.Stat(delayedHAMarkerFile); err == nil {
726-
return ceohelpers.DelayedHAScalingStrategy, nil
727-
}
728-
729725
controlPlane, found := installConfig["controlPlane"].(map[string]interface{})
730726
if !found {
731727
return "", fmt.Errorf("unrecognized data structure in controlPlane field")
732728
}
733-
replicaCount, found := controlPlane["replicas"].(float64)
729+
cpReplicaCount, found := controlPlane["replicas"].(float64)
734730
if !found {
735731
return "", fmt.Errorf("unrecognized data structure in controlPlane replica field")
736732
}
737733

738734
// Bootstrap in place strategy when bootstrapInPlace root key exists in the install-config
739735
// and controlPlane replicas is 1.
740-
if _, found := installConfig["bootstrapInPlace"]; found && int(replicaCount) == 1 {
736+
if _, found := installConfig["bootstrapInPlace"]; found && int(cpReplicaCount) == 1 {
741737
return ceohelpers.BootstrapInPlaceStrategy, nil
742738
}
743739

740+
// Delayed HA strategy is set if marker file exists on disk.
741+
if _, err := os.Stat(delayedHAMarkerFile); err == nil {
742+
743+
// Handle two-node topologies; if an arbiter node is not present
744+
// then use DelayedTwoNodeScalingStrategy to allow bootstrap to proceed
745+
// without a third etcd member
746+
if int(cpReplicaCount) == 2 {
747+
arbiter, arbiterDefined := installConfig["aribiter"].(map[string]interface{})
748+
if !arbiterDefined {
749+
return ceohelpers.DelayedTwoNodeScalingStrategy, nil
750+
}
751+
752+
arbReplicaCount, arbReplicasDefined := arbiter["replicas"].(float64)
753+
if !arbReplicasDefined || arbReplicaCount < 1 {
754+
return ceohelpers.DelayedTwoNodeScalingStrategy, nil
755+
}
756+
}
757+
758+
// TODO check for SNO here?
759+
760+
// This should handle both delayed two-node with arbiter and delayed HA clusters
761+
return ceohelpers.DelayedHAScalingStrategy, nil
762+
}
763+
744764
// HA "default".
745765
return ceohelpers.HAScalingStrategy, nil
746766
}

pkg/cmd/render/render_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ func TestRenderScalingStrategyBootstrapInPlace(t *testing.T) {
298298
func TestRenderScalingStrategyDelayedHA(t *testing.T) {
299299
want := TemplateData{
300300
BootstrapScalingStrategy: ceohelpers.DelayedHAScalingStrategy,
301-
NamespaceAnnotations: map[string]string{ceohelpers.DelayedHABootstrapScalingStrategyAnnotation: ""},
301+
NamespaceAnnotations: map[string]string{ceohelpers.DelayedBootstrapScalingStrategyAnnotation: ""},
302302
}
303303
config := &testConfig{
304304
t: t,

pkg/operator/bootstrapteardown/bootstrap_teardown_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,11 @@ func (c *BootstrapTeardownController) canRemoveEtcdBootstrap(ctx context.Context
194194
if len(members) < 4 {
195195
return false, hasBootstrap, bootstrapMemberID, nil
196196
}
197-
case ceohelpers.DelayedHAScalingStrategy:
197+
case ceohelpers.DelayedHAScalingStrategy, ceohelpers.TwoNodeScalingStrategy:
198198
if len(members) < 3 {
199199
return false, hasBootstrap, bootstrapMemberID, nil
200200
}
201-
case ceohelpers.UnsafeScalingStrategy:
201+
case ceohelpers.UnsafeScalingStrategy, ceohelpers.DelayedTwoNodeScalingStrategy:
202202
if len(members) < 2 {
203203
return false, hasBootstrap, bootstrapMemberID, nil
204204
}

pkg/operator/ceohelpers/bootstrap.go

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66

7+
configv1 "github.com/openshift/api/config/v1"
78
configv1listers "github.com/openshift/client-go/config/listers/config/v1"
89
"github.com/openshift/library-go/pkg/operator/bootstrap"
910
"github.com/openshift/library-go/pkg/operator/v1helpers"
@@ -35,6 +36,22 @@ const (
3536
// annotation to the openshift-etcd namesapce.
3637
DelayedHAScalingStrategy BootstrapScalingStrategy = "DelayedHAScalingStrategy"
3738

39+
// TwoNodeScalingStrategy means the etcd cluster will only be scaled up when at least
40+
// 2 nodes are available so that quorum is maintained at all times. This rule applies
41+
// during bootstrapping and the steady state.
42+
//
43+
// This strategy is used for deployments of Two Node OpenShift with Fencing.
44+
TwoNodeScalingStrategy BootstrapScalingStrategy = "TwoNodeScalingStrategy"
45+
46+
// DelayedTwoNodeScalingStrategy means that during bootstrapping, the etcd cluster will
47+
// be allowed to scale when at least 1 member is available (which is unsafe),
48+
// but after bootstrapping any further scaling will require 2 nodes in the same
49+
// way as TwoNodeScalingStrategy.
50+
//
51+
// This strategy is intended for deploys of Two Node OpenShift with Fencing via
52+
// the assisted or agent-based installers.
53+
DelayedTwoNodeScalingStrategy BootstrapScalingStrategy = "DelayedTwoNodeScalingStrategy"
54+
3855
// BootstrapInPlaceStrategy means that the bootstrap node will never exist
3956
// during the lifecycle of the cluster. Bootkube will run on a live iso
4057
// afterwards the node will pivot into the manifests generated during that
@@ -54,9 +71,18 @@ const (
5471
)
5572

5673
const (
57-
// DelayedHABootstrapScalingStrategyAnnotation is an annotation on the openshift-etcd
58-
// namespace which, if present indicates the DelayedHAScalingStrategy strategy
59-
// should be used.
74+
// DelayedBootstrapScalingStrategyAnnotation is an annotation on the openshift-etcd
75+
// namespace which, if present, indicates that one of the delayed scaling strategies
76+
// should be used. This is generally used by the assisted installer to ensure that
77+
// the bootstrap node can reboot into a cluster node.
78+
//
79+
// For HA clusters, this will be set to DelayedHAScalingStrategy.
80+
//
81+
// For Two Node OpenShift with Fencing, this is set to DelayedTwoNodeScalingStrategy.
82+
DelayedBootstrapScalingStrategyAnnotation = "openshift.io/delayed-bootstrap"
83+
84+
// DelayedHABootstrapScalingStrategyAnnotation performs the same function as the annotation
85+
// above, and is kept for backwards compatibility.
6086
DelayedHABootstrapScalingStrategyAnnotation = "openshift.io/delayed-ha-bootstrap"
6187
)
6288

@@ -78,17 +104,25 @@ func GetBootstrapScalingStrategy(staticPodClient v1helpers.StaticPodOperatorClie
78104
if err != nil {
79105
return strategy, fmt.Errorf("failed to get %s namespace: %w", operatorclient.TargetNamespace, err)
80106
}
107+
108+
// Check for both the delayed annotation and the legacy DelayedHABootrapScalingStrategyAnnotation
109+
_, hasDelayedAnnotation := etcdNamespace.Annotations[DelayedBootstrapScalingStrategyAnnotation]
81110
_, hasDelayedHAAnnotation := etcdNamespace.Annotations[DelayedHABootstrapScalingStrategyAnnotation]
111+
hasDelayedAnnotation = hasDelayedAnnotation || hasDelayedHAAnnotation
82112

83-
singleNode, err := IsSingleNodeTopology(infraLister)
113+
topology, err := GetControlPlaneTopology(infraLister)
84114
if err != nil {
85115
return strategy, fmt.Errorf("failed to get control plane topology: %w", err)
86116
}
87117

88118
switch {
89-
case isUnsupportedUnsafeEtcd || singleNode:
119+
case isUnsupportedUnsafeEtcd || topology == configv1.SingleReplicaTopologyMode:
90120
strategy = UnsafeScalingStrategy
91-
case hasDelayedHAAnnotation:
121+
case topology == configv1.DualReplicaTopologyMode && hasDelayedAnnotation:
122+
strategy = DelayedTwoNodeScalingStrategy
123+
case topology == configv1.DualReplicaTopologyMode && !hasDelayedAnnotation:
124+
strategy = TwoNodeScalingStrategy
125+
case hasDelayedAnnotation:
92126
strategy = DelayedHAScalingStrategy
93127
default:
94128
strategy = HAScalingStrategy

pkg/operator/ceohelpers/control_plane_topology.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,6 @@ func GetControlPlaneTopology(infraLister configv1listers.InfrastructureLister) (
2626
return infraData.Status.ControlPlaneTopology, nil
2727
}
2828

29-
func IsSingleNodeTopology(infraLister configv1listers.InfrastructureLister) (bool, error) {
30-
topology, err := GetControlPlaneTopology(infraLister)
31-
if err != nil {
32-
return false, err
33-
}
34-
35-
return topology == configv1.SingleReplicaTopologyMode, nil
36-
}
37-
3829
// IsArbiterNodeTopology returns if the cluster infrastructure ControlPlaneTopology is set to HighlyAvailableArbiterMode
3930
// We use the infra interface in this situation instead of the lister because typically you are looking to find out this information
4031
// in order to configure controllers before the informers are running.

pkg/operator/defragcontroller/defragcontroller.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,10 @@ func (c *DefragController) checkDefragEnabled(ctx context.Context, recorder even
110110
}
111111

112112
// Ensure defrag disabled unless HA.
113-
if controlPlaneTopology != configv1.HighlyAvailableTopologyMode {
114-
klog.V(4).Infof("Defrag controller disabled for non HA cluster topology: %s", controlPlaneTopology)
113+
if !(controlPlaneTopology == configv1.HighlyAvailableTopologyMode ||
114+
controlPlaneTopology == configv1.HighlyAvailableArbiterMode ||
115+
controlPlaneTopology == configv1.DualReplicaTopologyMode) {
116+
klog.V(4).Infof("Defrag controller disabled for incompatible cluster topology: %s", controlPlaneTopology)
115117
return false, c.ensureControllerDisabledCondition(ctx, operatorv1.ConditionTrue, recorder)
116118
}
117119

vendor/github.com/openshift/api/Dockerfile.ocp

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/openshift/api/Makefile

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)