diff --git a/examples/44-node-repair.yaml b/examples/44-node-repair.yaml
new file mode 100644
index 0000000000..ddbe861a6f
--- /dev/null
+++ b/examples/44-node-repair.yaml
@@ -0,0 +1,13 @@
+# An example ClusterConfig that uses a managed node group with auto repair.
+
+apiVersion: eksctl.io/v1alpha5
+kind: ClusterConfig
+
+metadata:
+ name: cluster-44
+ region: us-west-2
+
+managedNodeGroups:
+- name: ng-1
+ nodeRepairConfig:
+ enabled: true
diff --git a/go.mod b/go.mod
index 25bdfafb31..984266e1a8 100644
--- a/go.mod
+++ b/go.mod
@@ -16,7 +16,7 @@ require (
github.com/aws/aws-sdk-go-v2/credentials v1.17.11
github.com/aws/aws-sdk-go-v2/service/autoscaling v1.51.1
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.56.1
- github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.46.2
+ github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.46.3
github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.45.0
github.com/aws/aws-sdk-go-v2/service/cognitoidentityprovider v1.36.3
github.com/aws/aws-sdk-go-v2/service/ec2 v1.166.0
diff --git a/go.sum b/go.sum
index 643001319b..e77165d7e5 100644
--- a/go.sum
+++ b/go.sum
@@ -740,6 +740,8 @@ github.com/aws/aws-sdk-go-v2/service/cloudformation v1.56.1 h1:EqRhsrEoXFFyzcNuq
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.56.1/go.mod h1:75rrfzgrN4Ol0m9Xo4+8S09KBoGAd1t6eafFHMt5wDI=
github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.46.2 h1:DrN2vg75JseLCepYjMVav43e+v7+AhArtWlm2F0OJ6Y=
github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.46.2/go.mod h1:WcTfALKgqv+VCMRCLtG4155sAwcfdYhFADc/yDJgSlc=
+github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.46.3 h1:DfrEQMWCfk0wkuv/r0zwcGoykCuYWCLoGolbax6O3sw=
+github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.46.3/go.mod h1:WcTfALKgqv+VCMRCLtG4155sAwcfdYhFADc/yDJgSlc=
github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.45.0 h1:j9rGKWaYglZpf9KbJCQVM/L85Y4UdGMgK80A1OddR24=
github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.45.0/go.mod h1:LZafBHU62ByizrdhNLMnzWGsUX+abAW4q35PN+FOj+A=
github.com/aws/aws-sdk-go-v2/service/cognitoidentityprovider v1.36.3 h1:JNWpkjImTP2e308bv7ihfwgOawf640BY/pyZWrBb9rw=
diff --git a/goformation/cloudformation/eks/aws-eks-nodegroup.go b/goformation/cloudformation/eks/aws-eks-nodegroup.go
index 4e06e8349f..1cb6fd9e1d 100644
--- a/goformation/cloudformation/eks/aws-eks-nodegroup.go
+++ b/goformation/cloudformation/eks/aws-eks-nodegroup.go
@@ -54,6 +54,11 @@ type Nodegroup struct {
// See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html#cfn-eks-nodegroup-launchtemplate
LaunchTemplate *Nodegroup_LaunchTemplateSpecification `json:"LaunchTemplate,omitempty"`
+ // NodeRepairConfig AWS CloudFormation Property
+ // Required: false
+ // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html#cfn-eks-nodegroup-noderepairconfig
+ NodeRepairConfig *Nodegroup_NodeRepairConfig `json:"NodeRepairConfig,omitempty"`
+
// NodeRole AWS CloudFormation Property
// Required: true
// See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html#cfn-eks-nodegroup-noderole
diff --git a/goformation/cloudformation/eks/aws-eks-nodegroup_noderepairconfig.go b/goformation/cloudformation/eks/aws-eks-nodegroup_noderepairconfig.go
new file mode 100644
index 0000000000..edf17f899d
--- /dev/null
+++ b/goformation/cloudformation/eks/aws-eks-nodegroup_noderepairconfig.go
@@ -0,0 +1,32 @@
+package eks
+
+import (
+ "goformation/v4/cloudformation/types"
+
+ "goformation/v4/cloudformation/policies"
+)
+
+// Nodegroup_NodeRepairConfig AWS CloudFormation Resource (AWS::EKS::Nodegroup.NodeRepairConfig)
+type Nodegroup_NodeRepairConfig struct {
+ Enabled *types.Value `json:"Enabled,omitempty"`
+
+ // AWSCloudFormationDeletionPolicy represents a CloudFormation DeletionPolicy
+ AWSCloudFormationDeletionPolicy policies.DeletionPolicy `json:"-"`
+
+ // AWSCloudFormationUpdateReplacePolicy represents a CloudFormation UpdateReplacePolicy
+ AWSCloudFormationUpdateReplacePolicy policies.UpdateReplacePolicy `json:"-"`
+
+ // AWSCloudFormationDependsOn stores the logical ID of the resources to be created before this resource
+ AWSCloudFormationDependsOn []string `json:"-"`
+
+ // AWSCloudFormationMetadata stores structured data associated with this resource
+ AWSCloudFormationMetadata map[string]interface{} `json:"-"`
+
+ // AWSCloudFormationCondition stores the logical ID of the condition that must be satisfied for this resource to be created
+ AWSCloudFormationCondition string `json:"-"`
+}
+
+// AWSCloudFormationType returns the AWS CloudFormation resource type
+func (r *Nodegroup_NodeRepairConfig) AWSCloudFormationType() string {
+ return "AWS::EKS::Nodegroup.NodeRepairConfig"
+}
diff --git a/pkg/apis/eksctl.io/v1alpha5/assets/schema.json b/pkg/apis/eksctl.io/v1alpha5/assets/schema.json
index 2e720d60fb..a36295d198 100755
--- a/pkg/apis/eksctl.io/v1alpha5/assets/schema.json
+++ b/pkg/apis/eksctl.io/v1alpha5/assets/schema.json
@@ -1469,6 +1469,11 @@
"name": {
"type": "string"
},
+ "nodeRepairConfig": {
+ "$ref": "#/definitions/NodeGroupNodeRepairConfig",
+ "description": "configures the auto repair feature of the nodegroup",
+ "x-intellij-html-description": "configures the auto repair feature of the nodegroup"
+ },
"outpostARN": {
"type": "string",
"description": "specifies the Outpost ARN in which the nodegroup should be created.",
@@ -1633,7 +1638,8 @@
"taints",
"updateConfig",
"launchTemplate",
- "releaseVersion"
+ "releaseVersion",
+ "nodeRepairConfig"
],
"additionalProperties": false,
"description": "represents an EKS-managed nodegroup",
@@ -2205,6 +2211,21 @@
"description": "holds the configuration for [spot instances](/usage/spot-instances/)",
"x-intellij-html-description": "holds the configuration for spot instances"
},
+ "NodeGroupNodeRepairConfig": {
+ "properties": {
+ "enabled": {
+ "type": "boolean",
+ "description": "Enables the auto repair feature for the nodegroup",
+ "x-intellij-html-description": "Enables the auto repair feature for the nodegroup"
+ }
+ },
+ "preferredOrder": [
+ "enabled"
+ ],
+ "additionalProperties": false,
+ "description": "contains the auto repair configuration for the nodegroup",
+ "x-intellij-html-description": "contains the auto repair configuration for the nodegroup"
+ },
"NodeGroupSGs": {
"properties": {
"attachIDs": {
diff --git a/pkg/apis/eksctl.io/v1alpha5/types.go b/pkg/apis/eksctl.io/v1alpha5/types.go
index 758d3907e0..d88fd7f22d 100644
--- a/pkg/apis/eksctl.io/v1alpha5/types.go
+++ b/pkg/apis/eksctl.io/v1alpha5/types.go
@@ -1596,6 +1596,13 @@ type (
// +optional
MaxUnavailablePercentage *int `json:"maxUnavailablePercentage,omitempty"`
}
+
+ // NodeGroupNodeRepairConfig contains the auto repair configuration for the nodegroup
+ NodeGroupNodeRepairConfig struct {
+ // Enables the auto repair feature for the nodegroup
+ // +optional
+ Enabled *bool `json:"enabled,omitempty"`
+ }
)
// MetricsCollection used by the scaling config,
@@ -1883,6 +1890,10 @@ type ManagedNodeGroup struct {
// ReleaseVersion the AMI version of the EKS optimized AMI to use
ReleaseVersion string `json:"releaseVersion"`
+ // NodeRepairConfig configures the auto repair feature of the nodegroup
+ // +optional
+ NodeRepairConfig *NodeGroupNodeRepairConfig `json:"nodeRepairConfig,omitempty"`
+
// Internal fields
Unowned bool `json:"-"`
diff --git a/pkg/cfn/builder/managed_launch_template_test.go b/pkg/cfn/builder/managed_launch_template_test.go
index da496486b4..acc5e2f30d 100644
--- a/pkg/cfn/builder/managed_launch_template_test.go
+++ b/pkg/cfn/builder/managed_launch_template_test.go
@@ -266,6 +266,19 @@ API_SERVER_URL=https://test.com
resourcesFilename: "spot.json",
}),
+ Entry("With node repair enabled", &mngCase{
+ ng: &api.ManagedNodeGroup{
+ NodeGroupBase: &api.NodeGroupBase{
+ Name: "node-repair-enabled",
+ InstanceType: "m5.xlarge",
+ },
+ NodeRepairConfig: &api.NodeGroupNodeRepairConfig{
+ Enabled: aws.Bool(true),
+ },
+ },
+ resourcesFilename: "node-repair-enabled.json",
+ }),
+
Entry("Without instance type set in the launch template", &mngCase{
ng: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
diff --git a/pkg/cfn/builder/managed_nodegroup.go b/pkg/cfn/builder/managed_nodegroup.go
index 3bbd438246..5b82811601 100644
--- a/pkg/cfn/builder/managed_nodegroup.go
+++ b/pkg/cfn/builder/managed_nodegroup.go
@@ -117,6 +117,14 @@ func (m *ManagedNodeGroupResourceSet) AddAllResources(ctx context.Context) error
managedResource.UpdateConfig = updateConfig
}
+ if m.nodeGroup.NodeRepairConfig != nil {
+ nodeRepairConfig := &gfneks.Nodegroup_NodeRepairConfig{}
+ if m.nodeGroup.NodeRepairConfig.Enabled != nil {
+ nodeRepairConfig.Enabled = gfnt.NewBoolean(*m.nodeGroup.NodeRepairConfig.Enabled)
+ }
+ managedResource.NodeRepairConfig = nodeRepairConfig
+ }
+
if m.nodeGroup.Spot {
// TODO use constant from SDK
managedResource.CapacityType = gfnt.NewString("SPOT")
diff --git a/pkg/cfn/builder/testdata/launch_template/node-repair-enabled.json b/pkg/cfn/builder/testdata/launch_template/node-repair-enabled.json
new file mode 100644
index 0000000000..83578dadd4
--- /dev/null
+++ b/pkg/cfn/builder/testdata/launch_template/node-repair-enabled.json
@@ -0,0 +1,176 @@
+{
+ "LaunchTemplate": {
+ "Type": "AWS::EC2::LaunchTemplate",
+ "Properties": {
+ "LaunchTemplateData": {
+ "BlockDeviceMappings": [
+ {
+ "DeviceName": "/dev/xvda",
+ "Ebs": {
+ "Iops": 3000,
+ "Throughput": 125,
+ "VolumeSize": 80,
+ "VolumeType": "gp3"
+ }
+ }
+ ],
+ "MetadataOptions": {
+ "HttpPutResponseHopLimit": 2,
+ "HttpTokens": "required"
+ },
+ "SecurityGroupIds": [
+ {
+ "Fn::ImportValue": "eksctl-lt::ClusterSecurityGroupId"
+ }
+ ],
+ "TagSpecifications": [
+ {
+ "ResourceType": "instance",
+ "Tags": [
+ {
+ "Key": "Name",
+ "Value": "lt-node-repair-enabled-Node"
+ },
+ {
+ "Key": "alpha.eksctl.io/nodegroup-name",
+ "Value": "node-repair-enabled"
+ },
+ {
+ "Key": "alpha.eksctl.io/nodegroup-type",
+ "Value": "managed"
+ }
+ ]
+ },
+ {
+ "ResourceType": "volume",
+ "Tags": [
+ {
+ "Key": "Name",
+ "Value": "lt-node-repair-enabled-Node"
+ },
+ {
+ "Key": "alpha.eksctl.io/nodegroup-name",
+ "Value": "node-repair-enabled"
+ },
+ {
+ "Key": "alpha.eksctl.io/nodegroup-type",
+ "Value": "managed"
+ }
+ ]
+ },
+ {
+ "ResourceType": "network-interface",
+ "Tags": [
+ {
+ "Key": "Name",
+ "Value": "lt-node-repair-enabled-Node"
+ },
+ {
+ "Key": "alpha.eksctl.io/nodegroup-name",
+ "Value": "node-repair-enabled"
+ },
+ {
+ "Key": "alpha.eksctl.io/nodegroup-type",
+ "Value": "managed"
+ }
+ ]
+ }
+ ]
+ },
+ "LaunchTemplateName": {
+ "Fn::Sub": "${AWS::StackName}"
+ }
+ }
+ },
+ "ManagedNodeGroup": {
+ "Type": "AWS::EKS::Nodegroup",
+ "Properties": {
+ "AmiType": "AL2023_x86_64_STANDARD",
+ "ClusterName": "lt",
+ "Labels": {
+ "alpha.eksctl.io/cluster-name": "lt",
+ "alpha.eksctl.io/nodegroup-name": "node-repair-enabled"
+ },
+ "InstanceTypes": ["m5.xlarge"],
+ "NodeRole": {
+ "Fn::GetAtt": [
+ "NodeInstanceRole",
+ "Arn"
+ ]
+ },
+ "NodegroupName": "node-repair-enabled",
+ "ScalingConfig": {
+ "DesiredSize": 2,
+ "MaxSize": 2,
+ "MinSize": 2
+ },
+ "Subnets": [
+ "subnet-public-us-west-2a"
+ ],
+ "Tags": {
+ "alpha.eksctl.io/nodegroup-name": "node-repair-enabled",
+ "alpha.eksctl.io/nodegroup-type": "managed"
+ },
+ "LaunchTemplate": {
+ "Id": {
+ "Ref": "LaunchTemplate"
+ }
+ },
+ "NodeRepairConfig": {
+ "Enabled": true
+ }
+ }
+ },
+ "NodeInstanceRole": {
+ "Type": "AWS::IAM::Role",
+ "Properties": {
+ "AssumeRolePolicyDocument": {
+ "Statement": [
+ {
+ "Action": [
+ "sts:AssumeRole"
+ ],
+ "Effect": "Allow",
+ "Principal": {
+ "Service": [
+ {
+ "Fn::FindInMap": [
+ "ServicePrincipalPartitionMap",
+ {
+ "Ref": "AWS::Partition"
+ },
+ "EC2"
+ ]
+ }
+ ]
+ }
+ }
+ ],
+ "Version": "2012-10-17"
+ },
+ "ManagedPolicyArns": [
+ {
+ "Fn::Sub": "arn:${AWS::Partition}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
+ },
+ {
+ "Fn::Sub": "arn:${AWS::Partition}:iam::aws:policy/AmazonEKSWorkerNodePolicy"
+ },
+ {
+ "Fn::Sub": "arn:${AWS::Partition}:iam::aws:policy/AmazonEKS_CNI_Policy"
+ },
+ {
+ "Fn::Sub": "arn:${AWS::Partition}:iam::aws:policy/AmazonSSMManagedInstanceCore"
+ }
+ ],
+ "Path": "/",
+ "Tags": [
+ {
+ "Key": "Name",
+ "Value": {
+ "Fn::Sub": "${AWS::StackName}/NodeInstanceRole"
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/pkg/ctl/cmdutils/configfile.go b/pkg/ctl/cmdutils/configfile.go
index 29d7e76294..087313a590 100644
--- a/pkg/ctl/cmdutils/configfile.go
+++ b/pkg/ctl/cmdutils/configfile.go
@@ -71,6 +71,7 @@ var (
commonNGFlagsIncompatibleWithConfigFile = []string{
"managed",
"spot",
+ "enable-node-repair",
"instance-types",
"nodes",
"nodes-min",
@@ -605,11 +606,17 @@ func makeManagedNodegroup(nodeGroup *api.NodeGroup, options CreateManagedNGOptio
AttachIDs: ngBase.SecurityGroups.AttachIDs,
}
}
- return &api.ManagedNodeGroup{
+ mng := &api.ManagedNodeGroup{
NodeGroupBase: &ngBase,
Spot: options.Spot,
InstanceTypes: options.InstanceTypes,
}
+ if options.NodeRepairEnabled {
+ mng.NodeRepairConfig = &api.NodeGroupNodeRepairConfig{
+ Enabled: &options.NodeRepairEnabled,
+ }
+ }
+ return mng
}
func validateUnsupportedCLIFeatures(ng *api.ManagedNodeGroup) error {
@@ -620,7 +627,7 @@ func validateManagedNGFlags(cmd *cobra.Command, managed bool) error {
if managed {
return nil
}
- flagsValidOnlyWithMNG := []string{"spot", "instance-types"}
+ flagsValidOnlyWithMNG := []string{"spot", "enable-node-repair", "instance-types"}
if flagName, found := findChangedFlag(cmd, flagsValidOnlyWithMNG); found {
return errors.Errorf("--%s is only valid with managed nodegroups (--managed)", flagName)
}
diff --git a/pkg/ctl/cmdutils/create_cluster.go b/pkg/ctl/cmdutils/create_cluster.go
index e000bd7de8..31201c0d23 100644
--- a/pkg/ctl/cmdutils/create_cluster.go
+++ b/pkg/ctl/cmdutils/create_cluster.go
@@ -39,9 +39,10 @@ type NodeGroupOptions struct {
// CreateManagedNGOptions holds options for creating a managed nodegroup
type CreateManagedNGOptions struct {
- Managed bool
- Spot bool
- InstanceTypes []string
+ Managed bool
+ Spot bool
+ NodeRepairEnabled bool
+ InstanceTypes []string
}
// CreateNGOptions holds options for creating a nodegroup
diff --git a/pkg/ctl/cmdutils/nodegroup_flags.go b/pkg/ctl/cmdutils/nodegroup_flags.go
index 015765ab86..50f4c751ea 100644
--- a/pkg/ctl/cmdutils/nodegroup_flags.go
+++ b/pkg/ctl/cmdutils/nodegroup_flags.go
@@ -56,6 +56,7 @@ func AddCommonCreateNodeGroupFlags(fs *pflag.FlagSet, cmd *Cmd, ng *api.NodeGrou
fs.BoolVarP(&mngOptions.Managed, "managed", "", true, "Create EKS-managed nodegroup")
fs.BoolVar(&mngOptions.Spot, "spot", false, "Create a spot nodegroup (managed nodegroups only)")
+ fs.BoolVar(&mngOptions.NodeRepairEnabled, "enable-node-repair", false, "Enable automatic node repair (managed nodegroups only)")
fs.StringSliceVar(&mngOptions.InstanceTypes, "instance-types", nil, "Comma-separated list of instance types (e.g., --instance-types=c3.large,c4.large,c5.large")
}
diff --git a/pkg/ctl/create/nodegroup_test.go b/pkg/ctl/create/nodegroup_test.go
index b40c3cd490..23f11a8ba2 100644
--- a/pkg/ctl/create/nodegroup_test.go
+++ b/pkg/ctl/create/nodegroup_test.go
@@ -85,6 +85,10 @@ var _ = Describe("create nodegroup", func() {
args: []string{"--cluster", "foo", "--spot"},
error: "--spot is only valid with managed nodegroups (--managed)",
}),
+ Entry("with enable-node-repair flag", invalidParamsCase{
+ args: []string{"--cluster", "foo", "--enable-node-repair"},
+ error: "--enable-node-repair is only valid with managed nodegroups (--managed)",
+ }),
Entry("with instance-types flag", invalidParamsCase{
args: []string{"--cluster", "foo", "--instance-types", "some-type"},
error: "--instance-types is only valid with managed nodegroups (--managed)",
diff --git a/userdocs/mkdocs.yml b/userdocs/mkdocs.yml
index dd60b40596..85c4e97ac1 100644
--- a/userdocs/mkdocs.yml
+++ b/userdocs/mkdocs.yml
@@ -177,6 +177,7 @@ nav:
- usage/windows-worker-nodes.md
- usage/nodegroup-additional-volume-mappings.md
- usage/hybrid-nodes.md
+ - usage/nodegroup-node-repair-config.md
- usage/eksctl-karpenter.md
- usage/eksctl-anywhere.md
- GitOps:
diff --git a/userdocs/src/usage/nodegroup-node-repair-config.md b/userdocs/src/usage/nodegroup-node-repair-config.md
new file mode 100644
index 0000000000..f42ff4b434
--- /dev/null
+++ b/userdocs/src/usage/nodegroup-node-repair-config.md
@@ -0,0 +1,47 @@
+# Support for Node Repair Config in EKS Managed Nodegroups
+
+EKS Managed Nodegroups now supports Node Repair, where the health of managed nodes are monitored,
+and unhealthy worker nodes are replaced or rebooted in response.
+
+## Creating a cluster a managed nodegroup with node repair enabled
+
+To create a cluster with a managed nodegroup using node repair, pass the `--enable-node-repair` flag:
+
+```shell
+$ eksctl create cluster --enable-node-repair
+```
+
+To create a managed nodegroup using node repair on an existing cluster:
+
+```shell
+$ eksctl create nodegroup --cluster= --enable-node-repair
+```
+
+To create a cluster with a managed nodegroup using node repair via a config file:
+
+```yaml
+# node-repair-nodegroup-cluster.yaml
+---
+apiVersion: eksctl.io/v1alpha5
+kind: ClusterConfig
+
+metadata:
+ name: cluster-44
+ region: us-west-2
+
+managedNodeGroups:
+- name: ng-1
+ nodeRepairConfig:
+ enabled: true
+
+```
+
+```shell
+$ eksctl create cluster -f node-repair-nodegroup-cluster.yaml
+```
+
+## Further information
+
+- [EKS Managed Nodegroup Node Health][eks-user-guide]
+
+[eks-user-guide]: https://docs.aws.amazon.com/eks/latest/userguide/node-health.html