@@ -273,26 +273,38 @@ func (v *Validation) validateRunInstancesAuthorization(
273273 tags map [string ]string ,
274274 launchTemplate * launchtemplate.LaunchTemplate ,
275275) (result reconcile.Result , err error ) {
276- runInstancesInput := getRunInstancesInput (nodeClass , tags , launchTemplate )
277- // Adding NopRetryer to avoid aggressive retry when rate limited
278- if _ , err = v .ec2api .RunInstances (ctx , runInstancesInput , func (o * ec2.Options ) {
279- o .Retryer = aws.NopRetryer {}
280- }); awserrors .IgnoreDryRunError (err ) != nil {
281- // If we get InstanceProfile NotFound, but we have a resolved instance profile in the status,
282- // this means there is most likely an eventual consistency issue and we just need to requeue
283- if awserrors .IsInstanceProfileNotFound (err ) || awserrors .IsRateLimitedError (err ) || awserrors .IsServerError (err ) {
284- return reconcile.Result {Requeue : true }, nil
285- }
286- if awserrors .IgnoreUnauthorizedOperationError (err ) != nil {
287- // Dry run should only ever return UnauthorizedOperation or DryRunOperation so if we receive any other error
288- // it would be an unexpected state
289- return reconcile.Result {}, fmt .Errorf ("validating ec2:RunInstances authorization, %w" , err )
276+ // We use the first subnet's error to determine the outcome. Mixed-error scenarios across subnets
277+ // are unlikely in practice since authorization policies are not subnet-specific, and transient
278+ // failures on individual subnets are already handled by the early-exit-on-success pattern.
279+ var firstSubnetErr error
280+ for i , subnet := range nodeClass .Status .Subnets {
281+ runInstancesInput := getRunInstancesInput (tags , launchTemplate , & subnet )
282+ if _ , err = v .ec2api .RunInstances (ctx , runInstancesInput , func (o * ec2.Options ) {
283+ // Adding NopRetryer to avoid aggressive retry when rate limited
284+ o .Retryer = aws.NopRetryer {}
285+ }); awserrors .IgnoreDryRunError (err ) != nil {
286+ if i == 0 {
287+ firstSubnetErr = err
288+ }
289+ } else {
290+ // if any of them succeed, we can exit early
291+ return reconcile.Result {}, nil
290292 }
291- log .FromContext (ctx ).Error (err , "unauthorized to call ec2:RunInstances" )
292- v .updateCacheOnFailure (nodeClass , tags , ConditionReasonRunInstancesAuthFailed )
293- return reconcile.Result {RequeueAfter : requeueAfterTime }, nil
294293 }
295- return reconcile.Result {}, nil
294+
295+ // If we get InstanceProfile NotFound, but we have a resolved instance profile in the status,
296+ // this means there is most likely an eventual consistency issue and we just need to requeue
297+ if awserrors .IsInstanceProfileNotFound (firstSubnetErr ) || awserrors .IsRateLimitedError (firstSubnetErr ) || awserrors .IsServerError (firstSubnetErr ) {
298+ return reconcile.Result {Requeue : true }, nil
299+ }
300+ if awserrors .IgnoreUnauthorizedOperationError (firstSubnetErr ) != nil {
301+ // Dry run should only ever return UnauthorizedOperation or DryRunOperation so if we receive any other error
302+ // it would be an unexpected state
303+ return reconcile.Result {}, fmt .Errorf ("validating ec2:RunInstances authorization, %w" , firstSubnetErr )
304+ }
305+ log .FromContext (ctx ).Error (firstSubnetErr , "unauthorized to call ec2:RunInstances" )
306+ v .updateCacheOnFailure (nodeClass , tags , ConditionReasonRunInstancesAuthFailed )
307+ return reconcile.Result {RequeueAfter : requeueAfterTime }, nil
296308}
297309
298310func (* Validation ) requiredConditions () []string {
@@ -336,9 +348,9 @@ func (v *Validation) clearCacheEntries(nodeClass *v1.EC2NodeClass) {
336348}
337349
338350func getRunInstancesInput (
339- nodeClass * v1.EC2NodeClass ,
340351 tags map [string ]string ,
341352 launchTemplate * launchtemplate.LaunchTemplate ,
353+ subnet * v1.Subnet ,
342354) * ec2.RunInstancesInput {
343355 return & ec2.RunInstancesInput {
344356 DryRun : lo .ToPtr (true ),
@@ -352,7 +364,7 @@ func getRunInstancesInput(
352364 NetworkInterfaces : []ec2types.InstanceNetworkInterfaceSpecification {
353365 {
354366 DeviceIndex : lo.ToPtr [int32 ](0 ),
355- SubnetId : lo .ToPtr (nodeClass . Status . Subnets [ 0 ] .ID ),
367+ SubnetId : lo .ToPtr (subnet .ID ),
356368 },
357369 },
358370 TagSpecifications : []ec2types.TagSpecification {
0 commit comments