@@ -24,9 +24,9 @@ import (
2424 "sync"
2525 "time"
2626
27- "github.com/Azure/azure-sdk-for-go/services/ compute/mgmt/2022-08-01/compute "
28- "github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2017-05-10/ resources"
29- azStorage "github.com/Azure/azure-sdk-for-go/storage"
27+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/ compute/armcompute/v5 "
28+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/ resources/armresources "
29+ "github.com/Azure/azure-sdk-for-go/sdk/ storage/azblob "
3030 "k8s.io/utils/ptr"
3131
3232 apiv1 "k8s.io/api/core/v1"
@@ -82,15 +82,21 @@ func (as *AgentPool) initialize() error {
8282 ctx , cancel := getContextWithCancel ()
8383 defer cancel ()
8484
85- template , err := as .manager .azClient .deploymentClient .ExportTemplate (ctx , as .manager .config .ResourceGroup , as .manager .config .Deployment )
85+ // Verify the deployment exists
86+ deployment , err := as .manager .azClient .deploymentClient .Get (ctx , as .manager .config .ResourceGroup , as .manager .config .Deployment )
8687 if err != nil {
87- klog .Errorf ("deploymentClient.ExportTemplate(%s, %s) failed: %v" , as .manager .config .ResourceGroup , as .manager .config .Deployment , err )
88- return err .Error ()
88+ klog .Errorf ("deploymentClient.Get(%s, %s) failed: %v" , as .manager .config .ResourceGroup , as .manager .config .Deployment , err )
89+ return err
90+ }
91+
92+ if deployment .Properties == nil {
93+ return fmt .Errorf ("deployment properties is nil for deployment %s" , as .manager .config .Deployment )
8994 }
9095
91- as .template = template . Template . (map [string ]interface {})
96+ as .template = make (map [string ]interface {})
9297 as .parameters = as .manager .config .DeploymentParameters
93- return normalizeForK8sVMASScalingUp (as .template )
98+
99+ return nil
94100}
95101
96102// MinSize returns minimum size of the node group.
@@ -136,10 +142,10 @@ func (as *AgentPool) Id() string {
136142 return as .Name
137143}
138144
139- func (as * AgentPool ) getVMsFromCache () ([]compute .VirtualMachine , error ) {
145+ func (as * AgentPool ) getVMsFromCache () ([]* armcompute .VirtualMachine , error ) {
140146 allVMs := as .manager .azureCache .getVirtualMachines ()
141147 if _ , exists := allVMs [as .Name ]; ! exists {
142- return []compute. VirtualMachine {} , fmt .Errorf ("could not find VMs with poolName: %s" , as .Name )
148+ return nil , fmt .Errorf ("could not find VMs with poolName: %s" , as .Name )
143149 }
144150 return allVMs [as .Name ], nil
145151}
@@ -157,7 +163,14 @@ func (as *AgentPool) GetVMIndexes() ([]int, map[int]string, error) {
157163 indexes := make ([]int , 0 )
158164 indexToVM := make (map [int ]string )
159165 for _ , instance := range instances {
160- index , err := GetVMNameIndex (instance .StorageProfile .OsDisk .OsType , * instance .Name )
166+ if instance .Properties == nil || instance .Properties .StorageProfile == nil || instance .Properties .StorageProfile .OSDisk == nil {
167+ continue
168+ }
169+ var osType armcompute.OperatingSystemTypes
170+ if instance .Properties .StorageProfile .OSDisk .OSType != nil {
171+ osType = * instance .Properties .StorageProfile .OSDisk .OSType
172+ }
173+ index , err := GetVMNameIndex (osType , * instance .Name )
161174 if err != nil {
162175 return nil , nil , err
163176 }
@@ -211,27 +224,28 @@ func (as *AgentPool) TargetSize() (int, error) {
211224 return int (size ), nil
212225}
213226
214- func (as * AgentPool ) getAllSucceededAndFailedDeployments () ([]resources .DeploymentExtended , error ) {
227+ func (as * AgentPool ) getAllSucceededAndFailedDeployments () ([]* armresources .DeploymentExtended , error ) {
215228 ctx , cancel := getContextWithCancel ()
216229 defer cancel ()
217230
218231 allDeployments , rerr := as .manager .azClient .deploymentClient .List (ctx , as .manager .config .ResourceGroup )
219232 if rerr != nil {
220- klog .Errorf ("getAllSucceededAndFailedDeployments: failed to list deployments with error: %v" , rerr . Error () )
221- return nil , rerr . Error ()
233+ klog .Errorf ("getAllSucceededAndFailedDeployments: failed to list deployments with error: %v" , rerr )
234+ return nil , rerr
222235 }
223236
224- result := make ([]resources .DeploymentExtended , 0 )
237+ result := make ([]* armresources .DeploymentExtended , 0 )
225238 for _ , deployment := range allDeployments {
226239 if deployment .Properties == nil || deployment .Properties .ProvisioningState == nil {
227240 continue
228241 }
229- if * deployment .Properties .ProvisioningState == "Succeeded" || * deployment .Properties .ProvisioningState == "Failed" {
242+ provState := string (* deployment .Properties .ProvisioningState )
243+ if provState == "Succeeded" || provState == "Failed" {
230244 result = append (result , deployment )
231245 }
232246 }
233247
234- return result , rerr . Error ()
248+ return result , nil
235249}
236250
237251// deleteOutdatedDeployments keeps the newest deployments in the resource group and delete others,
@@ -256,7 +270,12 @@ func (as *AgentPool) deleteOutdatedDeployments() (err error) {
256270 }
257271
258272 sort .Slice (deployments , func (i , j int ) bool {
259- return deployments [i ].Properties .Timestamp .Time .After (deployments [j ].Properties .Timestamp .Time )
273+ iTime := deployments [i ].Properties .Timestamp
274+ jTime := deployments [j ].Properties .Timestamp
275+ if iTime == nil || jTime == nil {
276+ return false
277+ }
278+ return iTime .After (* jTime )
260279 })
261280
262281 toBeDeleted := deployments [as .manager .config .MaxDeploymentsCount :]
@@ -269,7 +288,7 @@ func (as *AgentPool) deleteOutdatedDeployments() (err error) {
269288 klog .V (4 ).Infof ("deleteOutdatedDeployments: starts deleting outdated deployment (%s)" , * deployment .Name )
270289 rerr := as .manager .azClient .deploymentClient .Delete (ctx , as .manager .config .ResourceGroup , * deployment .Name )
271290 if rerr != nil {
272- errList = append (errList , rerr . Error () )
291+ errList = append (errList , rerr )
273292 }
274293 }
275294
@@ -317,20 +336,20 @@ func (as *AgentPool) IncreaseSize(delta int) error {
317336 as .parameters [as .Name + "Offset" ] = map [string ]int {"value" : highestUsedIndex + 1 }
318337
319338 newDeploymentName := fmt .Sprintf ("cluster-autoscaler-%d" , rand .New (rand .NewSource (time .Now ().UnixNano ())).Int31 ())
320- newDeployment := resources .Deployment {
321- Properties : & resources .DeploymentProperties {
339+ newDeployment := armresources .Deployment {
340+ Properties : & armresources .DeploymentProperties {
322341 Template : & as .template ,
323342 Parameters : & as .parameters ,
324- Mode : resources . Incremental ,
343+ Mode : ptr . To ( armresources . DeploymentModeIncremental ) ,
325344 },
326345 }
327346 ctx , cancel := getContextWithCancel ()
328347 defer cancel ()
329348 klog .V (3 ).Infof ("Waiting for deploymentClient.CreateOrUpdate(%s, %s, %v)" , as .manager .config .ResourceGroup , newDeploymentName , newDeployment )
330- rerr := as .manager .azClient .deploymentClient .CreateOrUpdate (ctx , as .manager .config .ResourceGroup , newDeploymentName , newDeployment , "" )
349+ _ , rerr := as .manager .azClient .deploymentClient .CreateOrUpdate (ctx , as .manager .config .ResourceGroup , newDeploymentName , newDeployment )
331350 if rerr != nil {
332- klog .Errorf ("deploymentClient.CreateOrUpdate for deployment %q failed: %v" , newDeploymentName , rerr . Error () )
333- return rerr . Error ()
351+ klog .Errorf ("deploymentClient.CreateOrUpdate for deployment %q failed: %v" , newDeploymentName , rerr )
352+ return rerr
334353 }
335354 klog .V (3 ).Infof ("deploymentClient.CreateOrUpdate(%s, %s, %v) success" , as .manager .config .ResourceGroup , newDeploymentName , newDeployment )
336355
@@ -515,58 +534,76 @@ func (as *AgentPool) deleteBlob(accountName, vhdContainer, vhdBlob string) error
515534 ctx , cancel := getContextWithCancel ()
516535 defer cancel ()
517536
518- storageKeysResult , rerr := as .manager .azClient .storageAccountsClient .ListKeys (ctx , as . manager . config . SubscriptionID , as .manager .config .ResourceGroup , accountName )
537+ keys , rerr := as .manager .azClient .storageAccountsClient .ListKeys (ctx , as .manager .config .ResourceGroup , accountName )
519538 if rerr != nil {
520- return rerr .Error ()
539+ return rerr
540+ }
541+
542+ if len (keys ) == 0 {
543+ return fmt .Errorf ("no storage keys found for account %s" , accountName )
521544 }
522545
523- keys := * storageKeysResult .Keys
524- client , err := azStorage .NewBasicClientOnSovereignCloud (accountName , ptr .Deref (keys [0 ].Value , "" ), as .manager .env )
546+ // Build blob URL and create client with shared key credentials
547+ blobURL := fmt .Sprintf ("https://%s.blob.%s/%s/%s" ,
548+ accountName ,
549+ as .manager .env .StorageEndpointSuffix ,
550+ vhdContainer ,
551+ vhdBlob )
552+ credential , err := azblob .NewSharedKeyCredential (accountName , ptr .Deref (keys [0 ].Value , "" ))
525553 if err != nil {
526- return err
554+ return fmt . Errorf ( "failed to create shared key credential: %w" , err )
527555 }
528556
529- bs := client .GetBlobService ()
530- containerRef := bs .GetContainerReference (vhdContainer )
531- blobRef := containerRef .GetBlobReference (vhdBlob )
557+ blobClient , err := azblob .NewClientWithSharedKeyCredential (blobURL , credential , nil )
558+ if err != nil {
559+ return fmt .Errorf ("failed to create blob client: %w" , err )
560+ }
532561
533- return blobRef .Delete (& azStorage.DeleteBlobOptions {})
562+ _ , err = blobClient .DeleteBlob (ctx , vhdContainer , vhdBlob , nil )
563+ return err
534564}
535565
536566// deleteVirtualMachine deletes a VM and any associated OS disk
537567func (as * AgentPool ) deleteVirtualMachine (name string ) error {
538568 ctx , cancel := getContextWithCancel ()
539569 defer cancel ()
540570
541- vm , rerr := as .manager .azClient .virtualMachinesClient .Get (ctx , as .manager .config .ResourceGroup , name , "" )
571+ vm , rerr := as .manager .azClient .virtualMachinesClient .Get (ctx , as .manager .config .ResourceGroup , name , nil )
542572 if rerr != nil {
543- if exists , _ := checkResourceExistsFromRetryError (rerr ); ! exists {
573+ if isNotFoundError (rerr ) {
544574 klog .V (2 ).Infof ("VirtualMachine %s/%s has already been removed" , as .manager .config .ResourceGroup , name )
545575 return nil
546576 }
547577
548- klog .Errorf ("failed to get VM: %s/%s: %s" , as .manager .config .ResourceGroup , name , rerr .Error ())
549- return rerr .Error ()
578+ klog .Errorf ("failed to get VM: %s/%s: %v" , as .manager .config .ResourceGroup , name , rerr )
579+ return rerr
580+ }
581+
582+ if vm .Properties == nil || vm .Properties .StorageProfile == nil || vm .Properties .StorageProfile .OSDisk == nil {
583+ klog .Errorf ("failed to get a valid os disk for VM: %s/%s" , as .manager .config .ResourceGroup , name )
584+ return fmt .Errorf ("os disk information not available" )
550585 }
551586
552- vhd := vm .VirtualMachineProperties .StorageProfile .OsDisk .Vhd
553- managedDisk := vm .VirtualMachineProperties .StorageProfile .OsDisk .ManagedDisk
587+ vhd := vm .Properties .StorageProfile .OSDisk .Vhd
588+ managedDisk := vm .Properties .StorageProfile .OSDisk .ManagedDisk
554589 if vhd == nil && managedDisk == nil {
555590 klog .Errorf ("failed to get a valid os disk URI for VM: %s/%s" , as .manager .config .ResourceGroup , name )
556591 return fmt .Errorf ("os disk does not have a VHD URI" )
557592 }
558593
559- osDiskName := vm .VirtualMachineProperties .StorageProfile .OsDisk .Name
594+ osDiskName := vm .Properties .StorageProfile .OSDisk .Name
560595 var nicName string
561- nicID := (* vm .VirtualMachineProperties .NetworkProfile .NetworkInterfaces )[0 ].ID
562- if nicID == nil {
563- klog .Warningf ("NIC ID is not set for VM (%s/%s)" , as .manager .config .ResourceGroup , name )
564- } else {
565- nicName , err := resourceName (* nicID )
566- if err != nil {
567- return err
596+ if vm .Properties .NetworkProfile != nil && vm .Properties .NetworkProfile .NetworkInterfaces != nil && len (vm .Properties .NetworkProfile .NetworkInterfaces ) > 0 {
597+ nicID := vm .Properties .NetworkProfile .NetworkInterfaces [0 ].ID
598+ if nicID == nil {
599+ klog .Warningf ("NIC ID is not set for VM (%s/%s)" , as .manager .config .ResourceGroup , name )
600+ } else {
601+ nicName , err := resourceName (* nicID )
602+ if err != nil {
603+ return err
604+ }
605+ klog .Infof ("found nic name for VM (%s/%s): %s" , as .manager .config .ResourceGroup , name , nicName )
568606 }
569- klog .Infof ("found nic name for VM (%s/%s): %s" , as .manager .config .ResourceGroup , name , nicName )
570607 }
571608
572609 klog .Infof ("deleting VM: %s/%s" , as .manager .config .ResourceGroup , name )
@@ -575,9 +612,8 @@ func (as *AgentPool) deleteVirtualMachine(name string) error {
575612
576613 klog .Infof ("waiting for VirtualMachine deletion: %s/%s" , as .manager .config .ResourceGroup , name )
577614 rerr = as .manager .azClient .virtualMachinesClient .Delete (deleteCtx , as .manager .config .ResourceGroup , name )
578- _ , realErr := checkResourceExistsFromRetryError (rerr )
579- if realErr != nil {
580- return realErr
615+ if rerr != nil && ! isNotFoundError (rerr ) {
616+ return rerr
581617 }
582618 klog .V (2 ).Infof ("VirtualMachine %s/%s removed" , as .manager .config .ResourceGroup , name )
583619
@@ -586,10 +622,8 @@ func (as *AgentPool) deleteVirtualMachine(name string) error {
586622 interfaceCtx , interfaceCancel := getContextWithCancel ()
587623 defer interfaceCancel ()
588624 rerr := as .manager .azClient .interfacesClient .Delete (interfaceCtx , as .manager .config .ResourceGroup , nicName )
589- klog .Infof ("waiting for nic deletion: %s/%s" , as .manager .config .ResourceGroup , nicName )
590- _ , realErr := checkResourceExistsFromRetryError (rerr )
591- if realErr != nil {
592- return realErr
625+ if rerr != nil && ! isNotFoundError (rerr ) {
626+ return rerr
593627 }
594628 klog .V (2 ).Infof ("interface %s/%s removed" , as .manager .config .ResourceGroup , nicName )
595629 }
@@ -604,23 +638,21 @@ func (as *AgentPool) deleteVirtualMachine(name string) error {
604638
605639 klog .Infof ("deleting blob: %s/%s" , vhdContainer , vhdBlob )
606640 if err = as .deleteBlob (accountName , vhdContainer , vhdBlob ); err != nil {
607- _ , realErr := checkResourceExistsFromError (err )
608- if realErr != nil {
609- return realErr
641+ if ! isNotFoundError (err ) {
642+ return err
610643 }
611- klog .V (2 ).Infof ("Blob %s/%s removed" , as .manager .config .ResourceGroup , vhdBlob )
612644 }
645+ klog .V (2 ).Infof ("Blob %s/%s removed" , as .manager .config .ResourceGroup , vhdBlob )
613646 } else if managedDisk != nil {
614647 if osDiskName == nil {
615648 klog .Warningf ("osDisk is not set for VM %s/%s" , as .manager .config .ResourceGroup , name )
616649 } else {
617650 klog .Infof ("deleting managed disk: %s/%s" , as .manager .config .ResourceGroup , * osDiskName )
618651 disksCtx , disksCancel := getContextWithCancel ()
619652 defer disksCancel ()
620- rerr := as .manager .azClient .disksClient .Delete (disksCtx , as .manager .config .SubscriptionID , as .manager .config .ResourceGroup , * osDiskName )
621- _ , realErr := checkResourceExistsFromRetryError (rerr )
622- if realErr != nil {
623- return realErr
653+ rerr := as .manager .azClient .disksClient .Delete (disksCtx , as .manager .config .ResourceGroup , * osDiskName )
654+ if rerr != nil && ! isNotFoundError (rerr ) {
655+ return rerr
624656 }
625657 klog .V (2 ).Infof ("disk %s/%s removed" , as .manager .config .ResourceGroup , * osDiskName )
626658 }
0 commit comments