@@ -41,8 +41,6 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
4141 defer glog .V (3 ).Infof ("Finished Job <%s/%s> killing" , jobInfo .Job .Namespace , jobInfo .Job .Name )
4242
4343 job := jobInfo .Job
44- // Job version is bumped only when job is killed
45- job .Status .Version = job .Status .Version + 1
4644 glog .Infof ("Current Version is: %d of job: %s/%s" , job .Status .Version , job .Namespace , job .Name )
4745 if job .DeletionTimestamp != nil {
4846 glog .Infof ("Job <%s/%s> is terminating, skip management process." ,
@@ -88,6 +86,10 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
8886 return fmt .Errorf ("failed to kill %d pods of %d" , len (errs ), total )
8987 }
9088
89+ job = job .DeepCopy ()
90+ //Job version is bumped only when job is killed
91+ job .Status .Version = job .Status .Version + 1
92+
9193 job .Status = vkv1.JobStatus {
9294 State : job .Status .State ,
9395
@@ -112,6 +114,8 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
112114 return err
113115 } else {
114116 if e := cc .cache .Update (job ); e != nil {
117+ glog .Errorf ("KillJob - Failed to update Job %v/%v in cache: %v" ,
118+ job .Namespace , job .Name , e )
115119 return e
116120 }
117121 }
@@ -138,21 +142,12 @@ func (cc *Controller) createJob(jobInfo *apis.JobInfo, nextState state.UpdateSta
138142 glog .V (3 ).Infof ("Starting to create Job <%s/%s>" , jobInfo .Job .Namespace , jobInfo .Job .Name )
139143 defer glog .V (3 ).Infof ("Finished Job <%s/%s> create" , jobInfo .Job .Namespace , jobInfo .Job .Name )
140144
141- job := jobInfo .Job
145+ job := jobInfo .Job . DeepCopy ()
142146 glog .Infof ("Current Version is: %d of job: %s/%s" , job .Status .Version , job .Namespace , job .Name )
143147
144- newJob , err := cc .needUpdateForVolumeClaim (job )
145- if err != nil {
148+ if update , err := cc .filljob (job ); err != nil || update {
146149 return err
147150 }
148- if newJob != nil {
149- if job , err := cc .vkClients .BatchV1alpha1 ().Jobs (job .Namespace ).Update (newJob ); err != nil {
150- glog .Errorf ("Failed to update Job %v/%v: %v" ,
151- job .Namespace , job .Name , err )
152- return err
153- }
154- return nil
155- }
156151
157152 if err := cc .pluginOnJobAdd (job ); err != nil {
158153 cc .recorder .Event (job , v1 .EventTypeWarning , string (vkv1 .PluginError ),
@@ -168,14 +163,26 @@ func (cc *Controller) createJob(jobInfo *apis.JobInfo, nextState state.UpdateSta
168163 return err
169164 }
170165
166+ if job , err := cc .vkClients .BatchV1alpha1 ().Jobs (job .Namespace ).UpdateStatus (job ); err != nil {
167+ glog .Errorf ("Failed to update status of Job %v/%v: %v" ,
168+ job .Namespace , job .Name , err )
169+ return err
170+ } else {
171+ if e := cc .cache .Update (job ); e != nil {
172+ glog .Errorf ("CreateJob - Failed to update Job %v/%v in cache: %v" ,
173+ job .Namespace , job .Name , e )
174+ return e
175+ }
176+ }
177+
171178 return nil
172179}
173180
174181func (cc * Controller ) syncJob (jobInfo * apis.JobInfo , updateStatus state.UpdateStatusFn ) error {
175182 glog .V (3 ).Infof ("Starting to sync up Job <%s/%s>" , jobInfo .Job .Namespace , jobInfo .Job .Name )
176183 defer glog .V (3 ).Infof ("Finished Job <%s/%s> sync up" , jobInfo .Job .Namespace , jobInfo .Job .Name )
177184
178- job := jobInfo .Job
185+ job := jobInfo .Job . DeepCopy ()
179186 glog .Infof ("Current Version is: %d of job: %s/%s" , job .Status .Version , job .Namespace , job .Name )
180187
181188 if job .DeletionTimestamp != nil {
@@ -313,6 +320,8 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
313320 return err
314321 } else {
315322 if e := cc .cache .Update (job ); e != nil {
323+ glog .Errorf ("SyncJob - Failed to update Job %v/%v in cache: %v" ,
324+ job .Namespace , job .Name , e )
316325 return e
317326 }
318327 }
@@ -356,10 +365,11 @@ func (cc *Controller) createJobIOIfNotExist(job *vkv1.Job) error {
356365 return nil
357366}
358367
359- func (cc * Controller ) needUpdateForVolumeClaim (job * vkv1.Job ) (* vkv1.Job , error ) {
368+ func (cc * Controller ) needUpdateForVolumeClaim (job * vkv1.Job ) (bool , * vkv1.Job , error ) {
360369 // If VolumeClaimName does not exist, generate them for Job.
361370 var newJob * vkv1.Job
362371 volumes := job .Spec .Volumes
372+ update := false
363373 for index , volume := range volumes {
364374 vcName := volume .VolumeClaimName
365375 if len (vcName ) == 0 {
@@ -368,7 +378,7 @@ func (cc *Controller) needUpdateForVolumeClaim(job *vkv1.Job) (*vkv1.Job, error)
368378 vcName = fmt .Sprintf ("%s-volume-%s" , job .Name , randomStr )
369379 exist , err := cc .checkPVCExist (job , vcName )
370380 if err != nil {
371- return nil , err
381+ return false , nil , err
372382 }
373383 if exist {
374384 continue
@@ -377,11 +387,12 @@ func (cc *Controller) needUpdateForVolumeClaim(job *vkv1.Job) (*vkv1.Job, error)
377387 newJob = job .DeepCopy ()
378388 }
379389 newJob .Spec .Volumes [index ].VolumeClaimName = vcName
390+ update = true
380391 break
381392 }
382393 }
383394 }
384- return newJob , nil
395+ return update , newJob , nil
385396}
386397
387398func (cc * Controller ) checkPVCExist (job * vkv1.Job , vcName string ) (bool , error ) {
@@ -494,3 +505,31 @@ func (cc *Controller) calcPGMinResources(job *vkv1.Job) *v1.ResourceList {
494505
495506 return minAvailableTasksRes .Convert2K8sResource ()
496507}
508+
509+ func (cc * Controller ) filljob (job * vkv1.Job ) (bool , error ) {
510+ update , newJob , err := cc .needUpdateForVolumeClaim (job )
511+ if err != nil {
512+ return false , err
513+ }
514+ if update {
515+ if _ , err := cc .vkClients .BatchV1alpha1 ().Jobs (job .Namespace ).Update (newJob ); err != nil {
516+ glog .Errorf ("Failed to update Job %v/%v: %v" ,
517+ job .Namespace , job .Name , err )
518+ return false , err
519+ }
520+ return true , nil
521+ } else if job .Status .State .Phase == "" {
522+ job .Status .State .Phase = vkv1 .Pending
523+ if j , err := cc .vkClients .BatchV1alpha1 ().Jobs (job .Namespace ).UpdateStatus (job ); err != nil {
524+ glog .Errorf ("Failed to update status of Job %v/%v: %v" ,
525+ job .Namespace , job .Name , err )
526+ } else {
527+ if e := cc .cache .Update (j ); e != nil {
528+ glog .Error ("Failed to update cache status of Job %v/%v: %v" , job .Namespace , job .Name , e )
529+ }
530+ }
531+ return true , nil
532+ }
533+
534+ return false , nil
535+ }
0 commit comments