@@ -319,19 +319,20 @@ function sync_pbm_config_from_storage() {
319319}
320320
321321function wait_for_backup_completion() {
322- retry_interval=5
323- attempt=1
324322 describe_result=" "
323+ local retry_interval=5
324+ local attempt=1
325+ local max_retries=12
325326 set +e
326327 while true ; do
327328 describe_result=$( pbm describe-backup --mongodb-uri " $PBM_MONGODB_URI " " $backup_name " -o json 2>&1 )
328329 if [ $? -eq 0 ] && [ -n " $describe_result " ]; then
329330 backup_status=$( echo " $describe_result " | jq -r ' .status' )
330- if [ " $backup_status " = " " ] || [ " $backup_status " = " starting" ] || [ " $backup_status " = " running" ]; then
331- echo " INFO: Attempt $attempt : Backup status is $backup_status , retrying in ${retry_interval} s..."
332- sleep $retry_interval
333- (( attempt ++ ))
334- continue
331+ if [ " $backup_status " = " starting" ] || [ " $backup_status " = " running" ]; then
332+ echo " INFO: Backup status is $backup_status , retrying in ${retry_interval} s..."
333+ elif [ " $backup_status " = " " ] ; then
334+ echo " INFO: Backup status is $backup_status , retrying in ${retry_interval} s... "
335+ attempt= $(( attempt + 1 ))
335336 elif [ " $backup_status " = " done" ]; then
336337 echo " INFO: Backup status is done."
337338 break
@@ -340,14 +341,17 @@ function wait_for_backup_completion() {
340341 exit 1
341342 fi
342343 elif echo " $describe_result " | grep -q " not found" ; then
343- echo " INFO: Attempt $attempt : Backup metadata not found, retrying in ${retry_interval} s..."
344- sleep $retry_interval
345- (( attempt++ ))
346- continue
344+ echo " INFO: Backup metadata not found, retrying in ${retry_interval} s..."
345+ attempt=$(( attempt+ 1 ))
347346 else
348347 echo " ERROR: Unexpected: $describe_result "
349348 exit 1
350349 fi
350+ sleep $retry_interval
351+ if [ $attempt -gt $max_retries ]; then
352+ echo " ERROR: Failed to get backup status after $max_retries attempts"
353+ exit 1
354+ fi
351355 done
352356 set -e
353357
@@ -441,10 +445,10 @@ function process_restore_end_signal() {
441445}
442446
443447function get_describe_backup_info() {
444- max_retries=60
445- retry_interval=5
446- attempt=1
447448 describe_result=" "
449+ local max_retries=60
450+ local retry_interval=5
451+ local attempt=1
448452 set +e
449453 while [ $attempt -le $max_retries ]; do
450454 describe_result=$( pbm describe-backup --mongodb-uri " $PBM_MONGODB_URI " " $backup_name " -o json 2>&1 )
@@ -487,15 +491,25 @@ storage:
487491 access-key-id: ${S3_ACCESS_KEY}
488492 secret-access-key: ${S3_SECRET_KEY}
489493EOF
490-
494+ local attempt=0
495+ local max_retries=12
496+ local try_interval=5
491497 while true ; do
492498 restore_status=$( pbm describe-restore " $restore_name " -c $cnf_file -o json | jq -r ' .status' )
493- echo " INFO: Restore $restore_name status: $restore_status "
499+ echo " INFO: Restore $restore_name status: $restore_status , retrying in ${try_interval} s... "
494500 if [ " $restore_status " = " done" ]; then
495501 rm $cnf_file
496502 break
497- elif [ " $restore_status " = " " ] || [ " $restore_status " = " starting" ] || [ " $restore_status " = " running" ]; then
498- sleep 5
503+ elif [ " $restore_status " = " starting" ] || [ " $restore_status " = " running" ]; then
504+ sleep $try_interval
505+ elif [ " $restore_status " = " " ]; then
506+ sleep $try_interval
507+ attempt=$(( attempt+ 1 ))
508+ if [ $attempt -gt $max_retries ]; then
509+ echo " ERROR: Restore $restore_name status is still empty after $max_retries retries"
510+ rm $cnf_file
511+ exit 1
512+ fi
499513 else
500514 rm $cnf_file
501515 exit 1
0 commit comments