@@ -187,7 +187,8 @@ def slow_down_downscale(asg_sizes: dict, nodes_by_asg_zone: dict):
187187 return asg_sizes
188188
189189
190- def calculate_required_auto_scaling_group_sizes (nodes_by_asg_zone : dict , usage_by_asg_zone : dict , buffer_percentage : dict , buffer_fixed : dict ):
190+ def calculate_required_auto_scaling_group_sizes (nodes_by_asg_zone : dict , usage_by_asg_zone : dict ,
191+ buffer_percentage : dict , buffer_fixed : dict , buffer_spare_nodes : int = 0 ):
191192 asg_size = collections .defaultdict (int )
192193
193194 dump_info = STATS .get ('last_info_dump' , 0 ) < (time .time () - 600 )
@@ -216,6 +217,8 @@ def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_b
216217 logger .info ('Node {} is marked as unschedulable, compensating.' .format (node ['name' ]))
217218 required_nodes += 1
218219
220+ required_nodes += buffer_spare_nodes
221+
219222 overprovisioned = {resource : 0 for resource in RESOURCES }
220223 for resource , value in allocatable .items ():
221224 overprovisioned [resource ] = value - requested [resource ]
@@ -314,7 +317,7 @@ def get_ready_nodes_by_asg(nodes_by_asg_zone):
314317 return ready_nodes_by_asg
315318
316319
317- def autoscale (buffer_percentage : dict , buffer_fixed : dict , dry_run : bool ):
320+ def autoscale (buffer_percentage : dict , buffer_fixed : dict , buffer_spare_nodes : int = 0 , dry_run : bool = False ):
318321 api = get_kube_api ()
319322
320323 all_nodes = get_nodes (api )
@@ -328,7 +331,7 @@ def autoscale(buffer_percentage: dict, buffer_fixed: dict, dry_run: bool):
328331 pods = pykube .Pod .objects (api , namespace = pykube .all )
329332
330333 usage_by_asg_zone = calculate_usage_by_asg_zone (pods , nodes_by_name )
331- asg_size = calculate_required_auto_scaling_group_sizes (nodes_by_asg_zone , usage_by_asg_zone , buffer_percentage , buffer_fixed )
334+ asg_size = calculate_required_auto_scaling_group_sizes (nodes_by_asg_zone , usage_by_asg_zone , buffer_percentage , buffer_fixed , buffer_spare_nodes )
332335 asg_size = slow_down_downscale (asg_size , nodes_by_asg_zone )
333336 ready_nodes_by_asg = get_ready_nodes_by_asg (nodes_by_asg_zone )
334337 resize_auto_scaling_groups (autoscaling , asg_size , ready_nodes_by_asg , dry_run )
@@ -340,7 +343,9 @@ def main():
340343 action = 'store_true' )
341344 parser .add_argument ('--debug' , '-d' , help = 'Debug mode: print more information' , action = 'store_true' )
342345 parser .add_argument ('--once' , help = 'Run loop only once and exit' , action = 'store_true' )
343- parser .add_argument ('--interval' , type = int , help = 'Loop interval' , default = 60 )
346+ parser .add_argument ('--interval' , type = int , help = 'Loop interval (default: 60s)' , default = 60 )
347+ parser .add_argument ('--buffer-spare-nodes' , type = int ,
348+ help = 'Number of extra "spare" nodes to provision per ASG/AZ (default: 1)' , default = 1 )
344349 for resource in RESOURCES :
345350 parser .add_argument ('--buffer-{}-percentage' .format (resource ), type = float ,
346351 help = '{} buffer %%' .format (resource .capitalize ()), default = DEFAULT_BUFFER_PERCENTAGE [resource ])
@@ -362,7 +367,8 @@ def main():
362367
363368 while True :
364369 try :
365- autoscale (buffer_percentage , buffer_fixed , dry_run = args .dry_run )
370+ autoscale (buffer_percentage , buffer_fixed , buffer_spare_nodes = args .buffer_spare_nodes ,
371+ dry_run = args .dry_run )
366372 except :
367373 logger .exception ('Failed to autoscale' )
368374 if args .once :
0 commit comments