Skip to content
This repository was archived by the owner on Dec 24, 2019. It is now read-only.

Commit 1496c3a

Browse files
authored
Merge pull request #18 from hjacobs/buffer-spare-nodes
#17 provision one extra "spare" node per ASG/AZ
2 parents 7625f37 + 928033a commit 1496c3a

2 files changed

Lines changed: 13 additions & 6 deletions

File tree

kube_aws_autoscaler/main.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ def slow_down_downscale(asg_sizes: dict, nodes_by_asg_zone: dict):
187187
return asg_sizes
188188

189189

190-
def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_by_asg_zone: dict, buffer_percentage: dict, buffer_fixed: dict):
190+
def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_by_asg_zone: dict,
191+
buffer_percentage: dict, buffer_fixed: dict, buffer_spare_nodes: int=0):
191192
asg_size = collections.defaultdict(int)
192193

193194
dump_info = STATS.get('last_info_dump', 0) < (time.time() - 600)
@@ -216,6 +217,8 @@ def calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone: dict, usage_b
216217
logger.info('Node {} is marked as unschedulable, compensating.'.format(node['name']))
217218
required_nodes += 1
218219

220+
required_nodes += buffer_spare_nodes
221+
219222
overprovisioned = {resource: 0 for resource in RESOURCES}
220223
for resource, value in allocatable.items():
221224
overprovisioned[resource] = value - requested[resource]
@@ -314,7 +317,7 @@ def get_ready_nodes_by_asg(nodes_by_asg_zone):
314317
return ready_nodes_by_asg
315318

316319

317-
def autoscale(buffer_percentage: dict, buffer_fixed: dict, dry_run: bool):
320+
def autoscale(buffer_percentage: dict, buffer_fixed: dict, buffer_spare_nodes: int=0, dry_run: bool=False):
318321
api = get_kube_api()
319322

320323
all_nodes = get_nodes(api)
@@ -328,7 +331,7 @@ def autoscale(buffer_percentage: dict, buffer_fixed: dict, dry_run: bool):
328331
pods = pykube.Pod.objects(api, namespace=pykube.all)
329332

330333
usage_by_asg_zone = calculate_usage_by_asg_zone(pods, nodes_by_name)
331-
asg_size = calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone, usage_by_asg_zone, buffer_percentage, buffer_fixed)
334+
asg_size = calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone, usage_by_asg_zone, buffer_percentage, buffer_fixed, buffer_spare_nodes)
332335
asg_size = slow_down_downscale(asg_size, nodes_by_asg_zone)
333336
ready_nodes_by_asg = get_ready_nodes_by_asg(nodes_by_asg_zone)
334337
resize_auto_scaling_groups(autoscaling, asg_size, ready_nodes_by_asg, dry_run)
@@ -340,7 +343,9 @@ def main():
340343
action='store_true')
341344
parser.add_argument('--debug', '-d', help='Debug mode: print more information', action='store_true')
342345
parser.add_argument('--once', help='Run loop only once and exit', action='store_true')
343-
parser.add_argument('--interval', type=int, help='Loop interval', default=60)
346+
parser.add_argument('--interval', type=int, help='Loop interval (default: 60s)', default=60)
347+
parser.add_argument('--buffer-spare-nodes', type=int,
348+
help='Number of extra "spare" nodes to provision per ASG/AZ (default: 1)', default=1)
344349
for resource in RESOURCES:
345350
parser.add_argument('--buffer-{}-percentage'.format(resource), type=float,
346351
help='{} buffer %%'.format(resource.capitalize()), default=DEFAULT_BUFFER_PERCENTAGE[resource])
@@ -362,7 +367,8 @@ def main():
362367

363368
while True:
364369
try:
365-
autoscale(buffer_percentage, buffer_fixed, dry_run=args.dry_run)
370+
autoscale(buffer_percentage, buffer_fixed, buffer_spare_nodes=args.buffer_spare_nodes,
371+
dry_run=args.dry_run)
366372
except:
367373
logger.exception('Failed to autoscale')
368374
if args.once:

tests/test_autoscaler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def test_calculate_required_auto_scaling_group_sizes():
7070
assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {}, {}, {}) == {'a1': 0}
7171
assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {('a1', 'z1'): {'cpu': 1, 'memory': 1, 'pods': 1}}, {}, {}) == {'a1': 1}
7272
assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {('unknown', 'unknown'): {'cpu': 1, 'memory': 1, 'pods': 1}}, {}, {}) == {'a1': 1}
73+
assert calculate_required_auto_scaling_group_sizes({('a1', 'z1'): [node]}, {}, {}, {}, buffer_spare_nodes=2) == {'a1': 2}
7374

7475

7576
def test_calculate_required_auto_scaling_group_sizes_cordon():
@@ -336,7 +337,7 @@ def test_main(monkeypatch):
336337
monkeypatch.setattr('kube_aws_autoscaler.main.autoscale', autoscale)
337338
monkeypatch.setattr('sys.argv', ['foo', '--once', '--dry-run'])
338339
main()
339-
autoscale.assert_called_once_with({'memory': 10, 'pods': 10, 'cpu': 10}, {'memory': 209715200, 'pods': 10, 'cpu': 0.2}, dry_run=True)
340+
autoscale.assert_called_once_with({'memory': 10, 'pods': 10, 'cpu': 10}, {'memory': 209715200, 'pods': 10, 'cpu': 0.2}, buffer_spare_nodes=1, dry_run=True)
340341

341342
autoscale.side_effect = ValueError
342343

0 commit comments

Comments
 (0)