Skip to content

Commit 18494fe

Browse files
authored
Merge pull request #132 from pierotofy/regionrot
Adds ability to rotate regions on failure
2 parents 96b96ae + 7af1944 commit 18494fe

File tree

6 files changed

+87
-48
lines changed

6 files changed

+87
-48
lines changed

docs/digitalocean.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Example configuration file:
1818
"maxRuntime": -1,
1919
"maxUploadTime": -1,
2020
"dropletsLimit": 30,
21-
"region": "sfo2",
21+
"region": ["sfo2", "sfo1"],
2222

2323
"image": "ubuntu-16-04-x64",
2424
"tags": ["clusterodm"],

libs/asr-providers/aws.js

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ module.exports = class AWSAsrProvider extends AbstractASRProvider{
4141
"instanceLimit": -1,
4242
"createRetries": 1,
4343
"region": "us-west-2",
44-
"zone": "",
44+
"zone": "",
4545
"monitoring": false,
4646
"tags": ["clusterodm"],
4747
"ami": "ami-07b4f3c02c7f83d59",
@@ -123,35 +123,34 @@ module.exports = class AWSAsrProvider extends AbstractASRProvider{
123123
const dockerImage = this.getConfig("dockerImage");
124124
const accessKey = this.getConfig("accessKey");
125125
const secretKey = this.getConfig("secretKey");
126-
const dataDirMountPath = this.getConfig("dataDirMountPath");
126+
const dataDirMountPath = this.getConfig("dataDirMountPath");
127127
const s3 = this.getConfig("s3");
128128
const webhook = netutils.publicAddressPath("/commit", req, token);
129129

130130
const setupCmd = this.getConfig("nodeSetupCmd");
131-
if (setupCmd != null && setupCmd.length > 0)
132-
{
131+
if (setupCmd != null && setupCmd.length > 0){
133132
await dm.ssh(setupCmd);
134133
}
135134

136135
let dockerRunArgs = [`sudo docker run -d -p 3000:3000`];
137136

138-
if(dataDirMountPath.length > 0){
139-
dockerRunArgs.push(`--mount type=bind,source=${dataDirMountPath},target=/var/www/data`);
140-
}
141-
if (this.getConfig("dockerGpu")){
142-
dockerRunArgs.push(`--gpus all`);
143-
}
144-
145-
dockerRunArgs.push(`${dockerImage} -q 1`);
146-
dockerRunArgs.push(`--s3_access_key ${accessKey}`);
147-
dockerRunArgs.push(`--s3_secret_key ${secretKey}`);
148-
dockerRunArgs.push(`--s3_endpoint ${s3.endpoint}`);
149-
dockerRunArgs.push(`--s3_bucket ${s3.bucket}`);
150-
dockerRunArgs.push(`--s3_acl ${s3.acl}`);
151-
dockerRunArgs.push(`--webhook ${webhook}`);
152-
dockerRunArgs.push(`--token ${nodeToken}`);
153-
154-
await dm.ssh(dockerRunArgs.join(" "));
137+
if(dataDirMountPath.length > 0){
138+
dockerRunArgs.push(`--mount type=bind,source=${dataDirMountPath},target=/var/www/data`);
139+
}
140+
if (this.getConfig("dockerGpu")){
141+
dockerRunArgs.push(`--gpus all`);
142+
}
143+
144+
dockerRunArgs.push(`${dockerImage} -q 1`);
145+
dockerRunArgs.push(`--s3_access_key ${accessKey}`);
146+
dockerRunArgs.push(`--s3_secret_key ${secretKey}`);
147+
dockerRunArgs.push(`--s3_endpoint ${s3.endpoint}`);
148+
dockerRunArgs.push(`--s3_bucket ${s3.bucket}`);
149+
dockerRunArgs.push(`--s3_acl ${s3.acl}`);
150+
dockerRunArgs.push(`--webhook ${webhook}`);
151+
dockerRunArgs.push(`--token ${nodeToken}`);
152+
153+
await dm.ssh(dockerRunArgs.join(" "));
155154
}
156155

157156
getImagePropertiesFor(imagesCount){
@@ -177,12 +176,12 @@ module.exports = class AWSAsrProvider extends AbstractASRProvider{
177176
return this.getConfig("maxUploadTime");
178177
}
179178

180-
async getCreateArgs(imagesCount){
179+
async getCreateArgs(imagesCount, attempt){
181180
const image_props = this.getImagePropertiesFor(imagesCount);
182181
const args = [
183182
"--amazonec2-access-key", this.getConfig("accessKey"),
184183
"--amazonec2-secret-key", this.getConfig("secretKey"),
185-
"--amazonec2-region", this.getConfig("region"),
184+
"--amazonec2-region", this.getConfigArrayItem("region", attempt - 1),
186185
"--amazonec2-ami", this.getConfig("ami"),
187186
"--amazonec2-instance-type", image_props["slug"],
188187
"--amazonec2-root-size", image_props["storage"],
@@ -206,37 +205,42 @@ module.exports = class AWSAsrProvider extends AbstractASRProvider{
206205

207206
if (this.getConfig("usePrivateAddress")) {
208207
args.push("--amazonec2-use-private-address");
209-
if(this.getConfig("assignPrivateAddressOnly")) {
210-
args.push("--amazonec2-private-address-only");
211-
}
208+
if(this.getConfig("assignPrivateAddressOnly")) {
209+
args.push("--amazonec2-private-address-only");
210+
}
212211
}
213212

214213
if (this.getConfig("engineInstallUrl")){
215214
args.push("--engine-install-url")
216215
args.push(this.getConfig("engineInstallUrl"));
217216
}
218217

219-
if (this.getConfig("zone").length > 0){
220-
args.push("--amazonec2-zone")
221-
args.push(this.getConfig("zone"));
222-
}
218+
if (this.getConfig("zone").length > 0){
219+
args.push("--amazonec2-zone")
220+
args.push(this.getConfig("zone"));
221+
}
223222

224223
if (this.getConfig("vpc").length > 0){
225224
args.push("--amazonec2-vpc-id")
226225
args.push(this.getConfig("vpc"));
227226
}
228227

229-
if (this.getConfig("subnet").length > 0){
228+
if (this.getConfig("subnet").length > 0){
230229
args.push("--amazonec2-subnet-id")
231230
args.push(this.getConfig("subnet"));
232231
}
233232

234-
235-
if (this.getConfig("iamrole").length > 0){
233+
if (this.getConfig("iamrole").length > 0){
236234
args.push("--amazonec2-iam-instance-profile")
237235
args.push(this.getConfig("iamrole"));
238236
}
239237

240238
return args;
241239
}
240+
241+
getFailureSleepTime(attempt){
242+
const numRegions = this.getConfigArray("region").length;
243+
if (attempt <= numRegions) return 1000;
244+
else return 10000 * (attempt - numRegions);
245+
}
242246
};

libs/asr-providers/digitalocean.js

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ module.exports = class DigitalOceanAsrProvider extends AbstractASRProvider{
165165
return this.getConfig("maxUploadTime");
166166
}
167167

168-
async getImageInfo(){
168+
async getImageInfo(attempt){
169169
let imageName = this.getConfig("image");
170-
let imageRegion = this.getConfig("region");
170+
let imageRegion = this.getConfigArrayItem("region", attempt - 1);
171171

172172
if (this.getConfig("snapshot")){
173173
// We need to fetch the imageID
@@ -209,8 +209,8 @@ module.exports = class DigitalOceanAsrProvider extends AbstractASRProvider{
209209
};
210210
}
211211

212-
async getCreateArgs(imagesCount){
213-
const imageInfo = await this.getImageInfo();
212+
async getCreateArgs(imagesCount, attempt){
213+
const imageInfo = await this.getImageInfo(attempt);
214214

215215
const args = [
216216
"--digitalocean-access-token", this.getConfig("accessToken"),
@@ -245,4 +245,10 @@ module.exports = class DigitalOceanAsrProvider extends AbstractASRProvider{
245245

246246
return args;
247247
}
248+
249+
getFailureSleepTime(attempt){
250+
const numRegions = this.getConfigArray("region").length;
251+
if (attempt <= numRegions) return 1000;
252+
else return 10000 * (attempt - numRegions);
253+
}
248254
};

libs/asr-providers/hetzner.js

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,11 @@ module.exports = class HetznerAsrProvider extends AbstractASRProvider{
217217
return this.getConfig("maxUploadTime");
218218
}
219219

220-
async getCreateArgs(imagesCount){
220+
async getCreateArgs(imagesCount, attempt){
221+
221222
const args = [
222223
"--hetzner-api-token", this.getConfig("apiToken"),
223-
"--hetzner-server-location", this.getConfig("location"),
224+
"--hetzner-server-location", this.getConfigArrayItem("location", attempt - 1),
224225
"--hetzner-server-type", this.getImageSlugFor(imagesCount)
225226
];
226227

@@ -250,4 +251,10 @@ module.exports = class HetznerAsrProvider extends AbstractASRProvider{
250251

251252
return args;
252253
}
254+
255+
getFailureSleepTime(attempt){
256+
const numLocs = this.getConfigArray("location").length;
257+
if (attempt <= numLocs) return 1000;
258+
else return 10000 * (attempt - numLocs);
259+
}
253260
};

libs/asr-providers/scaleway.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,11 @@ module.exports = class ScalewayAsrProvider extends AbstractASRProvider{
149149
return this.getConfig("maxUploadTime");
150150
}
151151

152-
async getCreateArgs(imagesCount){
152+
async getCreateArgs(imagesCount, attempt){
153153
const args = [
154154
"--scaleway-organization", this.getConfig("organization"),
155155
"--scaleway-token", this.getConfig("secretToken"),
156-
"--scaleway-region", this.getConfig("region"),
156+
"--scaleway-region", this.getConfigArrayItem("region", attempt - 1),
157157
"--scaleway-image", this.getConfig("image"),
158158
"--scaleway-commercial-type", this.getImageSlugFor(imagesCount)
159159
];
@@ -165,4 +165,10 @@ module.exports = class ScalewayAsrProvider extends AbstractASRProvider{
165165

166166
return args;
167167
}
168+
169+
getFailureSleepTime(attempt){
170+
const numRegions = this.getConfigArray("region").length;
171+
if (attempt <= numRegions) return 1000;
172+
else return 10000 * (attempt - numRegions);
173+
}
168174
};

libs/classes/AbstractASRProvider.js

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,14 @@ module.exports = class AbstractASRProvider{
4646
throw new Error("Not implemented");
4747
}
4848

49-
async getCreateArgs(imagesCount){
49+
async getCreateArgs(imagesCount, attempt){
5050
throw new Error("Not implemented");
5151
}
5252

53+
getFailureSleepTime(attempt){
54+
return 10000 * attempt;
55+
}
56+
5357
canHandle(imagesCount){
5458
throw new Error("Not implemented");
5559
}
@@ -99,7 +103,7 @@ module.exports = class AbstractASRProvider{
99103

100104
// Helper function for debugging
101105
async debugCreateDockerMachineCmd(imagesCount){
102-
const args = await this.getCreateArgs(imagesCount);
106+
const args = await this.getCreateArgs(imagesCount, 1);
103107
return `docker-machine create --driver ${this.getDriverName()} ${args.join(" ")} debug-machine`;
104108
}
105109

@@ -114,8 +118,6 @@ module.exports = class AbstractASRProvider{
114118
if (!this.canHandle(imagesCount)) throw new Error(`Cannot handle ${imagesCount} images.`);
115119

116120
const dm = new DockerMachine(hostname);
117-
const args = ["--driver", this.getDriverName()]
118-
.concat(await this.getCreateArgs(imagesCount));
119121
const nodeToken = short.generate();
120122

121123
try{
@@ -125,20 +127,23 @@ module.exports = class AbstractASRProvider{
125127
for (let i = 1; i <= this.getCreateRetries(); i++){
126128
if (status.aborted) throw new Error("Aborted");
127129

130+
const args = ["--driver", this.getDriverName()]
131+
.concat(await this.getCreateArgs(imagesCount, i));
132+
128133
logger.info(`Trying to create machine... (${i})`);
129134
try{
130135
await dm.create(args);
131136
created = true;
132137
break;
133138
}catch(e){
134-
logger.warn(`Cannot create machine: ${e}`);
139+
logger.warn(`Cannot create machine: ${e} with args ${args.join(" ")}`);
135140
try{
136141
await dm.rm(true); // Make sure to cleanup if something goes wrong!
137142
}catch(e){
138143
// Do nothing
139144
}
140145

141-
await utils.sleep(10000 * i);
146+
await utils.sleep(this.getFailureSleepTime(i));
142147
}
143148
}
144149
if (!created) throw new Error(`Cannot create machine (attempted ${this.getCreateRetries()} times)`);
@@ -197,4 +202,15 @@ module.exports = class AbstractASRProvider{
197202
getConfig(key, defaultValue = ""){
198203
return utils.get(this.config, key, defaultValue);
199204
}
205+
206+
getConfigArray(key, defaultValue = []){
207+
let val = this.getConfig(key, defaultValue);
208+
if (!Array.isArray(val)) val = [val];
209+
return val;
210+
}
211+
212+
getConfigArrayItem(key, idx){
213+
let arr = this.getConfigArray(key, ["invalid"]);
214+
return arr[idx % arr.length];
215+
}
200216
}

0 commit comments

Comments
 (0)