Skip to content

Commit 8d97ef1

Browse files
authored
Merge pull request #50 from pjdarton/bugfix_jenkins_36878
Bugfix jenkins 36878
2 parents 1893c0d + 8cd6569 commit 8d97ef1

File tree

9 files changed

+376
-163
lines changed

9 files changed

+376
-163
lines changed

src/main/java/org/jenkinsci/plugins/vSphereCloud.java

Lines changed: 127 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,7 @@
2424
import java.util.ArrayList;
2525
import java.util.Collection;
2626
import java.util.Collections;
27-
import java.util.IdentityHashMap;
28-
import java.util.Iterator;
2927
import java.util.List;
30-
import java.util.Map;
31-
import java.util.Set;
32-
import java.util.TreeMap;
33-
import java.util.TreeSet;
3428
import java.util.UUID;
3529
import java.util.concurrent.Callable;
3630
import java.util.concurrent.ConcurrentHashMap;
@@ -79,29 +73,59 @@ public class vSphereCloud extends Cloud {
7973
private transient CloudProvisioningState templateState;
8074

8175
private static java.util.logging.Logger VSLOG = java.util.logging.Logger.getLogger("vsphere-cloud");
82-
private static void InternalLog(Slave slave, SlaveComputer slaveComputer, TaskListener listener, String format, Object... args)
76+
private static void InternalLog(Slave slave, SlaveComputer slaveComputer, TaskListener listener, Throwable ex, String format, Object... args)
8377
{
78+
final Level logLevel = Level.INFO;
79+
if (!VSLOG.isLoggable(logLevel) && listener == null)
80+
return;
8481
String s = "";
8582
if (slave != null)
8683
s = String.format("[%s] ", slave.getNodeName());
8784
if (slaveComputer != null)
8885
s = String.format("[%s] ", slaveComputer.getName());
8986
s = s + String.format(format, args);
90-
if (listener != null)
87+
if (listener != null) {
9188
listener.getLogger().print(s + "\n");
92-
VSLOG.log(Level.INFO, s);
89+
if ( ex!=null ) {
90+
listener.getLogger().print(ex.toString() + "\n");
91+
ex.printStackTrace(listener.getLogger());
92+
}
93+
}
94+
if ( ex!=null ) {
95+
VSLOG.log(logLevel, s, ex);
96+
} else {
97+
VSLOG.log(logLevel, s);
98+
}
9399
}
94100
public static void Log(String format, Object... args) {
95-
InternalLog(null, null, null, format, args);
101+
InternalLog(null, null, null, null, format, args);
102+
}
103+
public static void Log(Throwable ex, String format, Object... args) {
104+
InternalLog(null, null, null, ex, format, args);
96105
}
97106
public static void Log(TaskListener listener, String format, Object... args) {
98-
InternalLog(null, null, listener, format, args);
107+
InternalLog(null, null, listener, null, format, args);
108+
}
109+
public static void Log(TaskListener listener, Throwable ex, String format, Object... args) {
110+
InternalLog(null, null, listener, ex, format, args);
111+
}
112+
public static void Log(Slave slave, String format, Object... args) {
113+
InternalLog(slave, null, null, null, format, args);
114+
}
115+
public static void Log(Slave slave, Throwable ex, String format, Object... args) {
116+
InternalLog(slave, null, null, ex, format, args);
99117
}
100118
public static void Log(Slave slave, TaskListener listener, String format, Object... args) {
101-
InternalLog(slave, null, listener, format, args);
119+
InternalLog(slave, null, listener, null, format, args);
120+
}
121+
public static void Log(Slave slave, TaskListener listener, Throwable ex, String format, Object... args) {
122+
InternalLog(slave, null, listener, ex, format, args);
102123
}
103124
public static void Log(SlaveComputer slave, TaskListener listener, String format, Object... args) {
104-
InternalLog(null, slave, listener, format, args);
125+
InternalLog(null, slave, listener, null, format, args);
126+
}
127+
public static void Log(SlaveComputer slave, TaskListener listener, Throwable ex, String format, Object... args) {
128+
InternalLog(null, slave, listener, ex, format, args);
105129
}
106130

107131
@Deprecated
@@ -150,8 +174,23 @@ public Object readResolve() throws IOException {
150174
private void ensureLists() {
151175
if (currentOnline == null)
152176
currentOnline = new ConcurrentHashMap<String, String>();
153-
if (templateState == null)
177+
if (templateState == null) {
178+
/*
179+
* If Jenkins has just restarted, we may have existing slaves that
180+
* exist but aren't currently recorded in our non-persisted state,
181+
* so we need to discover them.
182+
*/
154183
templateState = new CloudProvisioningState(this);
184+
for (final vSphereCloudProvisionedSlave n : NodeIterator.nodes(vSphereCloudProvisionedSlave.class)) {
185+
final String nodeName = n.getNodeName();
186+
final vSphereCloudSlaveTemplate template = getTemplateForVM(nodeName);
187+
if (template != null) {
188+
final CloudProvisioningRecord provisionable = templateState.getOrCreateRecord(template);
189+
templateState.provisioningStarted(provisionable, nodeName);
190+
templateState.provisionedSlaveNowActive(provisionable, nodeName);
191+
}
192+
}
193+
}
155194
}
156195

157196
public int getMaxOnlineSlaves() {
@@ -166,11 +205,12 @@ public List<? extends vSphereCloudSlaveTemplate> getTemplates() {
166205
return this.templates;
167206
}
168207

169-
public vSphereCloudSlaveTemplate getTemplate(final String template) {
170-
if(this.templates==null)
208+
private vSphereCloudSlaveTemplate getTemplateForVM(final String vmName) {
209+
if (this.templates == null || vmName == null)
171210
return null;
172-
for(vSphereCloudSlaveTemplate t : this.templates) {
173-
if(t.getCloneNamePrefix().equals(template)) {
211+
for (final vSphereCloudSlaveTemplate t : this.templates) {
212+
final String cloneNamePrefix = t.getCloneNamePrefix();
213+
if (cloneNamePrefix != null && vmName.startsWith(cloneNamePrefix)) {
174214
return t;
175215
}
176216
}
@@ -179,7 +219,7 @@ public vSphereCloudSlaveTemplate getTemplate(final String template) {
179219

180220
private List<vSphereCloudSlaveTemplate> getTemplates(final Label label) {
181221
if(this.templates==null)
182-
return Collections.EMPTY_LIST;
222+
return Collections.emptyList();
183223
List<vSphereCloudSlaveTemplate> matchingTemplates = new ArrayList<vSphereCloudSlaveTemplate>();
184224
for(vSphereCloudSlaveTemplate t : this.templates) {
185225
if(t.getMode() == Node.Mode.NORMAL) {
@@ -241,25 +281,30 @@ public boolean canProvision(Label label) {
241281
return !getTemplates(label).isEmpty();
242282
}
243283

244-
private boolean isOkToProvisionAnySlaves(VSphere vSphere) throws VSphereException {
284+
private Integer calculateMaxAdditionalSlavesPermitted() {
245285
if (this.instanceCap == 0 || this.instanceCap == Integer.MAX_VALUE) {
246-
return true;
286+
return null;
247287
}
248-
final int totalVms = vSphere.countVms();
249-
final boolean thereIsNoRoom = totalVms >= this.instanceCap;
288+
final int totalVms = templateState.countNodes();
289+
final int maxSlavesToProvision = this.instanceCap - totalVms;
290+
final boolean thereIsNoRoom = maxSlavesToProvision <= 0;
250291
VSLOG.info("There are " + totalVms + " VMs in this cloud. The instance cap for the cloud is "
251292
+ this.instanceCap + ", so we " + (thereIsNoRoom ? "are full" : "have room for more"));
252-
return !thereIsNoRoom;
293+
return Integer.valueOf(maxSlavesToProvision);
253294
}
254295

255296
@Override
256297
public Collection<PlannedNode> provision(final Label label, int excessWorkload) {
257298
final String methodCallDescription = "provision(" + label + "," + excessWorkload + ")";
258299
try {
259300
int excessWorkloadSoFar = excessWorkload;
301+
// First we see what our static slaves can do for us.
260302
int numberOfvSphereCloudSlaves = 0;
261303
int numberOfvSphereCloudSlaveExecutors = 0;
262304
for(vSphereCloudSlave n : NodeIterator.nodes(vSphereCloudSlave.class)) {
305+
if( n instanceof vSphereCloudProvisionedSlave) {
306+
continue; // ignore cloud slaves
307+
}
263308
if(n.getComputer().isOffline() && label.matches(n.getAssignedLabels())) {
264309
n.getComputer().tryReconnect();
265310
numberOfvSphereCloudSlaves++;
@@ -272,24 +317,33 @@ public Collection<PlannedNode> provision(final Label label, int excessWorkload)
272317
+ numberOfvSphereCloudSlaveExecutors + " executors): Workload is satisifed by bringing those online.");
273318
return Collections.emptySet();
274319
}
275-
final VSphere vSphere = vSphereInstance();
276-
if (!isOkToProvisionAnySlaves(vSphere)) {
277-
return Collections.emptySet();
278-
}
279-
final List<vSphereCloudSlaveTemplate> templates = getTemplates(label);
280-
final List<PlannedNode> plannedNodes = new ArrayList<PlannedNode>();
320+
// If we've got this far then our static slaves are insufficient to meet
321+
// demand and we should consider creating new slaves.
281322
synchronized(this) {
282323
ensureLists();
283324
}
325+
final List<PlannedNode> plannedNodes = new ArrayList<PlannedNode>();
284326
synchronized(templateState) {
285327
templateState.pruneUnwantedRecords();
328+
Integer maxSlavesToProvisionBeforeCloudCapHit = calculateMaxAdditionalSlavesPermitted();
329+
if (maxSlavesToProvisionBeforeCloudCapHit!=null && maxSlavesToProvisionBeforeCloudCapHit<=0) {
330+
return Collections.emptySet(); // no capacity due to cloud instance cap
331+
}
332+
final List<vSphereCloudSlaveTemplate> templates = getTemplates(label);
286333
final List<CloudProvisioningRecord> whatWeCouldUse = templateState.calculateProvisionableTemplates(templates);
287334
VSLOG.log(Level.INFO, methodCallDescription + ": " + numberOfvSphereCloudSlaves + " existing slaves (="
288335
+ numberOfvSphereCloudSlaveExecutors + " executors), templates available are " + whatWeCouldUse);
289-
while(excessWorkloadSoFar > 0) {
336+
while (excessWorkloadSoFar > 0) {
337+
if (maxSlavesToProvisionBeforeCloudCapHit != null) {
338+
final int intValue = maxSlavesToProvisionBeforeCloudCapHit.intValue();
339+
if (intValue <= 0) {
340+
break; // out of capacity due to cloud instance cap
341+
}
342+
maxSlavesToProvisionBeforeCloudCapHit = Integer.valueOf(intValue - 1);
343+
}
290344
final CloudProvisioningRecord whatWeShouldSpinUp = CloudProvisioningAlgorithm.findTemplateWithMostFreeCapacity(whatWeCouldUse);
291345
if (whatWeShouldSpinUp==null) {
292-
break; // out of capacity
346+
break; // out of capacity due to template instance cap
293347
}
294348
final PlannedNode plannedNode = VSpherePlannedNode.createInstance(templateState, whatWeShouldSpinUp);
295349
plannedNodes.add(plannedNode);
@@ -305,6 +359,38 @@ public Collection<PlannedNode> provision(final Label label, int excessWorkload)
305359
}
306360
}
307361

362+
/**
363+
* This is called by {@link vSphereCloudProvisionedSlave} instances once
364+
* they terminate, so we can take note of their passing and then destroy the
365+
* VM itself.
366+
*
367+
* @param cloneName
368+
* The name of the VM that's just terminated.
369+
*/
370+
void provisionedSlaveHasTerminated(final String cloneName) {
371+
synchronized(this) {
372+
ensureLists();
373+
}
374+
VSLOG.log(Level.FINER, "provisionedSlaveHasTerminated({0}): recording in our runtime state...", cloneName);
375+
// once we're done, remove our cached record.
376+
synchronized(templateState) {
377+
templateState.provisionedSlaveNowTerminated(cloneName);
378+
}
379+
VSLOG.log(Level.FINER, "provisionedSlaveHasTerminated({0}): destroying VM...", cloneName);
380+
VSphere vSphere = null;
381+
try {
382+
vSphere = vSphereInstance();
383+
vSphere.destroyVm(cloneName, false);
384+
VSLOG.log(Level.FINER, "provisionedSlaveHasTerminated({0}): VM destroyed.", cloneName);
385+
} catch (VSphereException ex) {
386+
VSLOG.log(Level.SEVERE, "provisionedSlaveHasTerminated(" + cloneName + "): Exception while trying to destroy VM", ex);
387+
} finally {
388+
if (vSphere != null) {
389+
vSphere.disconnect();
390+
}
391+
}
392+
}
393+
308394
static class VSpherePlannedNode extends PlannedNode {
309395
private VSpherePlannedNode(String displayName, Future<Node> future, int numExecutors) {
310396
super(displayName, future, numExecutors);
@@ -463,13 +549,11 @@ public boolean configure(StaplerRequest req, JSONObject o)
463549
* @param vsHost From UI.
464550
* @param vsDescription From UI.
465551
* @param credentialsId From UI.
466-
* @param maxOnlineSlaves From UI.
467552
* @return Result of the validation.
468553
*/
469554
public FormValidation doTestConnection(@QueryParameter String vsHost,
470555
@QueryParameter String vsDescription,
471-
@QueryParameter String credentialsId,
472-
@QueryParameter int maxOnlineSlaves) {
556+
@QueryParameter String credentialsId) {
473557
try {
474558
/* We know that these objects are not null */
475559
if (vsHost.length() == 0) {
@@ -505,5 +589,13 @@ else if (vsHost.endsWith("/")) {
505589
throw new RuntimeException(e);
506590
}
507591
}
592+
593+
public FormValidation doCheckMaxOnlineSlaves(@QueryParameter String maxOnlineSlaves) {
594+
return FormValidation.validateNonNegativeInteger(maxOnlineSlaves);
595+
}
596+
597+
public FormValidation doCheckInstanceCap(@QueryParameter String instanceCap) {
598+
return FormValidation.validateNonNegativeInteger(instanceCap);
599+
}
508600
}
509601
}

src/main/java/org/jenkinsci/plugins/vSphereCloudLauncher.java

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -144,14 +144,14 @@ public void launch(SlaveComputer slaveComputer, TaskListener taskListener)
144144
v = vsC.vSphereInstance();
145145
VirtualMachine vm = v.getVmByName(vmName);
146146
if (vm == null) {
147-
throw new IOException("Virtual Machine could not be found");
147+
throw new IOException("Virtual Machine '" + vmName + "' could not be found");
148148
}
149149

150150
// Revert to a snapshot - always - if one is specified.
151151
if (!snapName.isEmpty()) {
152152
VirtualMachineSnapshot snap = v.getSnapshotInTree(vm, snapName);
153153
if (snap == null) {
154-
throw new IOException("Virtual Machine snapshot cannot be found");
154+
throw new IOException("Virtual Machine '" + vmName + "' snapshot '" + snapName + "' cannot be found");
155155
}
156156

157157
vSphereCloud.Log(slaveComputer, taskListener, "Reverting to snapshot:" + snapName);
@@ -194,13 +194,21 @@ public void launch(SlaveComputer slaveComputer, TaskListener taskListener)
194194
* Now we wait our launch delay amount before trying to connect.
195195
*/
196196
if (delegate.isLaunchSupported()) {
197-
// Delegate is going to do launch.
198-
Thread.sleep(launchDelay * 1000);
197+
if (launchDelay > 0) {
198+
vSphereCloud.Log(slaveComputer, taskListener, "Waiting for " + launchDelay
199+
+ " seconds before asking " + delegate + " to launch slave.");
200+
// Delegate is going to do launch.
201+
Thread.sleep(launchDelay * 1000);
202+
}
203+
vSphereCloud.Log(slaveComputer, taskListener, "Asking " + delegate.getClass().getSimpleName() + " to launch slave.");
199204
delegate.launch(slaveComputer, taskListener);
200205
} else {
206+
vSphereCloud.Log(slaveComputer, taskListener, "Waiting for up to " + launchDelay
207+
+ " seconds for slave to come online.");
201208
for (int i = 0; i <= launchDelay; i++) {
202209
Thread.sleep(1000);
203210
if (slaveComputer.isOnline()) {
211+
vSphereCloud.Log(slaveComputer, taskListener, "Slave has come online.");
204212
break;
205213
}
206214
}
@@ -209,9 +217,9 @@ public void launch(SlaveComputer slaveComputer, TaskListener taskListener)
209217
throw new IOException("Slave did not come online in allowed time");
210218
}
211219
}
220+
vSphereCloud.Log(slaveComputer, taskListener, "Slave online");
212221
} catch (final Exception e) {
213-
vSphereCloud.Log(slaveComputer, taskListener, "EXCEPTION while starting VM");
214-
vSphereCloud.Log(slaveComputer, taskListener, e.getMessage());
222+
vSphereCloud.Log(slaveComputer, taskListener, e, "EXCEPTION while starting VM");
215223
vsC.markVMOffline(slaveComputer.getDisplayName(), vmName);
216224
throw new RuntimeException(e);
217225
} finally {
@@ -220,6 +228,8 @@ public void launch(SlaveComputer slaveComputer, TaskListener taskListener)
220228
if (v != null)
221229
v.disconnect();
222230
}
231+
} catch (final RuntimeException e) {
232+
throw e;
223233
} catch (final Exception e) {
224234
throw new RuntimeException(e);
225235
}
@@ -301,12 +311,11 @@ public synchronized void afterDisconnect(SlaveComputer slaveComputer, TaskListen
301311
// VM is already powered down.
302312
}
303313
}
304-
if (v != null)
314+
if (v != null) {
305315
v.disconnect();
316+
}
306317
} catch (Throwable t) {
307-
vSphereCloud.Log(slaveComputer, taskListener, "Got an exception");
308-
vSphereCloud.Log(slaveComputer, taskListener, t.toString());
309-
vSphereCloud.Log(slaveComputer, taskListener, "Printed exception");
318+
vSphereCloud.Log(slaveComputer, taskListener, t, "Got an exception");
310319
taskListener.fatalError(t.getMessage(), t);
311320
} finally {
312321
vsSlave.slaveIsDisconnecting = Boolean.FALSE;
@@ -391,10 +400,8 @@ private void shutdownVM(VirtualMachine vm, SlaveComputer slaveComputer, TaskList
391400
Thread.sleep(5000);
392401
}
393402
} catch (Throwable t) {
394-
vSphereCloud.Log(slaveComputer, taskListener,
403+
vSphereCloud.Log(slaveComputer, taskListener, t,
395404
"Got an exception while attempting a graceful shutdown");
396-
vSphereCloud.Log(slaveComputer, taskListener, t.toString());
397-
vSphereCloud.Log(slaveComputer, taskListener, "Printed exception");
398405
vSphereCloud.Log(slaveComputer, taskListener, "Will now attempt a hard power down");
399406
}
400407
}
@@ -444,7 +451,7 @@ private void suspendVM(VirtualMachine vm, SlaveComputer slaveComputer, TaskListe
444451
vSphereCloud.Log(slaveComputer, taskListener, "Suspending the VM");
445452
Task task = vm.suspendVM_Task();
446453
if (!task.waitForTask().equals(Task.SUCCESS)) {
447-
vSphereCloud.Log(slaveComputer, taskListener, "Unable to susped the VM");
454+
vSphereCloud.Log(slaveComputer, taskListener, "Unable to suspend the VM");
448455
}
449456
}
450457
}

0 commit comments

Comments
 (0)