Skip to content

Commit cbda0c1

Browse files
easyCZroboquat
authored andcommitted
[ws-man-bridge] Document when unknown workspace instance occurs
1 parent 1998a98 commit cbda0c1

File tree

1 file changed

+110
-38
lines changed

1 file changed

+110
-38
lines changed

components/ws-manager-bridge/src/bridge.ts

Lines changed: 110 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,29 @@
66

77
import { inject, injectable, interfaces } from "inversify";
88
import { MessageBusIntegration } from "./messagebus-integration";
9-
import { Disposable, WorkspaceInstance, Queue, WorkspaceInstancePort, PortVisibility, RunningWorkspaceInfo, DisposableCollection } from "@gitpod/gitpod-protocol";
10-
import { WorkspaceStatus, WorkspacePhase, GetWorkspacesRequest, WorkspaceConditionBool, PortVisibility as WsManPortVisibility, PromisifiedWorkspaceManagerClient } from "@gitpod/ws-manager/lib";
9+
import {
10+
Disposable,
11+
WorkspaceInstance,
12+
Queue,
13+
WorkspaceInstancePort,
14+
PortVisibility,
15+
RunningWorkspaceInfo,
16+
DisposableCollection,
17+
} from "@gitpod/gitpod-protocol";
18+
import {
19+
WorkspaceStatus,
20+
WorkspacePhase,
21+
GetWorkspacesRequest,
22+
WorkspaceConditionBool,
23+
PortVisibility as WsManPortVisibility,
24+
PromisifiedWorkspaceManagerClient,
25+
} from "@gitpod/ws-manager/lib";
1126
import { WorkspaceDB } from "@gitpod/gitpod-db/lib/workspace-db";
1227
import { UserDB } from "@gitpod/gitpod-db/lib/user-db";
13-
import { log } from '@gitpod/gitpod-protocol/lib/util/logging';
28+
import { log } from "@gitpod/gitpod-protocol/lib/util/logging";
1429
import { TraceContext } from "@gitpod/gitpod-protocol/lib/util/tracing";
1530
import { IAnalyticsWriter } from "@gitpod/gitpod-protocol/lib/analytics";
16-
import { TracedWorkspaceDB, TracedUserDB, DBWithTracing } from '@gitpod/gitpod-db/lib/traced-db';
31+
import { TracedWorkspaceDB, TracedUserDB, DBWithTracing } from "@gitpod/gitpod-db/lib/traced-db";
1732
import { PrometheusMetricsExporter } from "./prometheus-metrics-exporter";
1833
import { ClientProvider, WsmanSubscriber } from "./wsman-subscriber";
1934
import { Timestamp } from "google-protobuf/google/protobuf/timestamp_pb";
@@ -71,7 +86,7 @@ export class WorkspaceManagerBridge implements Disposable {
7186
log.debug(`Starting status update handler: ${cluster.name}`, logPayload);
7287
/* no await */ this.startStatusUpdateHandler(clientProvider, writeToDB, logPayload)
7388
// this is a mere safe-guard: we do not expect the code inside to fail
74-
.catch(err => log.error("Cannot start status update handler", err));
89+
.catch((err) => log.error("Cannot start status update handler", err));
7590
};
7691

7792
if (cluster.govern) {
@@ -102,20 +117,41 @@ export class WorkspaceManagerBridge implements Disposable {
102117
this.dispose();
103118
}
104119

105-
protected async startStatusUpdateHandler(clientProvider: ClientProvider, writeToDB: boolean, logPayload: {}): Promise<void> {
120+
protected async startStatusUpdateHandler(
121+
clientProvider: ClientProvider,
122+
writeToDB: boolean,
123+
logPayload: {},
124+
): Promise<void> {
106125
const subscriber = new WsmanSubscriber(clientProvider);
107126
this.disposables.push(subscriber);
108127

109128
const onReconnect = (ctx: TraceContext, s: WorkspaceStatus[]) => {
110-
s.forEach(sx => this.serializeMessagesByInstanceId<WorkspaceStatus>(ctx, sx, m => m.getId(), (ctx, msg) => this.handleStatusUpdate(ctx, msg, writeToDB)))
129+
s.forEach((sx) =>
130+
this.serializeMessagesByInstanceId<WorkspaceStatus>(
131+
ctx,
132+
sx,
133+
(m) => m.getId(),
134+
(ctx, msg) => this.handleStatusUpdate(ctx, msg, writeToDB),
135+
),
136+
);
111137
};
112138
const onStatusUpdate = (ctx: TraceContext, s: WorkspaceStatus) => {
113-
this.serializeMessagesByInstanceId<WorkspaceStatus>(ctx, s, msg => msg.getId(), (ctx, s) => this.handleStatusUpdate(ctx, s, writeToDB))
139+
this.serializeMessagesByInstanceId<WorkspaceStatus>(
140+
ctx,
141+
s,
142+
(msg) => msg.getId(),
143+
(ctx, s) => this.handleStatusUpdate(ctx, s, writeToDB),
144+
);
114145
};
115146
await subscriber.subscribe({ onReconnect, onStatusUpdate }, logPayload);
116147
}
117148

118-
protected serializeMessagesByInstanceId<M>(ctx: TraceContext, msg: M, getInstanceId: (msg: M) => string, handler: (ctx: TraceContext, msg: M) => Promise<void>) {
149+
protected serializeMessagesByInstanceId<M>(
150+
ctx: TraceContext,
151+
msg: M,
152+
getInstanceId: (msg: M) => string,
153+
handler: (ctx: TraceContext, msg: M) => Promise<void>,
154+
) {
119155
const instanceId = getInstanceId(msg);
120156
if (!instanceId) {
121157
log.warn("Received invalid message, could not read instanceId!", { msg });
@@ -125,7 +161,7 @@ export class WorkspaceManagerBridge implements Disposable {
125161
// We can't just handle the status update directly, but have to "serialize" it to ensure the updates stay in order.
126162
// If we did not do this, the async nature of our code would allow for one message to overtake the other.
127163
let q = this.queues.get(instanceId) || new Queue();
128-
q.enqueue(() => handler(ctx, msg)).catch(e => log.error({instanceId}, e));
164+
q.enqueue(() => handler(ctx, msg)).catch((e) => log.error({ instanceId }, e));
129165
this.queues.set(instanceId, q);
130166
}
131167

@@ -149,17 +185,21 @@ export class WorkspaceManagerBridge implements Disposable {
149185
const userId = status.metadata!.owner!;
150186
const logCtx = { instanceId, workspaceId, userId };
151187

152-
const instance = await this.workspaceDB.trace({span}).findInstanceById(instanceId);
188+
const instance = await this.workspaceDB.trace({ span }).findInstanceById(instanceId);
153189
if (instance) {
154190
this.prometheusExporter.statusUpdateReceived(this.cluster.name, true);
155191
} else {
192+
// This scenario happens when the update for a WorkspaceInstance is picked up by a ws-manager-bridge in a different region,
193+
// before db-sync finished running. This is because all ws-manager-bridge instances receive updates from all WorkspaceClusters.
194+
// We ignore this update because we do not have anything to reconcile this update against, but also because we assume it is handled
195+
// by another instance of ws-manager-bridge that is in the region where the WorkspaceInstance record was created.
156196
this.prometheusExporter.statusUpdateReceived(this.cluster.name, false);
157197
log.warn(logCtx, "Received a status update for an unknown instance", { status });
158198
return;
159199
}
160200

161201
if (!!status.spec.exposedPortsList) {
162-
instance.status.exposedPorts = status.spec.exposedPortsList.map(p => {
202+
instance.status.exposedPorts = status.spec.exposedPortsList.map((p) => {
163203
return <WorkspaceInstancePort>{
164204
port: p.port,
165205
visibility: mapPortVisibility(p.visibility),
@@ -180,7 +220,9 @@ export class WorkspaceManagerBridge implements Disposable {
180220
instance.status.conditions.pullingImages = toBool(status.conditions.pullingImages!);
181221
instance.status.conditions.deployed = toBool(status.conditions.deployed);
182222
instance.status.conditions.timeout = status.conditions.timeout;
183-
instance.status.conditions.firstUserActivity = mapFirstUserActivity(rawStatus.getConditions()!.getFirstUserActivity());
223+
instance.status.conditions.firstUserActivity = mapFirstUserActivity(
224+
rawStatus.getConditions()!.getFirstUserActivity(),
225+
);
184226
instance.status.conditions.headlessTaskFailed = status.conditions.headlessTaskFailed;
185227
instance.status.conditions.stoppedByRequest = toBool(status.conditions.stoppedByRequest);
186228
instance.status.message = status.message;
@@ -191,7 +233,7 @@ export class WorkspaceManagerBridge implements Disposable {
191233

192234
if (status.repo) {
193235
const r = status.repo;
194-
const undefinedIfEmpty = <T>(l: T[]) => l.length > 0 ? l : undefined;
236+
const undefinedIfEmpty = <T>(l: T[]) => (l.length > 0 ? l : undefined);
195237

196238
instance.status.repo = {
197239
branch: r.branch,
@@ -201,8 +243,8 @@ export class WorkspaceManagerBridge implements Disposable {
201243
unpushedCommits: undefinedIfEmpty(r.unpushedCommitsList),
202244
totalUntrackedFiles: r.totalUntrackedFiles,
203245
untrackedFiles: undefinedIfEmpty(r.untrackedFilesList),
204-
totalUnpushedCommits: r.totalUnpushedCommits
205-
}
246+
totalUnpushedCommits: r.totalUnpushedCommits,
247+
};
206248
}
207249

208250
if (instance.status.conditions.deployed && !instance.deployedTime) {
@@ -238,7 +280,7 @@ export class WorkspaceManagerBridge implements Disposable {
238280
instance.status.phase = "interrupted";
239281
break;
240282
case WorkspacePhase.STOPPING:
241-
if (instance.status.phase != 'stopped') {
283+
if (instance.status.phase != "stopped") {
242284
instance.status.phase = "stopping";
243285
if (!instance.stoppingTime) {
244286
// The first time a workspace enters stopping we record that as it's stoppingTime time.
@@ -259,14 +301,14 @@ export class WorkspaceManagerBridge implements Disposable {
259301
// yet. Just for this case we need to set it now.
260302
instance.stoppingTime = now;
261303
}
262-
lifecycleHandler = () => this.onInstanceStopped({span}, userId, instance);
304+
lifecycleHandler = () => this.onInstanceStopped({ span }, userId, instance);
263305
break;
264306
}
265307

266308
span.setTag("after", JSON.stringify(instance));
267309

268310
// now notify all prebuild listeners about updates - and update DB if needed
269-
await this.updatePrebuiltWorkspace({span}, userId, status, writeToDB);
311+
await this.updatePrebuiltWorkspace({ span }, userId, status, writeToDB);
270312

271313
if (writeToDB) {
272314
await this.workspaceDB.trace(ctx).storeInstance(instance);
@@ -280,56 +322,78 @@ export class WorkspaceManagerBridge implements Disposable {
280322
}
281323
}
282324
await this.messagebus.notifyOnInstanceUpdate(ctx, userId, instance);
283-
284325
} catch (e) {
285-
TraceContext.setError({span}, e);
326+
TraceContext.setError({ span }, e);
286327
throw e;
287328
} finally {
288329
span.finish();
289330
}
290331
}
291332

292-
protected startController(clientProvider: ClientProvider, controllerIntervalSeconds: number, controllerMaxDisconnectSeconds: number, maxTimeToRunningPhaseSeconds = 60 * 60) {
333+
protected startController(
334+
clientProvider: ClientProvider,
335+
controllerIntervalSeconds: number,
336+
controllerMaxDisconnectSeconds: number,
337+
maxTimeToRunningPhaseSeconds = 60 * 60,
338+
) {
293339
let disconnectStarted = Number.MAX_SAFE_INTEGER;
294340
this.disposables.push(
295341
repeat(async () => {
296342
try {
297343
const client = await clientProvider();
298344
await this.controlInstallationInstances(client, maxTimeToRunningPhaseSeconds);
299345

300-
disconnectStarted = Number.MAX_SAFE_INTEGER; // Reset disconnect period
346+
disconnectStarted = Number.MAX_SAFE_INTEGER; // Reset disconnect period
301347
} catch (e) {
302348
if (durationLongerThanSeconds(disconnectStarted, controllerMaxDisconnectSeconds)) {
303-
log.warn("Error while controlling installation's workspaces", e, { installation: this.cluster.name });
349+
log.warn("Error while controlling installation's workspaces", e, {
350+
installation: this.cluster.name,
351+
});
304352
} else if (disconnectStarted > Date.now()) {
305353
disconnectStarted = Date.now();
306354
}
307355
}
308-
}, controllerIntervalSeconds * 1000)
356+
}, controllerIntervalSeconds * 1000),
309357
);
310358
}
311359

312-
protected async controlInstallationInstances(client: PromisifiedWorkspaceManagerClient, maxTimeToRunningPhaseSeconds: number) {
360+
protected async controlInstallationInstances(
361+
client: PromisifiedWorkspaceManagerClient,
362+
maxTimeToRunningPhaseSeconds: number,
363+
) {
313364
const installation = this.cluster.name;
314365
log.debug("controlling instances", { installation });
315366
let ctx: TraceContext = {};
316367

317368
const runningInstances = await this.workspaceDB.trace(ctx).findRunningInstancesWithWorkspaces(installation);
318369
const runningInstancesIdx = new Map<string, RunningWorkspaceInfo>();
319-
runningInstances.forEach(i => runningInstancesIdx.set(i.latestInstance.id, i));
370+
runningInstances.forEach((i) => runningInstancesIdx.set(i.latestInstance.id, i));
320371

321372
const actuallyRunningInstances = await client.getWorkspaces(ctx, new GetWorkspacesRequest());
322-
actuallyRunningInstances.getStatusList().forEach(s => runningInstancesIdx.delete(s.getId()));
373+
actuallyRunningInstances.getStatusList().forEach((s) => runningInstancesIdx.delete(s.getId()));
323374

324375
const promises: Promise<any>[] = [];
325376
for (const [instanceId, ri] of runningInstancesIdx.entries()) {
326377
const instance = ri.latestInstance;
327-
if (!(instance.status.phase === 'running' || durationLongerThanSeconds(Date.parse(instance.creationTime), maxTimeToRunningPhaseSeconds))) {
328-
log.debug({ instanceId }, "Skipping instance", { phase: instance.status.phase, creationTime: instance.creationTime, region: instance.region });
378+
if (
379+
!(
380+
instance.status.phase === "running" ||
381+
durationLongerThanSeconds(Date.parse(instance.creationTime), maxTimeToRunningPhaseSeconds)
382+
)
383+
) {
384+
log.debug({ instanceId }, "Skipping instance", {
385+
phase: instance.status.phase,
386+
creationTime: instance.creationTime,
387+
region: instance.region,
388+
});
329389
continue;
330390
}
331391

332-
log.info({instanceId, workspaceId: instance.workspaceId}, "Database says the instance is starting for too long or running, but wsman does not know about it. Marking as stopped in database.", {installation});
392+
log.info(
393+
{ instanceId, workspaceId: instance.workspaceId },
394+
"Database says the instance is starting for too long or running, but wsman does not know about it. Marking as stopped in database.",
395+
{ installation },
396+
);
333397
instance.status.phase = "stopped";
334398
instance.stoppingTime = new Date().toISOString();
335399
instance.stoppedTime = new Date().toISOString();
@@ -344,27 +408,36 @@ export class WorkspaceManagerBridge implements Disposable {
344408
// probes are an EE feature - we just need the hook here
345409
}
346410

347-
protected async updatePrebuiltWorkspace(ctx: TraceContext, userId: string, status: WorkspaceStatus.AsObject, writeToDB: boolean) {
411+
protected async updatePrebuiltWorkspace(
412+
ctx: TraceContext,
413+
userId: string,
414+
status: WorkspaceStatus.AsObject,
415+
writeToDB: boolean,
416+
) {
348417
// prebuilds are an EE feature - we just need the hook here
349418
}
350419

351420
protected async stopPrebuildInstance(ctx: TraceContext, instance: WorkspaceInstance): Promise<void> {
352421
// prebuilds are an EE feature - we just need the hook here
353422
}
354423

355-
protected async onInstanceStopped(ctx: TraceContext, ownerUserID: string, instance: WorkspaceInstance): Promise<void> {
424+
protected async onInstanceStopped(
425+
ctx: TraceContext,
426+
ownerUserID: string,
427+
instance: WorkspaceInstance,
428+
): Promise<void> {
356429
const span = TraceContext.startSpan("onInstanceStopped", ctx);
357430

358431
try {
359-
await this.userDB.trace({span}).deleteGitpodTokensNamedLike(ownerUserID, `${instance.id}-%`);
432+
await this.userDB.trace({ span }).deleteGitpodTokensNamedLike(ownerUserID, `${instance.id}-%`);
360433
this.analytics.track({
361434
userId: ownerUserID,
362435
event: "workspace_stopped",
363436
messageId: `bridge-wsstopped-${instance.id}`,
364-
properties: { "instanceId": instance.id, "workspaceId": instance.workspaceId }
437+
properties: { instanceId: instance.id, workspaceId: instance.workspaceId },
365438
});
366439
} catch (err) {
367-
TraceContext.setError({span}, err);
440+
TraceContext.setError({ span }, err);
368441
throw err;
369442
} finally {
370443
span.finish();
@@ -374,7 +447,6 @@ export class WorkspaceManagerBridge implements Disposable {
374447
public dispose() {
375448
this.disposables.dispose();
376449
}
377-
378450
}
379451

380452
const mapFirstUserActivity = (firstUserActivity: Timestamp | undefined): string | undefined => {
@@ -413,4 +485,4 @@ const filterStatus = (status: WorkspaceStatus.AsObject): Partial<WorkspaceStatus
413485
conditions: status.conditions,
414486
runtime: status.runtime,
415487
};
416-
}
488+
};

0 commit comments

Comments
 (0)