Skip to content

Commit 353da20

Browse files
committed
Fix initialization race conditions and improve error resilience.
- `getServerByName` now propagates errors from the internal `set-name` request instead of silently swallowing them. - `onStart` failures no longer permanently brick the Durable Object. Errors are caught inside `blockConcurrencyWhile` (preserving the input gate) and the status is reset, allowing subsequent requests to retry initialization. - `fetch()` now retries initialization when a previous `onStart` attempt failed, instead of skipping it because the name was already set. - Errors in `fetch()` (including `onStart` failures and malformed props) are now caught and returned as proper 500 responses instead of crashing as unhandled exceptions. - WebSocket handlers (`webSocketMessage`, `webSocketClose`, `webSocketError`) are now wrapped in try/catch so that transient `onStart` failures don't kill the connection — the next message will retry.
1 parent 2c8ab5f commit 353da20

12 files changed

Lines changed: 239 additions & 76 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
"partyserver": patch
3+
---
4+
5+
Fix initialization race conditions and improve error resilience.
6+
7+
- `getServerByName` now propagates errors from the internal `set-name` request instead of silently swallowing them.
8+
- `onStart` failures no longer permanently brick the Durable Object. Errors are caught inside `blockConcurrencyWhile` (preserving the input gate) and the status is reset, allowing subsequent requests to retry initialization.
9+
- `fetch()` now retries initialization when a previous `onStart` attempt failed, instead of skipping it because the name was already set.
10+
- Errors in `fetch()` (including `onStart` failures and malformed props) are now caught and returned as proper 500 responses instead of crashing as unhandled exceptions.
11+
- WebSocket handlers (`webSocketMessage`, `webSocketClose`, `webSocketError`) are now wrapped in try/catch so that transient `onStart` failures don't kill the connection — the next message will retry.

package-lock.json

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/hono-party/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,6 @@
3737
"devDependencies": {
3838
"@cloudflare/workers-types": "^4.20251218.0",
3939
"hono": "^4.11.1",
40-
"partyserver": "^0.1.3"
40+
"partyserver": "^0.1.4"
4141
}
4242
}

packages/partyfn/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
],
2020
"dependencies": {
2121
"nanoid": "^5.1.6",
22-
"partysocket": "^1.1.12"
22+
"partysocket": "^1.1.13"
2323
},
2424
"scripts": {
2525
"build": "tsx scripts/build.ts"

packages/partyserver/src/index.ts

Lines changed: 70 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,7 @@ export async function getServerByName<
6767
}
6868

6969
// unfortunately we have to await this
70-
await stub
71-
.fetch(req)
72-
// drain body
73-
.then((res) => res.text())
74-
.catch((e) => {
75-
console.error("Could not set server name:", e);
76-
});
70+
await stub.fetch(req).then((res) => res.text());
7771

7872
return stub;
7973
}
@@ -366,34 +360,32 @@ export class Server<
366360
* Handle incoming requests to the server.
367361
*/
368362
async fetch(request: Request): Promise<Response> {
369-
// Set the props in-mem if the request included them.
370-
const props = request.headers.get("x-partykit-props");
371-
if (props) {
372-
try {
363+
try {
364+
// Set the props in-mem if the request included them.
365+
const props = request.headers.get("x-partykit-props");
366+
if (props) {
373367
this.#_props = JSON.parse(props);
374-
} catch {
375-
// This should never happen but log it just in case
376-
console.error("Internal error parsing context props.");
377368
}
378-
}
369+
if (!this.#_name) {
370+
// This is temporary while we solve https://github.com/cloudflare/workerd/issues/2240
379371

380-
if (!this.#_name) {
381-
// This is temporary while we solve https://github.com/cloudflare/workerd/issues/2240
382-
383-
// get namespace and room from headers
384-
// const namespace = request.headers.get("x-partykit-namespace");
385-
const room = request.headers.get("x-partykit-room");
386-
if (
387-
// !namespace ||
388-
!room
389-
) {
390-
throw new Error(`Missing namespace or room headers when connecting to ${this.#ParentClass.name}.
372+
// get namespace and room from headers
373+
// const namespace = request.headers.get("x-partykit-namespace");
374+
const room = request.headers.get("x-partykit-room");
375+
if (
376+
// !namespace ||
377+
!room
378+
) {
379+
throw new Error(`Missing namespace or room headers when connecting to ${this.#ParentClass.name}.
391380
Did you try connecting directly to this Durable Object? Try using getServerByName(namespace, id) instead.`);
381+
}
382+
await this.setName(room);
383+
} else if (this.#status !== "started") {
384+
// Name was set by a previous request but initialization failed.
385+
// Retry initialization so the server can recover from transient
386+
// onStart failures.
387+
await this.#initialize();
392388
}
393-
await this.setName(room);
394-
}
395-
396-
try {
397389
const url = new URL(request.url);
398390

399391
// TODO: this is a hack to set the server name,
@@ -450,7 +442,7 @@ Did you try connecting directly to this Durable Object? Try using getServerByNam
450442
}
451443
} catch (err) {
452444
console.error(
453-
`Error in ${this.#ParentClass.name}:${this.name} fetch:`,
445+
`Error in ${this.#ParentClass.name}:${this.#_name ?? "<unnamed>"} fetch:`,
454446
err
455447
);
456448
if (!(err instanceof Error)) throw err;
@@ -477,13 +469,20 @@ Did you try connecting directly to this Durable Object? Try using getServerByNam
477469
return;
478470
}
479471

480-
const connection = createLazyConnection(ws);
472+
try {
473+
const connection = createLazyConnection(ws);
481474

482-
// rehydrate the server name if it's woken up
483-
await this.setName(connection.server);
484-
// TODO: ^ this shouldn't be async
475+
// rehydrate the server name if it's woken up
476+
await this.setName(connection.server);
477+
// TODO: ^ this shouldn't be async
485478

486-
return this.onMessage(connection, message);
479+
return this.onMessage(connection, message);
480+
} catch (e) {
481+
console.error(
482+
`Error in ${this.#ParentClass.name}:${this.#_name ?? "<unnamed>"} webSocketMessage:`,
483+
e
484+
);
485+
}
487486
}
488487

489488
async webSocketClose(
@@ -496,35 +495,58 @@ Did you try connecting directly to this Durable Object? Try using getServerByNam
496495
return;
497496
}
498497

499-
const connection = createLazyConnection(ws);
498+
try {
499+
const connection = createLazyConnection(ws);
500500

501-
// rehydrate the server name if it's woken up
502-
await this.setName(connection.server);
503-
// TODO: ^ this shouldn't be async
501+
// rehydrate the server name if it's woken up
502+
await this.setName(connection.server);
503+
// TODO: ^ this shouldn't be async
504504

505-
return this.onClose(connection, code, reason, wasClean);
505+
return this.onClose(connection, code, reason, wasClean);
506+
} catch (e) {
507+
console.error(
508+
`Error in ${this.#ParentClass.name}:${this.#_name ?? "<unnamed>"} webSocketClose:`,
509+
e
510+
);
511+
}
506512
}
507513

508514
async webSocketError(ws: WebSocket, error: unknown): Promise<void> {
509515
if (!isPartyServerWebSocket(ws)) {
510516
return;
511517
}
512518

513-
const connection = createLazyConnection(ws);
519+
try {
520+
const connection = createLazyConnection(ws);
514521

515-
// rehydrate the server name if it's woken up
516-
await this.setName(connection.server);
517-
// TODO: ^ this shouldn't be async
522+
// rehydrate the server name if it's woken up
523+
await this.setName(connection.server);
524+
// TODO: ^ this shouldn't be async
518525

519-
return this.onError(connection, error);
526+
return this.onError(connection, error);
527+
} catch (e) {
528+
console.error(
529+
`Error in ${this.#ParentClass.name}:${this.#_name ?? "<unnamed>"} webSocketError:`,
530+
e
531+
);
532+
}
520533
}
521534

522535
async #initialize(): Promise<void> {
536+
let error: unknown;
523537
await this.ctx.blockConcurrencyWhile(async () => {
524538
this.#status = "starting";
525-
await this.onStart(this.#_props);
526-
this.#status = "started";
539+
try {
540+
await this.onStart(this.#_props);
541+
this.#status = "started";
542+
} catch (e) {
543+
this.#status = "zero";
544+
error = e;
545+
}
527546
});
547+
// Re-throw outside blockConcurrencyWhile so the DO's input gate
548+
// isn't permanently broken, allowing subsequent requests to retry.
549+
if (error) throw error;
528550
}
529551

530552
#attachSocketEventHandlers(connection: Connection) {

packages/partyserver/src/tests/index.test.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,86 @@ describe("Hibernating Server (setName handles initialization)", () => {
402402
});
403403
});
404404

405+
describe("Error handling", () => {
406+
it("returns 500 with useful message when room header is missing", async () => {
407+
// Send a request directly to a DO stub without the x-partykit-room header.
408+
// This should return a 500 with the "Missing namespace or room headers"
409+
// message, NOT crash with "Attempting to read .name before it was set".
410+
const id = env.Stateful.idFromName("no-header-test");
411+
const stub = env.Stateful.get(id);
412+
const response = await stub.fetch(
413+
new Request("http://example.com/some-path")
414+
);
415+
expect(response.status).toBe(500);
416+
const body = await response.text();
417+
expect(body).toContain("Missing namespace or room headers");
418+
});
419+
});
420+
421+
describe("onStart failure recovery", () => {
422+
it("resets status so subsequent requests can retry initialization", async () => {
423+
const ctx = createExecutionContext();
424+
425+
// First request: onStart throws on first attempt, returns 500
426+
const request1 = new Request(
427+
"http://example.com/parties/failing-on-start-server/recovery-test"
428+
);
429+
const response1 = await worker.fetch(request1, env, ctx);
430+
expect(response1.status).toBe(500);
431+
432+
// Second request: onStart should succeed on the retry because
433+
// the status was reset to "zero" (not stuck at "starting"), and
434+
// the error was caught inside blockConcurrencyWhile so the DO's
435+
// input gate wasn't permanently broken.
436+
const request2 = new Request(
437+
"http://example.com/parties/failing-on-start-server/recovery-test"
438+
);
439+
const response2 = await worker.fetch(request2, env, ctx);
440+
expect(response2.status).toBe(200);
441+
const data = (await response2.json()) as {
442+
counter: number;
443+
failCount: number;
444+
};
445+
// counter is 2 because onStart ran twice (first failed, second succeeded)
446+
expect(data.counter).toEqual(2);
447+
expect(data.failCount).toEqual(1);
448+
});
449+
});
450+
451+
describe("Hibernating server name rehydration", () => {
452+
it("this.name is available in onMessage after wake-up", async () => {
453+
const ctx = createExecutionContext();
454+
const request = new Request(
455+
"http://example.com/parties/hibernating-name-in-message/rehydrate-test",
456+
{
457+
headers: { Upgrade: "websocket" }
458+
}
459+
);
460+
const response = await worker.fetch(request, env, ctx);
461+
const ws = response.webSocket!;
462+
ws.accept();
463+
464+
// Wait for the onConnect message which includes the name
465+
const connectMessage = await new Promise<string>((resolve) => {
466+
ws.addEventListener("message", (e) => resolve(e.data as string), {
467+
once: true
468+
});
469+
});
470+
expect(connectMessage).toEqual("connected:rehydrate-test");
471+
472+
// Send a message to trigger onMessage, which also reads this.name
473+
ws.send("ping");
474+
const nameMessage = await new Promise<string>((resolve) => {
475+
ws.addEventListener("message", (e) => resolve(e.data as string), {
476+
once: true
477+
});
478+
});
479+
expect(nameMessage).toEqual("name:rehydrate-test");
480+
481+
ws.close();
482+
});
483+
});
484+
405485
describe("Alarm (initialize without redundant blockConcurrencyWhile)", () => {
406486
it("properly initializes on alarm and calls onAlarm", async () => {
407487
// Use a single stub for the entire test so runDurableObjectAlarm

0 commit comments

Comments
 (0)