From e9b2a9a4e4e63a8c045c1ac9e5d07fec3d4f2535 Mon Sep 17 00:00:00 2001
From: Sunil Pai <spai@cloudflare.com>
Date: Thu, 23 Apr 2026 08:48:54 -0400
Subject: [PATCH 1/2] fix(workers-ai-provider): forward reasoning_effort and
 chat_template_kwargs (#501)

`modelSettings` passed to the provider were flowing through `getRunOptions()`
into the 3rd arg (options) of `binding.run(model, inputs, options)`, but
Cloudflare Workers AI's `reasoning_effort` and `chat_template_kwargs`
parameters belong on the 2nd arg (inputs). As a result they were silently
dropped, causing reasoning models (GLM-4.7-flash, Kimi K2.5/K2.6, GPT-OSS,
QwQ) to burn the entire output token budget on chain-of-thought.

- Type `reasoning_effort` and `chat_template_kwargs` directly on
  `WorkersAIChatSettings`.
- In `buildRunInputs()`, pull both values from settings and from
  `providerOptions["workers-ai"]` (per-call wins) and place them on the
  inputs object. `reasoning_effort: null` is preserved (`!== undefined`
  check) because it's the explicit "disable reasoning" signal.
- In `getRunOptions()`, strip them from `passthroughOptions` so they don't
  leak into the binding's options arg or the REST URL query string.
- Wire `options.providerOptions` through `doGenerate` and `doStream` so
  per-call overrides work without settings.

Adds 11 tests covering binding inputs placement, REST body placement,
null preservation, no leakage into options/query, per-call override, and
unrelated settings passthrough (no regression).

Closes #501.

Made-with: Cursor
---
 ...rkers-ai-provider-reasoning-passthrough.md |  28 ++
 packages/workers-ai-provider/README.md        |  29 ++
 .../src/workersai-chat-language-model.ts      |  39 ++-
 .../src/workersai-chat-settings.ts            |  23 ++
 .../test/text-generation.test.ts              | 267 ++++++++++++++++++
 5 files changed, 383 insertions(+), 3 deletions(-)
 create mode 100644 .changeset/workers-ai-provider-reasoning-passthrough.md

diff --git a/.changeset/workers-ai-provider-reasoning-passthrough.md b/.changeset/workers-ai-provider-reasoning-passthrough.md
new file mode 100644
index 000000000..1a7b7f317
--- /dev/null
+++ b/.changeset/workers-ai-provider-reasoning-passthrough.md
@@ -0,0 +1,28 @@
+---
+"workers-ai-provider": minor
+---
+
+Forward `reasoning_effort` and `chat_template_kwargs` onto `binding.run(model, inputs)`'s `inputs` object instead of silently dropping them into the options arg / REST query string. This fixes reasoning models (GLM-4.7-flash, Kimi K2.5/K2.6, GPT-OSS, QwQ) burning the entire output token budget on chain-of-thought with no visible content.
+
+Both settings-level and per-call usage are supported:
+
+```ts
+// Settings-level
+const model = workersai("@cf/zai-org/glm-4.7-flash", {
+  reasoning_effort: "low",
+  chat_template_kwargs: { enable_thinking: false },
+});
+
+// Per-call (overrides settings)
+await generateText({
+  model,
+  prompt,
+  providerOptions: {
+    "workers-ai": { reasoning_effort: "low" },
+  },
+});
+```
+
+`reasoning_effort: null` is preserved as-is (explicit "disable reasoning" signal). The two fields are also typed directly on `WorkersAIChatSettings`.
+
+Closes #501.
diff --git a/packages/workers-ai-provider/README.md b/packages/workers-ai-provider/README.md
index 716f5869a..e0649e142 100644
--- a/packages/workers-ai-provider/README.md
+++ b/packages/workers-ai-provider/README.md
@@ -112,6 +112,35 @@ for await (const chunk of result.textStream) {
 }
 ```
 
+## Reasoning Controls
+
+Reasoning-capable Workers AI models (GLM-4.7-flash, Kimi K2.5/K2.6, GPT-OSS, QwQ) accept `reasoning_effort` and `chat_template_kwargs` on their inputs. Either set them at model creation time as settings, or per-call via `providerOptions["workers-ai"]` (per-call wins):
+
+```ts
+// Settings-level (applies to every request on this model instance)
+const model = workersai("@cf/zai-org/glm-4.7-flash", {
+	reasoning_effort: "low", // "low" | "medium" | "high" | null
+	chat_template_kwargs: { enable_thinking: false },
+});
+
+await generateText({ model, prompt: "Summarize in one sentence." });
+```
+
+```ts
+// Per-call (overrides any settings-level value)
+const model = workersai("@cf/zai-org/glm-4.7-flash");
+
+await generateText({
+	model,
+	prompt: "Summarize in one sentence.",
+	providerOptions: {
+		"workers-ai": { reasoning_effort: "low" },
+	},
+});
+```
+
+`reasoning_effort: null` is meaningful — it's the explicit "disable reasoning" signal for models that support it. Both fields land on the `inputs` object of `binding.run()` (and the JSON body of the REST request), matching the shape expected by Workers AI. See the [model catalog](https://developers.cloudflare.com/workers-ai/models/) for per-model reasoning capabilities.
+
 ## Vision (Image Inputs)
 
 Send images to vision-capable models like Kimi K2.5:
diff --git a/packages/workers-ai-provider/src/workersai-chat-language-model.ts b/packages/workers-ai-provider/src/workersai-chat-language-model.ts
index abc5175f2..8ec6a9188 100644
--- a/packages/workers-ai-provider/src/workersai-chat-language-model.ts
+++ b/packages/workers-ai-provider/src/workersai-chat-language-model.ts
@@ -123,12 +123,30 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 	 * accept this format at runtime.
 	 *
 	 * The binding path additionally normalises null content to empty strings.
+	 *
+	 * Reasoning controls (`reasoning_effort`, `chat_template_kwargs`) are
+	 * forwarded here from settings. These belong on the INPUTS object, not on
+	 * the 3rd-arg options / REST query string — see
+	 * https://github.com/cloudflare/ai/issues/501. Per-call values from
+	 * `providerOptions["workers-ai"]` override settings.
+	 *
+	 * `reasoning_effort: null` is a valid value ("disable reasoning"), so we
+	 * check `!== undefined` rather than truthiness.
 	 */
 	private buildRunInputs(
 		args: ReturnType<typeof this.getArgs>["args"],
 		messages: ReturnType<typeof convertToWorkersAIChatMessages>["messages"],
-		options?: { stream?: boolean },
+		options?: { stream?: boolean; providerOptions?: Record<string, unknown> },
 	) {
+		const perCall =
+			(options?.providerOptions?.["workers-ai"] as Record<string, unknown> | undefined) ?? {};
+		const reasoningEffort =
+			"reasoning_effort" in perCall ? perCall.reasoning_effort : this.settings.reasoning_effort;
+		const chatTemplateKwargs =
+			"chat_template_kwargs" in perCall
+				? perCall.chat_template_kwargs
+				: this.settings.chat_template_kwargs;
+
 		return {
 			max_tokens: args.max_tokens,
 			messages: this.config.isBinding ? normalizeMessagesForBinding(messages) : messages,
@@ -138,11 +156,19 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 			top_p: args.top_p,
 			...(args.response_format ? { response_format: args.response_format } : {}),
 			...(options?.stream ? { stream: true } : {}),
+			...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
+			...(chatTemplateKwargs !== undefined
+				? { chat_template_kwargs: chatTemplateKwargs }
+				: {}),
 		};
 	}
 
 	/**
 	 * Get passthrough options for binding.run() from settings.
+	 *
+	 * `reasoning_effort` and `chat_template_kwargs` are explicitly excluded
+	 * here — they belong on the `inputs` object (see `buildRunInputs`), not on
+	 * the `options` (3rd) arg of binding.run() or the REST query string.
 	 */
 	private getRunOptions() {
 		const {
@@ -150,6 +176,8 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 			safePrompt: _safePrompt,
 			sessionAffinity,
 			extraHeaders,
+			reasoning_effort: _reasoningEffort,
+			chat_template_kwargs: _chatTemplateKwargs,
 			...passthroughOptions
 		} = this.settings;
 
@@ -173,7 +201,9 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 		const { args, warnings } = this.getArgs(options);
 		const { messages } = convertToWorkersAIChatMessages(options.prompt);
 
-		const inputs = this.buildRunInputs(args, messages);
+		const inputs = this.buildRunInputs(args, messages, {
+			providerOptions: options.providerOptions,
+		});
 		const runOptions = this.getRunOptions();
 
 		const output = await this.config.binding.run(
@@ -223,7 +253,10 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 		const { args, warnings } = this.getArgs(options);
 		const { messages } = convertToWorkersAIChatMessages(options.prompt);
 
-		const inputs = this.buildRunInputs(args, messages, { stream: true });
+		const inputs = this.buildRunInputs(args, messages, {
+			stream: true,
+			providerOptions: options.providerOptions,
+		});
 		const runOptions = this.getRunOptions();
 
 		const response = await this.config.binding.run(
diff --git a/packages/workers-ai-provider/src/workersai-chat-settings.ts b/packages/workers-ai-provider/src/workersai-chat-settings.ts
index c99370fc0..9b0732436 100644
--- a/packages/workers-ai-provider/src/workersai-chat-settings.ts
+++ b/packages/workers-ai-provider/src/workersai-chat-settings.ts
@@ -16,6 +16,29 @@ export type WorkersAIChatSettings = {
 	 */
 	sessionAffinity?: string;
 
+	/**
+	 * Controls the reasoning budget for reasoning-capable Workers AI models
+	 * (e.g. `@cf/zai-org/glm-4.7-flash`, `@cf/moonshotai/kimi-k2.5`,
+	 * `@cf/openai/gpt-oss-120b`).
+	 *
+	 * `null` is a valid value and disables reasoning for models that support it.
+	 * Forwarded on the `inputs` object of `binding.run(model, inputs)`.
+	 */
+	reasoning_effort?: "low" | "medium" | "high" | null;
+
+	/**
+	 * Chat-template overrides for reasoning-capable models that expose
+	 * thinking toggles (e.g. GLM, Kimi).
+	 *
+	 * Forwarded on the `inputs` object of `binding.run(model, inputs)`.
+	 */
+	chat_template_kwargs?: {
+		/** Whether to enable reasoning. Enabled by default on reasoning models. */
+		enable_thinking?: boolean;
+		/** If false, preserves reasoning context between turns. */
+		clear_thinking?: boolean;
+	};
+
 	/**
 	 * Passthrough settings that are provided directly to the run function.
 	 * Use this for any provider-specific options not covered by the typed fields.
diff --git a/packages/workers-ai-provider/test/text-generation.test.ts b/packages/workers-ai-provider/test/text-generation.test.ts
index 2f5a27861..ff51b0c1a 100644
--- a/packages/workers-ai-provider/test/text-generation.test.ts
+++ b/packages/workers-ai-provider/test/text-generation.test.ts
@@ -551,4 +551,271 @@ describe("Binding - Text Generation Tests", () => {
 		expect(result.reasoningText).toBe("Let me think step by step");
 		expect(result.text).toBe("The answer is 42");
 	});
+
+	// ---------------------------------------------------------------------
+	// Reasoning passthrough — reasoning_effort + chat_template_kwargs
+	// https://github.com/cloudflare/ai/issues/501
+	// ---------------------------------------------------------------------
+
+	it("should forward settings.reasoning_effort on inputs (2nd arg), not options", async () => {
+		let capturedInputs: any = null;
+		let capturedOptions: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, options?: any) => {
+					capturedInputs = inputs;
+					capturedOptions = options;
+					return { response: "ok" };
+				},
+			},
+		});
+
+		const model = workersai("@cf/zai-org/glm-4.7-flash", {
+			reasoning_effort: "low",
+		});
+
+		await generateText({ model, prompt: "Hi" });
+
+		// Must land on inputs (2nd arg)
+		expect(capturedInputs).toHaveProperty("reasoning_effort", "low");
+		// Must NOT leak into options (3rd arg) — the exact bug in #501
+		expect(capturedOptions).not.toHaveProperty("reasoning_effort");
+	});
+
+	it("should forward settings.chat_template_kwargs on inputs, not options", async () => {
+		let capturedInputs: any = null;
+		let capturedOptions: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, options?: any) => {
+					capturedInputs = inputs;
+					capturedOptions = options;
+					return { response: "ok" };
+				},
+			},
+		});
+
+		const model = workersai("@cf/zai-org/glm-4.7-flash", {
+			chat_template_kwargs: { enable_thinking: false },
+		});
+
+		await generateText({ model, prompt: "Hi" });
+
+		expect(capturedInputs.chat_template_kwargs).toEqual({ enable_thinking: false });
+		expect(capturedOptions).not.toHaveProperty("chat_template_kwargs");
+	});
+
+	it("should preserve reasoning_effort: null (disables reasoning)", async () => {
+		let capturedInputs: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, _options?: any) => {
+					capturedInputs = inputs;
+					return { response: "ok" };
+				},
+			},
+		});
+
+		const model = workersai("@cf/zai-org/glm-4.7-flash", {
+			reasoning_effort: null,
+		});
+
+		await generateText({ model, prompt: "Hi" });
+
+		// null is the explicit "no reasoning" signal — must be preserved on inputs
+		expect(capturedInputs).toHaveProperty("reasoning_effort");
+		expect(capturedInputs.reasoning_effort).toBeNull();
+	});
+
+	it("should not set reasoning fields when omitted", async () => {
+		let capturedInputs: any = null;
+		let capturedOptions: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, options?: any) => {
+					capturedInputs = inputs;
+					capturedOptions = options;
+					return { response: "ok" };
+				},
+			},
+		});
+
+		await generateText({ model: workersai(TEST_MODEL), prompt: "Hi" });
+
+		expect(capturedInputs).not.toHaveProperty("reasoning_effort");
+		expect(capturedInputs).not.toHaveProperty("chat_template_kwargs");
+		expect(capturedOptions).not.toHaveProperty("reasoning_effort");
+		expect(capturedOptions).not.toHaveProperty("chat_template_kwargs");
+	});
+
+	it("should allow per-call providerOptions['workers-ai'] to override settings", async () => {
+		let capturedInputs: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, _options?: any) => {
+					capturedInputs = inputs;
+					return { response: "ok" };
+				},
+			},
+		});
+
+		const model = workersai("@cf/zai-org/glm-4.7-flash", {
+			reasoning_effort: "high",
+		});
+
+		await generateText({
+			model,
+			prompt: "Hi",
+			providerOptions: {
+				"workers-ai": { reasoning_effort: "low" },
+			},
+		});
+
+		// Per-call wins over settings
+		expect(capturedInputs.reasoning_effort).toBe("low");
+	});
+
+	it("should forward reasoning params on streaming requests too", async () => {
+		let capturedInputs: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, _options?: any) => {
+					capturedInputs = inputs;
+					// Return a simple complete (non-streaming) response; the provider
+					// wraps it as a synthetic stream via graceful degradation.
+					return { response: "ok" };
+				},
+			},
+		});
+
+		const model = workersai("@cf/zai-org/glm-4.7-flash", {
+			reasoning_effort: "medium",
+			chat_template_kwargs: { enable_thinking: true },
+		});
+
+		const { streamText } = await import("ai");
+		const { textStream } = streamText({ model, prompt: "Hi" });
+		// Consume the stream so doStream actually runs
+		for await (const _ of textStream) {
+			// drain
+		}
+
+		expect(capturedInputs.stream).toBe(true);
+		expect(capturedInputs.reasoning_effort).toBe("medium");
+		expect(capturedInputs.chat_template_kwargs).toEqual({ enable_thinking: true });
+	});
+});
+
+// ---------------------------------------------------------------------------
+// REST mode — reasoning passthrough lands in JSON body (not URL query)
+// https://github.com/cloudflare/ai/issues/501
+// ---------------------------------------------------------------------------
+
+describe("REST - reasoning passthrough", () => {
+	beforeAll(() => server.listen());
+	afterEach(() => server.resetHandlers());
+	afterAll(() => server.close());
+
+	const REASONING_MODEL = "@cf/zai-org/glm-4.7-flash";
+
+	it("should put reasoning_effort in the JSON body, not the URL query string", async () => {
+		let capturedBody: any = null;
+		let capturedQuery: Record<string, string> = {};
+
+		server.use(
+			http.post(
+				`https://api.cloudflare.com/client/v4/accounts/${TEST_ACCOUNT_ID}/ai/run/${REASONING_MODEL}`,
+				async ({ request }) => {
+					const url = new URL(request.url);
+					capturedQuery = Object.fromEntries(url.searchParams.entries());
+					capturedBody = await request.json();
+					return HttpResponse.json({ result: { response: "ok" } });
+				},
+			),
+		);
+
+		const workersai = createWorkersAI({
+			accountId: TEST_ACCOUNT_ID,
+			apiKey: TEST_API_KEY,
+		});
+
+		const model = workersai(REASONING_MODEL, {
+			reasoning_effort: "low",
+			chat_template_kwargs: { enable_thinking: false },
+		});
+
+		await generateText({ model, prompt: "Hi" });
+
+		// Both fields must be on the JSON body (inputs), not the URL query string
+		expect(capturedBody.reasoning_effort).toBe("low");
+		expect(capturedBody.chat_template_kwargs).toEqual({ enable_thinking: false });
+		expect(capturedQuery).not.toHaveProperty("reasoning_effort");
+		expect(capturedQuery).not.toHaveProperty("chat_template_kwargs");
+	});
+
+	it("should preserve reasoning_effort: null in the REST body", async () => {
+		let capturedBody: any = null;
+
+		server.use(
+			http.post(
+				`https://api.cloudflare.com/client/v4/accounts/${TEST_ACCOUNT_ID}/ai/run/${REASONING_MODEL}`,
+				async ({ request }) => {
+					capturedBody = await request.json();
+					return HttpResponse.json({ result: { response: "ok" } });
+				},
+			),
+		);
+
+		const workersai = createWorkersAI({
+			accountId: TEST_ACCOUNT_ID,
+			apiKey: TEST_API_KEY,
+		});
+
+		const model = workersai(REASONING_MODEL, {
+			reasoning_effort: null,
+		});
+
+		await generateText({ model, prompt: "Hi" });
+
+		// null is explicitly meaningful — must round-trip
+		expect(capturedBody).toHaveProperty("reasoning_effort");
+		expect(capturedBody.reasoning_effort).toBeNull();
+	});
+
+	it("should still passthrough unrelated settings as URL query (no regression)", async () => {
+		let capturedQuery: Record<string, string> = {};
+
+		server.use(
+			http.post(
+				`https://api.cloudflare.com/client/v4/accounts/${TEST_ACCOUNT_ID}/ai/run/${REASONING_MODEL}`,
+				async ({ request }) => {
+					const url = new URL(request.url);
+					capturedQuery = Object.fromEntries(url.searchParams.entries());
+					return HttpResponse.json({ result: { response: "ok" } });
+				},
+			),
+		);
+
+		const workersai = createWorkersAI({
+			accountId: TEST_ACCOUNT_ID,
+			apiKey: TEST_API_KEY,
+		});
+
+		const model = workersai(REASONING_MODEL, {
+			// Other custom settings should continue flowing through as URL query
+			custom_flag: "yes",
+			reasoning_effort: "low",
+		});
+
+		await generateText({ model, prompt: "Hi" });
+
+		expect(capturedQuery).toHaveProperty("custom_flag", "yes");
+		expect(capturedQuery).not.toHaveProperty("reasoning_effort");
+	});
 });

From 4b25307564485a6811fd932708a36e3133e5f25d Mon Sep 17 00:00:00 2001
From: Sunil Pai <spai@cloudflare.com>
Date: Thu, 23 Apr 2026 08:54:56 -0400
Subject: [PATCH 2/2] test(workers-ai-provider): harden reasoning passthrough
 edge cases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Review-driven follow-ups on top of the #501 fix:

- Defensively guard `providerOptions["workers-ai"]` against non-object
  runtime values. `"key" in x` throws for primitives, so fall back to
  settings if a user passes a string / number / boolean / array rather
  than crashing the call.
- Test: per-call `null` overrides a non-null settings value (confirms the
  `"key" in perCall` precedence logic works when the value is explicitly
  falsy).
- Test: malformed `providerOptions["workers-ai"]` falls back to settings.
- Test: reasoning params + AI Gateway on the binding path — inputs and
  options stay cleanly separated; gateway doesn't see reasoning_effort.
- Test: `reasoning_effort: null` in settings no longer throws on the REST
  path. Before the fix, `createRun` rejected null at the query-string
  coercion step. Now that reasoning_effort lives in the JSON body, this
  round-trips cleanly.

Made-with: Cursor
---
 .../src/workersai-chat-language-model.ts      |  10 +-
 .../test/text-generation.test.ts              | 114 ++++++++++++++++++
 2 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/packages/workers-ai-provider/src/workersai-chat-language-model.ts b/packages/workers-ai-provider/src/workersai-chat-language-model.ts
index 8ec6a9188..9cd039cb9 100644
--- a/packages/workers-ai-provider/src/workersai-chat-language-model.ts
+++ b/packages/workers-ai-provider/src/workersai-chat-language-model.ts
@@ -138,8 +138,14 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 		messages: ReturnType<typeof convertToWorkersAIChatMessages>["messages"],
 		options?: { stream?: boolean; providerOptions?: Record<string, unknown> },
 	) {
-		const perCall =
-			(options?.providerOptions?.["workers-ai"] as Record<string, unknown> | undefined) ?? {};
+		// The AI SDK types this as `Record<string, JSONObject>` but we defensively
+		// accept anything and only treat it as a lookup if it's a plain object.
+		// `"key" in x` throws for primitives, so we can't skip the typeof guard.
+		const rawPerCall = options?.providerOptions?.["workers-ai"];
+		const perCall: Record<string, unknown> =
+			rawPerCall !== null && typeof rawPerCall === "object" && !Array.isArray(rawPerCall)
+				? (rawPerCall as Record<string, unknown>)
+				: {};
 		const reasoningEffort =
 			"reasoning_effort" in perCall ? perCall.reasoning_effort : this.settings.reasoning_effort;
 		const chatTemplateKwargs =
diff --git a/packages/workers-ai-provider/test/text-generation.test.ts b/packages/workers-ai-provider/test/text-generation.test.ts
index ff51b0c1a..a401c23f3 100644
--- a/packages/workers-ai-provider/test/text-generation.test.ts
+++ b/packages/workers-ai-provider/test/text-generation.test.ts
@@ -680,6 +680,97 @@ describe("Binding - Text Generation Tests", () => {
 		expect(capturedInputs.reasoning_effort).toBe("low");
 	});
 
+	it("should allow per-call null to override a non-null settings value", async () => {
+		// The `in` operator is what enables this: an explicit key in per-call
+		// overrides settings even when the per-call value is null.
+		let capturedInputs: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, _options?: any) => {
+					capturedInputs = inputs;
+					return { response: "ok" };
+				},
+			},
+		});
+
+		const model = workersai("@cf/zai-org/glm-4.7-flash", {
+			reasoning_effort: "high",
+		});
+
+		await generateText({
+			model,
+			prompt: "Hi",
+			providerOptions: {
+				"workers-ai": { reasoning_effort: null },
+			},
+		});
+
+		expect(capturedInputs).toHaveProperty("reasoning_effort");
+		expect(capturedInputs.reasoning_effort).toBeNull();
+	});
+
+	it("should ignore providerOptions['workers-ai'] when not a plain object", async () => {
+		// Guard against runtime misuse — AI SDK types say JSONObject, but users
+		// can bypass with `as any`. `"key" in primitive` throws, so we fall back
+		// to settings instead of crashing.
+		let capturedInputs: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, _options?: any) => {
+					capturedInputs = inputs;
+					return { response: "ok" };
+				},
+			},
+		});
+
+		const model = workersai("@cf/zai-org/glm-4.7-flash", {
+			reasoning_effort: "medium",
+		});
+
+		await generateText({
+			model,
+			prompt: "Hi",
+			providerOptions: {
+				// Intentionally wrong shape — string/array/null should be ignored
+				"workers-ai": "not-an-object" as any,
+			},
+		});
+
+		// Falls back to settings
+		expect(capturedInputs.reasoning_effort).toBe("medium");
+	});
+
+	it("should combine reasoning params with AI Gateway on the binding path", async () => {
+		// Reasoning params must land on inputs (2nd arg); gateway config stays on
+		// options (3rd arg). They should not interfere with each other.
+		let capturedInputs: any = null;
+		let capturedOptions: any = null;
+
+		const workersai = createWorkersAI({
+			binding: {
+				run: async (_modelName: string, inputs: any, options?: any) => {
+					capturedInputs = inputs;
+					capturedOptions = options;
+					return { response: "ok" };
+				},
+			},
+			gateway: { id: "my-gw" },
+		});
+
+		const model = workersai("@cf/zai-org/glm-4.7-flash", {
+			reasoning_effort: "low",
+		});
+
+		await generateText({ model, prompt: "Hi" });
+
+		expect(capturedInputs.reasoning_effort).toBe("low");
+		expect(capturedOptions.gateway).toEqual({ id: "my-gw" });
+		// And crucially: the gateway shouldn't pick up reasoning_effort
+		expect(capturedOptions).not.toHaveProperty("reasoning_effort");
+	});
+
 	it("should forward reasoning params on streaming requests too", async () => {
 		let capturedInputs: any = null;
 
@@ -788,6 +879,29 @@ describe("REST - reasoning passthrough", () => {
 		expect(capturedBody.reasoning_effort).toBeNull();
 	});
 
+	it("should NOT throw when reasoning_effort is null in settings (REST regression)", async () => {
+		// Before this fix, `createRun` would throw because it can't coerce
+		// `null` into a URL query-string value. Now that reasoning_effort is
+		// moved to the JSON body, this round-trips cleanly.
+		server.use(
+			http.post(
+				`https://api.cloudflare.com/client/v4/accounts/${TEST_ACCOUNT_ID}/ai/run/${REASONING_MODEL}`,
+				async () => HttpResponse.json({ result: { response: "ok" } }),
+			),
+		);
+
+		const workersai = createWorkersAI({
+			accountId: TEST_ACCOUNT_ID,
+			apiKey: TEST_API_KEY,
+		});
+
+		const model = workersai(REASONING_MODEL, {
+			reasoning_effort: null,
+		});
+
+		await expect(generateText({ model, prompt: "Hi" })).resolves.toBeDefined();
+	});
+
 	it("should still passthrough unrelated settings as URL query (no regression)", async () => {
 		let capturedQuery: Record<string, string> = {};