From e9b2a9a4e4e63a8c045c1ac9e5d07fec3d4f2535 Mon Sep 17 00:00:00 2001 From: Sunil Pai Date: Thu, 23 Apr 2026 08:48:54 -0400 Subject: [PATCH 1/2] fix(workers-ai-provider): forward reasoning_effort and chat_template_kwargs (#501) `modelSettings` passed to the provider were flowing through `getRunOptions()` into the 3rd arg (options) of `binding.run(model, inputs, options)`, but Cloudflare Workers AI's `reasoning_effort` and `chat_template_kwargs` parameters belong on the 2nd arg (inputs). As a result they were silently dropped, causing reasoning models (GLM-4.7-flash, Kimi K2.5/K2.6, GPT-OSS, QwQ) to burn the entire output token budget on chain-of-thought. - Type `reasoning_effort` and `chat_template_kwargs` directly on `WorkersAIChatSettings`. - In `buildRunInputs()`, pull both values from settings and from `providerOptions["workers-ai"]` (per-call wins) and place them on the inputs object. `reasoning_effort: null` is preserved (`!== undefined` check) because it's the explicit "disable reasoning" signal. - In `getRunOptions()`, strip them from `passthroughOptions` so they don't leak into the binding's options arg or the REST URL query string. - Wire `options.providerOptions` through `doGenerate` and `doStream` so per-call overrides work without settings. Adds 11 tests covering binding inputs placement, REST body placement, null preservation, no leakage into options/query, per-call override, and unrelated settings passthrough (no regression). Closes #501. Made-with: Cursor --- ...rkers-ai-provider-reasoning-passthrough.md | 28 ++ packages/workers-ai-provider/README.md | 29 ++ .../src/workersai-chat-language-model.ts | 39 ++- .../src/workersai-chat-settings.ts | 23 ++ .../test/text-generation.test.ts | 267 ++++++++++++++++++ 5 files changed, 383 insertions(+), 3 deletions(-) create mode 100644 .changeset/workers-ai-provider-reasoning-passthrough.md diff --git a/.changeset/workers-ai-provider-reasoning-passthrough.md b/.changeset/workers-ai-provider-reasoning-passthrough.md new file mode 100644 index 000000000..1a7b7f317 --- /dev/null +++ b/.changeset/workers-ai-provider-reasoning-passthrough.md @@ -0,0 +1,28 @@ +--- +"workers-ai-provider": minor +--- + +Forward `reasoning_effort` and `chat_template_kwargs` onto `binding.run(model, inputs)`'s `inputs` object instead of silently dropping them into the options arg / REST query string. This fixes reasoning models (GLM-4.7-flash, Kimi K2.5/K2.6, GPT-OSS, QwQ) burning the entire output token budget on chain-of-thought with no visible content. + +Both settings-level and per-call usage are supported: + +```ts +// Settings-level +const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: "low", + chat_template_kwargs: { enable_thinking: false }, +}); + +// Per-call (overrides settings) +await generateText({ + model, + prompt, + providerOptions: { + "workers-ai": { reasoning_effort: "low" }, + }, +}); +``` + +`reasoning_effort: null` is preserved as-is (explicit "disable reasoning" signal). The two fields are also typed directly on `WorkersAIChatSettings`. + +Closes #501. diff --git a/packages/workers-ai-provider/README.md b/packages/workers-ai-provider/README.md index 716f5869a..e0649e142 100644 --- a/packages/workers-ai-provider/README.md +++ b/packages/workers-ai-provider/README.md @@ -112,6 +112,35 @@ for await (const chunk of result.textStream) { } ``` +## Reasoning Controls + +Reasoning-capable Workers AI models (GLM-4.7-flash, Kimi K2.5/K2.6, GPT-OSS, QwQ) accept `reasoning_effort` and `chat_template_kwargs` on their inputs. Either set them at model creation time as settings, or per-call via `providerOptions["workers-ai"]` (per-call wins): + +```ts +// Settings-level (applies to every request on this model instance) +const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: "low", // "low" | "medium" | "high" | null + chat_template_kwargs: { enable_thinking: false }, +}); + +await generateText({ model, prompt: "Summarize in one sentence." }); +``` + +```ts +// Per-call (overrides any settings-level value) +const model = workersai("@cf/zai-org/glm-4.7-flash"); + +await generateText({ + model, + prompt: "Summarize in one sentence.", + providerOptions: { + "workers-ai": { reasoning_effort: "low" }, + }, +}); +``` + +`reasoning_effort: null` is meaningful — it's the explicit "disable reasoning" signal for models that support it. Both fields land on the `inputs` object of `binding.run()` (and the JSON body of the REST request), matching the shape expected by Workers AI. See the [model catalog](https://developers.cloudflare.com/workers-ai/models/) for per-model reasoning capabilities. + ## Vision (Image Inputs) Send images to vision-capable models like Kimi K2.5: diff --git a/packages/workers-ai-provider/src/workersai-chat-language-model.ts b/packages/workers-ai-provider/src/workersai-chat-language-model.ts index abc5175f2..8ec6a9188 100644 --- a/packages/workers-ai-provider/src/workersai-chat-language-model.ts +++ b/packages/workers-ai-provider/src/workersai-chat-language-model.ts @@ -123,12 +123,30 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 { * accept this format at runtime. * * The binding path additionally normalises null content to empty strings. + * + * Reasoning controls (`reasoning_effort`, `chat_template_kwargs`) are + * forwarded here from settings. These belong on the INPUTS object, not on + * the 3rd-arg options / REST query string — see + * https://github.com/cloudflare/ai/issues/501. Per-call values from + * `providerOptions["workers-ai"]` override settings. + * + * `reasoning_effort: null` is a valid value ("disable reasoning"), so we + * check `!== undefined` rather than truthiness. */ private buildRunInputs( args: ReturnType["args"], messages: ReturnType["messages"], - options?: { stream?: boolean }, + options?: { stream?: boolean; providerOptions?: Record }, ) { + const perCall = + (options?.providerOptions?.["workers-ai"] as Record | undefined) ?? {}; + const reasoningEffort = + "reasoning_effort" in perCall ? perCall.reasoning_effort : this.settings.reasoning_effort; + const chatTemplateKwargs = + "chat_template_kwargs" in perCall + ? perCall.chat_template_kwargs + : this.settings.chat_template_kwargs; + return { max_tokens: args.max_tokens, messages: this.config.isBinding ? normalizeMessagesForBinding(messages) : messages, @@ -138,11 +156,19 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 { top_p: args.top_p, ...(args.response_format ? { response_format: args.response_format } : {}), ...(options?.stream ? { stream: true } : {}), + ...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}), + ...(chatTemplateKwargs !== undefined + ? { chat_template_kwargs: chatTemplateKwargs } + : {}), }; } /** * Get passthrough options for binding.run() from settings. + * + * `reasoning_effort` and `chat_template_kwargs` are explicitly excluded + * here — they belong on the `inputs` object (see `buildRunInputs`), not on + * the `options` (3rd) arg of binding.run() or the REST query string. */ private getRunOptions() { const { @@ -150,6 +176,8 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 { safePrompt: _safePrompt, sessionAffinity, extraHeaders, + reasoning_effort: _reasoningEffort, + chat_template_kwargs: _chatTemplateKwargs, ...passthroughOptions } = this.settings; @@ -173,7 +201,9 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 { const { args, warnings } = this.getArgs(options); const { messages } = convertToWorkersAIChatMessages(options.prompt); - const inputs = this.buildRunInputs(args, messages); + const inputs = this.buildRunInputs(args, messages, { + providerOptions: options.providerOptions, + }); const runOptions = this.getRunOptions(); const output = await this.config.binding.run( @@ -223,7 +253,10 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 { const { args, warnings } = this.getArgs(options); const { messages } = convertToWorkersAIChatMessages(options.prompt); - const inputs = this.buildRunInputs(args, messages, { stream: true }); + const inputs = this.buildRunInputs(args, messages, { + stream: true, + providerOptions: options.providerOptions, + }); const runOptions = this.getRunOptions(); const response = await this.config.binding.run( diff --git a/packages/workers-ai-provider/src/workersai-chat-settings.ts b/packages/workers-ai-provider/src/workersai-chat-settings.ts index c99370fc0..9b0732436 100644 --- a/packages/workers-ai-provider/src/workersai-chat-settings.ts +++ b/packages/workers-ai-provider/src/workersai-chat-settings.ts @@ -16,6 +16,29 @@ export type WorkersAIChatSettings = { */ sessionAffinity?: string; + /** + * Controls the reasoning budget for reasoning-capable Workers AI models + * (e.g. `@cf/zai-org/glm-4.7-flash`, `@cf/moonshotai/kimi-k2.5`, + * `@cf/openai/gpt-oss-120b`). + * + * `null` is a valid value and disables reasoning for models that support it. + * Forwarded on the `inputs` object of `binding.run(model, inputs)`. + */ + reasoning_effort?: "low" | "medium" | "high" | null; + + /** + * Chat-template overrides for reasoning-capable models that expose + * thinking toggles (e.g. GLM, Kimi). + * + * Forwarded on the `inputs` object of `binding.run(model, inputs)`. + */ + chat_template_kwargs?: { + /** Whether to enable reasoning. Enabled by default on reasoning models. */ + enable_thinking?: boolean; + /** If false, preserves reasoning context between turns. */ + clear_thinking?: boolean; + }; + /** * Passthrough settings that are provided directly to the run function. * Use this for any provider-specific options not covered by the typed fields. diff --git a/packages/workers-ai-provider/test/text-generation.test.ts b/packages/workers-ai-provider/test/text-generation.test.ts index 2f5a27861..ff51b0c1a 100644 --- a/packages/workers-ai-provider/test/text-generation.test.ts +++ b/packages/workers-ai-provider/test/text-generation.test.ts @@ -551,4 +551,271 @@ describe("Binding - Text Generation Tests", () => { expect(result.reasoningText).toBe("Let me think step by step"); expect(result.text).toBe("The answer is 42"); }); + + // --------------------------------------------------------------------- + // Reasoning passthrough — reasoning_effort + chat_template_kwargs + // https://github.com/cloudflare/ai/issues/501 + // --------------------------------------------------------------------- + + it("should forward settings.reasoning_effort on inputs (2nd arg), not options", async () => { + let capturedInputs: any = null; + let capturedOptions: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, options?: any) => { + capturedInputs = inputs; + capturedOptions = options; + return { response: "ok" }; + }, + }, + }); + + const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: "low", + }); + + await generateText({ model, prompt: "Hi" }); + + // Must land on inputs (2nd arg) + expect(capturedInputs).toHaveProperty("reasoning_effort", "low"); + // Must NOT leak into options (3rd arg) — the exact bug in #501 + expect(capturedOptions).not.toHaveProperty("reasoning_effort"); + }); + + it("should forward settings.chat_template_kwargs on inputs, not options", async () => { + let capturedInputs: any = null; + let capturedOptions: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, options?: any) => { + capturedInputs = inputs; + capturedOptions = options; + return { response: "ok" }; + }, + }, + }); + + const model = workersai("@cf/zai-org/glm-4.7-flash", { + chat_template_kwargs: { enable_thinking: false }, + }); + + await generateText({ model, prompt: "Hi" }); + + expect(capturedInputs.chat_template_kwargs).toEqual({ enable_thinking: false }); + expect(capturedOptions).not.toHaveProperty("chat_template_kwargs"); + }); + + it("should preserve reasoning_effort: null (disables reasoning)", async () => { + let capturedInputs: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, _options?: any) => { + capturedInputs = inputs; + return { response: "ok" }; + }, + }, + }); + + const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: null, + }); + + await generateText({ model, prompt: "Hi" }); + + // null is the explicit "no reasoning" signal — must be preserved on inputs + expect(capturedInputs).toHaveProperty("reasoning_effort"); + expect(capturedInputs.reasoning_effort).toBeNull(); + }); + + it("should not set reasoning fields when omitted", async () => { + let capturedInputs: any = null; + let capturedOptions: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, options?: any) => { + capturedInputs = inputs; + capturedOptions = options; + return { response: "ok" }; + }, + }, + }); + + await generateText({ model: workersai(TEST_MODEL), prompt: "Hi" }); + + expect(capturedInputs).not.toHaveProperty("reasoning_effort"); + expect(capturedInputs).not.toHaveProperty("chat_template_kwargs"); + expect(capturedOptions).not.toHaveProperty("reasoning_effort"); + expect(capturedOptions).not.toHaveProperty("chat_template_kwargs"); + }); + + it("should allow per-call providerOptions['workers-ai'] to override settings", async () => { + let capturedInputs: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, _options?: any) => { + capturedInputs = inputs; + return { response: "ok" }; + }, + }, + }); + + const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: "high", + }); + + await generateText({ + model, + prompt: "Hi", + providerOptions: { + "workers-ai": { reasoning_effort: "low" }, + }, + }); + + // Per-call wins over settings + expect(capturedInputs.reasoning_effort).toBe("low"); + }); + + it("should forward reasoning params on streaming requests too", async () => { + let capturedInputs: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, _options?: any) => { + capturedInputs = inputs; + // Return a simple complete (non-streaming) response; the provider + // wraps it as a synthetic stream via graceful degradation. + return { response: "ok" }; + }, + }, + }); + + const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: "medium", + chat_template_kwargs: { enable_thinking: true }, + }); + + const { streamText } = await import("ai"); + const { textStream } = streamText({ model, prompt: "Hi" }); + // Consume the stream so doStream actually runs + for await (const _ of textStream) { + // drain + } + + expect(capturedInputs.stream).toBe(true); + expect(capturedInputs.reasoning_effort).toBe("medium"); + expect(capturedInputs.chat_template_kwargs).toEqual({ enable_thinking: true }); + }); +}); + +// --------------------------------------------------------------------------- +// REST mode — reasoning passthrough lands in JSON body (not URL query) +// https://github.com/cloudflare/ai/issues/501 +// --------------------------------------------------------------------------- + +describe("REST - reasoning passthrough", () => { + beforeAll(() => server.listen()); + afterEach(() => server.resetHandlers()); + afterAll(() => server.close()); + + const REASONING_MODEL = "@cf/zai-org/glm-4.7-flash"; + + it("should put reasoning_effort in the JSON body, not the URL query string", async () => { + let capturedBody: any = null; + let capturedQuery: Record = {}; + + server.use( + http.post( + `https://api.cloudflare.com/client/v4/accounts/${TEST_ACCOUNT_ID}/ai/run/${REASONING_MODEL}`, + async ({ request }) => { + const url = new URL(request.url); + capturedQuery = Object.fromEntries(url.searchParams.entries()); + capturedBody = await request.json(); + return HttpResponse.json({ result: { response: "ok" } }); + }, + ), + ); + + const workersai = createWorkersAI({ + accountId: TEST_ACCOUNT_ID, + apiKey: TEST_API_KEY, + }); + + const model = workersai(REASONING_MODEL, { + reasoning_effort: "low", + chat_template_kwargs: { enable_thinking: false }, + }); + + await generateText({ model, prompt: "Hi" }); + + // Both fields must be on the JSON body (inputs), not the URL query string + expect(capturedBody.reasoning_effort).toBe("low"); + expect(capturedBody.chat_template_kwargs).toEqual({ enable_thinking: false }); + expect(capturedQuery).not.toHaveProperty("reasoning_effort"); + expect(capturedQuery).not.toHaveProperty("chat_template_kwargs"); + }); + + it("should preserve reasoning_effort: null in the REST body", async () => { + let capturedBody: any = null; + + server.use( + http.post( + `https://api.cloudflare.com/client/v4/accounts/${TEST_ACCOUNT_ID}/ai/run/${REASONING_MODEL}`, + async ({ request }) => { + capturedBody = await request.json(); + return HttpResponse.json({ result: { response: "ok" } }); + }, + ), + ); + + const workersai = createWorkersAI({ + accountId: TEST_ACCOUNT_ID, + apiKey: TEST_API_KEY, + }); + + const model = workersai(REASONING_MODEL, { + reasoning_effort: null, + }); + + await generateText({ model, prompt: "Hi" }); + + // null is explicitly meaningful — must round-trip + expect(capturedBody).toHaveProperty("reasoning_effort"); + expect(capturedBody.reasoning_effort).toBeNull(); + }); + + it("should still passthrough unrelated settings as URL query (no regression)", async () => { + let capturedQuery: Record = {}; + + server.use( + http.post( + `https://api.cloudflare.com/client/v4/accounts/${TEST_ACCOUNT_ID}/ai/run/${REASONING_MODEL}`, + async ({ request }) => { + const url = new URL(request.url); + capturedQuery = Object.fromEntries(url.searchParams.entries()); + return HttpResponse.json({ result: { response: "ok" } }); + }, + ), + ); + + const workersai = createWorkersAI({ + accountId: TEST_ACCOUNT_ID, + apiKey: TEST_API_KEY, + }); + + const model = workersai(REASONING_MODEL, { + // Other custom settings should continue flowing through as URL query + custom_flag: "yes", + reasoning_effort: "low", + }); + + await generateText({ model, prompt: "Hi" }); + + expect(capturedQuery).toHaveProperty("custom_flag", "yes"); + expect(capturedQuery).not.toHaveProperty("reasoning_effort"); + }); }); From 4b25307564485a6811fd932708a36e3133e5f25d Mon Sep 17 00:00:00 2001 From: Sunil Pai Date: Thu, 23 Apr 2026 08:54:56 -0400 Subject: [PATCH 2/2] test(workers-ai-provider): harden reasoning passthrough edge cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review-driven follow-ups on top of the #501 fix: - Defensively guard `providerOptions["workers-ai"]` against non-object runtime values. `"key" in x` throws for primitives, so fall back to settings if a user passes a string / number / boolean / array rather than crashing the call. - Test: per-call `null` overrides a non-null settings value (confirms the `"key" in perCall` precedence logic works when the value is explicitly falsy). - Test: malformed `providerOptions["workers-ai"]` falls back to settings. - Test: reasoning params + AI Gateway on the binding path — inputs and options stay cleanly separated; gateway doesn't see reasoning_effort. - Test: `reasoning_effort: null` in settings no longer throws on the REST path. Before the fix, `createRun` rejected null at the query-string coercion step. Now that reasoning_effort lives in the JSON body, this round-trips cleanly. Made-with: Cursor --- .../src/workersai-chat-language-model.ts | 10 +- .../test/text-generation.test.ts | 114 ++++++++++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) diff --git a/packages/workers-ai-provider/src/workersai-chat-language-model.ts b/packages/workers-ai-provider/src/workersai-chat-language-model.ts index 8ec6a9188..9cd039cb9 100644 --- a/packages/workers-ai-provider/src/workersai-chat-language-model.ts +++ b/packages/workers-ai-provider/src/workersai-chat-language-model.ts @@ -138,8 +138,14 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 { messages: ReturnType["messages"], options?: { stream?: boolean; providerOptions?: Record }, ) { - const perCall = - (options?.providerOptions?.["workers-ai"] as Record | undefined) ?? {}; + // The AI SDK types this as `Record` but we defensively + // accept anything and only treat it as a lookup if it's a plain object. + // `"key" in x` throws for primitives, so we can't skip the typeof guard. + const rawPerCall = options?.providerOptions?.["workers-ai"]; + const perCall: Record = + rawPerCall !== null && typeof rawPerCall === "object" && !Array.isArray(rawPerCall) + ? (rawPerCall as Record) + : {}; const reasoningEffort = "reasoning_effort" in perCall ? perCall.reasoning_effort : this.settings.reasoning_effort; const chatTemplateKwargs = diff --git a/packages/workers-ai-provider/test/text-generation.test.ts b/packages/workers-ai-provider/test/text-generation.test.ts index ff51b0c1a..a401c23f3 100644 --- a/packages/workers-ai-provider/test/text-generation.test.ts +++ b/packages/workers-ai-provider/test/text-generation.test.ts @@ -680,6 +680,97 @@ describe("Binding - Text Generation Tests", () => { expect(capturedInputs.reasoning_effort).toBe("low"); }); + it("should allow per-call null to override a non-null settings value", async () => { + // The `in` operator is what enables this: an explicit key in per-call + // overrides settings even when the per-call value is null. + let capturedInputs: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, _options?: any) => { + capturedInputs = inputs; + return { response: "ok" }; + }, + }, + }); + + const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: "high", + }); + + await generateText({ + model, + prompt: "Hi", + providerOptions: { + "workers-ai": { reasoning_effort: null }, + }, + }); + + expect(capturedInputs).toHaveProperty("reasoning_effort"); + expect(capturedInputs.reasoning_effort).toBeNull(); + }); + + it("should ignore providerOptions['workers-ai'] when not a plain object", async () => { + // Guard against runtime misuse — AI SDK types say JSONObject, but users + // can bypass with `as any`. `"key" in primitive` throws, so we fall back + // to settings instead of crashing. + let capturedInputs: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, _options?: any) => { + capturedInputs = inputs; + return { response: "ok" }; + }, + }, + }); + + const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: "medium", + }); + + await generateText({ + model, + prompt: "Hi", + providerOptions: { + // Intentionally wrong shape — string/array/null should be ignored + "workers-ai": "not-an-object" as any, + }, + }); + + // Falls back to settings + expect(capturedInputs.reasoning_effort).toBe("medium"); + }); + + it("should combine reasoning params with AI Gateway on the binding path", async () => { + // Reasoning params must land on inputs (2nd arg); gateway config stays on + // options (3rd arg). They should not interfere with each other. + let capturedInputs: any = null; + let capturedOptions: any = null; + + const workersai = createWorkersAI({ + binding: { + run: async (_modelName: string, inputs: any, options?: any) => { + capturedInputs = inputs; + capturedOptions = options; + return { response: "ok" }; + }, + }, + gateway: { id: "my-gw" }, + }); + + const model = workersai("@cf/zai-org/glm-4.7-flash", { + reasoning_effort: "low", + }); + + await generateText({ model, prompt: "Hi" }); + + expect(capturedInputs.reasoning_effort).toBe("low"); + expect(capturedOptions.gateway).toEqual({ id: "my-gw" }); + // And crucially: the gateway shouldn't pick up reasoning_effort + expect(capturedOptions).not.toHaveProperty("reasoning_effort"); + }); + it("should forward reasoning params on streaming requests too", async () => { let capturedInputs: any = null; @@ -788,6 +879,29 @@ describe("REST - reasoning passthrough", () => { expect(capturedBody.reasoning_effort).toBeNull(); }); + it("should NOT throw when reasoning_effort is null in settings (REST regression)", async () => { + // Before this fix, `createRun` would throw because it can't coerce + // `null` into a URL query-string value. Now that reasoning_effort is + // moved to the JSON body, this round-trips cleanly. + server.use( + http.post( + `https://api.cloudflare.com/client/v4/accounts/${TEST_ACCOUNT_ID}/ai/run/${REASONING_MODEL}`, + async () => HttpResponse.json({ result: { response: "ok" } }), + ), + ); + + const workersai = createWorkersAI({ + accountId: TEST_ACCOUNT_ID, + apiKey: TEST_API_KEY, + }); + + const model = workersai(REASONING_MODEL, { + reasoning_effort: null, + }); + + await expect(generateText({ model, prompt: "Hi" })).resolves.toBeDefined(); + }); + it("should still passthrough unrelated settings as URL query (no regression)", async () => { let capturedQuery: Record = {};