SAP · davidkna-sap · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026 · Jan 26, 2026
diff --git a/.changeset/curvy-mice-hear.md b/.changeset/curvy-mice-hear.md
@@ -0,0 +1,5 @@
+---
+'@sap-ai-sdk/orchestration': minor
+---
+
+[feat] Support streaming with orchestration prompt module fallback.
diff --git a/packages/orchestration/src/index.ts b/packages/orchestration/src/index.ts
@@ -11,6 +11,9 @@ export type {
   ChatCompletionRequest,
   RequestOptions,
   StreamOptions,
+  StreamOptionsWithOverrides,
+  BaseStreamOptions,
+  ModuleStreamOptions,
   DocumentGroundingServiceConfig,
   DocumentGroundingServiceFilter,
   DpiMaskingConfig,

diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts
@@ -28,7 +28,9 @@ import type { CompletionPostResponse } from './client/api/schema/index.js';
 import type {
   OrchestrationModuleConfig,
   OrchestrationConfigRef,
-  ChatCompletionRequest
+  ChatCompletionRequest,
+  StreamOptions,
+  StreamOptionsWithOverrides
 } from './orchestration-types.js';
 
 const defaultJsonConfig = `{
@@ -1318,4 +1320,356 @@ describe('orchestration service client', () => {
       spy.mockRestore();
     });
   });
+
+  describe('Orchestration Client with module fallback configs', () => {
+    it('calls chatCompletion with module fallback configuration array', async () => {
+      const primaryConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-4o', timeout: 5, params: { max_tokens: 100 } },
+          prompt: {
+            template: [{ role: 'user', content: 'Try primary model' }]
+          }
+        }
+      };
+
+      const fallbackConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-5-mini', params: { max_tokens: 50 } },
+          prompt: {
+            template: [{ role: 'user', content: 'Fallback model' }]
+          }
+        }
+      };
+
+      const mockResponse = await parseMockResponse<CompletionPostResponse>(
+        'orchestration',
+        'orchestration-chat-completion-success-response.json'
+      );
+
+      mockInference(
+        {
+          data: constructCompletionPostRequest([primaryConfig, fallbackConfig])
+        },
+        {
+          data: mockResponse,
+          status: 200
+        },
+        {
+          url: 'inference/deployments/1234/v2/completion'
+        }
+      );
+
+      const response = await new OrchestrationClient([
+        primaryConfig,
+        fallbackConfig
+      ]).chatCompletion();
+
+      expect(response).toBeInstanceOf(OrchestrationResponse);
+      expect(response.getContent()).toBe('Hello! How can I assist you today?');
+    });
+
+    it('calls streaming with module fallback configuration array', async () => {
+      const primaryConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-4o' },
+          prompt: {
+            template: [{ role: 'user', content: 'Primary model' }]
+          }
+        }
+      };
+
+      const fallbackConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-5-mini' },
+          prompt: {
+            template: [{ role: 'user', content: 'Fallback model' }]
+          }
+        }
+      };
+
+      mockInference(
+        {
+          data: constructCompletionPostRequest(
+            [primaryConfig, fallbackConfig],
+            undefined,
+            true
+          )
+        },
+        {
+          data: streamMockResponse,
+          status: 200
+        },
+        {
+          url: 'inference/deployments/1234/v2/completion'
+        }
+      );
+
+      const response = await new OrchestrationClient([
+        primaryConfig,
+        fallbackConfig
+      ]).stream();
+
+      const chunks: string[] = [];
+      for await (const chunk of response.stream) {
+        const content = chunk.getDeltaContent();
+        if (content) {
+          chunks.push(content);
+        }
+      }
+
+      // Verify that streaming completed successfully and returned content
+      expect(chunks.length).toBeGreaterThan(0);
+      expect(chunks.join('')).toContain('SAP Cloud SDK');
+    });
+
+    it('returns intermediate failures from fallback attempts', async () => {
+      const primaryConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'non-existing-model' },
+          prompt: {
+            template: [{ role: 'user', content: 'Test' }]
+          }
+        }
+      };
+
+      const fallbackConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-5-mini' },
+          prompt: {
+            template: [{ role: 'user', content: 'Test' }]
+          }
+        }
+      };
+
+      const mockResponseWithErrors = {
+        ...(await parseMockResponse<CompletionPostResponse>(
+          'orchestration',
+          'orchestration-chat-completion-success-response.json'
+        )),
+        intermediate_failures: [
+          {
+            message: 'Model non-existing-model not found',
+            code: 'MODEL_NOT_FOUND',
+            location: 'config[0]'
+          }
+        ]
+      };
+
+      mockInference(
+        {
+          data: constructCompletionPostRequest([primaryConfig, fallbackConfig])
+        },
+        {
+          data: mockResponseWithErrors,
+          status: 200
+        },
+        {
+          url: 'inference/deployments/1234/v2/completion'
+        }
+      );
+
+      const response = await new OrchestrationClient([
+        primaryConfig,
+        fallbackConfig
+      ]).chatCompletion();
+
+      const failures = response.getIntermediateFailures();
+      expect(failures).toBeDefined();
+      expect(failures).toHaveLength(1);
+      expect(failures![0].message).toBe('Model non-existing-model not found');
+    });
+
+    it('should throw when provided empty config array', () => {
+      expect(() => new OrchestrationClient([])).toThrow(
+        'Configuration array must not be empty.'
+      );
+    });
+  });
+
+  describe('Orchestration Client with per-config stream options', () => {
+    it('calls streaming with stream options for module fallback configs', async () => {
+      const primaryConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-4o' },
+          prompt: {
+            template: [{ role: 'user', content: 'Primary' }]
+          }
+        }
+      };
+
+      const fallbackConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-5-mini' },
+          prompt: {
+            template: [{ role: 'user', content: 'Fallback' }]
+          }
+        }
+      };
+
+      const streamOptions: StreamOptionsWithOverrides = {
+        promptTemplating: { include_usage: false },
+        overrides: {
+          0: { promptTemplating: { include_usage: true } }
+        }
+      };
+
+      mockInference(
+        {
+          data: constructCompletionPostRequest(
+            [primaryConfig, fallbackConfig],
+            undefined,
+            true,
+            streamOptions
+          )
+        },
+        {
+          data: streamMockResponse,
+          status: 200
+        },
+        {
+          url: 'inference/deployments/1234/v2/completion'
+        }
+      );
+
+      const response = await new OrchestrationClient([
+        primaryConfig,
+        fallbackConfig
+      ]).stream({}, undefined, streamOptions);
+
+      const chunks: string[] = [];
+      for await (const chunk of response.stream) {
+        const content = chunk.getDeltaContent();
+        if (content) {
+          chunks.push(content);
+        }
+      }
+
+      expect(chunks.length).toBeGreaterThan(0);
+      expect(chunks.join('')).toContain('SAP Cloud SDK');
+    });
+
+    it('calls streaming with array-based overrides', async () => {
+      const primaryConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-4o' },
+          prompt: {
+            template: [{ role: 'user', content: 'Primary' }]
+          }
+        }
+      };
+
+      const fallbackConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-5-mini' },
+          prompt: {
+            template: [{ role: 'user', content: 'Fallback' }]
+          }
+        }
+      };
+
+      const streamOptions: StreamOptions = {
+        global: { chunk_size: 100 },
+        overrides: [
+          { promptTemplating: { include_usage: true } },
+          { promptTemplating: { include_usage: false } }
+        ] as any
+      };
+
+      mockInference(
+        {
+          data: constructCompletionPostRequest(
+            [primaryConfig, fallbackConfig],
+            undefined,
+            true,
+            streamOptions
+          )
+        },
+        {
+          data: streamMockResponse,
+          status: 200
+        },
+        {
+          url: 'inference/deployments/1234/v2/completion'
+        }
+      );
+
+      const response = await new OrchestrationClient([
+        primaryConfig,
+        fallbackConfig
+      ]).stream({}, undefined, streamOptions);
+
+      const chunks: string[] = [];
+      for await (const chunk of response.stream) {
+        const content = chunk.getDeltaContent();
+        if (content) {
+          chunks.push(content);
+        }
+      }
+
+      expect(chunks.length).toBeGreaterThan(0);
+      expect(chunks.join('')).toContain('SAP Cloud SDK');
+    });
+
+    it('warns when override index is out of bounds', async () => {
+      const primaryConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-4o' },
+          prompt: {
+            template: [{ role: 'user', content: 'Primary' }]
+          }
+        }
+      };
+
+      const fallbackConfig: OrchestrationModuleConfig = {
+        promptTemplating: {
+          model: { name: 'gpt-5-mini' },
+          prompt: {
+            template: [{ role: 'user', content: 'Fallback' }]
+          }
+        }
+      };
+
+      const streamOptions: StreamOptionsWithOverrides = {
+        overrides: {
+          0: { promptTemplating: { include_usage: true } },
+          2: { promptTemplating: { include_usage: false } }
+        }
+      };
+
+      const logger = createLogger({
+        package: 'orchestration',
+        messageContext: 'orchestration-utils'
+      });
+      const debugSpy = jest.spyOn(logger, 'debug');
+
+      mockInference(
+        {
+          data: constructCompletionPostRequest(
+            [primaryConfig, fallbackConfig],
+            undefined,
+            true,
+            streamOptions
+          )
+        },
+        {
+          data: streamMockResponse,
+          status: 200
+        },
+        {
+          url: 'inference/deployments/1234/v2/completion'
+        }
+      );
+
+      await new OrchestrationClient([primaryConfig, fallbackConfig]).stream(
+        {},
+        undefined,
+        streamOptions
+      );
+
+      expect(debugSpy).toHaveBeenCalledWith(expect.stringContaining('2'));
+      expect(debugSpy).toHaveBeenCalledWith(
+        expect.stringContaining('do not correspond to any module configuration')
+      );
+    });
+  });
 });