feat(embed-many): respect supportsParallelCalls & concurrency (vercel#6108)

samdenty · lgrammel · web-flow · commit a8c8bd5bd498 · 2025-05-02T16:16:14.000+01:00
## Background

We didn't actually read the supportsParallelCalls field at all, and we
did everything serially in embedding model

## Summary

This makes the embedding model actually respect supportsParallel calls

Co-authored-by: Lars Grammel &lt;lars.grammel@gmail.com&gt;
diff --git a/.changeset/hungry-hotels-hunt.md b/.changeset/hungry-hotels-hunt.md
@@ -0,0 +1,7 @@
+---
+'@ai-sdk/amazon-bedrock': patch
+'@ai-sdk/provider': patch
+'ai': patch
+---
+
+feat(embed-many): respect supportsParallelCalls & concurrency
diff --git a/packages/ai/core/embed/embed-many.test.ts b/packages/ai/core/embed/embed-many.test.ts
@@ -5,6 +5,7 @@ import {
 } from '../test/mock-embedding-model-v2';
 import { MockTracer } from '../test/mock-tracer';
 import { embedMany } from './embed-many';
+import { createResolvablePromise } from '../../util/create-resolvable-promise';
 
 const dummyEmbeddings = [
   [0.1, 0.2, 0.3],
@@ -18,6 +19,153 @@ const testValues = [
   'snowy night in the mountains',
 ];
 
+describe('model.supportsParallelCalls', () => {
+  it('should not parallelize when false', async () => {
+    const events: string[] = [];
+    let callCount = 0;
+
+    const resolvables = [
+      createResolvablePromise<void>(),
+      createResolvablePromise<void>(),
+      createResolvablePromise<void>(),
+    ];
+
+    const embedManyPromise = embedMany({
+      model: new MockEmbeddingModelV2({
+        supportsParallelCalls: false,
+        maxEmbeddingsPerCall: 1,
+        doEmbed: async () => {
+          const index = callCount++;
+          events.push(`start-${index}`);
+
+          await resolvables[index].promise;
+          events.push(`end-${index}`);
+
+          return {
+            embeddings: [dummyEmbeddings[index]],
+            response: { headers: {}, body: {} },
+          };
+        },
+      }),
+      values: testValues,
+    });
+
+    resolvables.forEach(resolvable => {
+      resolvable.resolve();
+    });
+
+    const { embeddings } = await embedManyPromise;
+
+    expect(events).toStrictEqual([
+      'start-0',
+      'end-0',
+      'start-1',
+      'end-1',
+      'start-2',
+      'end-2',
+    ]);
+
+    expect(embeddings).toStrictEqual(dummyEmbeddings);
+  });
+
+  it('should parallelize when true', async () => {
+    const events: string[] = [];
+    let callCount = 0;
+
+    const resolvables = [
+      createResolvablePromise<void>(),
+      createResolvablePromise<void>(),
+      createResolvablePromise<void>(),
+    ];
+
+    const embedManyPromise = embedMany({
+      model: new MockEmbeddingModelV2({
+        supportsParallelCalls: true,
+        maxEmbeddingsPerCall: 1,
+        doEmbed: async () => {
+          const index = callCount++;
+          events.push(`start-${index}`);
+
+          await resolvables[index].promise;
+          events.push(`end-${index}`);
+
+          return {
+            embeddings: [dummyEmbeddings[index]],
+            response: { headers: {}, body: {} },
+          };
+        },
+      }),
+      values: testValues,
+    });
+
+    resolvables.forEach(resolvable => {
+      resolvable.resolve();
+    });
+
+    const { embeddings } = await embedManyPromise;
+
+    expect(events).toStrictEqual([
+      'start-0',
+      'start-1',
+      'start-2',
+      'end-0',
+      'end-1',
+      'end-2',
+    ]);
+
+    expect(embeddings).toStrictEqual(dummyEmbeddings);
+  });
+
+  it('should support maxParallelCalls', async () => {
+    const events: string[] = [];
+    let callCount = 0;
+
+    const resolvables = [
+      createResolvablePromise<void>(),
+      createResolvablePromise<void>(),
+      createResolvablePromise<void>(),
+    ];
+
+    const embedManyPromise = embedMany({
+      maxParallelCalls: 2,
+      model: new MockEmbeddingModelV2({
+        supportsParallelCalls: true,
+        maxEmbeddingsPerCall: 1,
+        doEmbed: async () => {
+          const index = callCount++;
+          events.push(`start-${index}`);
+
+          await resolvables[index].promise;
+          events.push(`end-${index}`);
+
+          return {
+            embeddings: [dummyEmbeddings[index]],
+            response: { headers: {}, body: {} },
+          };
+        },
+      }),
+      values: testValues,
+    });
+
+    resolvables.forEach(resolvable => {
+      resolvable.resolve();
+    });
+
+    const { embeddings } = await embedManyPromise;
+
+    expect(events).toStrictEqual([
+      'start-0',
+      'start-1',
+      'end-0',
+      'end-1',
+      'start-2',
+      'end-2',
+    ]);
+
+    expect(embeddings).toStrictEqual(dummyEmbeddings);
+  });
+});
+
 describe('result.embedding', () => {
   it('should generate embeddings', async () => {
     const result = await embedMany({
diff --git a/packages/ai/core/embed/embed-many.ts b/packages/ai/core/embed/embed-many.ts
@@ -28,6 +28,7 @@ has a limit on how many embeddings can be generated in a single call.
 export async function embedMany<VALUE>({
   model,
   values,
+  maxParallelCalls = Infinity,
   maxRetries: maxRetriesArg,
   abortSignal,
   headers,
@@ -73,6 +74,13 @@ Only applicable for HTTP-based providers.
   functionality that can be fully encapsulated in the provider.
   */
   providerOptions?: ProviderOptions;
+
+  /**
+   * Maximum number of concurrent requests.
+   *
+   * @default Infinity
+   */
+  maxParallelCalls?: number;
 }): Promise<EmbedManyResult<VALUE>> {
   const { maxRetries, retry } = prepareRetries({ maxRetries: maxRetriesArg });
 
@@ -100,7 +108,10 @@ Only applicable for HTTP-based providers.
     }),
     tracer,
     fn: async span => {
-      const maxEmbeddingsPerCall = await model.maxEmbeddingsPerCall;
+      const [maxEmbeddingsPerCall, supportsParallelCalls] = await Promise.all([
+        model.maxEmbeddingsPerCall,
+        model.supportsParallelCalls,
+      ]);
 
       // the model has not specified limits on
       // how many embeddings can be generated in a single call
@@ -192,66 +203,75 @@ Only applicable for HTTP-based providers.
       > = [];
       let tokens = 0;
 
-      for (const chunk of valueChunks) {
-        const {
-          embeddings: responseEmbeddings,
-          usage,
-          response,
-        } = await retry(() => {
-          // nested spans to align with the embedMany telemetry data:
-          return recordSpan({
-            name: 'ai.embedMany.doEmbed',
-            attributes: selectTelemetryAttributes({
-              telemetry,
-              attributes: {
-                ...assembleOperationName({
-                  operationId: 'ai.embedMany.doEmbed',
-                  telemetry,
-                }),
-                ...baseTelemetryAttributes,
-                // specific settings that only make sense on the outer level:
-                'ai.values': {
-                  input: () => chunk.map(value => JSON.stringify(value)),
-                },
-              },
-            }),
-            tracer,
-            fn: async doEmbedSpan => {
-              const modelResponse = await model.doEmbed({
-                values: chunk,
-                abortSignal,
-                headers,
-                providerOptions,
-              });
-
-              const embeddings = modelResponse.embeddings;
-              const usage = modelResponse.usage ?? { tokens: NaN };
+      const parallelChunks = splitArray(
+        valueChunks,
+        supportsParallelCalls ? maxParallelCalls : 1,
+      );
 
-              doEmbedSpan.setAttributes(
-                selectTelemetryAttributes({
+      for (const parallelChunk of parallelChunks) {
+        const results = await Promise.all(
+          parallelChunk.map(chunk => {
+            return retry(() => {
+              // nested spans to align with the embedMany telemetry data:
+              return recordSpan({
+                name: 'ai.embedMany.doEmbed',
+                attributes: selectTelemetryAttributes({
                   telemetry,
                   attributes: {
-                    'ai.embeddings': {
-                      output: () =>
-                        embeddings.map(embedding => JSON.stringify(embedding)),
+                    ...assembleOperationName({
+                      operationId: 'ai.embedMany.doEmbed',
+                      telemetry,
+                    }),
+                    ...baseTelemetryAttributes,
+                    // specific settings that only make sense on the outer level:
+                    'ai.values': {
+                      input: () => chunk.map(value => JSON.stringify(value)),
                     },
-                    'ai.usage.tokens': usage.tokens,
                   },
                 }),
-              );
-
-              return {
-                embeddings,
-                usage,
-                response: modelResponse.response,
-              };
-            },
-          });
-        });
+                tracer,
+                fn: async doEmbedSpan => {
+                  const modelResponse = await model.doEmbed({
+                    values: chunk,
+                    abortSignal,
+                    headers,
+                    providerOptions,
+                  });
+
+                  const embeddings = modelResponse.embeddings;
+                  const usage = modelResponse.usage ?? { tokens: NaN };
+
+                  doEmbedSpan.setAttributes(
+                    selectTelemetryAttributes({
+                      telemetry,
+                      attributes: {
+                        'ai.embeddings': {
+                          output: () =>
+                            embeddings.map(embedding =>
+                              JSON.stringify(embedding),
+                            ),
+                        },
+                        'ai.usage.tokens': usage.tokens,
+                      },
+                    }),
+                  );
+
+                  return {
+                    embeddings,
+                    usage,
+                    response: modelResponse.response,
+                  };
+                },
+              });
+            });
+          }),
+        );
 
-        embeddings.push(...responseEmbeddings);
-        responses.push(response);
-        tokens += usage.tokens;
+        for (const result of results) {
+          embeddings.push(...result.embeddings);
+          responses.push(result.response);
+          tokens += result.usage.tokens;
+        }
       }
 
       span.setAttributes(
diff --git a/packages/amazon-bedrock/src/bedrock-embedding-model.test.ts b/packages/amazon-bedrock/src/bedrock-embedding-model.test.ts
@@ -86,14 +86,6 @@ describe('doEmbed', () => {
     expect(usage?.tokens).toStrictEqual(8);
   });
 
-  it('should handle multiple input values and extract usage', async () => {
-    const { usage } = await model.doEmbed({
-      values: testValues,
-    });
-
-    expect(usage?.tokens).toStrictEqual(16);
-  });
-
   it('should properly combine headers from all sources', async () => {
     const optionsHeaders = {
       'options-header': 'options-value',
diff --git a/packages/amazon-bedrock/src/bedrock-embedding-model.ts b/packages/amazon-bedrock/src/bedrock-embedding-model.ts