Skip to content

Commit e614c7e

Browse files
authored
feat(js/plugins/google-genai): Added videoMetadata for vertexai (#3660)
1 parent 57204eb commit e614c7e

File tree

4 files changed

+164
-12
lines changed

4 files changed

+164
-12
lines changed

js/plugins/google-genai/src/common/converters.ts

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import {
3030
Part as GeminiPart,
3131
Schema,
3232
SchemaType,
33+
VideoMetadata,
3334
} from './types.js';
3435

3536
export function toGeminiTool(tool: ToolDefinition): FunctionDeclaration {
@@ -100,28 +101,37 @@ function toGeminiSchemaProperty(property?: ToolDefinition['inputSchema']) {
100101
}
101102

102103
function toGeminiMedia(part: Part): GeminiPart {
104+
let media: GeminiPart;
103105
if (part.media?.url.startsWith('data:')) {
104106
// Inline data
105107
const dataUrl = part.media.url;
106108
const b64Data = dataUrl.substring(dataUrl.indexOf(',')! + 1);
107109
const contentType =
108110
part.media.contentType ||
109111
dataUrl.substring(dataUrl.indexOf(':')! + 1, dataUrl.indexOf(';'));
110-
return { inlineData: { mimeType: contentType, data: b64Data } };
112+
media = { inlineData: { mimeType: contentType, data: b64Data } };
113+
} else {
114+
// File data
115+
if (!part.media?.contentType) {
116+
throw Error(
117+
'Must supply a `contentType` when sending File URIs to Gemini.'
118+
);
119+
}
120+
media = {
121+
fileData: {
122+
mimeType: part.media.contentType,
123+
fileUri: part.media.url,
124+
},
125+
};
111126
}
112127

113-
// File data
114-
if (!part.media?.contentType) {
115-
throw Error(
116-
'Must supply a `contentType` when sending File URIs to Gemini.'
117-
);
128+
// Video metadata
129+
if (part.metadata?.videoMetadata) {
130+
let videoMetadata = part.metadata.videoMetadata as VideoMetadata;
131+
media.videoMetadata = { ...videoMetadata };
118132
}
119-
return {
120-
fileData: {
121-
mimeType: part.media.contentType,
122-
fileUri: part.media.url,
123-
},
124-
};
133+
134+
return media;
125135
}
126136

127137
function toGeminiToolRequest(part: Part): GeminiPart {
@@ -195,6 +205,7 @@ function toGeminiPart(part: Part): GeminiPart {
195205
if (part.custom) {
196206
return toGeminiCustom(part);
197207
}
208+
198209
throw new Error('Unsupported Part type ' + JSON.stringify(part));
199210
}
200211

js/plugins/google-genai/src/common/types.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,27 @@ export declare interface CodeExecutionResult {
637637
output: string;
638638
}
639639

640+
/**
641+
* Can be added in the same part as video media to specify
642+
* which part of the video to consider and how many frames
643+
* per second to analyze. VertexAI only.
644+
*/
645+
export declare interface VideoMetadata {
646+
/**
647+
* The video offset to start at. e.g. '3.5s'
648+
*/
649+
startOffset?: string;
650+
/**
651+
* The video offset to end at e.g. '10.5s'
652+
*/
653+
endOffset?: string;
654+
/**
655+
* The number of frames to consider per second
656+
* 0.0 to 24.0.
657+
*/
658+
fps?: number;
659+
}
660+
640661
/**
641662
* This is a Gemini Part. (Users never see this
642663
* structure, it is just built by the converters.)
@@ -651,6 +672,7 @@ export declare interface Part {
651672
thoughtSignature?: string;
652673
executableCode?: ExecutableCode;
653674
codeExecutionResult?: CodeExecutionResult;
675+
videoMetadata?: VideoMetadata;
654676
}
655677

656678
/**

js/plugins/google-genai/tests/common/converters_test.ts

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,84 @@ describe('toGeminiMessage', () => {
162162
],
163163
},
164164
},
165+
{
166+
should:
167+
'should transform genkit message (fileData video content with metadata) correctly',
168+
inputMessage: {
169+
role: 'user',
170+
content: [
171+
{ text: 'describe the following video:' },
172+
{
173+
media: {
174+
contentType: 'video/mp4',
175+
url: 'gs://bucket/video.mp4',
176+
},
177+
metadata: {
178+
videoMetadata: {
179+
startOffset: '10.0s',
180+
endOffset: '20.5s',
181+
fps: 0.5,
182+
},
183+
},
184+
},
185+
],
186+
},
187+
expectedOutput: {
188+
role: 'user',
189+
parts: [
190+
{ text: 'describe the following video:' },
191+
{
192+
fileData: {
193+
mimeType: 'video/mp4',
194+
fileUri: 'gs://bucket/video.mp4',
195+
},
196+
videoMetadata: {
197+
startOffset: '10.0s',
198+
endOffset: '20.5s',
199+
fps: 0.5,
200+
},
201+
},
202+
],
203+
},
204+
},
205+
{
206+
should:
207+
'should transform genkit message (fileData video content with partial metadata) correctly',
208+
inputMessage: {
209+
role: 'user',
210+
content: [
211+
{ text: 'describe the following video:' },
212+
{
213+
media: {
214+
contentType: 'video/mp4',
215+
url: 'gs://bucket/video.mp4',
216+
},
217+
metadata: {
218+
videoMetadata: {
219+
startOffset: '5.3s',
220+
endOffset: '15.7s',
221+
},
222+
},
223+
},
224+
],
225+
},
226+
expectedOutput: {
227+
role: 'user',
228+
parts: [
229+
{ text: 'describe the following video:' },
230+
{
231+
fileData: {
232+
mimeType: 'video/mp4',
233+
fileUri: 'gs://bucket/video.mp4',
234+
},
235+
videoMetadata: {
236+
startOffset: '5.3s',
237+
endOffset: '15.7s',
238+
},
239+
},
240+
],
241+
},
242+
},
165243
{
166244
should: 'should re-populate thoughtSignature from reasoning metadata',
167245
inputMessage: {

js/testapps/basic-gemini/src/index-vertexai.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,54 @@ ai.defineFlow('youtube-videos', async (_, { sendChunk }) => {
6969
url: 'https://www.youtube.com/watch?v=3p1P5grjXIQ',
7070
contentType: 'video/mp4',
7171
},
72+
// Metadata is optional. You can leave it out if you
73+
// want the whole video at default fps.
74+
metadata: {
75+
videoMetadata: {
76+
fps: 0.5,
77+
startOffset: '3.5s',
78+
endOffset: '10.2s',
79+
},
80+
},
7281
},
7382
],
7483
});
7584

7685
return text;
7786
});
7887

88+
export const videoUnderstanding = ai.defineFlow(
89+
{
90+
name: 'video-understanding-metadata',
91+
inputSchema: z.void(),
92+
outputSchema: z.any(),
93+
},
94+
async () => {
95+
const llmResponse = await ai.generate({
96+
model: vertexAI.model('gemini-2.5-flash'),
97+
prompt: [
98+
{
99+
media: {
100+
url: 'gs://cloud-samples-data/video/animals.mp4',
101+
contentType: 'video/mp4',
102+
},
103+
metadata: {
104+
videoMetadata: {
105+
fps: 0.5,
106+
startOffset: '3.5s',
107+
endOffset: '10.2s',
108+
},
109+
},
110+
},
111+
{
112+
text: 'describe this video',
113+
},
114+
],
115+
});
116+
return llmResponse.text;
117+
}
118+
);
119+
79120
// streaming
80121
ai.defineFlow('streaming', async (_, { sendChunk }) => {
81122
const { stream } = ai.generateStream({

0 commit comments

Comments
 (0)