|
20 | 20 | from rsb.models.field import Field |
21 | 21 |
|
22 | 22 |
|
| 23 | +from agentle.generations.models.generation.generation_config import GenerationConfig |
23 | 24 | from agentle.generations.models.message_parts.file import FilePart |
24 | 25 | from agentle.generations.models.structured_outputs_store.visual_media_description import ( |
25 | 26 | VisualMediaDescription, |
@@ -182,6 +183,9 @@ class DocxFileParser(DocumentParser): |
182 | 183 | Note: When this is enabled, most other configuration options are ignored as the AI handles all processing. |
183 | 184 | """ |
184 | 185 |
|
| 186 | + max_output_tokens: int | None = Field(default=None) |
| 187 | + """Maximum number of tokens to generate in the response.""" |
| 188 | + |
185 | 189 | async def parse_async( |
186 | 190 | self, |
187 | 191 | document_path: str, |
@@ -517,6 +521,9 @@ def _try_convert_docx_to_pdf_headless( |
517 | 521 | "Output clear, concise descriptions suitable for a 'Visual Content' section." |
518 | 522 | ), |
519 | 523 | response_schema=VisualMediaDescription, |
| 524 | + generation_config=GenerationConfig( |
| 525 | + max_output_tokens=self.max_output_tokens |
| 526 | + ), |
520 | 527 | ) |
521 | 528 | page_description = agent_response.parsed.md |
522 | 529 | image_cache[page_hash] = (page_description, "") |
@@ -663,6 +670,7 @@ def _convert_docx_to_pdf(input_path: str, out_dir: str) -> str | None: |
663 | 670 | model=self.model, |
664 | 671 | use_native_pdf_processing=True, |
665 | 672 | strategy=self.strategy, |
| 673 | + max_output_tokens=self.max_output_tokens, |
666 | 674 | ) |
667 | 675 |
|
668 | 676 | logger.debug("Delegating to PDFFileParser with native processing") |
|
0 commit comments