|
46 | 46 |
|
47 | 47 | ### Example of ApiVlmOptions definitions
|
48 | 48 |
|
49 |
| -#### Using LM Studio |
| 49 | +#### Using LM Studio or VLLM (OpenAI-compatible APIs) |
| 50 | + |
| 51 | + |
| 52 | +def openai_compatible_vlm_options( |
| 53 | + model: str, |
| 54 | + prompt: str, |
| 55 | + format: ResponseFormat, |
| 56 | + hostname_and_port, |
| 57 | + temperature: float = 0.7, |
| 58 | + max_tokens: int = 4096, |
| 59 | + api_key: str = "", |
| 60 | + skip_special_tokens=False, |
| 61 | +): |
| 62 | + headers = {} |
| 63 | + if api_key: |
| 64 | + headers["Authorization"] = f"Bearer {api_key}" |
50 | 65 |
|
51 |
| - |
52 |
| -def lms_vlm_options(model: str, prompt: str, format: ResponseFormat): |
53 | 66 | options = ApiVlmOptions(
|
54 |
| - url="http://localhost:1234/v1/chat/completions", # the default LM Studio |
| 67 | + url=f"http://{hostname_and_port}/v1/chat/completions", # LM studio defaults to port 1234, VLLM to 8000 |
55 | 68 | params=dict(
|
56 | 69 | model=model,
|
| 70 | + max_tokens=max_tokens, |
| 71 | + skip_special_tokens=skip_special_tokens, # needed for VLLM |
57 | 72 | ),
|
| 73 | + headers=headers, |
58 | 74 | prompt=prompt,
|
59 | 75 | timeout=90,
|
60 |
| - scale=1.0, |
| 76 | + scale=2.0, |
| 77 | + temperature=temperature, |
61 | 78 | response_format=format,
|
62 | 79 | )
|
63 | 80 | return options
|
@@ -207,24 +224,24 @@ def main():
|
207 | 224 | # The ApiVlmOptions() allows to interface with APIs supporting
|
208 | 225 | # the multi-modal chat interface. Here follow a few example on how to configure those.
|
209 | 226 |
|
210 |
| - # One possibility is self-hosting the model, e.g., via LM Studio or Ollama. |
211 |
| - |
212 |
| - # Example using the SmolDocling model with LM Studio: |
213 |
| - # (uncomment the following lines) |
214 |
| - pipeline_options.vlm_options = lms_vlm_options( |
215 |
| - model="smoldocling-256m-preview-mlx-docling-snap", |
| 227 | + # One possibility is self-hosting the model, e.g., via LM Studio, Ollama or VLLM. |
| 228 | + # |
| 229 | + # e.g. with VLLM, serve granite-docling with these commands: |
| 230 | + # > vllm serve ibm-granite/granite-docling-258M --revision untied |
| 231 | + # |
| 232 | + # with LM Studio, serve granite-docling with these commands: |
| 233 | + # > lms server start |
| 234 | + # > lms load ibm-granite/granite-docling-258M-mlx |
| 235 | + |
| 236 | + # Example using the Granite-Docling model with LM Studio or VLLM: |
| 237 | + pipeline_options.vlm_options = openai_compatible_vlm_options( |
| 238 | + model="granite-docling-258m-mlx", # For VLLM use "ibm-granite/granite-docling-258M" |
| 239 | + hostname_and_port="localhost:1234", # LM studio defaults to port 1234, VLLM to 8000 |
216 | 240 | prompt="Convert this page to docling.",
|
217 | 241 | format=ResponseFormat.DOCTAGS,
|
| 242 | + api_key="", |
218 | 243 | )
|
219 | 244 |
|
220 |
| - # Example using the Granite Vision model with LM Studio: |
221 |
| - # (uncomment the following lines) |
222 |
| - # pipeline_options.vlm_options = lms_vlm_options( |
223 |
| - # model="granite-vision-3.2-2b", |
224 |
| - # prompt="OCR the full page to markdown.", |
225 |
| - # format=ResponseFormat.MARKDOWN, |
226 |
| - # ) |
227 |
| - |
228 | 245 | # Example using the OlmOcr (dynamic prompt) model with LM Studio:
|
229 | 246 | # (uncomment the following lines)
|
230 | 247 | # pipeline_options.vlm_options = lms_olmocr_vlm_options(
|
@@ -261,3 +278,5 @@ def main():
|
261 | 278 |
|
262 | 279 | if __name__ == "__main__":
|
263 | 280 | main()
|
| 281 | + |
| 282 | +# %% |
0 commit comments