Skip to content

Commit 8b7e83a

Browse files
authored
docs: Update API VLM example with granite-docling (#2294)
chore: Update API VLM example with granite-docling Signed-off-by: Christoph Auer <[email protected]>
1 parent 8322c2e commit 8b7e83a

File tree

1 file changed

+38
-19
lines changed

1 file changed

+38
-19
lines changed

docs/examples/vlm_pipeline_api_model.py

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,35 @@
4646

4747
### Example of ApiVlmOptions definitions
4848

49-
#### Using LM Studio
49+
#### Using LM Studio or VLLM (OpenAI-compatible APIs)
50+
51+
52+
def openai_compatible_vlm_options(
53+
model: str,
54+
prompt: str,
55+
format: ResponseFormat,
56+
hostname_and_port,
57+
temperature: float = 0.7,
58+
max_tokens: int = 4096,
59+
api_key: str = "",
60+
skip_special_tokens=False,
61+
):
62+
headers = {}
63+
if api_key:
64+
headers["Authorization"] = f"Bearer {api_key}"
5065

51-
52-
def lms_vlm_options(model: str, prompt: str, format: ResponseFormat):
5366
options = ApiVlmOptions(
54-
url="http://localhost:1234/v1/chat/completions", # the default LM Studio
67+
url=f"http://{hostname_and_port}/v1/chat/completions", # LM studio defaults to port 1234, VLLM to 8000
5568
params=dict(
5669
model=model,
70+
max_tokens=max_tokens,
71+
skip_special_tokens=skip_special_tokens, # needed for VLLM
5772
),
73+
headers=headers,
5874
prompt=prompt,
5975
timeout=90,
60-
scale=1.0,
76+
scale=2.0,
77+
temperature=temperature,
6178
response_format=format,
6279
)
6380
return options
@@ -207,24 +224,24 @@ def main():
207224
# The ApiVlmOptions() allows to interface with APIs supporting
208225
# the multi-modal chat interface. Here follow a few example on how to configure those.
209226

210-
# One possibility is self-hosting the model, e.g., via LM Studio or Ollama.
211-
212-
# Example using the SmolDocling model with LM Studio:
213-
# (uncomment the following lines)
214-
pipeline_options.vlm_options = lms_vlm_options(
215-
model="smoldocling-256m-preview-mlx-docling-snap",
227+
# One possibility is self-hosting the model, e.g., via LM Studio, Ollama or VLLM.
228+
#
229+
# e.g. with VLLM, serve granite-docling with these commands:
230+
# > vllm serve ibm-granite/granite-docling-258M --revision untied
231+
#
232+
# with LM Studio, serve granite-docling with these commands:
233+
# > lms server start
234+
# > lms load ibm-granite/granite-docling-258M-mlx
235+
236+
# Example using the Granite-Docling model with LM Studio or VLLM:
237+
pipeline_options.vlm_options = openai_compatible_vlm_options(
238+
model="granite-docling-258m-mlx", # For VLLM use "ibm-granite/granite-docling-258M"
239+
hostname_and_port="localhost:1234", # LM studio defaults to port 1234, VLLM to 8000
216240
prompt="Convert this page to docling.",
217241
format=ResponseFormat.DOCTAGS,
242+
api_key="",
218243
)
219244

220-
# Example using the Granite Vision model with LM Studio:
221-
# (uncomment the following lines)
222-
# pipeline_options.vlm_options = lms_vlm_options(
223-
# model="granite-vision-3.2-2b",
224-
# prompt="OCR the full page to markdown.",
225-
# format=ResponseFormat.MARKDOWN,
226-
# )
227-
228245
# Example using the OlmOcr (dynamic prompt) model with LM Studio:
229246
# (uncomment the following lines)
230247
# pipeline_options.vlm_options = lms_olmocr_vlm_options(
@@ -261,3 +278,5 @@ def main():
261278

262279
if __name__ == "__main__":
263280
main()
281+
282+
# %%

0 commit comments

Comments
 (0)