stacklok · aponcedeleonch · Feb 12, 2025 · Feb 12, 2025
diff --git a/src/codegate/muxing/adapter.py b/src/codegate/muxing/adapter.py
@@ -158,7 +158,12 @@ def _format_ollama(self, chunk: str) -> str:
             ollama_chunk = ChatResponse(**chunk_dict)
             open_ai_chunk = OLlamaToModel.normalize_chat_chunk(ollama_chunk)
             return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
-        except Exception:
+        except Exception as e:
+            # Sometimes we receive an OpenAI formatted chunk from ollama. Specifically when
+            # talking to Cline or Kodu. If that's the case we use the format_openai function.
+            if "data:" in chunk:
+                return self._format_openai(chunk)
+            logger.warning(f"Error formatting Ollama chunk: {chunk}. Error: {e}")
             return chunk
 
     def _format_antropic(self, chunk: str) -> str:

diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
@@ -8,6 +8,7 @@
 
 from codegate.clients.clients import ClientType
 from codegate.providers.base import BaseCompletionHandler
+from codegate.providers.ollama.adapter import OLlamaToModel
 
 logger = structlog.get_logger("codegate")
 
@@ -24,29 +25,9 @@ async def ollama_stream_generator(  # noqa: C901
                 # the correct format and start to handle multiple clients
                 # in a more robust way.
                 if client_type in [ClientType.CLINE, ClientType.KODU]:
-                    # First get the raw dict from the chunk
                     chunk_dict = chunk.model_dump()
-                    # Create response dictionary in OpenAI-like format
-                    response = {
-                        "id": f"chatcmpl-{chunk_dict.get('created_at', '')}",
-                        "object": "chat.completion.chunk",
-                        "created": chunk_dict.get("created_at"),
-                        "model": chunk_dict.get("model"),
-                        "choices": [
-                            {
-                                "index": 0,
-                                "delta": {
-                                    "content": chunk_dict.get("message", {}).get("content", ""),
-                                    "role": chunk_dict.get("message", {}).get("role", "assistant"),
-                                },
-                                "finish_reason": (
-                                    chunk_dict.get("done_reason")
-                                    if chunk_dict.get("done", False)
-                                    else None
-                                ),
-                            }
-                        ],
-                    }
+                    model_response = OLlamaToModel.normalize_chat_chunk(chunk)
+                    response = model_response.model_dump()
                     # Preserve existing type or add default if missing
                     response["type"] = chunk_dict.get("type", "stream")