diff --git a/src/codegate/muxing/adapter.py b/src/codegate/muxing/adapter.py index e4ac3dc2..e63ded38 100644 --- a/src/codegate/muxing/adapter.py +++ b/src/codegate/muxing/adapter.py @@ -158,7 +158,12 @@ def _format_ollama(self, chunk: str) -> str: ollama_chunk = ChatResponse(**chunk_dict) open_ai_chunk = OLlamaToModel.normalize_chat_chunk(ollama_chunk) return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True) - except Exception: + except Exception as e: + # Sometimes we receive an OpenAI formatted chunk from ollama. Specifically when + # talking to Cline or Kodu. If that's the case we use the format_openai function. + if "data:" in chunk: + return self._format_openai(chunk) + logger.warning(f"Error formatting Ollama chunk: {chunk}. Error: {e}") return chunk def _format_antropic(self, chunk: str) -> str: diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py index 73f53037..ea7e56e9 100644 --- a/src/codegate/providers/ollama/completion_handler.py +++ b/src/codegate/providers/ollama/completion_handler.py @@ -8,6 +8,7 @@ from codegate.clients.clients import ClientType from codegate.providers.base import BaseCompletionHandler +from codegate.providers.ollama.adapter import OLlamaToModel logger = structlog.get_logger("codegate") @@ -24,29 +25,9 @@ async def ollama_stream_generator( # noqa: C901 # the correct format and start to handle multiple clients # in a more robust way. if client_type in [ClientType.CLINE, ClientType.KODU]: - # First get the raw dict from the chunk chunk_dict = chunk.model_dump() - # Create response dictionary in OpenAI-like format - response = { - "id": f"chatcmpl-{chunk_dict.get('created_at', '')}", - "object": "chat.completion.chunk", - "created": chunk_dict.get("created_at"), - "model": chunk_dict.get("model"), - "choices": [ - { - "index": 0, - "delta": { - "content": chunk_dict.get("message", {}).get("content", ""), - "role": chunk_dict.get("message", {}).get("role", "assistant"), - }, - "finish_reason": ( - chunk_dict.get("done_reason") - if chunk_dict.get("done", False) - else None - ), - } - ], - } + model_response = OLlamaToModel.normalize_chat_chunk(chunk) + response = model_response.model_dump() # Preserve existing type or add default if missing response["type"] = chunk_dict.get("type", "stream")