fix problems with codegate version and cline detection

yrobla · yrobla · commit 4fe3b440e42f · 2025-01-23T09:44:53.000+01:00
fix lint
diff --git a/src/codegate/pipeline/cli/cli.py b/src/codegate/pipeline/cli/cli.py
@@ -1,3 +1,4 @@
+import re
 import shlex
 
 from litellm import ChatCompletionRequest
@@ -76,12 +77,19 @@ async def process(
 
         if last_user_message is not None:
             last_user_message_str, _ = last_user_message
-            splitted_message = last_user_message_str.lower().split(" ")
+            cleaned_message_str = re.sub(r"<.*?>", "", last_user_message_str).strip()
+            splitted_message = cleaned_message_str.lower().split(" ")
             # We expect codegate as the first word in the message
             if splitted_message[0] == "codegate":
                 context.shortcut_response = True
-                args = shlex.split(last_user_message_str)
+                args = shlex.split(cleaned_message_str)
                 cmd_out = await codegate_cli(args[1:])
+
+                if cleaned_message_str != last_user_message_str:
+                    # it came from Cline, need to wrap into tags
+                    cmd_out = (
+                        f"<attempt_completion><result>{cmd_out}</result></attempt_completion>\n"
+                    )
                 return PipelineResult(
                     response=PipelineResponse(
                         step_name=self.name,
diff --git a/src/codegate/providers/anthropic/completion_handler.py b/src/codegate/providers/anthropic/completion_handler.py
@@ -16,6 +16,7 @@ async def execute_completion(
         api_key: Optional[str],
         stream: bool = False,
         is_fim_request: bool = False,
+        base_tool: Optional[str] = "",
     ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
         """
         Ensures the model name is prefixed with 'anthropic/' to explicitly route to Anthropic's API.
diff --git a/src/codegate/providers/base.py b/src/codegate/providers/base.py
@@ -233,8 +233,19 @@ async def complete(
         # Execute the completion and translate the response
         # This gives us either a single response or a stream of responses
         # based on the streaming flag
+        is_cline_client = any(
+            "Cline" in str(message.get("content", "")) for message in data.get("messages", [])
+        )
+        base_tool = ""
+        if is_cline_client:
+            base_tool = "cline"
+
         model_response = await self._completion_handler.execute_completion(
-            provider_request, api_key=api_key, stream=streaming, is_fim_request=is_fim_request  # type: ignore
+            provider_request,
+            api_key=api_key,
+            stream=streaming,
+            is_fim_request=is_fim_request,
+            base_tool=base_tool,
         )
         if not streaming:
             normalized_response = self._output_normalizer.normalize(model_response)
diff --git a/src/codegate/providers/completion/base.py b/src/codegate/providers/completion/base.py
@@ -20,6 +20,7 @@ async def execute_completion(
         api_key: Optional[str],
         stream: bool = False,  # TODO: remove this param?
         is_fim_request: bool = False,
+        is_cline_client: bool = False,
     ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
         """Execute the completion request"""
         pass
diff --git a/src/codegate/providers/litellmshim/litellmshim.py b/src/codegate/providers/litellmshim/litellmshim.py
@@ -43,6 +43,7 @@ async def execute_completion(
         api_key: Optional[str],
         stream: bool = False,
         is_fim_request: bool = False,
+        base_tool: Optional[str] = "",
     ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
         """
         Execute the completion request with LiteLLM's API
diff --git a/src/codegate/providers/llamacpp/completion_handler.py b/src/codegate/providers/llamacpp/completion_handler.py
@@ -52,6 +52,7 @@ async def execute_completion(
         api_key: Optional[str],
         stream: bool = False,
         is_fim_request: bool = False,
+        base_tool: Optional[str] = "",
     ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
         """
         Execute the completion request with inference engine API
diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
@@ -12,17 +12,16 @@
 
 
 async def ollama_stream_generator(
-    stream: AsyncIterator[ChatResponse], is_cline_client: bool
+    stream: AsyncIterator[ChatResponse], base_tool: str
 ) -> AsyncIterator[str]:
     """OpenAI-style SSE format"""
     try:
         async for chunk in stream:
             try:
-                yield f"{chunk.model_dump_json()}\n\n"
                 # TODO We should wire in the client info so we can respond with
                 # the correct format and start to handle multiple clients
                 # in a more robust way.
-                if not is_cline_client:
+                if base_tool != "cline":
                     yield f"{chunk.model_dump_json()}\n"
                 else:
                     # First get the raw dict from the chunk
@@ -63,6 +62,9 @@ async def ollama_stream_generator(
                     for field in optional_fields:
                         if field in chunk_dict:
                             response[field] = chunk_dict[field]
+
+                    print("in cline")
+                    print(json.dumps(response))
                     yield f"data: {json.dumps(response)}\n"
             except Exception as e:
                 logger.error(f"Error in stream generator: {str(e)}")
@@ -76,26 +78,18 @@ class OllamaShim(BaseCompletionHandler):
 
     def __init__(self, base_url):
         self.client = AsyncClient(host=base_url, timeout=300)
-        self.is_cline_client = False
+        self.base_tool = ""
 
     async def execute_completion(
         self,
         request: ChatCompletionRequest,
         api_key: Optional[str],
         stream: bool = False,
         is_fim_request: bool = False,
-        is_cline_client: bool = False,
+        base_tool: Optional[str] = "",
     ) -> Union[ChatResponse, GenerateResponse]:
         """Stream response directly from Ollama API."""
-
-        # TODO: I don't like this, but it's a quick fix for now until we start
-        # passing through the client info so we can respond with the correct
-        # format.
-        # Determine if the client is a Cline client
-        self.is_cline_client = any(
-            "Cline" in str(message.get("content", "")) for message in request.get("messages", [])
-        )
-
+        self.base_tool = base_tool
         if is_fim_request:
             prompt = request["messages"][0].get("content", "")
             response = await self.client.generate(
@@ -116,7 +110,7 @@ def _create_streaming_response(self, stream: AsyncIterator[ChatResponse]) -> Str
         is the format that FastAPI expects for streaming responses.
         """
         return StreamingResponse(
-            ollama_stream_generator(stream, self.is_cline_client),
+            ollama_stream_generator(stream, self.base_tool or ""),
             media_type="application/x-ndjson; charset=utf-8",
             headers={
                 "Cache-Control": "no-cache",
diff --git a/src/codegate/providers/ollama/provider.py b/src/codegate/providers/ollama/provider.py
@@ -80,8 +80,6 @@ async def show_model(request: Request):
         @self.router.post(f"/{self.provider_route_name}/v1/generate")
         async def create_completion(request: Request):
             body = await request.body()
-            print("i request")
-            print(body)
             data = json.loads(body)
             # `base_url` is used in the providers pipeline to do the packages lookup.
             # Force it to be the one that comes in the configuration.