diff --git a/src/codegate/llm_utils/llmclient.py b/src/codegate/llm_utils/llmclient.py index a74c4d00..d364790c 100644 --- a/src/codegate/llm_utils/llmclient.py +++ b/src/codegate/llm_utils/llmclient.py @@ -149,4 +149,4 @@ async def _complete_litellm( except Exception as e: logger.error(f"LiteLLM completion failed {model} ({content}): {e}") - return {} + raise e diff --git a/src/codegate/providers/anthropic/provider.py b/src/codegate/providers/anthropic/provider.py index cc6a8e25..db26e682 100644 --- a/src/codegate/providers/anthropic/provider.py +++ b/src/codegate/providers/anthropic/provider.py @@ -1,4 +1,5 @@ import json +import structlog from typing import Optional from fastapi import Header, HTTPException, Request @@ -53,5 +54,16 @@ async def create_message( data = json.loads(body) is_fim_request = self._is_fim_request(request, data) - stream = await self.complete(data, x_api_key, is_fim_request) + try: + stream = await self.complete(data, x_api_key, is_fim_request) + except Exception as e: + #  check if we have an status code there + if hasattr(e, "status_code"): + # log the exception + logger = structlog.get_logger("codegate") + logger.error("Error in AnthropicProvider completion", error=str(e)) + raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore + else: + # just continue raising the exception + raise e return self._completion_handler.create_response(stream) diff --git a/src/codegate/providers/llamacpp/provider.py b/src/codegate/providers/llamacpp/provider.py index d262eb79..e63f1fbc 100644 --- a/src/codegate/providers/llamacpp/provider.py +++ b/src/codegate/providers/llamacpp/provider.py @@ -1,7 +1,8 @@ import json +import structlog from typing import Optional -from fastapi import Request +from fastapi import Request, HTTPException from codegate.pipeline.base import SequentialPipelineProcessor from codegate.pipeline.output import OutputPipelineProcessor @@ -10,7 +11,7 @@ from codegate.providers.llamacpp.normalizer import LLamaCppInputNormalizer, LLamaCppOutputNormalizer -class LlamaCppProvider(BaseProvider): +class LlamaCppProvider(BaseProvider): def __init__( self, pipeline_processor: Optional[SequentialPipelineProcessor] = None, @@ -46,7 +47,24 @@ async def create_completion( ): body = await request.body() data = json.loads(body) + logger = structlog.get_logger("codegate") is_fim_request = self._is_fim_request(request, data) - stream = await self.complete(data, None, is_fim_request=is_fim_request) + try: + stream = await self.complete(data, None, is_fim_request=is_fim_request) + except RuntimeError as e: + # propagate as error 500 + logger.error("Error in LlamaCppProvider completion", error=str(e)) + raise HTTPException(status_code=500, detail=str(e)) + except ValueError as e: + # capture well known exceptions + logger.error("Error in LlamaCppProvider completion", error=str(e)) + if str(e).startswith("Model path does not exist") or \ + str(e).startswith("No file found"): + raise HTTPException(status_code=404, detail=str(e)) + elif "exceed" in str(e): + raise HTTPException(status_code=429, detail=str(e)) + else: + # just continue raising the exception + raise e return self._completion_handler.create_response(stream) diff --git a/src/codegate/providers/ollama/provider.py b/src/codegate/providers/ollama/provider.py index 975d2295..d0517299 100644 --- a/src/codegate/providers/ollama/provider.py +++ b/src/codegate/providers/ollama/provider.py @@ -1,7 +1,9 @@ import json from typing import Optional -from fastapi import Request +from fastapi import Request, HTTPException +import httpx +import structlog from codegate.config import Config from codegate.pipeline.base import SequentialPipelineProcessor @@ -58,5 +60,20 @@ async def create_completion(request: Request): data["base_url"] = self.base_url is_fim_request = self._is_fim_request(request, data) - stream = await self.complete(data, api_key=None, is_fim_request=is_fim_request) + try: + stream = await self.complete(data, api_key=None, is_fim_request=is_fim_request) + except httpx.ConnectError as e: + logger = structlog.get_logger("codegate") + logger.error("Error in OllamaProvider completion", error=str(e)) + raise HTTPException(status_code=503, detail="Ollama service is unavailable") + except Exception as e: + #  check if we have an status code there + if hasattr(e, "status_code"): + # log the exception + logger = structlog.get_logger("codegate") + logger.error("Error in OllamaProvider completion", error=str(e)) + raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore + else: + # just continue raising the exception + raise e return self._completion_handler.create_response(stream) diff --git a/src/codegate/providers/openai/provider.py b/src/codegate/providers/openai/provider.py index 6935d4b7..e3b78da2 100644 --- a/src/codegate/providers/openai/provider.py +++ b/src/codegate/providers/openai/provider.py @@ -1,4 +1,5 @@ import json +import structlog from typing import Optional from fastapi import Header, HTTPException, Request @@ -54,5 +55,16 @@ async def create_completion( data = json.loads(body) is_fim_request = self._is_fim_request(request, data) - stream = await self.complete(data, api_key, is_fim_request=is_fim_request) + try: + stream = await self.complete(data, api_key, is_fim_request=is_fim_request) + except Exception as e: + #  check if we have an status code there + if hasattr(e, "status_code"): + logger = structlog.get_logger("codegate") + logger.error("Error in OpenAIProvider completion", error=str(e)) + + raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore + else: + # just continue raising the exception + raise e return self._completion_handler.create_response(stream) diff --git a/src/codegate/providers/vllm/provider.py b/src/codegate/providers/vllm/provider.py index 09b7b98e..b80a3096 100644 --- a/src/codegate/providers/vllm/provider.py +++ b/src/codegate/providers/vllm/provider.py @@ -1,4 +1,5 @@ import json +import structlog from typing import Optional import httpx @@ -52,7 +53,10 @@ async def get_models(authorization: str = Header(..., description="Bearer token" token = authorization.split(" ")[1] config = Config.get_config() - base_url = config.provider_urls.get("vllm") + if config: + base_url = config.provider_urls.get("vllm") + else: + base_url = "" async with httpx.AsyncClient() as client: response = await client.get( @@ -76,8 +80,22 @@ async def create_completion( # Add the vLLM base URL to the request config = Config.get_config() - data["base_url"] = config.provider_urls.get("vllm") + if config: + data["base_url"] = config.provider_urls.get("vllm") + else: + data["base_url"] = "" is_fim_request = self._is_fim_request(request, data) - stream = await self.complete(data, api_key, is_fim_request=is_fim_request) + try: + stream = await self.complete(data, api_key, is_fim_request=is_fim_request) + except Exception as e: + #  check if we have an status code there + if hasattr(e, "status_code"): + logger = structlog.get_logger("codegate") + logger.error("Error in VLLMProvider completion", error=str(e)) + + raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore + else: + # just continue raising the exception + raise e return self._completion_handler.create_response(stream)