Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 1b2d338

Browse files
Merge pull request #275 from stacklok/fix-ollama-on-host
Make sure we can speak with ollama localhosted from container
2 parents 92895f4 + 12ec917 commit 1b2d338

File tree

6 files changed

+24
-19
lines changed

6 files changed

+24
-19
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ ENV PYTHONPATH=/app/src
102102
ENV CODEGATE_VLLM_URL=https://inference.codegate.ai
103103
ENV CODEGATE_OPENAI_URL=
104104
ENV CODEGATE_ANTHROPIC_URL=
105-
ENV CODEGATE_OLLAMA_URL=
105+
ENV CODEGATE_OLLAMA_URL=http://host.docker.internal:11434
106106
ENV CODEGATE_APP_LOG_LEVEL=WARNING
107107
ENV CODEGATE_LOG_FORMAT=TEXT
108108

src/codegate/llm_utils/llmclient.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
from typing import Any, Dict, Optional
33

44
import structlog
5-
from litellm import acompletion, completion
5+
from litellm import acompletion
6+
from ollama import Client as OllamaClient
67

78
from codegate.config import Config
89
from codegate.inference import LlamaCppInferenceEngine
@@ -117,13 +118,14 @@ async def _complete_litellm(
117118

118119
try:
119120
if provider == "ollama":
120-
response = completion(
121+
model = model.split("/")[-1]
122+
response = OllamaClient(host=base_url).chat(
121123
model=model,
122124
messages=request["messages"],
123-
api_key=api_key,
124-
temperature=request["temperature"],
125-
base_url=base_url,
125+
format="json",
126+
options={"temperature": request["temperature"]},
126127
)
128+
content = response.message.content
127129
else:
128130
response = await acompletion(
129131
model=model,
@@ -133,7 +135,7 @@ async def _complete_litellm(
133135
base_url=base_url,
134136
response_format=request["response_format"],
135137
)
136-
content = response["choices"][0]["message"]["content"]
138+
content = response["choices"][0]["message"]["content"]
137139

138140
# Clean up code blocks if present
139141
if content.startswith("```"):
@@ -142,5 +144,5 @@ async def _complete_litellm(
142144
return json.loads(content)
143145

144146
except Exception as e:
145-
logger.error(f"LiteLLM completion failed {provider}/{model} ({content}): {e}")
147+
logger.error(f"LiteLLM completion failed {model} ({content}): {e}")
146148
return {}

src/codegate/pipeline/extract_snippets/output.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def _split_chunk_at_code_end(self, content: str) -> tuple[str, str]:
8787
if line.strip() == "```":
8888
# Return content up to and including ```, and the rest
8989
before = "\n".join(lines[: i + 1])
90-
after = "\n".join(lines[i + 1:])
90+
after = "\n".join(lines[i + 1 :])
9191
return before, after
9292
return content, ""
9393

src/codegate/providers/ollama/completion_handler.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from litellm import ChatCompletionRequest
66
from ollama import AsyncClient, ChatResponse, GenerateResponse
77

8-
from codegate.config import Config
98
from codegate.providers.base import BaseCompletionHandler
109

1110
logger = structlog.get_logger("codegate")
@@ -27,13 +26,7 @@ async def ollama_stream_generator(
2726

2827
class OllamaShim(BaseCompletionHandler):
2928

30-
def __init__(self):
31-
config = Config.get_config()
32-
if config is None:
33-
provided_urls = {}
34-
else:
35-
provided_urls = config.provider_urls
36-
base_url = provided_urls.get("ollama", "http://localhost:11434/")
29+
def __init__(self, base_url):
3730
self.client = AsyncClient(host=base_url, timeout=300)
3831

3932
async def execute_completion(

src/codegate/providers/ollama/provider.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from fastapi import Request
55

6+
from codegate.config import Config
67
from codegate.pipeline.base import SequentialPipelineProcessor
78
from codegate.pipeline.output import OutputPipelineProcessor
89
from codegate.providers.base import BaseProvider
@@ -18,7 +19,13 @@ def __init__(
1819
output_pipeline_processor: Optional[OutputPipelineProcessor] = None,
1920
fim_output_pipeline_processor: Optional[OutputPipelineProcessor] = None,
2021
):
21-
completion_handler = OllamaShim()
22+
config = Config.get_config()
23+
if config is None:
24+
provided_urls = {}
25+
else:
26+
provided_urls = config.provider_urls
27+
self.base_url = provided_urls.get("ollama", "http://localhost:11434/")
28+
completion_handler = OllamaShim(self.base_url)
2229
super().__init__(
2330
OllamaInputNormalizer(),
2431
OllamaOutputNormalizer(),
@@ -46,6 +53,9 @@ def _setup_routes(self):
4653
async def create_completion(request: Request):
4754
body = await request.body()
4855
data = json.loads(body)
56+
# `base_url` is used in the providers pipeline to do the packages lookup.
57+
# Force it to be the one that comes in the configuration.
58+
data["base_url"] = self.base_url
4959

5060
is_fim_request = self._is_fim_request(request, data)
5161
stream = await self.complete(data, api_key=None, is_fim_request=is_fim_request)

tests/providers/ollama/test_ollama_completion_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def mock_client():
1919

2020
@pytest.fixture
2121
def handler(mock_client):
22-
ollama_shim = OllamaShim()
22+
ollama_shim = OllamaShim("http://ollama:11434")
2323
ollama_shim.client = mock_client
2424
return ollama_shim
2525

0 commit comments

Comments
 (0)