Skip to content

Commit 977d5d8

Browse files
authored
feat: move ollama to new agent loop (#3615)
1 parent 4f1c1ab commit 977d5d8

File tree

6 files changed

+173
-317
lines changed

6 files changed

+173
-317
lines changed

letta/llm_api/llm_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def create(
5858
put_inner_thoughts_first=put_inner_thoughts_first,
5959
actor=actor,
6060
)
61-
case ProviderType.openai | ProviderType.together:
61+
case ProviderType.openai | ProviderType.together | ProviderType.ollama:
6262
from letta.llm_api.openai_client import OpenAIClient
6363

6464
return OpenAIClient(

letta/schemas/providers/ollama.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
logger = get_logger(__name__)
1515

16+
ollama_prefix = "/v1"
17+
1618

1719
class OllamaProvider(OpenAIProvider):
1820
"""Ollama provider that uses the native /api/generate endpoint
@@ -43,13 +45,13 @@ async def list_llm_models_async(self) -> list[LLMConfig]:
4345
for model in response_json["models"]:
4446
context_window = self.get_model_context_window(model["name"])
4547
if context_window is None:
46-
print(f"Ollama model {model['name']} has no context window")
47-
continue
48+
print(f"Ollama model {model['name']} has no context window, using default 32000")
49+
context_window = 32000
4850
configs.append(
4951
LLMConfig(
5052
model=model["name"],
51-
model_endpoint_type="ollama",
52-
model_endpoint=self.base_url,
53+
model_endpoint_type=ProviderType.ollama,
54+
model_endpoint=f"{self.base_url}{ollama_prefix}",
5355
model_wrapper=self.default_prompt_formatter,
5456
context_window=context_window,
5557
handle=self.get_handle(model["name"]),
@@ -75,13 +77,14 @@ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
7577
for model in response_json["models"]:
7678
embedding_dim = await self._get_model_embedding_dim_async(model["name"])
7779
if not embedding_dim:
78-
print(f"Ollama model {model['name']} has no embedding dimension")
79-
continue
80+
print(f"Ollama model {model['name']} has no embedding dimension, using default 1024")
81+
# continue
82+
embedding_dim = 1024
8083
configs.append(
8184
EmbeddingConfig(
8285
embedding_model=model["name"],
83-
embedding_endpoint_type="ollama",
84-
embedding_endpoint=self.base_url,
86+
embedding_endpoint_type=ProviderType.ollama,
87+
embedding_endpoint=f"{self.base_url}{ollama_prefix}",
8588
embedding_dim=embedding_dim,
8689
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
8790
handle=self.get_handle(model["name"], is_embedding=True),

letta/server/rest_api/routers/v1/agents.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,15 @@ async def send_message(
865865
# TODO: This is redundant, remove soon
866866
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
867867
agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
868-
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex", "bedrock"]
868+
model_compatible = agent.llm_config.model_endpoint_type in [
869+
"anthropic",
870+
"openai",
871+
"together",
872+
"google_ai",
873+
"google_vertex",
874+
"bedrock",
875+
"ollama",
876+
]
869877

870878
# Create a new run for execution tracking
871879
if settings.track_agent_run:
@@ -999,7 +1007,15 @@ async def send_message_streaming(
9991007
# TODO: This is redundant, remove soon
10001008
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
10011009
agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1002-
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex", "bedrock"]
1010+
model_compatible = agent.llm_config.model_endpoint_type in [
1011+
"anthropic",
1012+
"openai",
1013+
"together",
1014+
"google_ai",
1015+
"google_vertex",
1016+
"bedrock",
1017+
"ollama",
1018+
]
10031019
model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock"]
10041020
not_letta_endpoint = agent.llm_config.model_endpoint != LETTA_MODEL_ENDPOINT
10051021

@@ -1194,6 +1210,7 @@ async def _process_message_background(
11941210
"google_ai",
11951211
"google_vertex",
11961212
"bedrock",
1213+
"ollama",
11971214
]
11981215
if agent_eligible and model_compatible:
11991216
if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
@@ -1373,7 +1390,15 @@ async def preview_raw_payload(
13731390
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
13741391
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
13751392
agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1376-
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex", "bedrock"]
1393+
model_compatible = agent.llm_config.model_endpoint_type in [
1394+
"anthropic",
1395+
"openai",
1396+
"together",
1397+
"google_ai",
1398+
"google_vertex",
1399+
"bedrock",
1400+
"ollama",
1401+
]
13771402

13781403
if agent_eligible and model_compatible:
13791404
if agent.enable_sleeptime:
@@ -1433,7 +1458,15 @@ async def summarize_agent_conversation(
14331458
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
14341459
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
14351460
agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1436-
model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex", "bedrock"]
1461+
model_compatible = agent.llm_config.model_endpoint_type in [
1462+
"anthropic",
1463+
"openai",
1464+
"together",
1465+
"google_ai",
1466+
"google_vertex",
1467+
"bedrock",
1468+
"ollama",
1469+
]
14371470

14381471
if agent_eligible and model_compatible:
14391472
agent = LettaAgent(
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"context_window": 8192,
33
"model_endpoint_type": "ollama",
4-
"model_endpoint": "http://127.0.0.1:11434",
5-
"model": "qwen3:32b",
4+
"model_endpoint": "http://127.0.0.1:11434/v1",
5+
"model": "qwen2.5:7b",
66
"put_inner_thoughts_in_kwargs": true
77
}

0 commit comments

Comments
 (0)