letta-ai
diff --git a/‎.github/workflows/send-message-integration-tests.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/send-message-integration-tests.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎alembic/versions/ffb17eb241fc_add_api_version_to_byok_providers.py‎
Lines changed: 31 additions & 0 deletions b/‎alembic/versions/ffb17eb241fc_add_api_version_to_byok_providers.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎examples/docs/node/example.ts‎
Lines changed: 0 additions & 1 deletion b/‎examples/docs/node/example.ts‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎letta/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎letta/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎letta/agents/helpers.py‎
Lines changed: 4 additions & 0 deletions b/‎letta/agents/helpers.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎letta/agents/letta_agent.py‎
Lines changed: 142 additions & 5 deletions b/‎letta/agents/letta_agent.py‎
Lines changed: 142 additions & 5 deletions
diff --git a/‎letta/constants.py‎
Lines changed: 10 additions & 7 deletions b/‎letta/constants.py‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎letta/data_sources/connectors.py‎
Lines changed: 70 additions & 53 deletions b/‎letta/data_sources/connectors.py‎
Lines changed: 70 additions & 53 deletions
@@ -148,6 +148,7 @@ jobs:
           DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
           GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
           GOOGLE_CLOUD_LOCATION: ${{ secrets.GOOGLE_CLOUD_LOCATION }}
+          LETTA_GEMINI_FORCE_MINIMUM_THINKING_BUDGET: true
         run: |
           poetry run pytest \
             -s -vv \
 
@@ -0,0 +1,31 @@
+"""add api version to byok providers
+
+Revision ID: ffb17eb241fc
+Revises: 5fb8bba2c373
+Create Date: 2025-08-12 14:35:26.375985
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "ffb17eb241fc"
+down_revision: Union[str, None] = "5fb8bba2c373"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column("providers", sa.Column("api_version", sa.String(), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column("providers", "api_version")
+    # ### end Alembic commands ###
@@ -1,4 +1,3 @@
-import type { LettaClient } from '@letta-ai/letta-client';
 import type {
   AssistantMessage,
   ReasoningMessage,
 
@@ -5,7 +5,7 @@
     __version__ = version("letta")
 except PackageNotFoundError:
     # Fallback for development installations
-    __version__ = "0.11.3"
+    __version__ = "0.11.4"
 
 if os.environ.get("LETTA_VERSION"):
     __version__ = os.environ["LETTA_VERSION"]
 
@@ -4,6 +4,7 @@
 from typing import List, Optional, Tuple
 
 from letta.helpers import ToolRulesSolver
+from letta.log import get_logger
 from letta.schemas.agent import AgentState
 from letta.schemas.letta_message import MessageType
 from letta.schemas.letta_response import LettaResponse
@@ -15,6 +16,8 @@
 from letta.server.rest_api.utils import create_input_messages
 from letta.services.message_manager import MessageManager
 
+logger = get_logger(__name__)
+
 
 def _create_letta_response(
     new_in_context_messages: list[Message],
@@ -222,6 +225,7 @@ def _safe_load_tool_call_str(tool_call_args_str: str) -> dict:
             # Load it again - this is due to sometimes Anthropic returning weird json @caren
             tool_args = json.loads(tool_args)
     except json.JSONDecodeError:
+        logger.error("Failed to JSON decode tool call argument string: %s", tool_call_args_str)
         tool_args = {}
 
     return tool_args
 
@@ -36,6 +36,7 @@
 DEFAULT_ORG_ID = "org-00000000-0000-4000-8000-000000000000"
 DEFAULT_ORG_NAME = "default_org"
 
+AGENT_ID_PATTERN = re.compile(r"^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", re.IGNORECASE)
 
 # String in the error message for when the context window is too large
 # Example full message:
@@ -330,15 +331,15 @@ def FUNCTION_RETURN_VALUE_TRUNCATED(return_str, return_char: int, return_char_li
 MESSAGE_SUMMARY_REQUEST_ACK = "Understood, I will respond with a summary of the message (and only the summary, nothing else) once I receive the conversation history. I'm ready."
 
 # Maximum length of an error message
-MAX_ERROR_MESSAGE_CHAR_LIMIT = 500
+MAX_ERROR_MESSAGE_CHAR_LIMIT = 1000
 
 # Default memory limits
-CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 5000
-CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 5000
-CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 5000
+CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 20000
+CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 20000
+CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 20000
 
 # Function return limits
-FUNCTION_RETURN_CHAR_LIMIT = 6000  # ~300 words
+FUNCTION_RETURN_CHAR_LIMIT = 50000  # ~300 words
 BASE_FUNCTION_RETURN_CHAR_LIMIT = 1000000  # very high (we rely on implementation)
 FILE_IS_TRUNCATED_WARNING = "# NOTE: This block is truncated, use functions to view the full content."
 
@@ -396,5 +397,7 @@ def FUNCTION_RETURN_VALUE_TRUNCATED(return_str, return_char: int, return_char_li
 WEB_SEARCH_MODEL_ENV_VAR_NAME = "LETTA_BUILTIN_WEBSEARCH_OPENAI_MODEL_NAME"
 WEB_SEARCH_MODEL_ENV_VAR_DEFAULT_VALUE = "gpt-4.1-mini-2025-04-14"
 
-# Excluded providers from base tool rules
-EXCLUDED_PROVIDERS_FROM_BASE_TOOL_RULES = {"anthropic", "openai", "google_ai", "google_vertex"}
+# Excluded model keywords from base tool rules
+EXCLUDE_MODEL_KEYWORDS_FROM_BASE_TOOL_RULES = ["claude-4-sonnet", "claude-3-5-sonnet", "gpt-5", "gemini-2.5-pro"]
+# But include models with these keywords in base tool rules (overrides exclusion)
+INCLUDE_MODEL_KEYWORDS_BASE_TOOL_RULES = ["mini"]
@@ -4,7 +4,6 @@
 
 from letta.constants import EMBEDDING_BATCH_SIZE
 from letta.data_sources.connectors_helper import assert_all_files_exist_locally, extract_metadata_from_files, get_filenames_in_dir
-from letta.embeddings import embedding_model
 from letta.schemas.file import FileMetadata
 from letta.schemas.passage import Passage
 from letta.schemas.source import Source
@@ -40,61 +39,29 @@ def generate_passages(self, file: FileMetadata, chunk_size: int = 1024) -> Itera
 
 async def load_data(connector: DataConnector, source: Source, passage_manager: PassageManager, file_manager: FileManager, actor: "User"):
     from letta.llm_api.llm_client import LLMClient
-    from letta.schemas.embedding_config import EmbeddingConfig
 
     """Load data from a connector (generates file and passages) into a specified source_id, associated with a user_id."""
     embedding_config = source.embedding_config
 
     # insert passages/file
-    texts = []
     embedding_to_document_name = {}
     passage_count = 0
     file_count = 0
 
-    async def generate_embeddings(texts: List[str], embedding_config: EmbeddingConfig) -> List[Passage]:
-        passages = []
-        if embedding_config.embedding_endpoint_type == "openai":
-            texts.append(passage_text)
-
-            client = LLMClient.create(
-                provider_type=embedding_config.embedding_endpoint_type,
-                actor=actor,
-            )
-            embeddings = await client.request_embeddings(texts, embedding_config)
-
-        else:
-            embed_model = embedding_model(embedding_config)
-            embeddings = [embed_model.get_text_embedding(text) for text in texts]
-
-        # collate passage and embedding
-        for text, embedding in zip(texts, embeddings):
-            passage = Passage(
-                text=text,
-                file_id=file_metadata.id,
-                source_id=source.id,
-                metadata=passage_metadata,
-                organization_id=source.organization_id,
-                embedding_config=source.embedding_config,
-                embedding=embedding,
-            )
-            hashable_embedding = tuple(passage.embedding)
-            file_name = file_metadata.file_name
-            if hashable_embedding in embedding_to_document_name:
-                typer.secho(
-                    f"Warning: Duplicate embedding found for passage in {file_name} (already exists in {embedding_to_document_name[hashable_embedding]}), skipping insert into VectorDB.",
-                    fg=typer.colors.YELLOW,
-                )
-                continue
-
-            passages.append(passage)
-            embedding_to_document_name[hashable_embedding] = file_name
-        return passages
+    # Use the new LLMClient for all embedding requests
+    client = LLMClient.create(
+        provider_type=embedding_config.embedding_endpoint_type,
+        actor=actor,
+    )
 
     for file_metadata in connector.find_files(source):
         file_count += 1
         await file_manager.create_file(file_metadata, actor)
 
-        # generate passages
+        # generate passages for this file
+        texts = []
+        metadatas = []
+
         for passage_text, passage_metadata in connector.generate_passages(file_metadata, chunk_size=embedding_config.embedding_chunk_size):
             # for some reason, llama index parsers sometimes return empty strings
             if len(passage_text) == 0:
@@ -104,24 +71,74 @@ async def generate_embeddings(texts: List[str], embedding_config: EmbeddingConfi
                 )
                 continue
 
-            # get embedding
             texts.append(passage_text)
+            metadatas.append(passage_metadata)
+
             if len(texts) >= EMBEDDING_BATCH_SIZE:
-                passages = await generate_embeddings(texts, embedding_config)
+                # Process the batch
+                embeddings = await client.request_embeddings(texts, embedding_config)
+                passages = []
+
+                for text, embedding, passage_metadata in zip(texts, embeddings, metadatas):
+                    passage = Passage(
+                        text=text,
+                        file_id=file_metadata.id,
+                        source_id=source.id,
+                        metadata=passage_metadata,
+                        organization_id=source.organization_id,
+                        embedding_config=source.embedding_config,
+                        embedding=embedding,
+                    )
+                    hashable_embedding = tuple(passage.embedding)
+                    file_name = file_metadata.file_name
+                    if hashable_embedding in embedding_to_document_name:
+                        typer.secho(
+                            f"Warning: Duplicate embedding found for passage in {file_name} (already exists in {embedding_to_document_name[hashable_embedding]}), skipping insert into VectorDB.",
+                            fg=typer.colors.YELLOW,
+                        )
+                        continue
+
+                    passages.append(passage)
+                    embedding_to_document_name[hashable_embedding] = file_name
+
+                # insert passages into passage store
+                await passage_manager.create_many_passages_async(passages, actor)
+                passage_count += len(passages)
+
+                # Reset for next batch
                 texts = []
-            else:
-                continue
+                metadatas = []
+
+        # Process final remaining texts for this file
+        if len(texts) > 0:
+            embeddings = await client.request_embeddings(texts, embedding_config)
+            passages = []
+
+            for text, embedding, passage_metadata in zip(texts, embeddings, metadatas):
+                passage = Passage(
+                    text=text,
+                    file_id=file_metadata.id,
+                    source_id=source.id,
+                    metadata=passage_metadata,
+                    organization_id=source.organization_id,
+                    embedding_config=source.embedding_config,
+                    embedding=embedding,
+                )
+                hashable_embedding = tuple(passage.embedding)
+                file_name = file_metadata.file_name
+                if hashable_embedding in embedding_to_document_name:
+                    typer.secho(
+                        f"Warning: Duplicate embedding found for passage in {file_name} (already exists in {embedding_to_document_name[hashable_embedding]}), skipping insert into VectorDB.",
+                        fg=typer.colors.YELLOW,
+                    )
+                    continue
+
+                passages.append(passage)
+                embedding_to_document_name[hashable_embedding] = file_name
 
-            # insert passages into passage store
             await passage_manager.create_many_passages_async(passages, actor)
             passage_count += len(passages)
 
-    # final remaining
-    if len(texts) > 0:
-        passages = await generate_embeddings(texts, embedding_config)
-        await passage_manager.create_many_passages_async(passages, actor)
-        passage_count += len(passages)
-
     return passage_count, file_count
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-import type { LettaClient } from '@letta-ai/letta-client';`
`2`	`1`	`import type {`
`3`	`2`	`AssistantMessage,`
`4`	`3`	`ReasoningMessage,`