letta-ai
diff --git a/‎letta/agent.py‎
Lines changed: 1 addition & 0 deletions b/‎letta/agent.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎letta/agents/base_agent.py‎
Lines changed: 1 addition & 0 deletions b/‎letta/agents/base_agent.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎letta/agents/helpers.py‎
Lines changed: 28 additions & 0 deletions b/‎letta/agents/helpers.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎letta/agents/letta_agent.py‎
Lines changed: 88 additions & 100 deletions b/‎letta/agents/letta_agent.py‎
Lines changed: 88 additions & 100 deletions
diff --git a/‎letta/agents/letta_agent_batch.py‎
Lines changed: 1 addition & 1 deletion b/‎letta/agents/letta_agent_batch.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎letta/agents/voice_agent.py‎
Lines changed: 2 additions & 1 deletion b/‎letta/agents/voice_agent.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎letta/constants.py‎
Lines changed: 1 addition & 0 deletions b/‎letta/constants.py‎
Lines changed: 1 addition & 0 deletions
@@ -1246,6 +1246,7 @@ def get_context_window(self) -> ContextWindowOverview:
         message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)
         external_memory_summary = compile_memory_metadata_block(
             memory_edit_timestamp=get_utc_time(),
+            timezone=self.agent_state.timezone,
             previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
             archival_memory_size=self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id),
         )
 
@@ -120,6 +120,7 @@ async def _rebuild_memory_async(
                 system_prompt=agent_state.system,
                 in_context_memory=agent_state.memory,
                 in_context_memory_last_edit=memory_edit_timestamp,
+                timezone=agent_state.timezone,
                 previous_message_count=num_messages - len(in_context_messages),
                 archival_memory_size=num_archival_memories,
                 tool_rules_solver=tool_rules_solver,
 
@@ -1,12 +1,15 @@
+import json
 import uuid
 import xml.etree.ElementTree as ET
 from typing import List, Optional, Tuple
 
+from letta.helpers import ToolRulesSolver
 from letta.schemas.agent import AgentState
 from letta.schemas.letta_message import MessageType
 from letta.schemas.letta_response import LettaResponse
 from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
 from letta.schemas.message import Message, MessageCreate
+from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.usage import LettaUsageStatistics
 from letta.schemas.user import User
 from letta.server.rest_api.utils import create_input_messages
@@ -205,3 +208,28 @@ def deserialize_message_history(xml_str: str) -> Tuple[List[str], str]:
 
 def generate_step_id():
     return f"step-{uuid.uuid4()}"
+
+
+def _safe_load_dict(raw: str) -> dict:
+    """Lenient JSON → dict with fallback to eval on assertion failure."""
+    if "}{" in raw:  # strip accidental parallel calls
+        raw = raw.split("}{", 1)[0] + "}"
+    try:
+        data = json.loads(raw)
+        if not isinstance(data, dict):
+            raise AssertionError
+        return data
+    except (json.JSONDecodeError, AssertionError):
+        return json.loads(raw) if raw else {}
+
+
+def _pop_heartbeat(tool_args: dict) -> bool:
+    hb = tool_args.pop("request_heartbeat", False)
+    return str(hb).lower() == "true" if isinstance(hb, str) else bool(hb)
+
+
+def _build_rule_violation_result(tool_name: str, valid: list[str], solver: ToolRulesSolver) -> ToolExecutionResult:
+    hint_lines = solver.guess_rule_violation(tool_name)
+    hint_txt = ("\n** Hint: Possible rules that were violated:\n" + "\n".join(f"\t- {h}" for h in hint_lines)) if hint_lines else ""
+    msg = f"[ToolConstraintError] Cannot call {tool_name}, " f"valid tools include: {valid}.{hint_txt}"
+    return ToolExecutionResult(status="error", func_return=msg)
@@ -10,7 +10,14 @@
 
 from letta.agents.base_agent import BaseAgent
 from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
-from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages_no_persist_async, generate_step_id
+from letta.agents.helpers import (
+    _build_rule_violation_result,
+    _create_letta_response,
+    _pop_heartbeat,
+    _prepare_in_context_messages_no_persist_async,
+    _safe_load_dict,
+    generate_step_id,
+)
 from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
 from letta.errors import ContextWindowExceededError
 from letta.helpers import ToolRulesSolver
@@ -931,45 +938,15 @@ async def _handle_ai_response(
         run_id: Optional[str] = None,
     ) -> Tuple[List[Message], bool, Optional[LettaStopReason]]:
         """
-        Now that streaming is done, handle the final AI response.
-        This might yield additional SSE tokens if we do stalling.
-        At the end, set self._continue_execution accordingly.
+        Handle the final AI response once streaming completes, execute / validate the
+        tool call, decide whether we should keep stepping, and persist state.
         """
-        stop_reason = None
-        # Check if the called tool is allowed by tool name:
-        tool_call_name = tool_call.function.name
-        tool_call_args_str = tool_call.function.arguments
-
-        # Temp hack to gracefully handle parallel tool calling attempt, only take first one
-        if "}{" in tool_call_args_str:
-            tool_call_args_str = tool_call_args_str.split("}{", 1)[0] + "}"
-
-        try:
-            tool_args = json.loads(tool_call_args_str)
-            assert isinstance(tool_args, dict), "tool_args must be a dict"
-        except json.JSONDecodeError:
-            tool_args = {}
-        except AssertionError:
-            tool_args = json.loads(tool_args)
-
-        # Get request heartbeats and coerce to bool
-        request_heartbeat = tool_args.pop("request_heartbeat", False)
-        if is_final_step:
-            stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
-            self.logger.info("Agent has reached max steps.")
-            request_heartbeat = False
-        else:
-            # Pre-emptively pop out inner_thoughts
-            tool_args.pop(INNER_THOUGHTS_KWARG, "")
-
-            # So this is necessary, because sometimes non-structured outputs makes mistakes
-            if not isinstance(request_heartbeat, bool):
-                if isinstance(request_heartbeat, str):
-                    request_heartbeat = request_heartbeat.lower() == "true"
-                else:
-                    request_heartbeat = bool(request_heartbeat)
-
-        tool_call_id = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
+        # 1.  Parse and validate the tool-call envelope
+        tool_call_name: str = tool_call.function.name
+        tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
+        tool_args = _safe_load_dict(tool_call.function.arguments)
+        request_heartbeat: bool = _pop_heartbeat(tool_args)
+        tool_args.pop(INNER_THOUGHTS_KWARG, None)
 
         log_telemetry(
             self.logger,
@@ -979,16 +956,11 @@ async def _handle_ai_response(
             tool_call_id=tool_call_id,
             request_heartbeat=request_heartbeat,
         )
-        # Check if tool rule is violated - if so, we'll force continuation
-        tool_rule_violated = tool_call_name not in valid_tool_names
 
+        # 2.  Execute the tool (or synthesize an error result if disallowed)
+        tool_rule_violated = tool_call_name not in valid_tool_names
         if tool_rule_violated:
-            base_error_message = f"[ToolConstraintError] Cannot call {tool_call_name}, valid tools to call include: {valid_tool_names}."
-            violated_rule_messages = tool_rules_solver.guess_rule_violation(tool_call_name)
-            if violated_rule_messages:
-                bullet_points = "\n".join(f"\t- {msg}" for msg in violated_rule_messages)
-                base_error_message += f"\n** Hint: Possible rules that were violated:\n{bullet_points}"
-            tool_execution_result = ToolExecutionResult(status="error", func_return=base_error_message)
+            tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
         else:
             tool_execution_result = await self._execute_tool(
                 tool_name=tool_call_name,
@@ -997,66 +969,38 @@ async def _handle_ai_response(
                 agent_step_span=agent_step_span,
                 step_id=step_id,
             )
+
         log_telemetry(
             self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
         )
 
-        if tool_call_name in ["conversation_search", "conversation_search_date", "archival_memory_search"]:
-            # with certain functions we rely on the paging mechanism to handle overflow
-            truncate = False
-        else:
-            # but by default, we add a truncation safeguard to prevent bad functions from
-            # overflow the agent context window
-            truncate = True
-
-        # get the function response limit
-        target_tool = next((x for x in agent_state.tools if x.name == tool_call_name), None)
-        return_char_limit = target_tool.return_char_limit if target_tool else None
+        # 3.  Prepare the function-response payload
+        truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
+        return_char_limit = next(
+            (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
+            None,
+        )
         function_response_string = validate_function_response(
-            tool_execution_result.func_return, return_char_limit=return_char_limit, truncate=truncate
+            tool_execution_result.func_return,
+            return_char_limit=return_char_limit,
+            truncate=truncate,
         )
-        function_response = package_function_response(
+        self.last_function_response = package_function_response(
             was_success=tool_execution_result.success_flag,
             response_string=function_response_string,
             timezone=agent_state.timezone,
         )
 
-        # 4. Register tool call with tool rule solver
-        # Resolve whether or not to continue stepping
-        continue_stepping = request_heartbeat
-
-        # Force continuation if tool rule was violated to give the model another chance
-        if tool_rule_violated:
-            continue_stepping = True
-        else:
-            tool_rules_solver.register_tool_call(tool_name=tool_call_name)
-            if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
-                if continue_stepping:
-                    stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
-                continue_stepping = False
-            elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
-                continue_stepping = True
-            elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
-                continue_stepping = True
-
-        # Check if required-before-exit tools have been called before allowing exit
-        heartbeat_reason = None  # Default
-        uncalled_required_tools = tool_rules_solver.get_uncalled_required_tools()
-        if not continue_stepping and uncalled_required_tools:
-            continue_stepping = True
-            heartbeat_reason = (
-                f"{NON_USER_MSG_PREFIX}Cannot finish, still need to call the following required tools: {', '.join(uncalled_required_tools)}"
-            )
-
-            # TODO: @caren is this right?
-            # reset stop reason since we ain't stopping!
-            stop_reason = None
-            self.logger.info(f"RequiredBeforeExitToolRule: Forcing agent continuation. Missing required tools: {uncalled_required_tools}")
+        # 4.  Decide whether to keep stepping  (<<< focal section simplified)
+        continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
+            request_heartbeat=request_heartbeat,
+            tool_call_name=tool_call_name,
+            tool_rule_violated=tool_rule_violated,
+            tool_rules_solver=tool_rules_solver,
+            is_final_step=is_final_step,
+        )
 
-        # 5a. Persist Steps to DB
-        # Following agent loop to persist this before messages
-        # TODO (cliandy): determine what should match old loop w/provider_id
-        # TODO (cliandy): UsageStatistics and LettaUsageStatistics are used in many places, but are not the same.
+        # 5.  Persist step + messages and propagate to jobs
         logged_step = await self.step_manager.log_step_async(
             actor=self.actor,
             agent_id=agent_state.id,
@@ -1071,7 +1015,6 @@ async def _handle_ai_response(
             step_id=step_id,
         )
 
-        # 5b. Persist Messages to DB
         tool_call_messages = create_letta_messages_from_llm_response(
             agent_id=agent_state.id,
             model=agent_state.llm_config.model,
@@ -1083,27 +1026,72 @@ async def _handle_ai_response(
             function_response=function_response_string,
             timezone=agent_state.timezone,
             actor=self.actor,
-            add_heartbeat_request_system_message=continue_stepping,
+            continue_stepping=continue_stepping,
             heartbeat_reason=heartbeat_reason,
             reasoning_content=reasoning_content,
             pre_computed_assistant_message_id=pre_computed_assistant_message_id,
-            step_id=logged_step.id if logged_step else None,  # TODO (cliandy): eventually move over other agent loops
+            step_id=logged_step.id if logged_step else None,
         )
 
         persisted_messages = await self.message_manager.create_many_messages_async(
             (initial_messages or []) + tool_call_messages, actor=self.actor
         )
-        self.last_function_response = function_response
 
         if run_id:
             await self.job_manager.add_messages_to_job_async(
                 job_id=run_id,
-                message_ids=[message.id for message in persisted_messages if message.role != "user"],
+                message_ids=[m.id for m in persisted_messages if m.role != "user"],
                 actor=self.actor,
             )
 
         return persisted_messages, continue_stepping, stop_reason
 
+    def _decide_continuation(
+        self,
+        request_heartbeat: bool,
+        tool_call_name: str,
+        tool_rule_violated: bool,
+        tool_rules_solver: ToolRulesSolver,
+        is_final_step: bool | None,
+    ) -> tuple[bool, str | None, LettaStopReason | None]:
+
+        continue_stepping = request_heartbeat
+        heartbeat_reason: str | None = None
+        stop_reason: LettaStopReason | None = None
+
+        if tool_rule_violated:
+            continue_stepping = True
+            heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: tool rule violation."
+        else:
+            tool_rules_solver.register_tool_call(tool_call_name)
+
+            if tool_rules_solver.is_terminal_tool(tool_call_name):
+                if continue_stepping:
+                    stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
+                continue_stepping = False
+
+            elif tool_rules_solver.has_children_tools(tool_call_name):
+                continue_stepping = True
+                heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: child tool rule."
+
+            elif tool_rules_solver.is_continue_tool(tool_call_name):
+                continue_stepping = True
+                heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: continue tool rule."
+
+        # – hard stop overrides –
+        if is_final_step:
+            continue_stepping = False
+            stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
+        else:
+            uncalled = tool_rules_solver.get_uncalled_required_tools()
+            if not continue_stepping and uncalled:
+                continue_stepping = True
+                heartbeat_reason = f"{NON_USER_MSG_PREFIX}Missing required tools: " f"{', '.join(uncalled)}"
+
+                stop_reason = None  # reset – we’re still going
+
+        return continue_stepping, heartbeat_reason, stop_reason
+
     @trace_method
     async def _execute_tool(
         self,
 
@@ -550,7 +550,7 @@ def _create_tool_call_messages(
             tool_execution_result=tool_exec_result_obj,
             timezone=agent_state.timezone,
             actor=self.actor,
-            add_heartbeat_request_system_message=False,
+            continue_stepping=False,
             reasoning_content=reasoning_content,
             pre_computed_assistant_message_id=None,
             llm_batch_item_id=llm_batch_item_id,
 
@@ -150,6 +150,7 @@ async def step_stream(self, input_messages: List[MessageCreate], max_steps: int
             system_prompt=agent_state.system,
             in_context_memory=agent_state.memory,
             in_context_memory_last_edit=memory_edit_timestamp,
+            timezone=agent_state.timezone,
             previous_message_count=self.num_messages,
             archival_memory_size=self.num_archival_memories,
         )
@@ -277,7 +278,7 @@ async def _handle_ai_response(
                 tool_execution_result=tool_execution_result,
                 timezone=agent_state.timezone,
                 actor=self.actor,
-                add_heartbeat_request_system_message=True,
+                continue_stepping=True,
             )
             letta_message_db_queue.extend(tool_call_messages)
 
 
@@ -6,6 +6,7 @@
 LETTA_TOOL_EXECUTION_DIR = os.path.join(LETTA_DIR, "tool_execution_dir")
 
 LETTA_MODEL_ENDPOINT = "https://inference.letta.com"
+DEFAULT_TIMEZONE = "UTC"
 
 ADMIN_PREFIX = "/v1/admin"
 API_PREFIX = "/v1"
Original file line number	Diff line number	Diff line change
`@@ -1246,6 +1246,7 @@ def get_context_window(self) -> ContextWindowOverview:`
`1246`	`1246`	`message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)`
`1247`	`1247`	`external_memory_summary = compile_memory_metadata_block(`
`1248`	`1248`	`memory_edit_timestamp=get_utc_time(),`
	`1249`	`+ timezone=self.agent_state.timezone,`
`1249`	`1250`	`previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),`
`1250`	`1251`	`archival_memory_size=self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id),`
`1251`	`1252`	`)`