1919
2020from letta .errors import BedrockError , BedrockPermissionError
2121from letta .llm_api .aws_bedrock import get_bedrock_client
22+ from letta .llm_api .helpers import add_inner_thoughts_to_functions
23+ from letta .local_llm .constants import INNER_THOUGHTS_KWARG , INNER_THOUGHTS_KWARG_DESCRIPTION
2224from letta .local_llm .utils import num_tokens_from_functions , num_tokens_from_messages
2325from letta .schemas .message import Message as _Message
2426from letta .schemas .message import MessageRole as _MessageRole
@@ -513,9 +515,23 @@ def convert_anthropic_stream_event_to_chatcompletion(
513515def _prepare_anthropic_request (
514516 data : ChatCompletionRequest ,
515517 inner_thoughts_xml_tag : Optional [str ] = "thinking" ,
518+ # if true, prefix fill the generation with the thinking tag
519+ prefix_fill : bool = True ,
520+ # if true, put COT inside the tool calls instead of inside the content
521+ put_inner_thoughts_in_kwargs : bool = False ,
516522) -> dict :
517523 """Prepare the request data for Anthropic API format."""
518- # convert the tools
524+
525+ # if needed, put inner thoughts as a kwarg for all tools
526+ if data .tools and put_inner_thoughts_in_kwargs :
527+ functions = add_inner_thoughts_to_functions (
528+ functions = [t .function .model_dump () for t in data .tools ],
529+ inner_thoughts_key = INNER_THOUGHTS_KWARG ,
530+ inner_thoughts_description = INNER_THOUGHTS_KWARG_DESCRIPTION ,
531+ )
532+ data .tools = [Tool (function = f ) for f in functions ]
533+
534+ # convert the tools to Anthropic's payload format
519535 anthropic_tools = None if data .tools is None else convert_tools_to_anthropic_format (data .tools )
520536
521537 # pydantic -> dict
@@ -529,11 +545,25 @@ def _prepare_anthropic_request(
529545 data .pop ("tools" )
530546 data .pop ("tool_choice" , None )
531547 elif anthropic_tools is not None :
548+ # TODO eventually enable parallel tool use
532549 data ["tools" ] = anthropic_tools
533- if len (anthropic_tools ) == 1 :
550+
551+ # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
552+ if put_inner_thoughts_in_kwargs :
553+ if len (anthropic_tools ) == 1 :
554+ data ["tool_choice" ] = {
555+ "type" : "tool" ,
556+ "name" : anthropic_tools [0 ]["name" ],
557+ "disable_parallel_tool_use" : True ,
558+ }
559+ else :
560+ data ["tool_choice" ] = {
561+ "type" : "any" ,
562+ "disable_parallel_tool_use" : True ,
563+ }
564+ else :
534565 data ["tool_choice" ] = {
535- "type" : "tool" ,
536- "name" : anthropic_tools [0 ]["name" ],
566+ "type" : "auto" ,
537567 "disable_parallel_tool_use" : True ,
538568 }
539569
@@ -548,8 +578,21 @@ def _prepare_anthropic_request(
548578 message ["content" ] = None
549579
550580 # Convert to Anthropic format
551- msg_objs = [_Message .dict_to_message (user_id = None , agent_id = None , openai_message_dict = m ) for m in data ["messages" ]]
552- data ["messages" ] = [m .to_anthropic_dict (inner_thoughts_xml_tag = inner_thoughts_xml_tag ) for m in msg_objs ]
581+ msg_objs = [
582+ _Message .dict_to_message (
583+ user_id = None ,
584+ agent_id = None ,
585+ openai_message_dict = m ,
586+ )
587+ for m in data ["messages" ]
588+ ]
589+ data ["messages" ] = [
590+ m .to_anthropic_dict (
591+ inner_thoughts_xml_tag = inner_thoughts_xml_tag ,
592+ put_inner_thoughts_in_kwargs = put_inner_thoughts_in_kwargs ,
593+ )
594+ for m in msg_objs
595+ ]
553596
554597 # Ensure first message is user
555598 if data ["messages" ][0 ]["role" ] != "user" :
@@ -558,6 +601,16 @@ def _prepare_anthropic_request(
558601 # Handle alternating messages
559602 data ["messages" ] = merge_tool_results_into_user_messages (data ["messages" ])
560603
604+ # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
605+ # https://docs.anthropic.com/en/api/messages#body-messages
606+ # NOTE: cannot prefill with tools for opus:
607+ # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
608+ if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data ["model" ]:
609+ data ["messages" ].append (
610+ # Start the thinking process for the assistant
611+ {"role" : "assistant" , "content" : f"<{ inner_thoughts_xml_tag } >" },
612+ )
613+
561614 # Validate max_tokens
562615 assert "max_tokens" in data , data
563616
@@ -571,6 +624,7 @@ def _prepare_anthropic_request(
571624def anthropic_chat_completions_request (
572625 data : ChatCompletionRequest ,
573626 inner_thoughts_xml_tag : Optional [str ] = "thinking" ,
627+ put_inner_thoughts_in_kwargs : bool = False ,
574628 betas : List [str ] = ["tools-2024-04-04" ],
575629) -> ChatCompletionResponse :
576630 """https://docs.anthropic.com/claude/docs/tool-use"""
@@ -580,7 +634,11 @@ def anthropic_chat_completions_request(
580634 anthropic_client = anthropic .Anthropic (api_key = anthropic_override_key )
581635 elif model_settings .anthropic_api_key :
582636 anthropic_client = anthropic .Anthropic ()
583- data = _prepare_anthropic_request (data , inner_thoughts_xml_tag )
637+ data = _prepare_anthropic_request (
638+ data = data ,
639+ inner_thoughts_xml_tag = inner_thoughts_xml_tag ,
640+ put_inner_thoughts_in_kwargs = put_inner_thoughts_in_kwargs ,
641+ )
584642 response = anthropic_client .beta .messages .create (
585643 ** data ,
586644 betas = betas ,
@@ -611,14 +669,19 @@ def anthropic_bedrock_chat_completions_request(
611669def anthropic_chat_completions_request_stream (
612670 data : ChatCompletionRequest ,
613671 inner_thoughts_xml_tag : Optional [str ] = "thinking" ,
672+ put_inner_thoughts_in_kwargs : bool = False ,
614673 betas : List [str ] = ["tools-2024-04-04" ],
615674) -> Generator [ChatCompletionChunkResponse , None , None ]:
616675 """Stream chat completions from Anthropic API.
617676
618677 Similar to OpenAI's streaming, but using Anthropic's native streaming support.
619678 See: https://docs.anthropic.com/claude/reference/messages-streaming
620679 """
621- data = _prepare_anthropic_request (data , inner_thoughts_xml_tag )
680+ data = _prepare_anthropic_request (
681+ data = data ,
682+ inner_thoughts_xml_tag = inner_thoughts_xml_tag ,
683+ put_inner_thoughts_in_kwargs = put_inner_thoughts_in_kwargs ,
684+ )
622685
623686 anthropic_override_key = ProviderManager ().get_anthropic_override_key ()
624687 if anthropic_override_key :
@@ -666,6 +729,7 @@ def anthropic_chat_completions_process_stream(
666729 chat_completion_request : ChatCompletionRequest ,
667730 stream_interface : Optional [Union [AgentChunkStreamingInterface , AgentRefreshStreamingInterface ]] = None ,
668731 inner_thoughts_xml_tag : Optional [str ] = "thinking" ,
732+ put_inner_thoughts_in_kwargs : bool = False ,
669733 create_message_id : bool = True ,
670734 create_message_datetime : bool = True ,
671735 betas : List [str ] = ["tools-2024-04-04" ],
@@ -743,6 +807,7 @@ def anthropic_chat_completions_process_stream(
743807 anthropic_chat_completions_request_stream (
744808 data = chat_completion_request ,
745809 inner_thoughts_xml_tag = inner_thoughts_xml_tag ,
810+ put_inner_thoughts_in_kwargs = put_inner_thoughts_in_kwargs ,
746811 betas = betas ,
747812 )
748813 ):
0 commit comments