feat: Simpler generation spans, use Haystack's to_openai_dict_format (#2044)

vblagoje · sjrl · web-flow · commit a6c9103da654 · 2025-07-03T14:44:07.000+02:00
* DRY: remove custom to_openai_dict_format function and use Haystack's

* Lint

* Update integrations/langfuse/pyproject.toml

Co-authored-by: Sebastian Husch Lee &lt;10526848+sjrl@users.noreply.github.com&gt;

---------

Co-authored-by: Sebastian Husch Lee &lt;10526848+sjrl@users.noreply.github.com&gt;
diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml
@@ -22,7 +22,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.13.0", "langfuse>=2.9.0, <3.0.0"]
+dependencies = ["haystack-ai>=2.15.1", "langfuse>=2.9.0, <3.0.0"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langfuse#readme"
diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import contextlib
-import json
 import os
 from abc import ABC, abstractmethod
 from collections import Counter
@@ -66,61 +65,6 @@
 tracing_context_var: ContextVar[Dict[Any, Any]] = ContextVar("tracing_context")
 
 
-def _to_openai_dict_format(chat_message: ChatMessage) -> Dict[str, Any]:
-    """
-    Convert a ChatMessage to the dictionary format expected by OpenAI's chat completion API.
-
-    Note: We already have such a method in Haystack's ChatMessage class.
-    However, the original method doesn't tolerate None values for ids of ToolCall and ToolCallResult.
-    Some generators, like GoogleGenAIChatGenerator, return None values for ids of ToolCall and ToolCallResult.
-    To seamlessly support these generators, we use this, Langfuse local, version of the method.
-
-    :param chat_message: The ChatMessage instance to convert.
-    :return: Dictionary in OpenAI Chat API format.
-    """
-    text_contents = chat_message.texts
-    tool_calls = chat_message.tool_calls
-    tool_call_results = chat_message.tool_call_results
-
-    if not text_contents and not tool_calls and not tool_call_results:
-        message = "A `ChatMessage` must contain at least one `TextContent`, `ToolCall`, or `ToolCallResult`."
-        logger.error(message)
-        raise ValueError(message)
-    if len(text_contents) + len(tool_call_results) > 1:
-        message = "A `ChatMessage` can only contain one `TextContent` or one `ToolCallResult`."
-        logger.error(message)
-        raise ValueError(message)
-
-    openai_msg: Dict[str, Any] = {"role": chat_message._role.value}
-
-    # Add name field if present
-    if chat_message._name is not None:
-        openai_msg["name"] = chat_message._name
-
-    if tool_call_results:
-        result = tool_call_results[0]
-        openai_msg["content"] = result.result
-        openai_msg["tool_call_id"] = result.origin.id
-        # OpenAI does not provide a way to communicate errors in tool invocations, so we ignore the error field
-        return openai_msg
-
-    if text_contents:
-        openai_msg["content"] = text_contents[0]
-    if tool_calls:
-        openai_tool_calls = []
-        for tc in tool_calls:
-            openai_tool_calls.append(
-                {
-                    "id": tc.id,
-                    "type": "function",
-                    # We disable ensure_ascii so special chars like emojis are not converted
-                    "function": {"name": tc.tool_name, "arguments": json.dumps(tc.arguments, ensure_ascii=False)},
-                }
-            )
-        openai_msg["tool_calls"] = openai_tool_calls
-    return openai_msg
-
-
 class LangfuseSpan(Span):
     """
     Internal class representing a bridge between the Haystack span tracing API and Langfuse.
@@ -158,15 +102,15 @@ def set_content_tag(self, key: str, value: Any) -> None:
             return
         if key.endswith(".input"):
             if "messages" in value:
-                messages = [_to_openai_dict_format(m) for m in value["messages"]]
+                messages = [m.to_openai_dict_format(require_tool_call_ids=False) for m in value["messages"]]
                 self._span.update(input=messages)
             else:
                 coerced_value = tracing_utils.coerce_tag_value(value)
                 self._span.update(input=coerced_value)
         elif key.endswith(".output"):
             if "replies" in value:
                 if all(isinstance(r, ChatMessage) for r in value["replies"]):
-                    replies = [_to_openai_dict_format(m) for m in value["replies"]]
+                    replies = [m.to_openai_dict_format(require_tool_call_ids=False) for m in value["replies"]]
                 else:
                     replies = value["replies"]
                 self._span.update(output=replies)

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ classifiers = [`
`22`	`22`	`"Programming Language :: Python :: Implementation :: CPython",`
`23`	`23`	`"Programming Language :: Python :: Implementation :: PyPy",`
`24`	`24`	`]`
`25`		`-dependencies = ["haystack-ai>=2.13.0", "langfuse>=2.9.0, <3.0.0"]`
	`25`	`+dependencies = ["haystack-ai>=2.15.1", "langfuse>=2.9.0, <3.0.0"]`
`26`	`26`
`27`	`27`	`[project.urls]`
`28`	`28`	`Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langfuse#readme"`