Skip to content

feat: Simpler generation spans, use Haystack's to_openai_dict_format #2044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/langfuse/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai>=2.13.0", "langfuse>=2.9.0, <3.0.0"]
dependencies = ["haystack-ai>=2.15.1", "langfuse>=2.9.0, <3.0.0"]

[project.urls]
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langfuse#readme"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# SPDX-License-Identifier: Apache-2.0

import contextlib
import json
import os
from abc import ABC, abstractmethod
from collections import Counter
Expand Down Expand Up @@ -66,61 +65,6 @@
tracing_context_var: ContextVar[Dict[Any, Any]] = ContextVar("tracing_context")


def _to_openai_dict_format(chat_message: ChatMessage) -> Dict[str, Any]:
"""
Convert a ChatMessage to the dictionary format expected by OpenAI's chat completion API.

Note: We already have such a method in Haystack's ChatMessage class.
However, the original method doesn't tolerate None values for ids of ToolCall and ToolCallResult.
Some generators, like GoogleGenAIChatGenerator, return None values for ids of ToolCall and ToolCallResult.
To seamlessly support these generators, we use this, Langfuse local, version of the method.

:param chat_message: The ChatMessage instance to convert.
:return: Dictionary in OpenAI Chat API format.
"""
text_contents = chat_message.texts
tool_calls = chat_message.tool_calls
tool_call_results = chat_message.tool_call_results

if not text_contents and not tool_calls and not tool_call_results:
message = "A `ChatMessage` must contain at least one `TextContent`, `ToolCall`, or `ToolCallResult`."
logger.error(message)
raise ValueError(message)
if len(text_contents) + len(tool_call_results) > 1:
message = "A `ChatMessage` can only contain one `TextContent` or one `ToolCallResult`."
logger.error(message)
raise ValueError(message)

openai_msg: Dict[str, Any] = {"role": chat_message._role.value}

# Add name field if present
if chat_message._name is not None:
openai_msg["name"] = chat_message._name

if tool_call_results:
result = tool_call_results[0]
openai_msg["content"] = result.result
openai_msg["tool_call_id"] = result.origin.id
# OpenAI does not provide a way to communicate errors in tool invocations, so we ignore the error field
return openai_msg

if text_contents:
openai_msg["content"] = text_contents[0]
if tool_calls:
openai_tool_calls = []
for tc in tool_calls:
openai_tool_calls.append(
{
"id": tc.id,
"type": "function",
# We disable ensure_ascii so special chars like emojis are not converted
"function": {"name": tc.tool_name, "arguments": json.dumps(tc.arguments, ensure_ascii=False)},
}
)
openai_msg["tool_calls"] = openai_tool_calls
return openai_msg


class LangfuseSpan(Span):
"""
Internal class representing a bridge between the Haystack span tracing API and Langfuse.
Expand Down Expand Up @@ -158,15 +102,15 @@ def set_content_tag(self, key: str, value: Any) -> None:
return
if key.endswith(".input"):
if "messages" in value:
messages = [_to_openai_dict_format(m) for m in value["messages"]]
messages = [m.to_openai_dict_format(require_tool_call_ids=False) for m in value["messages"]]
self._span.update(input=messages)
else:
coerced_value = tracing_utils.coerce_tag_value(value)
self._span.update(input=coerced_value)
elif key.endswith(".output"):
if "replies" in value:
if all(isinstance(r, ChatMessage) for r in value["replies"]):
replies = [_to_openai_dict_format(m) for m in value["replies"]]
replies = [m.to_openai_dict_format(require_tool_call_ids=False) for m in value["replies"]]
else:
replies = value["replies"]
self._span.update(output=replies)
Expand Down