Skip to content

Commit 390118c

Browse files
authored
feat(llmobs): add prompt tracking telemetry tags (#15637)
## Description Adds prompt tracking telemetry tags for OpenAI integration: - Adds `prompt_tracking_instrumentation_method:auto` tag for auto-instrumented prompts (OpenAI, LangChain) - Adds `prompt_tracking_instrumentation_method:annotated` tag for annotated prompts (manual) - Adds `prompt_multimodal:true` tag for OpenAI prompts containing image/file inputs ### Motivation Enable dd-go to emit statsd metrics tagged by prompt tracking source and multimodal content for internal dashboard analytics. ## Testing - Updated unit tests for OpenAI and LangChain integrations - All existing tests pass with new tag assertions - System tests ## Risks <!-- Note any risks associated with this change, or "None" if no risks --> 🤷 ## Related PRs - dd-trace-js: DataDog/dd-trace-js#7106 - dd-source: DataDog/dd-source#323345 - system-tests: DataDog/system-tests#5876
1 parent 2484164 commit 390118c

File tree

9 files changed

+112
-8
lines changed

9 files changed

+112
-8
lines changed

ddtrace/llmobs/_constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@
9494
# Prompt constants
9595
DEFAULT_PROMPT_NAME = "unnamed-prompt"
9696

97+
# Prompt tracking tags
98+
PROMPT_TRACKING_INSTRUMENTATION_METHOD = "prompt_tracking_instrumentation_method"
99+
PROMPT_MULTIMODAL = "prompt_multimodal"
100+
INSTRUMENTATION_METHOD_AUTO = "auto"
101+
INSTRUMENTATION_METHOD_ANNOTATED = "annotated"
102+
97103
DECORATOR = "_ml_obs.decorator"
98104
INTEGRATION = "_ml_obs.integration"
99105

ddtrace/llmobs/_integrations/langchain.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from ddtrace.llmobs._integrations.utils import LANGCHAIN_ROLE_MAPPING
3939
from ddtrace.llmobs._integrations.utils import extract_instance_metadata_from_stack
4040
from ddtrace.llmobs._integrations.utils import format_langchain_io
41+
from ddtrace.llmobs._integrations.utils import set_prompt_tracking_tags
4142
from ddtrace.llmobs._integrations.utils import update_proxy_workflow_input_output_value
4243
from ddtrace.llmobs._utils import _get_attr
4344
from ddtrace.llmobs._utils import _get_nearest_llmobs_ancestor
@@ -957,5 +958,6 @@ def llmobs_set_prompt_tag(self, instance, span: Span):
957958
try:
958959
prompt = _validate_prompt(prompt, strict_validation=True)
959960
span._set_ctx_item(INPUT_PROMPT, prompt)
961+
set_prompt_tracking_tags(span)
960962
except Exception as e:
961963
log.debug("Failed to validate langchain prompt", e)

ddtrace/llmobs/_integrations/utils.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,15 @@
2424
from ddtrace.llmobs._constants import INPUT_TYPE_IMAGE
2525
from ddtrace.llmobs._constants import INPUT_TYPE_TEXT
2626
from ddtrace.llmobs._constants import INPUT_VALUE
27+
from ddtrace.llmobs._constants import INSTRUMENTATION_METHOD_AUTO
2728
from ddtrace.llmobs._constants import METADATA
2829
from ddtrace.llmobs._constants import OAI_HANDOFF_TOOL_ARG
2930
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
3031
from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
3132
from ddtrace.llmobs._constants import OUTPUT_VALUE
33+
from ddtrace.llmobs._constants import PROMPT_MULTIMODAL
34+
from ddtrace.llmobs._constants import PROMPT_TRACKING_INSTRUMENTATION_METHOD
35+
from ddtrace.llmobs._constants import TAGS
3236
from ddtrace.llmobs._constants import TOOL_DEFINITIONS
3337
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
3438
from ddtrace.llmobs._utils import _get_attr
@@ -880,6 +884,35 @@ def _extract_chat_template_from_instructions(
880884
return chat_template
881885

882886

887+
def _has_multimodal_inputs(variables: Dict[str, Any]) -> bool:
888+
"""Check if prompt variables contain multimodal inputs (image/file)."""
889+
if not variables or not isinstance(variables, dict):
890+
return False
891+
for value in variables.values():
892+
item_type = _get_attr(value, "type", None)
893+
if item_type in (INPUT_TYPE_IMAGE, INPUT_TYPE_FILE):
894+
return True
895+
return False
896+
897+
898+
def set_prompt_tracking_tags(span: Span, *, is_multimodal: bool = False) -> None:
899+
"""Set prompt tracking telemetry tags on a span.
900+
901+
Args:
902+
span: The span to tag
903+
is_multimodal: Whether the prompt contains image/file inputs
904+
"""
905+
new_tags = {PROMPT_TRACKING_INSTRUMENTATION_METHOD: INSTRUMENTATION_METHOD_AUTO}
906+
if is_multimodal:
907+
new_tags[PROMPT_MULTIMODAL] = "true"
908+
909+
existing_tags = span._get_ctx_item(TAGS)
910+
if existing_tags:
911+
existing_tags.update(new_tags)
912+
else:
913+
span._set_ctx_item(TAGS, new_tags)
914+
915+
883916
def openai_set_meta_tags_from_response(
884917
span: Span, kwargs: Dict[str, Any], response: Optional[Any], integration: Any = None
885918
) -> None:
@@ -908,12 +941,13 @@ def openai_set_meta_tags_from_response(
908941
if prompt_data:
909942
try:
910943
prompt_data = dict(prompt_data) # Make a copy to avoid modifying the original
944+
variables = prompt_data.get("variables", {})
945+
has_multimodal = _has_multimodal_inputs(variables)
911946

912947
# Extract chat_template from response instructions if not already provided
913948
if response and not prompt_data.get("chat_template") and not prompt_data.get("template"):
914949
instructions = _get_attr(response, "instructions", None)
915950
if instructions:
916-
variables = prompt_data.get("variables", {})
917951
normalized_variables = _normalize_prompt_variables(variables)
918952
chat_template = _extract_chat_template_from_instructions(instructions, normalized_variables)
919953
if chat_template:
@@ -922,6 +956,8 @@ def openai_set_meta_tags_from_response(
922956

923957
validated_prompt = _validate_prompt(prompt_data, strict_validation=False)
924958
span._set_ctx_item(INPUT_PROMPT, validated_prompt)
959+
960+
set_prompt_tracking_tags(span, is_multimodal=has_multimodal)
925961
except (TypeError, ValueError, AttributeError) as e:
926962
logger.debug("Failed to validate prompt for OpenAI response: %s", e)
927963

ddtrace/llmobs/_llmobs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from ddtrace.llmobs._constants import INPUT_MESSAGES
7171
from ddtrace.llmobs._constants import INPUT_PROMPT
7272
from ddtrace.llmobs._constants import INPUT_VALUE
73+
from ddtrace.llmobs._constants import INSTRUMENTATION_METHOD_ANNOTATED
7374
from ddtrace.llmobs._constants import INTEGRATION
7475
from ddtrace.llmobs._constants import LLMOBS_TRACE_ID
7576
from ddtrace.llmobs._constants import METADATA
@@ -81,6 +82,7 @@
8182
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
8283
from ddtrace.llmobs._constants import OUTPUT_VALUE
8384
from ddtrace.llmobs._constants import PARENT_ID_KEY
85+
from ddtrace.llmobs._constants import PROMPT_TRACKING_INSTRUMENTATION_METHOD
8486
from ddtrace.llmobs._constants import PROPAGATED_LLMOBS_TRACE_ID_KEY
8587
from ddtrace.llmobs._constants import PROPAGATED_ML_APP_KEY
8688
from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY
@@ -1655,6 +1657,9 @@ def annotate(
16551657
try:
16561658
validated_prompt = _validate_prompt(prompt, strict_validation=False)
16571659
cls._set_dict_attribute(span, INPUT_PROMPT, validated_prompt)
1660+
cls._set_dict_attribute(
1661+
span, TAGS, {PROMPT_TRACKING_INSTRUMENTATION_METHOD: INSTRUMENTATION_METHOD_ANNOTATED}
1662+
)
16581663
except (ValueError, TypeError) as e:
16591664
error = "invalid_prompt"
16601665
raise LLMObsAnnotateSpanError("Failed to validate prompt with error:", str(e))

tests/contrib/langchain/test_langchain_llmobs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def _expected_langchain_llmobs_llm_span(
9393
tags={"ml_app": "langchain_test", "service": "tests.contrib.langchain"},
9494
span_links=span_links,
9595
prompt=prompt,
96+
prompt_tracking_instrumentation_method="auto" if prompt else None,
9697
)
9798

9899

@@ -212,9 +213,9 @@ def test_llmobs_string_prompt_template_invoke(langchain_core, langchain_openai,
212213
assert actual_prompt["id"] == "test_langchain_llmobs.prompt_template"
213214
assert actual_prompt["template"] == template_string
214215
assert actual_prompt["variables"] == variable_dict
215-
# Check that metadata from the prompt template is preserved
216216
assert "tags" in actual_prompt
217217
assert actual_prompt["tags"] == {"test_type": "basic_invoke", "author": "test_suite"}
218+
assert "prompt_tracking_instrumentation_method:auto" in llmobs_events[1]["tags"]
218219

219220

220221
def test_llmobs_string_prompt_template_direct_invoke(
@@ -237,14 +238,13 @@ def test_llmobs_string_prompt_template_direct_invoke(
237238
llmobs_events.sort(key=lambda span: span["start_ns"])
238239
assert len(llmobs_events) == 1 # Only LLM span, prompt template invoke doesn't create LLMObs event by itself
239240

240-
# The prompt should be attached to the LLM span
241241
actual_prompt = llmobs_events[0]["meta"]["input"]["prompt"]
242242
assert actual_prompt["id"] == "test_langchain_llmobs.greeting_template"
243243
assert actual_prompt["template"] == template_string
244244
assert actual_prompt["variables"] == variable_dict
245-
# Check that metadata from the prompt template is preserved
246245
assert "tags" in actual_prompt
247246
assert actual_prompt["tags"] == {"test_type": "direct_invoke", "interaction": "greeting"}
247+
assert "prompt_tracking_instrumentation_method:auto" in llmobs_events[0]["tags"]
248248

249249

250250
def test_llmobs_string_prompt_template_invoke_chat_model(

tests/contrib/openai/test_openai_llmobs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2407,6 +2407,7 @@ def test_response_with_mixed_input_prompt_tracking_url_stripped(self, openai, mo
24072407
),
24082408
}
24092409
],
2410+
prompt_multimodal=True,
24102411
)
24112412

24122413
@pytest.mark.skipif(
@@ -2485,6 +2486,7 @@ def test_response_with_mixed_input_prompt_tracking_url_preserved(self, openai, m
24852486
),
24862487
}
24872488
],
2489+
prompt_multimodal=True,
24882490
)
24892491

24902492
@pytest.mark.skipif(

tests/contrib/openai/utils.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,17 @@ def get_openai_vcr(subdirectory_name=""):
267267
)
268268

269269

270-
def assert_prompt_tracking(span_event, prompt_id, prompt_version, variables, expected_chat_template, expected_messages):
270+
def assert_prompt_tracking(
271+
span_event,
272+
prompt_id,
273+
prompt_version,
274+
variables,
275+
expected_chat_template,
276+
expected_messages,
277+
*,
278+
prompt_tracking_instrumentation_method="auto",
279+
prompt_multimodal=False,
280+
):
271281
"""Helper to assert prompt tracking metadata and template extraction."""
272282
assert "prompt" in span_event["meta"]["input"]
273283
actual_prompt = span_event["meta"]["input"]["prompt"]
@@ -277,3 +287,6 @@ def assert_prompt_tracking(span_event, prompt_id, prompt_version, variables, exp
277287
assert "chat_template" in actual_prompt
278288
assert actual_prompt["chat_template"] == expected_chat_template
279289
assert span_event["meta"]["input"]["messages"] == expected_messages
290+
assert f"prompt_tracking_instrumentation_method:{prompt_tracking_instrumentation_method}" in span_event["tags"]
291+
if prompt_multimodal:
292+
assert "prompt_multimodal:true" in span_event["tags"]

tests/llmobs/_utils.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ def _expected_llmobs_llm_span_event(
7575
span,
7676
span_kind="llm",
7777
prompt=None,
78+
prompt_tracking_instrumentation_method=None,
79+
prompt_multimodal=None,
7880
input_messages=None,
7981
input_documents=None,
8082
output_messages=None,
@@ -94,6 +96,9 @@ def _expected_llmobs_llm_span_event(
9496
"""
9597
Helper function to create an expected LLM span event.
9698
span_kind: either "llm" or "agent" or "embedding"
99+
prompt: prompt metadata dict (id, version, variables, template)
100+
prompt_tracking_instrumentation_method: prompt tracking source tag ('auto' for auto-instrumented)
101+
prompt_multimodal: whether prompt contains multimodal inputs (True if present)
97102
input_messages: list of input messages in format {"content": "...", "optional_role", "..."}
98103
output_messages: list of output messages in format {"content": "...", "optional_role", "..."}
99104
metadata: dict of metadata key value pairs
@@ -109,7 +114,16 @@ def _expected_llmobs_llm_span_event(
109114
tool_definitions: list of tool definitions that were available to the LLM
110115
"""
111116
span_event = _llmobs_base_span_event(
112-
span, span_kind, tags, session_id, error, error_message, error_stack, span_links
117+
span,
118+
span_kind,
119+
tags,
120+
session_id,
121+
error,
122+
error_message,
123+
error_stack,
124+
span_links,
125+
prompt_tracking_instrumentation_method,
126+
prompt_multimodal,
113127
)
114128
meta_dict = {"input": {}, "output": {}}
115129
if span_kind == "llm":
@@ -171,6 +185,8 @@ def _expected_llmobs_non_llm_span_event(
171185
error_message=None,
172186
error_stack=None,
173187
span_links=False,
188+
prompt_tracking_instrumentation_method=None,
189+
prompt_multimodal=None,
174190
):
175191
"""
176192
Helper function to create an expected span event of type (workflow, task, tool, retrieval).
@@ -185,9 +201,20 @@ def _expected_llmobs_non_llm_span_event(
185201
error_message: error message
186202
error_stack: error stack
187203
span_links: whether there are span links present on this span.
204+
prompt_tracking_instrumentation_method: prompt tracking source tag ('auto' for auto-instrumented)
205+
prompt_multimodal: whether prompt contains multimodal inputs (True if present)
188206
"""
189207
span_event = _llmobs_base_span_event(
190-
span, span_kind, tags, session_id, error, error_message, error_stack, span_links
208+
span,
209+
span_kind,
210+
tags,
211+
session_id,
212+
error,
213+
error_message,
214+
error_stack,
215+
span_links,
216+
prompt_tracking_instrumentation_method,
217+
prompt_multimodal,
191218
)
192219
meta_dict = {"input": {}, "output": {}}
193220
if span_kind == "retrieval":
@@ -221,7 +248,14 @@ def _llmobs_base_span_event(
221248
error_message=None,
222249
error_stack=None,
223250
span_links=False,
251+
prompt_tracking_instrumentation_method=None,
252+
prompt_multimodal=None,
224253
):
254+
expected_tags = _expected_llmobs_tags(span, tags=tags, error=error, session_id=session_id)
255+
if prompt_tracking_instrumentation_method:
256+
expected_tags.append(f"prompt_tracking_instrumentation_method:{prompt_tracking_instrumentation_method}")
257+
if prompt_multimodal:
258+
expected_tags.append(f"prompt_multimodal:{prompt_multimodal}")
225259
span_event = {
226260
"trace_id": mock.ANY,
227261
"span_id": str(span.span_id),
@@ -232,7 +266,7 @@ def _llmobs_base_span_event(
232266
"status": "error" if error else "ok",
233267
"meta": _Meta(span=_SpanField(kind=span_kind)),
234268
"metrics": {},
235-
"tags": _expected_llmobs_tags(span, tags=tags, error=error, session_id=session_id),
269+
"tags": expected_tags,
236270
"_dd": {
237271
"span_id": str(span.span_id),
238272
"trace_id": format_trace_id(span.trace_id),

tests/llmobs/test_llmobs_service.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from ddtrace.llmobs._constants import OUTPUT_DOCUMENTS
2626
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
2727
from ddtrace.llmobs._constants import OUTPUT_VALUE
28+
from ddtrace.llmobs._constants import PROMPT_TRACKING_INSTRUMENTATION_METHOD
2829
from ddtrace.llmobs._constants import PROPAGATED_ML_APP_KEY
2930
from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY
3031
from ddtrace.llmobs._constants import SESSION_ID
@@ -791,6 +792,7 @@ def test_annotate_prompt_dict(llmobs):
791792
"_dd_context_variable_keys": ["context"],
792793
"_dd_query_variable_keys": ["question"],
793794
}
795+
assert span._get_ctx_item(TAGS) == {PROMPT_TRACKING_INSTRUMENTATION_METHOD: "annotated"}
794796

795797

796798
def test_annotate_prompt_dict_with_context_var_keys(llmobs):
@@ -814,6 +816,7 @@ def test_annotate_prompt_dict_with_context_var_keys(llmobs):
814816
"_dd_context_variable_keys": ["var1", "var2"],
815817
"_dd_query_variable_keys": ["user_input"],
816818
}
819+
assert span._get_ctx_item(TAGS) == {PROMPT_TRACKING_INSTRUMENTATION_METHOD: "annotated"}
817820

818821

819822
def test_annotate_prompt_typed_dict(llmobs):
@@ -837,6 +840,7 @@ def test_annotate_prompt_typed_dict(llmobs):
837840
"_dd_context_variable_keys": ["var1", "var2"],
838841
"_dd_query_variable_keys": ["user_input"],
839842
}
843+
assert span._get_ctx_item(TAGS) == {PROMPT_TRACKING_INSTRUMENTATION_METHOD: "annotated"}
840844

841845

842846
def test_annotate_prompt_wrong_type(llmobs):
@@ -1322,6 +1326,7 @@ def test_annotation_context_modifies_prompt(llmobs):
13221326
"_dd_context_variable_keys": ["context"],
13231327
"_dd_query_variable_keys": ["question"],
13241328
}
1329+
assert span._get_ctx_item(TAGS) == {PROMPT_TRACKING_INSTRUMENTATION_METHOD: "annotated"}
13251330

13261331

13271332
def test_annotation_context_modifies_name(llmobs):
@@ -1507,6 +1512,7 @@ async def test_annotation_context_async_modifies_prompt(llmobs):
15071512
"_dd_context_variable_keys": ["context"],
15081513
"_dd_query_variable_keys": ["question"],
15091514
}
1515+
assert span._get_ctx_item(TAGS) == {PROMPT_TRACKING_INSTRUMENTATION_METHOD: "annotated"}
15101516

15111517

15121518
async def test_annotation_context_async_modifies_name(llmobs):

0 commit comments

Comments
 (0)