DataDog · jsimpher · Jun 17, 2025 · Jun 17, 2025 · Jun 17, 2025
@@ -1,13 +1,5 @@
 import sys
 
-from vertexai.generative_models import GenerativeModel
-from vertexai.generative_models import Part
-
-from ddtrace.internal.utils import get_argument_value
-from ddtrace.llmobs._integrations.utils import get_generation_config_google
-from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
-from ddtrace.llmobs._integrations.utils import tag_request_content_part_google
-from ddtrace.llmobs._integrations.utils import tag_response_part_google
 from ddtrace.llmobs._utils import _get_attr
 
 
@@ -44,7 +36,6 @@ def __iter__(self):
             self._dd_span.set_exc_info(*sys.exc_info())
             raise
         finally:
-            tag_stream_response(self._dd_span, self._chunks, self._dd_integration)
             if self._dd_integration.is_pc_sampled_llmobs(self._dd_span):
                 self._kwargs["instance"] = self._model_instance
                 self._kwargs["history"] = self._history
@@ -74,7 +65,6 @@ async def __aiter__(self):
             self._dd_span.set_exc_info(*sys.exc_info())
             raise
         finally:
-            tag_stream_response(self._dd_span, self._chunks, self._dd_integration)
             if self._dd_integration.is_pc_sampled_llmobs(self._dd_span):
                 self._kwargs["instance"] = self._model_instance
                 self._kwargs["history"] = self._history
@@ -97,156 +87,12 @@ def extract_info_from_parts(parts):
     return concatenated_text, function_calls
 
 
-def _tag_response_parts(span, integration, parts):
-    text, function_calls = extract_info_from_parts(parts)
-    span.set_tag_str(
-        "vertexai.response.candidates.%d.content.parts.%d.text" % (0, 0),
-        integration.trunc(str(text)),
-    )
-    for idx, function_call in enumerate(function_calls):
-        span.set_tag_str(
-            "vertexai.response.candidates.%d.content.parts.%d.function_calls.%d.function_call.name" % (0, 0, idx),
-            _get_attr(function_call, "name", ""),
-        )
-        span.set_tag_str(
-            "vertexai.response.candidates.%d.content.parts.%d.function_calls.%d.function_call.args" % (0, 0, idx),
-            integration.trunc(str(_get_attr(function_call, "args", ""))),
-        )
-
-
-def tag_stream_response(span, chunks, integration):
-    all_parts = []
-    role = ""
-    for chunk in chunks:
-        candidates = _get_attr(chunk, "candidates", [])
-        for candidate_idx, candidate in enumerate(candidates):
-            finish_reason = _get_attr(candidate, "finish_reason", None)
-            if finish_reason:
-                span.set_tag_str(
-                    "vertexai.response.candidates.%d.finish_reason" % (candidate_idx),
-                    _get_attr(finish_reason, "name", ""),
-                )
-            candidate_content = _get_attr(candidate, "content", {})
-            role = role or _get_attr(candidate_content, "role", "")
-            if not integration.is_pc_sampled_span(span):
-                continue
-            parts = _get_attr(candidate_content, "parts", [])
-            all_parts.extend(parts)
-        token_counts = _get_attr(chunk, "usage_metadata", None)
-        if not token_counts:
-            continue
-        span.set_metric("vertexai.response.usage.prompt_tokens", _get_attr(token_counts, "prompt_token_count", 0))
-        span.set_metric(
-            "vertexai.response.usage.completion_tokens", _get_attr(token_counts, "candidates_token_count", 0)
-        )
-        span.set_metric("vertexai.response.usage.total_tokens", _get_attr(token_counts, "total_token_count", 0))
-    # streamed responses have only a single candidate, so there is only one role to be tagged
-    span.set_tag_str("vertexai.response.candidates.0.content.role", str(role))
-    _tag_response_parts(span, integration, all_parts)
-
-
-def _tag_request_content(span, integration, content, content_idx):
-    """Tag the generation span with request contents."""
-    if isinstance(content, str):
-        span.set_tag_str("vertexai.request.contents.%d.text" % content_idx, integration.trunc(content))
-        return
-    if isinstance(content, dict):
-        role = content.get("role", "")
-        if role:
-            span.set_tag_str("vertexai.request.contents.%d.role" % content_idx, role)
-        parts = content.get("parts", [])
-        for part_idx, part in enumerate(parts):
-            tag_request_content_part_google("vertexai", span, integration, part, part_idx, content_idx)
-        return
-    if isinstance(content, Part):
-        tag_request_content_part_google("vertexai", span, integration, content, 0, content_idx)
-        return
-    role = _get_attr(content, "role", "")
-    if role:
-        span.set_tag_str("vertexai.request.contents.%d.role" % content_idx, str(role))
-    parts = _get_attr(content, "parts", [])
-    if not parts:
-        span.set_tag_str(
-            "vertexai.request.contents.%d.text" % content_idx,
-            integration.trunc("[Non-text content object: {}]".format(repr(content))),
-        )
-        return
-    for part_idx, part in enumerate(parts):
-        tag_request_content_part_google("vertexai", span, integration, part, part_idx, content_idx)
-
-
 def tag_request(span, integration, instance, args, kwargs, is_chat):
     """Tag the generation span with request details.
     Includes capturing generation configuration, system prompt, and messages.
     """
     # instance is either a chat session or a model itself
-    model_instance = instance if isinstance(instance, GenerativeModel) else instance._model
-    contents = get_argument_value(args, kwargs, 0, "content" if is_chat else "contents")
-    history = _get_attr(instance, "_history", [])
-    if history:
-        if isinstance(contents, list):
-            contents = history + contents
-        if isinstance(contents, Part) or isinstance(contents, str) or isinstance(contents, dict):
-            contents = history + [contents]
-    generation_config = get_generation_config_google(model_instance, kwargs)
-    generation_config_dict = None
-    if generation_config is not None:
-        generation_config_dict = (
-            generation_config if isinstance(generation_config, dict) else generation_config.to_dict()
-        )
-    system_instructions = get_system_instructions_from_google_model(model_instance)
     stream = kwargs.get("stream", None)
 
-    if generation_config_dict is not None:
-        for k, v in generation_config_dict.items():
-            span.set_tag_str("vertexai.request.generation_config.%s" % k, str(v))
-
     if stream:
         span.set_tag("vertexai.request.stream", True)
-
-    if not integration.is_pc_sampled_span(span):
-        return
-
-    for idx, text in enumerate(system_instructions):
-        span.set_tag_str(
-            "vertexai.request.system_instruction.%d.text" % idx,
-            integration.trunc(str(text)),
-        )
-
-    if isinstance(contents, str):
-        span.set_tag_str("vertexai.request.contents.0.text", integration.trunc(str(contents)))
-        return
-    elif isinstance(contents, Part):
-        tag_request_content_part_google("vertexai", span, integration, contents, 0, 0)
-        return
-    elif not isinstance(contents, list):
-        return
-    for content_idx, content in enumerate(contents):
-        _tag_request_content(span, integration, content, content_idx)
-
-
-def tag_response(span, generations, integration):
-    """Tag the generation span with response details.
-    Includes capturing generation text, roles, finish reasons, and token counts.
-    """
-    generations_dict = generations.to_dict()
-    candidates = generations_dict.get("candidates", [])
-    for candidate_idx, candidate in enumerate(candidates):
-        finish_reason = _get_attr(candidate, "finish_reason", None)
-        if finish_reason:
-            span.set_tag_str("vertexai.response.candidates.%d.finish_reason" % candidate_idx, finish_reason)
-        candidate_content = _get_attr(candidate, "content", None)
-        role = _get_attr(candidate_content, "role", "")
-        span.set_tag_str("vertexai.response.candidates.%d.content.role" % candidate_idx, str(role))
-        if not integration.is_pc_sampled_span(span):
-            continue
-        parts = _get_attr(candidate_content, "parts", [])
-        for part_idx, part in enumerate(parts):
-            tag_response_part_google("vertexai", span, integration, part, part_idx, candidate_idx)
-
-    token_counts = generations_dict.get("usage_metadata", None)
-    if not token_counts:
-        return
-    span.set_metric("vertexai.response.usage.prompt_tokens", _get_attr(token_counts, "prompt_token_count", 0))
-    span.set_metric("vertexai.response.usage.completion_tokens", _get_attr(token_counts, "candidates_token_count", 0))
-    span.set_metric("vertexai.response.usage.total_tokens", _get_attr(token_counts, "total_token_count", 0))
@@ -11,7 +11,6 @@
 from ddtrace.contrib.internal.vertexai._utils import TracedAsyncVertexAIStreamResponse
 from ddtrace.contrib.internal.vertexai._utils import TracedVertexAIStreamResponse
 from ddtrace.contrib.internal.vertexai._utils import tag_request
-from ddtrace.contrib.internal.vertexai._utils import tag_response
 from ddtrace.llmobs._integrations import VertexAIIntegration
 from ddtrace.llmobs._integrations.utils import extract_model_name_google
 from ddtrace.trace import Pin
@@ -75,7 +74,6 @@ def _traced_generate(vertexai, pin, func, instance, args, kwargs, model_instance
             return TracedVertexAIStreamResponse(
                 generations, model_instance, integration, span, args, kwargs, is_chat, history
             )
-        tag_response(span, generations, integration)
     except Exception:
         span.set_exc_info(*sys.exc_info())
         raise
@@ -110,7 +108,6 @@ async def _traced_agenerate(vertexai, pin, func, instance, args, kwargs, model_i
             return TracedAsyncVertexAIStreamResponse(
                 generations, model_instance, integration, span, args, kwargs, is_chat, history
             )
-        tag_response(span, generations, integration)
     except Exception:
         span.set_exc_info(*sys.exc_info())
         raise

@@ -184,6 +184,20 @@ def get_llmobs_metrics_tags(integration_name, span):
     return usage
 
 
+def parse_llmobs_metric_args(metrics):
+    usage = {}
+    input_tokens = _get_attr(metrics, "prompt_tokens", None)
+    output_tokens = _get_attr(metrics, "completion_tokens", None)
+    total_tokens = _get_attr(metrics, "total_tokens", None)
+    if input_tokens is not None:
+        usage[INPUT_TOKENS_METRIC_KEY] = input_tokens
+    if output_tokens is not None:
+        usage[OUTPUT_TOKENS_METRIC_KEY] = output_tokens
+    if total_tokens is not None:
+        usage[TOTAL_TOKENS_METRIC_KEY] = total_tokens
+    return usage
+
+
 def get_system_instructions_from_google_model(model_instance):
     """
     Extract system instructions from model and convert to []str for tagging.

@@ -7,15 +7,17 @@
 from ddtrace.internal.utils import ArgumentError
 from ddtrace.internal.utils import get_argument_value
 from ddtrace.llmobs._constants import INPUT_MESSAGES
+from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import METRICS
 from ddtrace.llmobs._constants import MODEL_NAME
 from ddtrace.llmobs._constants import MODEL_PROVIDER
 from ddtrace.llmobs._constants import OUTPUT_MESSAGES
+from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import SPAN_KIND
+from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 from ddtrace.llmobs._integrations.utils import extract_message_from_part_google
-from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
 from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
 from ddtrace.llmobs._integrations.utils import llmobs_get_metadata_google
 from ddtrace.llmobs._utils import _get_attr
@@ -43,6 +45,7 @@ def _llmobs_set_tags(
     ) -> None:
         instance = kwargs.get("instance", None)
         history = kwargs.get("history", [])
+        metrics = kwargs.get("metrics", {})
         metadata = llmobs_get_metadata_google(kwargs, instance)
 
         system_instruction = get_system_instructions_from_google_model(instance)
@@ -57,6 +60,8 @@ def _llmobs_set_tags(
         if response is not None:
             output_messages = self._extract_output_message(response)
 
+        metrics = self._extract_metrics_from_response(response)
+
         span._set_ctx_items(
             {
                 SPAN_KIND: "llm",
@@ -65,10 +70,32 @@ def _llmobs_set_tags(
                 METADATA: metadata,
                 INPUT_MESSAGES: input_messages,
                 OUTPUT_MESSAGES: output_messages,
-                METRICS: get_llmobs_metrics_tags("vertexai", span),
+                METRICS: metrics,
             }
         )
 
+    def _extract_metrics_from_response(self, response):
+        """Extract metrics from the response."""
+        generations_dict = response.to_dict()
+
+        token_counts = generations_dict.get("usage_metadata", None)
+        if not token_counts:
+            return
+
+        input_tokens = _get_attr(token_counts, "prompt_token_count", 0)
+        output_tokens = _get_attr(token_counts, "candidates_token_count", 0)
+        total_tokens = _get_attr(token_counts, "total_token_count", 0)
+
+        metrics = {}
+        if input_tokens is not None:
+            metrics[INPUT_TOKENS_METRIC_KEY] = input_tokens
+        if output_tokens is not None:
+            metrics[OUTPUT_TOKENS_METRIC_KEY] = output_tokens
+        if total_tokens is not None:
+            metrics[TOTAL_TOKENS_METRIC_KEY] = total_tokens
+
+        return metrics
+
     def _extract_input_message(self, contents, history, system_instruction=None):
         from vertexai.generative_models._generative_models import Part