Skip to content

Commit 700dd72

Browse files
jsondaicopybara-github
authored andcommitted
chore: GenAI SDK client - Improve rendering of structured content in evaluation results
PiperOrigin-RevId: 775432969
1 parent d4ede02 commit 700dd72

File tree

1 file changed

+193
-35
lines changed

1 file changed

+193
-35
lines changed

vertexai/_genai/_evals_visualization.py

Lines changed: 193 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import json
1818
import logging
19-
from typing import Optional
19+
from typing import Any, Optional
2020

2121
from pydantic import errors
2222
import pandas as pd
@@ -89,6 +89,9 @@ def _get_evaluation_html(eval_result_json: str) -> str:
8989
.reference-container {{ background-color: #e6f4ea; padding: 16px; margin: 12px 0; border-radius: 8px; white-space: pre-wrap; word-wrap: break-word; }}
9090
.response-container {{ background-color: #f9f9f9; padding: 12px; margin-top: 8px; border-radius: 8px; border: 1px solid #eee; }}
9191
.explanation {{ color: #5f6368; font-style: italic; font-size: 0.9em; padding-top: 6px; }}
92+
.raw-json-details {{ margin-top: 12px; border: 1px solid #eee; border-radius: 4px; padding: 8px; background-color: #f9f9f9; }}
93+
.raw-json-details summary {{ font-size: 0.9em; cursor: pointer; color: #5f6368;}}
94+
.raw-json-container {{ white-space: pre-wrap; word-wrap: break-word; max-height: 300px; overflow-y: auto; background-color: #f1f1f1; padding: 10px; border-radius: 4px; margin-top: 8px; }}
9295
</style>
9396
</head>
9497
<body>
@@ -114,23 +117,36 @@ def _get_evaluation_html(eval_result_json: str) -> str:
114117
container.innerHTML = '<h2>Detailed Results</h2>';
115118
if (!caseResults || caseResults.length === 0) {{ container.innerHTML += '<p>No detailed results.</p>'; return; }}
116119
const datasetRows = metadata && metadata.dataset ? metadata.dataset : [];
120+
117121
caseResults.forEach((caseResult, i) => {{
118122
const original_case = datasetRows[caseResult.eval_case_index] || {{}};
119-
const prompt = original_case.prompt || '(prompt not found)';
123+
const promptText = original_case.prompt_display_text || '(prompt not found)';
124+
const promptJson = original_case.prompt_raw_json;
120125
const reference = original_case.reference || '';
126+
const responseText = original_case.response_display_text || '(response not found)';
127+
const responseJson = original_case.response_raw_json;
128+
121129
let card = `<details><summary>Case #${{caseResult.eval_case_index != null ? caseResult.eval_case_index : i}}</summary>`;
122-
card += `<div class="prompt-container"><strong>Prompt:</strong><br>${{DOMPurify.sanitize(marked.parse(String(prompt)))}}</div>`;
130+
131+
card += `<div class="prompt-container"><strong>Prompt:</strong><br>${{DOMPurify.sanitize(marked.parse(String(promptText)))}}</div>`;
132+
if (promptJson) {{
133+
card += `<details class="raw-json-details"><summary>View Raw Prompt JSON</summary><pre class="raw-json-container">${{DOMPurify.sanitize(promptJson)}}</pre></details>`;
134+
}}
135+
123136
if (reference) {{ card += `<div class="reference-container"><strong>Reference:</strong><br>${{DOMPurify.sanitize(marked.parse(String(reference)))}}</div>`; }}
124-
(caseResult.response_candidate_results || []).forEach(candidate => {{
125-
const candidateResponse = candidate.response_text || '(response not found)';
126-
card += `<div class="response-container"><h4>Candidate Response</h4>${{DOMPurify.sanitize(marked.parse(String(candidateResponse)))}}</div>`;
127-
let metricTable = '<h4>Metrics</h4><table><tbody>';
128-
Object.entries(candidate.metric_results || {{}}).forEach(([name, val]) => {{
129-
metricTable += `<tr><td>${{name}}</td><td><b>${{val.score != null ? val.score.toFixed(2) : 'N/A'}}</b></td></tr>`;
130-
if (val.explanation) {{ metricTable += `<tr><td colspan="2"><div class="explanation">${{DOMPurify.sanitize(marked.parse(String(val.explanation)))}}</div></td></tr>`; }}
131-
}});
132-
card += metricTable + '</tbody></table>';
137+
138+
card += `<div class="response-container"><h4>Candidate Response</h4>${{DOMPurify.sanitize(marked.parse(String(responseText)))}}</div>`;
139+
if (responseJson) {{
140+
card += `<details class="raw-json-details"><summary>View Raw Response JSON</summary><pre class="raw-json-container">${{DOMPurify.sanitize(responseJson)}}</pre></details>`;
141+
}}
142+
143+
let metricTable = '<h4>Metrics</h4><table><tbody>';
144+
const candidateMetrics = (caseResult.response_candidate_results && caseResult.response_candidate_results[0] && caseResult.response_candidate_results[0].metric_results) || {{}};
145+
Object.entries(candidateMetrics).forEach(([name, val]) => {{
146+
metricTable += `<tr><td>${{name}}</td><td><b>${{val.score != null ? val.score.toFixed(2) : 'N/A'}}</b></td></tr>`;
147+
if (val.explanation) {{ metricTable += `<tr><td colspan="2"><div class="explanation">${{DOMPurify.sanitize(marked.parse(String(val.explanation)))}}</div></td></tr>`; }}
133148
}});
149+
card += metricTable + '</tbody></table>';
134150
container.innerHTML += card + '</details>';
135151
}});
136152
}}
@@ -168,6 +184,9 @@ def _get_comparison_html(eval_result_json: str) -> str:
168184
.response-column {{ border: 1px solid #e0e0e0; padding: 16px; border-radius: 8px; background: #f9f9f9; }}
169185
.response-text-container {{ background-color: #fff; padding: 12px; margin-top: 8px; border-radius: 4px; border: 1px solid #eee; white-space: pre-wrap; word-wrap: break-word; max-height: 400px; overflow-y: auto; }}
170186
.explanation {{ color: #5f6368; font-style: italic; font-size: 0.9em; padding-top: 8px; }}
187+
.raw-json-details {{ margin-top: 12px; border: 1px solid #eee; border-radius: 4px; padding: 8px; background-color: #f9f9f9; }}
188+
.raw-json-details summary {{ font-size: 0.9em; cursor: pointer; color: #5f6368;}}
189+
.raw-json-container {{ white-space: pre-wrap; word-wrap: break-word; max-height: 300px; overflow-y: auto; background-color: #f1f1f1; padding: 10px; border-radius: 4px; margin-top: 8px; }}
171190
</style>
172191
</head>
173192
<body>
@@ -202,13 +221,31 @@ def _get_comparison_html(eval_result_json: str) -> str:
202221
203222
caseResults.forEach((caseResult, i) => {{
204223
const original_case = datasetRows[caseResult.eval_case_index] || {{}};
205-
let card = `<details open><summary>Case #${{caseResult.eval_case_index}}</summary><div class="prompt-container">${{DOMPurify.sanitize(marked.parse(String(original_case.prompt || '')))}}</div><div class="responses-grid">`;
224+
const promptText = original_case.prompt_display_text || '(prompt not found)';
225+
const promptJson = original_case.prompt_raw_json;
226+
227+
let card = `<details open><summary>Case #${{caseResult.eval_case_index}}</summary>`;
228+
card += `<div class="prompt-container">${{DOMPurify.sanitize(marked.parse(String(promptText)))}}</div>`;
229+
if (promptJson) {{
230+
card += `<details class="raw-json-details"><summary>View Raw Prompt JSON</summary><pre class="raw-json-container">${{DOMPurify.sanitize(promptJson)}}</pre></details>`;
231+
}}
232+
233+
card += `<div class="responses-grid">`;
234+
206235
(caseResult.response_candidate_results || []).forEach((candidate, j) => {{
207236
const candidateName = candidateNames ? candidateNames[j] : `Candidate #${{j + 1}}`;
208-
card += `<div class="response-column"><h4>${{candidateName}}</h4><div class="response-text-container">${{DOMPurify.sanitize(marked.parse(String(candidate.response_text || '')))}}</div><h5>Metrics</h5><table><tbody>`;
237+
const displayText = candidate.display_text || '(response not found)';
238+
const rawJsonResponse = candidate.raw_json;
239+
240+
card += `<div class="response-column"><h4>${{candidateName}}</h4><div class="response-text-container">${{DOMPurify.sanitize(marked.parse(String(displayText)))}}</div>`;
241+
if (rawJsonResponse) {{
242+
card += `<details class="raw-json-details"><summary>View Raw Response JSON</summary><pre class="raw-json-container">${{DOMPurify.sanitize(rawJsonResponse)}}</pre></details>`;
243+
}}
244+
245+
card += `<h5>Metrics</h5><table><tbody>`;
209246
Object.entries(candidate.metric_results || {{}}).forEach(([name, val]) => {{
210-
card += `<tr><td>${{name}}</td><td><b>${{val.score.toFixed(2)}}</b></td></tr>`;
211-
if(val.explanation) card += `<tr><td colspan="2" class="explanation">${{DOMPurify.sanitize(marked.parse(String(val.explanation)))}}</td></tr>`;
247+
card += `<tr><td>${{name}}</td><td><b>${{val.score != null ? val.score.toFixed(2) : 'N/A'}}</b></td></tr>`;
248+
if(val.explanation) card += `<tr class="explanation-row"><td colspan="2" class="explanation">${{DOMPurify.sanitize(marked.parse(String(val.explanation)))}}</td></tr>`;
212249
}});
213250
card += '</tbody></table></div>';
214251
}});
@@ -241,6 +278,9 @@ def _get_inference_html(dataframe_json: str) -> str:
241278
th, td {{ border: 1px solid #dadce0; padding: 12px; text-align: left; vertical-align: top; }}
242279
th {{ background-color: #f2f2f2; font-weight: 500;}}
243280
td > div {{ white-space: pre-wrap; word-wrap: break-word; max-height: 400px; overflow-y: auto; }}
281+
.raw-json-details {{ margin-top: 8px; border-top: 1px solid #eee; padding-top: 8px; }}
282+
.raw-json-details summary {{ font-size: 0.9em; cursor: pointer; color: #5f6368; }}
283+
.raw-json-container {{ white-space: pre-wrap; word-wrap: break-word; max-height: 300px; overflow-y: auto; background-color: #f1f1f1; padding: 10px; border-radius: 4px; margin-top: 8px; }}
244284
</style>
245285
</head>
246286
<body>
@@ -249,8 +289,23 @@ def _get_inference_html(dataframe_json: str) -> str:
249289
<div id="results-table"></div>
250290
</div>
251291
<script>
252-
const data = JSON.parse({dataframe_json});
292+
const data = {dataframe_json};
253293
const container = document.getElementById('results-table');
294+
295+
function renderCell(cellValue) {{
296+
let cellContent = '';
297+
if (cellValue && typeof cellValue === 'object' && cellValue.display_text !== undefined) {{
298+
cellContent += `<div>${{DOMPurify.sanitize(marked.parse(String(cellValue.display_text)))}}</div>`;
299+
if (cellValue.raw_json) {{
300+
cellContent += `<details class="raw-json-details"><summary>View Raw JSON</summary><pre class="raw-json-container">${{DOMPurify.sanitize(cellValue.raw_json)}}</pre></details>`;
301+
}}
302+
}} else {{
303+
const cellDisplay = cellValue === null || cellValue === undefined ? '' : String(cellValue);
304+
cellContent = `<div>${{DOMPurify.sanitize(marked.parse(cellDisplay))}}</div>`;
305+
}}
306+
return `<td>${{cellContent}}</td>`;
307+
}}
308+
254309
if (!data || data.length === 0) {{ container.innerHTML = "<p>No data.</p>"; }}
255310
else {{
256311
let table = '<table><thead><tr>';
@@ -260,9 +315,7 @@ def _get_inference_html(dataframe_json: str) -> str:
260315
data.forEach(row => {{
261316
table += '<tr>';
262317
headers.forEach(header => {{
263-
const cellValue = row[header];
264-
const cellDisplay = cellValue === null || cellValue === undefined ? '' : String(cellValue);
265-
table += `<td><div>${{DOMPurify.sanitize(marked.parse(cellDisplay))}}</div></td>`;
318+
table += renderCell(row[header]);
266319
}});
267320
table += '</tr>';
268321
}});
@@ -274,6 +327,71 @@ def _get_inference_html(dataframe_json: str) -> str:
274327
"""
275328

276329

330+
def _extract_text_and_raw_json(content: Any) -> dict[str, str]:
331+
"""Extracts display text and raw JSON from a content object.
332+
333+
This function handles raw strings, Gemini's `contents` format, and
334+
OpenAI's `messages` format.
335+
336+
Args:
337+
content: The content from a 'prompt', 'request', or 'response' column.
338+
339+
Returns:
340+
A dictionary with 'display_text' for direct rendering and 'raw_json'
341+
for an expandable view.
342+
"""
343+
if not isinstance(content, (str, dict)):
344+
return {"display_text": str(content or ""), "raw_json": ""}
345+
346+
try:
347+
data = json.loads(content) if isinstance(content, str) else content
348+
349+
if not isinstance(data, dict):
350+
return {"display_text": str(content), "raw_json": ""}
351+
352+
pretty_json = json.dumps(data, indent=2, ensure_ascii=False)
353+
354+
# Gemini format check.
355+
if (
356+
"contents" in data
357+
and isinstance(data.get("contents"), list)
358+
and data["contents"]
359+
):
360+
first_part = data["contents"][0].get("parts", [{}])[0]
361+
display_text = first_part.get("text", str(data))
362+
return {"display_text": display_text, "raw_json": pretty_json}
363+
364+
# OpenAI response format check.
365+
elif (
366+
"choices" in data
367+
and isinstance(data.get("choices"), list)
368+
and data["choices"]
369+
):
370+
message = data["choices"][0].get("message", {})
371+
display_text = message.get("content", str(data))
372+
return {"display_text": display_text, "raw_json": pretty_json}
373+
374+
# OpenAI request format check.
375+
elif (
376+
"messages" in data
377+
and isinstance(data.get("messages"), list)
378+
and data["messages"]
379+
):
380+
user_messages = [
381+
message.get("content", "")
382+
for message in data["messages"]
383+
if message.get("role") == "user"
384+
]
385+
display_text = user_messages[-1] if user_messages else str(data)
386+
return {"display_text": display_text, "raw_json": pretty_json}
387+
else:
388+
# Not a recognized format.
389+
return {"display_text": str(content), "raw_json": pretty_json}
390+
391+
except (json.JSONDecodeError, TypeError, IndexError):
392+
return {"display_text": str(content), "raw_json": ""}
393+
394+
277395
def display_evaluation_result(
278396
eval_result_obj: types.EvaluationResult,
279397
candidate_names: Optional[list[str]] = None,
@@ -315,9 +433,18 @@ def display_evaluation_result(
315433
and input_dataset_list[0]
316434
and input_dataset_list[0].eval_dataset_df is not None
317435
):
318-
metadata_payload["dataset"] = _preprocess_df_for_json(
319-
input_dataset_list[0].eval_dataset_df
320-
).to_dict(orient="records")
436+
base_df = _preprocess_df_for_json(input_dataset_list[0].eval_dataset_df)
437+
processed_rows = []
438+
for _, row in base_df.iterrows():
439+
prompt_key = "request" if "request" in row else "prompt"
440+
prompt_info = _extract_text_and_raw_json(row.get(prompt_key))
441+
processed_row = {
442+
"prompt_display_text": prompt_info["display_text"],
443+
"prompt_raw_json": prompt_info["raw_json"],
444+
"reference": row.get("reference", ""),
445+
}
446+
processed_rows.append(processed_row)
447+
metadata_payload["dataset"] = processed_rows
321448

322449
if "eval_case_results" in result_dump:
323450
for case_res in result_dump["eval_case_results"]:
@@ -337,9 +464,10 @@ def display_evaluation_result(
337464
and case_idx is not None
338465
and case_idx < len(df)
339466
):
340-
cand_res["response_text"] = df.iloc[case_idx].get(
341-
"response"
342-
)
467+
response_content = df.iloc[case_idx].get("response")
468+
display_info = _extract_text_and_raw_json(response_content)
469+
cand_res["display_text"] = display_info["display_text"]
470+
cand_res["raw_json"] = display_info["raw_json"]
343471

344472
win_rates = eval_result_obj.win_rates if eval_result_obj.win_rates else {}
345473
if "summary_metrics" in result_dump:
@@ -351,25 +479,41 @@ def display_evaluation_result(
351479
html_content = _get_comparison_html(json.dumps(result_dump))
352480
else:
353481
single_dataset = input_dataset_list[0] if input_dataset_list else None
354-
482+
processed_rows = []
355483
if (
356484
single_dataset is not None
357485
and isinstance(single_dataset, types.EvaluationDataset)
358486
and single_dataset.eval_dataset_df is not None
359487
):
360488
processed_df = _preprocess_df_for_json(single_dataset.eval_dataset_df)
361-
metadata_payload["dataset"] = processed_df.to_dict(orient="records")
362-
if "eval_case_results" in result_dump and processed_df is not None:
489+
for _, row in processed_df.iterrows():
490+
prompt_key = "request" if "request" in row else "prompt"
491+
prompt_info = _extract_text_and_raw_json(row.get(prompt_key))
492+
response_info = _extract_text_and_raw_json(row.get("response"))
493+
processed_row = {
494+
"prompt_display_text": prompt_info["display_text"],
495+
"prompt_raw_json": prompt_info["raw_json"],
496+
"reference": row.get("reference", ""),
497+
"response_display_text": response_info["display_text"],
498+
"response_raw_json": response_info["raw_json"],
499+
}
500+
processed_rows.append(processed_row)
501+
metadata_payload["dataset"] = processed_rows
502+
503+
if "eval_case_results" in result_dump and processed_rows:
363504
for case_res in result_dump["eval_case_results"]:
364505
case_idx = case_res.get("eval_case_index")
365506
if (
366507
case_idx is not None
367-
and case_idx < len(processed_df)
508+
and case_idx < len(processed_rows)
368509
and case_res.get("response_candidate_results")
369510
):
370-
case_res["response_candidate_results"][0][
371-
"response_text"
372-
] = processed_df.iloc[case_idx].get("response")
511+
original_case = processed_rows[case_idx]
512+
cand_res = case_res["response_candidate_results"][0]
513+
cand_res["display_text"] = original_case[
514+
"response_display_text"
515+
]
516+
cand_res["raw_json"] = original_case["response_raw_json"]
373517

374518
result_dump["metadata"] = metadata_payload
375519
html_content = _get_evaluation_html(json.dumps(result_dump))
@@ -392,7 +536,21 @@ def display_evaluation_dataset(eval_dataset_obj: types.EvaluationDataset) -> Non
392536
logger.warning("No inference data to display.")
393537
return
394538

395-
processed_df = _preprocess_df_for_json(eval_dataset_obj.eval_dataset_df)
396-
dataframe_json_string = json.dumps(processed_df.to_json(orient="records"))
539+
processed_rows = []
540+
df = eval_dataset_obj.eval_dataset_df
541+
542+
for _, row in df.iterrows():
543+
processed_row = {}
544+
for col_name, cell_value in row.items():
545+
if col_name in ["prompt", "request", "response"]:
546+
processed_row[col_name] = _extract_text_and_raw_json(cell_value)
547+
else:
548+
if isinstance(cell_value, (dict, list)):
549+
processed_row[col_name] = json.dumps(cell_value, ensure_ascii=False)
550+
else:
551+
processed_row[col_name] = cell_value
552+
processed_rows.append(processed_row)
553+
554+
dataframe_json_string = json.dumps(processed_rows, ensure_ascii=False, default=str)
397555
html_content = _get_inference_html(dataframe_json_string)
398556
display.display(display.HTML(html_content))

0 commit comments

Comments
 (0)