Skip to content

Commit 2861bd4

Browse files
noahlwestShubyM
authored andcommitted
Add support for JSONL format, simplify task output directory paths, update run-eval-loop.sh (GoogleCloudPlatform#614)
1 parent 46ce18b commit 2861bd4

3 files changed

Lines changed: 49 additions & 25 deletions

File tree

dev/ci/periodics/run-eval-loop.sh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ echo "Task Pattern: ${TASK_PATTERN:-"All Tasks"}"
9696
# Loop from 1 to the specified number of iterations
9797
for i in $(seq 1 $ITERATIONS)
9898
do
99-
OUTPUT_DIR="${REPO_ROOT}/.build/k8s-ai-bench-${MODEL}-${i}"
99+
# Create a sanitized version of model name: replace all '/' with '-'
100+
SAFE_MODEL="${MODEL//\//-}"
101+
OUTPUT_DIR="${REPO_ROOT}/.build/k8s-ai-bench-${SAFE_MODEL}-${i}"
100102

101103
echo "Running iteration $i of $ITERATIONS..."
102104

@@ -124,6 +126,7 @@ do
124126
# Paths for analysis files
125127
MARKDOWN_FILE="${OUTPUT_DIR}/k8s-ai-bench.md"
126128
JSON_FILE="${OUTPUT_DIR}/k8s-ai-bench.json"
129+
JSONL_FILE="${OUTPUT_DIR}/k8s-ai-bench.jsonl"
127130

128131
# Run for markdown format
129132
"${K8S_AI_BENCH_BIN}" analyze --input-dir="${OUTPUT_DIR}" --results-filepath="${MARKDOWN_FILE}" --output-format=markdown --show-failures
@@ -139,6 +142,13 @@ do
139142
exit 1
140143
fi
141144

145+
# Run for jsonl format
146+
"${K8S_AI_BENCH_BIN}" analyze --input-dir="${OUTPUT_DIR}" --results-filepath="${JSONL_FILE}" --output-format=jsonl --show-failures
147+
if [ $? -ne 0 ]; then
148+
echo "Error on iteration $i during JSONL analysis. Aborting loop."
149+
exit 1
150+
fi
151+
142152
# Extract the time value and append it to the markdown file
143153
if [ -n "$run_time_line" ]; then
144154
time_value=$(echo $run_time_line | awk '{print $4}')

k8s-ai-bench/eval.go

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func runEvaluation(ctx context.Context, config EvalConfig) error {
130130
for _, llmConfig := range config.LLMConfigs {
131131
taskOutputDir := ""
132132
if config.OutputDir != "" {
133-
taskOutputDir = filepath.Join(config.OutputDir, job.taskID, llmConfig.ID)
133+
taskOutputDir = filepath.Join(config.OutputDir, job.taskID)
134134
if err := os.MkdirAll(taskOutputDir, 0755); err != nil {
135135
errorsCh <- fmt.Errorf("creating directory %q: %w", taskOutputDir, err)
136136
return
@@ -288,7 +288,7 @@ func evaluateTask(ctx context.Context, config EvalConfig, taskID string, task Ta
288288
taskCtx, cancel := context.WithTimeout(ctx, timeout)
289289
defer cancel()
290290

291-
taskOutputDir := filepath.Join(config.OutputDir, taskID, llmConfig.ID)
291+
taskOutputDir := filepath.Join(config.OutputDir, taskID)
292292

293293
var logBuffer bytes.Buffer
294294
multiWriter := io.MultiWriter(&logBuffer)
@@ -343,15 +343,7 @@ func evaluateTask(ctx context.Context, config EvalConfig, taskID string, task Ta
343343
logString := logBuffer.String()
344344
logTail, truncated := getLastNLines(logString, maxErrLogLines)
345345
// build log file path
346-
shimSegment := "shim_disabled"
347-
if x.llmConfig.EnableToolUseShim {
348-
shimSegment = "shim_enabled"
349-
}
350-
logPath := filepath.Join(
351-
config.OutputDir,
352-
taskID,
353-
shimSegment+"-"+x.llmConfig.ProviderID+"-"+x.llmConfig.ModelID,
354-
)
346+
logPath := taskOutputDir
355347
errorMessage := fmt.Sprintf("agent encountered error: %v\n---LOG---\n%s", err, logTail)
356348
if truncated {
357349
errorMessage += fmt.Sprintf("\n... (log truncated, full log at %s)", logPath)
@@ -416,15 +408,7 @@ func evaluateTask(ctx context.Context, config EvalConfig, taskID string, task Ta
416408
logString := logBuffer.String()
417409
logTail, truncated := getLastNLines(logString, maxLogLines)
418410
// build log file path
419-
shimSegment := "shim_disabled"
420-
if x.llmConfig.EnableToolUseShim {
421-
shimSegment = "shim_enabled"
422-
}
423-
logPath := filepath.Join(
424-
config.OutputDir,
425-
taskID,
426-
shimSegment+"-"+x.llmConfig.ProviderID+"-"+x.llmConfig.ModelID,
427-
)
411+
logPath := taskOutputDir
428412
failureMessage := fmt.Sprintf("verifier script failed: %v\n---LOG---\n%s", err, logTail)
429413
if truncated {
430414
failureMessage += fmt.Sprintf("\n... (log truncated, full log at %s)", logPath)

k8s-ai-bench/main.go

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -360,8 +360,8 @@ func runAnalyze() error {
360360
}
361361

362362
// Check if output format is valid
363-
if config.OutputFormat != "markdown" && config.OutputFormat != "json" {
364-
return fmt.Errorf("invalid output format: %s, valid options are 'markdown' or 'json'", config.OutputFormat)
363+
if config.OutputFormat != "markdown" && config.OutputFormat != "json" && config.OutputFormat != "jsonl" {
364+
return fmt.Errorf("invalid output format: %s, valid options are 'markdown', 'json' or 'jsonl'", config.OutputFormat)
365365
}
366366

367367
// Check if input directory exists
@@ -375,14 +375,19 @@ func runAnalyze() error {
375375
}
376376

377377
// Format and output results
378-
if config.OutputFormat == "markdown" {
378+
switch config.OutputFormat {
379+
case "markdown":
379380
if err := printMarkdownResults(config, allResults, resultsFilePath); err != nil {
380381
return fmt.Errorf("printing markdown results: %w", err)
381382
}
382-
} else {
383+
case "json":
383384
if err := printJSONResults(allResults, resultsFilePath); err != nil {
384385
return fmt.Errorf("printing JSON results: %w", err)
385386
}
387+
case "jsonl":
388+
if err := printJSONLResults(allResults, resultsFilePath); err != nil {
389+
return fmt.Errorf("printing JSONL results: %w", err)
390+
}
386391
}
387392

388393
return nil
@@ -756,3 +761,28 @@ func printJSONResults(results []model.TaskResult, resultsFilePath string) error
756761

757762
return nil
758763
}
764+
765+
func printJSONLResults(results []model.TaskResult, resultsFilePath string) error {
766+
var buffer strings.Builder
767+
for _, result := range results {
768+
jsonData, err := json.Marshal(result)
769+
if err != nil {
770+
return fmt.Errorf("marshaling result to JSON: %w", err)
771+
}
772+
buffer.Write(jsonData)
773+
buffer.WriteString("\n")
774+
}
775+
776+
// Write to file if path is provided, otherwise print to stdout
777+
if resultsFilePath != "" {
778+
if err := os.WriteFile(resultsFilePath, []byte(buffer.String()), 0644); err != nil {
779+
return fmt.Errorf("writing to file %q: %w", resultsFilePath, err)
780+
}
781+
fmt.Printf("Results written to %s\n", resultsFilePath)
782+
} else {
783+
// Print to stdout only if no file path is specified
784+
fmt.Print(buffer.String())
785+
}
786+
787+
return nil
788+
}

0 commit comments

Comments
 (0)