diff --git a/lib/mistral-agent-runner.ts b/lib/mistral-agent-runner.ts new file mode 100644 index 000000000..67a05afe9 --- /dev/null +++ b/lib/mistral-agent-runner.ts @@ -0,0 +1,717 @@ +import fs from "fs/promises"; +import path from "path"; +import { spawn } from "child_process"; +import { tmpdir } from "os"; + +export interface MistralAgentResult { + success: boolean; + output: string; + error?: string; + duration: number; + buildSuccess?: boolean; + lintSuccess?: boolean; + testSuccess?: boolean; + buildOutput?: string; + lintOutput?: string; + testOutput?: string; + streamData?: any[]; + evalPath?: string; + timestamp?: string; +} + +interface MistralAgentOptions { + verbose?: boolean; + debug?: boolean; + timeout?: number; + apiKey: string; + force?: boolean; + outputFormat?: string; + outputFile?: string; + saveOutput?: boolean; + modelName?: string; +} + +async function execAsync( + command: string, + options: { cwd?: string; timeout?: number; env?: Record; verbose?: boolean; idleTimeout?: number } = {} +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + return new Promise((resolve) => { + const startTime = Date.now(); + console.log(`[execAsync] Spawning command: ${command}`); + console.log(`[execAsync] CWD: ${options.cwd}`); + console.log(`[execAsync] Timeout: ${options.timeout}ms`); + console.log(`[execAsync] Idle timeout: ${options.idleTimeout || 30000}ms`); + + const child = spawn(command, { + shell: true, + cwd: options.cwd, + env: { ...process.env, ...options.env }, + timeout: options.timeout, + }); + + let stdout = ""; + let stderr = ""; + let lastOutputTime = startTime; + let idleTimeoutHandle: NodeJS.Timeout | null = null; + let resolved = false; + + const idleTimeoutMs = options.idleTimeout || 30000; // 30 seconds default idle timeout + + function resolveOnce(result: { stdout: string; stderr: string; exitCode: number }) { + if (resolved) return; + resolved = true; + clearInterval(heartbeat); + if (idleTimeoutHandle) clearTimeout(idleTimeoutHandle); + resolve(result); + } + + function resetIdleTimeout() { + if (idleTimeoutHandle) clearTimeout(idleTimeoutHandle); + + idleTimeoutHandle = setTimeout(() => { + const sinceLastOutput = Date.now() - lastOutputTime; + console.log(`[execAsync] Idle timeout reached (${(sinceLastOutput / 1000).toFixed(1)}s since last output)`); + console.log(`[execAsync] Forcefully terminating process ${child.pid}...`); + child.kill('SIGTERM'); + + // If SIGTERM doesn't work after 5 seconds, use SIGKILL + setTimeout(() => { + if (!resolved) { + console.log(`[execAsync] Process didn't respond to SIGTERM, using SIGKILL...`); + child.kill('SIGKILL'); + } + }, 5000); + }, idleTimeoutMs); + } + + // Start idle timeout + resetIdleTimeout(); + + // Set up a heartbeat to show the process is still running + const heartbeat = setInterval(() => { + const elapsed = Date.now() - startTime; + const sinceLastOutput = Date.now() - lastOutputTime; + console.log(`[execAsync] Still running... (${(elapsed / 1000).toFixed(1)}s elapsed, ${(sinceLastOutput / 1000).toFixed(1)}s since last output)`); + }, 5000); // Log every 5 seconds + + child.stdout?.on("data", (data) => { + const text = data.toString(); + stdout += text; + lastOutputTime = Date.now(); + resetIdleTimeout(); // Reset the idle timeout on new output + // Always log stdout in real-time to help debug + process.stdout.write(`[stdout] ${text}`); + }); + + child.stderr?.on("data", (data) => { + const text = data.toString(); + stderr += text; + lastOutputTime = Date.now(); + resetIdleTimeout(); // Reset the idle timeout on new output + // Always log stderr in real-time to help debug + process.stderr.write(`[stderr] ${text}`); + }); + + child.on("exit", (code) => { + const elapsed = Date.now() - startTime; + console.log(`[execAsync] Process exited with code: ${code} after ${(elapsed / 1000).toFixed(1)}s`); + resolveOnce({ + stdout, + stderr, + exitCode: code || 0, + }); + }); + + child.on("error", (error) => { + console.log(`[execAsync] Process error: ${error.message}`); + stderr += error.message; + resolveOnce({ + stdout, + stderr, + exitCode: 1, + }); + }); + + console.log(`[execAsync] Process spawned with PID: ${child.pid}`); + + // Also check if stdin needs to be closed (some processes wait for stdin) + if (child.stdin) { + console.log(`[execAsync] Closing stdin to prevent process from waiting for input...`); + child.stdin.end(); + } + }); +} + +async function copyDirectory(src: string, dest: string, excludeTestFiles: boolean = false) { + await fs.mkdir(dest, { recursive: true }); + const entries = await fs.readdir(src, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.name === "node_modules") { + continue; + } + + // Skip test files and eslint config if requested + if (excludeTestFiles) { + const isTestFile = entry.name.endsWith(".test.tsx") || + entry.name.endsWith(".test.ts") || + entry.name.endsWith(".spec.tsx") || + entry.name.endsWith(".spec.ts") || + entry.name.endsWith(".test.jsx") || + entry.name.endsWith(".test.js") || + entry.name.endsWith(".spec.jsx") || + entry.name.endsWith(".spec.js"); + const isTestDir = entry.name === "__tests__" || + entry.name === "test" || + entry.name === "tests"; + const isEslintConfig = entry.name === ".eslintrc.json" || + entry.name === ".eslintrc.js" || + entry.name === ".eslintrc.cjs" || + entry.name === ".eslintrc.yml" || + entry.name === ".eslintrc.yaml" || + entry.name === "eslint.config.js" || + entry.name === "eslint.config.mjs" || + entry.name === "eslint.config.cjs"; + + if (isTestFile || (entry.isDirectory() && isTestDir) || isEslintConfig) { + continue; + } + } + + const srcPath = path.join(src, entry.name); + const destPath = path.join(dest, entry.name); + + if (entry.isDirectory()) { + await copyDirectory(srcPath, destPath, excludeTestFiles); + } else { + await fs.copyFile(srcPath, destPath); + } + } +} + +async function copyTestFilesBack(inputDir: string, outputDir: string): Promise { + const entries = await fs.readdir(inputDir, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.name === "node_modules") { + continue; + } + + const isTestFile = entry.name.endsWith(".test.tsx") || + entry.name.endsWith(".test.ts") || + entry.name.endsWith(".spec.tsx") || + entry.name.endsWith(".spec.ts") || + entry.name.endsWith(".test.jsx") || + entry.name.endsWith(".test.js") || + entry.name.endsWith(".spec.jsx") || + entry.name.endsWith(".spec.js"); + const isTestDir = entry.name === "__tests__" || + entry.name === "test" || + entry.name === "tests"; + const isEslintConfig = entry.name === ".eslintrc.json" || + entry.name === ".eslintrc.js" || + entry.name === ".eslintrc.cjs" || + entry.name === ".eslintrc.yml" || + entry.name === ".eslintrc.yaml" || + entry.name === "eslint.config.js" || + entry.name === "eslint.config.mjs" || + entry.name === "eslint.config.cjs"; + + const srcPath = path.join(inputDir, entry.name); + const destPath = path.join(outputDir, entry.name); + + try { + if (isTestFile || isEslintConfig) { + // Copy the test file or eslint config + await fs.copyFile(srcPath, destPath); + } else if (entry.isDirectory() && isTestDir) { + // Copy the test directory + await copyDirectory(srcPath, destPath, false); // Don't exclude anything when copying test dirs + } else if (entry.isDirectory()) { + // Recursively copy test files from subdirectories + await copyTestFilesBack(srcPath, destPath); + } + } catch (error) { + // Ignore errors (e.g., directory doesn't exist in output) + } + } +} + +async function installWorkspaceDependencies( + workspaceDir: string, + verbose: boolean = false +): Promise<{ success: boolean; output: string }> { + try { + const packageJsonPath = path.join(workspaceDir, "package.json"); + const hasPackageJson = await fs + .stat(packageJsonPath) + .then(() => true) + .catch(() => false); + + if (!hasPackageJson) { + return { success: true, output: "No package.json found, skipping install" }; + } + + if (verbose) { + console.log(" ๐Ÿ“ฆ Running pnpm install..."); + } + + const { stdout, stderr, exitCode } = await execAsync("pnpm install --prefer-offline", { + cwd: workspaceDir, + timeout: 600000, // 10 minute timeout + idleTimeout: 60000, // 60 second idle timeout for downloads + }); + + if (exitCode === 0) { + if (verbose) { + console.log(" โœ“ Dependencies installed"); + } + return { success: true, output: stdout }; + } else { + return { success: false, output: stderr || stdout }; + } + } catch (error) { + return { + success: false, + output: error instanceof Error ? error.message : String(error), + }; + } +} + +export async function runMistralAgentEval( + evalPath: string, + options: MistralAgentOptions +): Promise { + const startTime = Date.now(); + const verbose = options.verbose || false; + + try { + console.log(`[1/9] Setting up paths...`); + // Setup paths + const evalsDir = path.join(process.cwd(), "evals"); + const evalDir = path.join(evalsDir, evalPath); + + console.log(`[2/9] Verifying eval exists at: ${evalDir}`); + // Verify eval exists + const evalExists = await fs + .stat(evalDir) + .then(() => true) + .catch(() => false); + + if (!evalExists) { + throw new Error(`Eval directory not found: ${evalPath}`); + } + + console.log(`[3/9] Reading prompt...`); + // Read prompt + const promptPath = path.join(evalDir, "prompt.md"); + const prompt = await fs.readFile(promptPath, "utf-8"); + + console.log(`[4/9] Creating temporary workspace...`); + // Create temporary workspace + const tempDir = path.join(tmpdir(), `mistral-agent-eval-${Date.now()}`); + const workspaceDir = path.join(tempDir, "workspace"); + await fs.mkdir(workspaceDir, { recursive: true }); + + console.log(`[5/9] Copying input files to workspace...`); + // Copy input files to workspace (exclude test files so agent doesn't see them) + const inputDir = path.join(evalDir, "input"); + await copyDirectory(inputDir, workspaceDir, true); + + if (verbose) { + console.log(`๐Ÿ“ Workspace created at: ${workspaceDir}`); + console.log(`๐Ÿ“ Prompt: ${prompt.slice(0, 200)}...`); + } + + console.log(`[6/9] Installing dependencies in workspace...`); + // Install dependencies in the workspace + const installResult = await installWorkspaceDependencies(workspaceDir, verbose); + if (!installResult.success) { + console.warn(`โš ๏ธ Warning: Dependency installation failed: ${installResult.output}`); + } + + console.log(`[7/9] Building vibe command...`); + // Build the vibe command + const vibeCommand = buildVibeCommand(workspaceDir, prompt, options); + + console.log(`๐Ÿ”ง Running command: ${vibeCommand}`); + + console.log(`[8/9] Executing vibe (timeout: ${options.timeout}ms)...`); + // Execute vibe + const { stdout, stderr, exitCode } = await execAsync(vibeCommand, { + cwd: workspaceDir, + timeout: options.timeout, + env: { + MISTRAL_API_KEY: options.apiKey, + }, + verbose, + idleTimeout: 90000, // Kill process if no output for 90 seconds + }); + + console.log(`โœ… Mistral agent execution completed (exit code: ${exitCode})`); + + // Parse output based on format + let streamData: any[] = []; + if (options.outputFormat === "stream-json") { + streamData = parseStreamJson(stdout); + } + + if (verbose) { + console.log(`๐Ÿ“ค Mistral Agent output (${stdout.length} chars)`); + if (stderr) { + console.log(`โš ๏ธ Stderr: ${stderr}`); + } + } + + console.log(`[8.5/9] Copying test files and eslint config back for validation...`); + // Copy test files and eslint config back for validation + await copyTestFilesBack(inputDir, workspaceDir); + + console.log(`[9/9] Running validation commands...`); + // Run evaluation commands + console.log(` โ†’ Running build...`); + const buildResult = await runBuildCommand(workspaceDir, verbose); + console.log(` โ†’ Build: ${buildResult.success ? 'โœ…' : 'โŒ'}`); + + console.log(` โ†’ Running lint...`); + const lintResult = await runLintCommand(workspaceDir, verbose); + console.log(` โ†’ Lint: ${lintResult.success ? 'โœ…' : 'โŒ'}`); + + console.log(` โ†’ Running tests...`); + const testResult = await runTestCommand(workspaceDir, verbose); + console.log(` โ†’ Tests: ${testResult.success ? 'โœ…' : 'โŒ'}`); + + // Save output to output-dry folder if requested + if (options.saveOutput) { + const modelName = options.modelName || "mistral-large-latest"; + const outputDryDir = path.join( + evalDir, + `output-dry-${modelName.replace(/\s+/g, "-").toLowerCase()}` + ); + + console.log(`๐Ÿ“ฆ Saving output to: ${outputDryDir}`); + + try { + // Remove existing output-dry folder if it exists + await fs.rm(outputDryDir, { recursive: true, force: true }); + + // Copy workspace to output-dry folder + await copyDirectory(workspaceDir, outputDryDir, false); + + console.log(`โœ… Output saved successfully`); + } catch (error) { + console.error( + `โš ๏ธ Failed to save output: ${ + error instanceof Error ? error.message : String(error) + }` + ); + } + } + + // Clean up temp directory if not in debug mode + if (!options.debug) { + console.log(`๐Ÿงน Cleaning up workspace...`); + await fs.rm(tempDir, { recursive: true, force: true }); + } else { + console.log(`\n${"=".repeat(80)}`); + console.log(`๐Ÿ› DEBUG MODE: Workspace preserved at:`); + console.log(` ${workspaceDir}`); + console.log(`${"=".repeat(80)}\n`); + } + + const duration = Date.now() - startTime; + console.log(`โฑ๏ธ Total duration: ${duration}ms`); + + const result: MistralAgentResult = { + success: exitCode === 0, + output: stdout, + error: exitCode !== 0 ? stderr || "Mistral Agent execution failed" : undefined, + duration, + buildSuccess: buildResult.success, + lintSuccess: lintResult.success, + testSuccess: testResult.success, + buildOutput: buildResult.output, + lintOutput: lintResult.output, + testOutput: testResult.output, + streamData: streamData.length > 0 ? streamData : undefined, + evalPath, + timestamp: new Date().toISOString(), + }; + + // Write results to file if outputFile is specified + if (options.outputFile) { + try { + await fs.writeFile( + options.outputFile, + JSON.stringify(result, null, 2), + "utf-8" + ); + console.log(`\n๐Ÿ“ Results written to: ${options.outputFile}`); + } catch (error) { + console.error( + `โš ๏ธ Failed to write results to file: ${ + error instanceof Error ? error.message : String(error) + }` + ); + } + } + + return result; + } catch (error) { + const duration = Date.now() - startTime; + + // Note: tempDir and workspaceDir might not be defined if error happened early + // Check if they exist before trying to clean up + const tempDirDefined = typeof tempDir !== 'undefined'; + const workspaceDirDefined = typeof workspaceDir !== 'undefined'; + + // Clean up or preserve workspace on error + if (tempDirDefined && !options.debug) { + try { + await fs.rm(tempDir, { recursive: true, force: true }); + console.log(`๐Ÿงน Cleaned up workspace after error`); + } catch (cleanupError) { + console.error(`โš ๏ธ Failed to cleanup: ${cleanupError}`); + } + } else if (workspaceDirDefined && options.debug) { + console.log(`\n${"=".repeat(80)}`); + console.log(`๐Ÿ› DEBUG MODE: Workspace preserved at (error occurred):`); + console.log(` ${workspaceDir}`); + console.log(`${"=".repeat(80)}\n`); + } + + const result: MistralAgentResult = { + success: false, + output: "", + error: error instanceof Error ? error.message : String(error), + duration, + buildSuccess: false, + lintSuccess: false, + testSuccess: false, + evalPath, + timestamp: new Date().toISOString(), + }; + + // Write error results to file if outputFile is specified + if (options.outputFile) { + try { + await fs.writeFile( + options.outputFile, + JSON.stringify(result, null, 2), + "utf-8" + ); + console.log(`\n๐Ÿ“ Results written to: ${options.outputFile}`); + } catch (writeError) { + console.error( + `โš ๏ธ Failed to write results to file: ${ + writeError instanceof Error ? writeError.message : String(writeError) + }` + ); + } + } + + return result; + } +} + +function buildVibeCommand( + workspaceDir: string, + prompt: string, + options: MistralAgentOptions +): string { + const args = ["vibe"]; + + // Append instruction to not run npm/pnpm commands to the prompt + const enhancedPrompt = `${prompt} + +IMPORTANT: Do not run npm, pnpm, yarn, or any package manager commands. Dependencies have already been installed. Do not run build, test, or dev server commands. Just write the code files. DO Not ask any followup questions either.`; + + // Add the prompt with -p flag for programmatic mode + const escapedPrompt = enhancedPrompt.replace(/'/g, "'\\''"); + args.push("-p", `'${escapedPrompt}'`); + + // Add auto-approve flag if force is specified + if (options.force) { + args.push("--auto-approve"); + } + + // Add output format (vibe supports: text, json, streaming) + if (options.outputFormat) { + const format = options.outputFormat === "stream-json" ? "streaming" : options.outputFormat; + args.push("--output", format); + } + + return args.join(" "); +} + +function parseStreamJson(output: string): any[] { + const lines = output.split("\n").filter(line => line.trim()); + const streamData: any[] = []; + + for (const line of lines) { + try { + const data = JSON.parse(line); + streamData.push(data); + } catch { + // Skip non-JSON lines + } + } + + return streamData; +} + +async function runBuildCommand( + workspaceDir: string, + verbose: boolean +): Promise<{ success: boolean; output: string }> { + try { + // Check for package.json to determine project type + const packageJsonPath = path.join(workspaceDir, "package.json"); + const hasPackageJson = await fs + .stat(packageJsonPath) + .then(() => true) + .catch(() => false); + + if (!hasPackageJson) { + if (verbose) console.log(" No package.json found, skipping build"); + return { success: true, output: "No package.json found, skipping build" }; + } + + // Read package.json to check for build script + const packageJson = JSON.parse(await fs.readFile(packageJsonPath, "utf-8")); + const hasBuildScript = packageJson.scripts?.build; + + if (!hasBuildScript) { + if (verbose) console.log(" No build script found in package.json"); + return { success: true, output: "No build script found in package.json" }; + } + + if (verbose) { + console.log(" ๐Ÿ”จ Running build command..."); + } + + // Use workspace's node_modules binary + const nextBin = path.join(workspaceDir, "node_modules", ".bin", "next"); + + const { stdout, stderr, exitCode } = await execAsync(`"${nextBin}" build`, { + cwd: workspaceDir, + timeout: 120000, // 2 minute timeout for build + verbose, + idleTimeout: 30000, // 30 second idle timeout + }); + + return { + success: exitCode === 0, + output: exitCode === 0 ? stdout : stderr || stdout, + }; + } catch (error) { + return { + success: false, + output: error instanceof Error ? error.message : String(error), + }; + } +} + +async function runLintCommand( + workspaceDir: string, + verbose: boolean +): Promise<{ success: boolean; output: string }> { + try { + // Check for package.json + const packageJsonPath = path.join(workspaceDir, "package.json"); + const hasPackageJson = await fs + .stat(packageJsonPath) + .then(() => true) + .catch(() => false); + + if (!hasPackageJson) { + if (verbose) console.log(" No package.json found, skipping lint"); + return { success: true, output: "No package.json found, skipping lint" }; + } + + // Read package.json to check for lint script + const packageJson = JSON.parse(await fs.readFile(packageJsonPath, "utf-8")); + const hasLintScript = packageJson.scripts?.lint; + + if (!hasLintScript) { + if (verbose) console.log(" No lint script found in package.json"); + return { success: true, output: "No lint script found in package.json" }; + } + + if (verbose) { + console.log(" ๐Ÿ” Running lint command..."); + } + + // Use workspace's node_modules binary + const nextBin = path.join(workspaceDir, "node_modules", ".bin", "next"); + + const { stdout, stderr, exitCode } = await execAsync(`"${nextBin}" lint`, { + cwd: workspaceDir, + timeout: 60000, // 1 minute timeout for lint + verbose, + idleTimeout: 30000, // 30 second idle timeout + }); + + return { + success: exitCode === 0, + output: exitCode === 0 ? stdout : stderr || stdout, + }; + } catch (error) { + return { + success: false, + output: error instanceof Error ? error.message : String(error), + }; + } +} + +async function runTestCommand( + workspaceDir: string, + verbose: boolean +): Promise<{ success: boolean; output: string }> { + try { + // Check for package.json + const packageJsonPath = path.join(workspaceDir, "package.json"); + const hasPackageJson = await fs + .stat(packageJsonPath) + .then(() => true) + .catch(() => false); + + if (!hasPackageJson) { + if (verbose) console.log(" No package.json found, skipping tests"); + return { success: true, output: "No package.json found, skipping tests" }; + } + + // Read package.json to check for test script + const packageJson = JSON.parse(await fs.readFile(packageJsonPath, "utf-8")); + const hasTestScript = packageJson.scripts?.test; + + if (!hasTestScript) { + if (verbose) console.log(" No test script found in package.json"); + return { success: true, output: "No test script found in package.json" }; + } + + if (verbose) { + console.log(" ๐Ÿงช Running test command..."); + } + + // Use workspace's node_modules binary + const vitestBin = path.join(workspaceDir, "node_modules", ".bin", "vitest"); + + const { stdout, stderr, exitCode } = await execAsync(`"${vitestBin}" run`, { + cwd: workspaceDir, + timeout: 180000, // 3 minute timeout for tests + verbose, + idleTimeout: 30000, // 30 second idle timeout + }); + + return { + success: exitCode === 0, + output: exitCode === 0 ? stdout : stderr || stdout, + }; + } catch (error) { + return { + success: false, + output: error instanceof Error ? error.message : String(error), + }; + } +} diff --git a/vibe-cli.ts b/vibe-cli.ts new file mode 100755 index 000000000..105f10682 --- /dev/null +++ b/vibe-cli.ts @@ -0,0 +1,369 @@ +#!/usr/bin/env bun + +import fs from "fs/promises"; +import path from "path"; +import { parseArgs } from "util"; +import { runMistralAgentEval, MistralAgentResult } from "./lib/mistral-agent-runner"; + +const { values, positionals } = parseArgs({ + args: process.argv.slice(2), + options: { + help: { type: "boolean", short: "h" }, + eval: { type: "string", short: "e" }, + all: { type: "boolean", short: "a" }, + verbose: { type: "boolean", short: "v" }, + debug: { type: "boolean" }, + timeout: { type: "string", short: "t" }, + "api-key": { type: "string" }, + force: { type: "boolean", short: "f" }, + "output-format": { type: "string", short: "o" }, + "output-file": { type: "string" }, + "save-output": { type: "boolean", short: "s" }, + "model-name": { type: "string", short: "m" }, + }, + allowPositionals: true, +}); + +function showHelp() { + console.log(` +Mistral Agent Evals CLI (vibe) + +Usage: + vibe [options] [eval-path] + +Options: + -h, --help Show this help message + -e, --eval Run a specific eval by path + -a, --all Run all evals with Mistral Agent + -v, --verbose Show detailed logs during eval execution + --debug Persist output folders for debugging (don't clean up) + -t, --timeout Timeout in milliseconds (default: 600000 = 10 minutes) + --api-key Mistral API key (or use MISTRAL_API_KEY env var) + -f, --force Auto-approve all tool executions (default: enabled) + -o, --output-format Output format: text, json, or streaming (default: text) + --output-file Write results to JSON file + -s, --save-output Save output to evals/{eval-name}/output-dry-{model} folder + -m, --model-name Model name for output folder (default: mistral-large-latest) + +Examples: + # Run a specific eval + bun vibe --eval 001-server-component + + # Run eval by positional argument + bun vibe 001-server-component + + # Run with verbose output and custom timeout + bun vibe --eval 001-server-component --verbose --timeout 600000 + + # Run all evals + bun vibe --all + + # Run with file modifications enabled + bun vibe --eval 001-server-component --force + + # Debug mode - keep output folders for inspection + bun vibe --eval 001-server-component --debug + + # Write results to JSON file + bun vibe --eval 001-server-component --output-file results-mistral.json + + # Save output to output-dry folder for comparison + bun vibe --eval 001-server-component --save-output +`); +} + +async function getAllEvals(): Promise { + const evalsDir = path.join(process.cwd(), "evals"); + const entries = await fs.readdir(evalsDir, { withFileTypes: true }); + + const evals: string[] = []; + + for (const entry of entries) { + if (entry.isDirectory() && /^\d+/.test(entry.name)) { + const evalPath = path.join(evalsDir, entry.name); + // Check if it has both input/ directory and prompt.md + const hasInput = await fs + .stat(path.join(evalPath, "input")) + .then((s) => s.isDirectory()) + .catch(() => false); + const hasPrompt = await fs + .stat(path.join(evalPath, "prompt.md")) + .then((s) => s.isFile()) + .catch(() => false); + + if (hasInput && hasPrompt) { + evals.push(entry.name); + } + } + } + + return evals.sort(); +} + +function formatDuration(ms: number): string { + if (ms < 1000) { + return `${Math.round(ms)}ms`; + } else { + const seconds = ms / 1000; + return `${seconds.toFixed(1)}s`; + } +} + +function displayResult(evalPath: string, result: MistralAgentResult) { + console.log("\n๐Ÿ“Š Mistral Agent Results:"); + console.log("โ•".repeat(80)); + + const evalColWidth = Math.max(25, evalPath.length); + const header = `| ${"Eval".padEnd(evalColWidth)} | Result | Build | Lint | Tests | Duration |`; + const separator = `|${"-".repeat(evalColWidth + 2)}|------------|-------|-------|-------|----------|`; + + console.log(header); + console.log(separator); + + const name = evalPath.padEnd(evalColWidth); + const build = result.buildSuccess ? "โœ…" : "โŒ"; + const lint = result.lintSuccess ? "โœ…" : "โŒ"; + const tests = result.testSuccess ? "โœ…" : "โŒ"; + const allPassed = result.buildSuccess && result.lintSuccess && result.testSuccess; + const resultStatus = allPassed ? "โœ… PASS" : "โŒ FAIL"; + const duration = formatDuration(result.duration); + + console.log( + `| ${name} | ${resultStatus.padEnd(10)} | ${build} | ${lint} | ${tests} | ${duration.padEnd(8)} |` + ); + + console.log("โ•".repeat(80)); + + if (!allPassed || !result.success) { + console.log("\nโŒ Error Details:"); + console.log("โ”€".repeat(80)); + + if (result.error) { + console.log(`Mistral Agent Error: ${result.error}`); + } + + if (!result.buildSuccess && result.buildOutput) { + console.log(`Build Error:\n${result.buildOutput.slice(-1000)}`); + } + + if (!result.lintSuccess && result.lintOutput) { + console.log(`Lint Error:\n${result.lintOutput.slice(-1000)}`); + } + + if (!result.testSuccess && result.testOutput) { + console.log(`Test Error:\n${result.testOutput.slice(-1000)}`); + } + } + + console.log("โ•".repeat(80)); +} + +function displayResultsTable(results: { evalPath: string; result: MistralAgentResult }[]) { + const totalTests = results.length; + console.log(`\n๐Ÿ“Š Mistral Agent Results Summary (${totalTests} Tests):`); + console.log("โ•".repeat(120)); + + const header = `| ${"Eval".padEnd(25)} | Result | Build | Lint | Tests | Duration |`; + const separator = `|${"-".repeat(27)}|------------|-------|-------|-------|----------|`; + + console.log(header); + console.log(separator); + + const failedEvals: Array<{ + evalPath: string; + buildError?: string; + lintError?: string; + testError?: string; + mistralError?: string; + }> = []; + + let passedEvals = 0; + + for (const { evalPath, result } of results) { + const name = evalPath.padEnd(25); + const build = result.buildSuccess ? "โœ…" : "โŒ"; + const lint = result.lintSuccess ? "โœ…" : "โŒ"; + const tests = result.testSuccess ? "โœ…" : "โŒ"; + const allPassed = result.success && result.buildSuccess && result.lintSuccess && result.testSuccess; + const resultStatus = allPassed ? "โœ… PASS" : "โŒ FAIL"; + const duration = formatDuration(result.duration); + + if (allPassed) { + passedEvals++; + } + + console.log( + `| ${name} | ${resultStatus.padEnd(10)} | ${build} | ${lint} | ${tests} | ${duration.padEnd(8)} |` + ); + + // Collect errors for failed evals + if (!allPassed) { + const errors: any = { evalPath }; + + if (result.error) { + errors.mistralError = result.error; + } + + if (!result.buildSuccess && result.buildOutput) { + errors.buildError = result.buildOutput.slice(-500); + } + + if (!result.lintSuccess && result.lintOutput) { + errors.lintError = result.lintOutput.slice(-500); + } + + if (!result.testSuccess && result.testOutput) { + errors.testError = result.testOutput.slice(-500); + } + + failedEvals.push(errors); + } + } + + console.log("โ•".repeat(120)); + + // Summary stats + console.log(`\n๐Ÿ“ˆ Summary: ${passedEvals}/${totalTests} evals passed`); + + // Display error summaries + if (failedEvals.length > 0) { + console.log("\nโŒ Error Summaries:"); + console.log("โ”€".repeat(120)); + + for (const failed of failedEvals) { + console.log(`\n${failed.evalPath}:`); + + if (failed.mistralError) { + console.log(` Mistral Agent: ${failed.mistralError}`); + } + + if (failed.buildError) { + console.log(` Build: ${failed.buildError}`); + } + + if (failed.lintError) { + console.log(` Lint: ${failed.lintError}`); + } + + if (failed.testError) { + console.log(` Tests: ${failed.testError}`); + } + } + } +} + +async function main() { + if (values.help) { + showHelp(); + return; + } + + // Check for API key + const apiKey = values["api-key"] || process.env.MISTRAL_API_KEY; + if (!apiKey) { + console.error("โŒ Error: Mistral API key is required."); + console.error("Set MISTRAL_API_KEY environment variable or use --api-key option."); + process.exit(1); + } + + const evalOptions = { + verbose: values.verbose || false, + debug: values.debug || false, + timeout: values.timeout ? parseInt(values.timeout) : 600000, // 10 minutes default + apiKey, + force: values.force !== undefined ? values.force : true, // Default to true for auto-approval + outputFormat: values["output-format"] || "text", + outputFile: values["output-file"], + saveOutput: values["save-output"] || false, + modelName: values["model-name"] || "mistral-large-latest", + }; + + if (values.all) { + const allEvals = await getAllEvals(); + console.log(`Running ${allEvals.length} evals with Mistral Agent...\n`); + + const results: { evalPath: string; result: MistralAgentResult }[] = []; + + // Don't pass outputFile to individual runs - we'll write all results at the end + const individualEvalOptions = { ...evalOptions, outputFile: undefined }; + + for (const evalPath of allEvals) { + try { + console.log(`๐Ÿš€ Running ${evalPath}...`); + const result = await runMistralAgentEval(evalPath, individualEvalOptions); + results.push({ evalPath, result }); + + const status = result.success && result.buildSuccess && result.lintSuccess && result.testSuccess + ? "โœ… PASS" + : "โŒ FAIL"; + console.log(`${status} ${evalPath} (${formatDuration(result.duration)})`); + + } catch (error) { + const errorResult: MistralAgentResult = { + success: false, + output: "", + error: error instanceof Error ? error.message : String(error), + duration: 0, + buildSuccess: false, + lintSuccess: false, + testSuccess: false, + }; + results.push({ evalPath, result: errorResult }); + console.log(`โŒ FAIL ${evalPath} - ${errorResult.error}`); + } + } + + displayResultsTable(results); + + // Write all results to file if outputFile is specified + if (evalOptions.outputFile) { + try { + await fs.writeFile( + evalOptions.outputFile, + JSON.stringify(results, null, 2), + "utf-8" + ); + console.log(`\n๐Ÿ“ All results written to: ${evalOptions.outputFile}`); + } catch (error) { + console.error( + `โš ๏ธ Failed to write results to file: ${ + error instanceof Error ? error.message : String(error) + }` + ); + } + } + + return; + } + + const evalPath = values.eval || positionals[0]; + if (!evalPath) { + console.error("โŒ Error: No eval specified. Use --eval , provide a positional argument, or use --all"); + console.log("\nAvailable evals:"); + const allEvals = await getAllEvals(); + allEvals.forEach((evalName) => console.log(` ${evalName}`)); + process.exit(1); + } + + console.log(`๐Ÿš€ Running Mistral Agent eval: ${evalPath}`); + + try { + const result = await runMistralAgentEval(evalPath, evalOptions); + displayResult(evalPath, result); + + const success = result.success && result.buildSuccess && result.lintSuccess && result.testSuccess; + process.exit(success ? 0 : 1); + + } catch (error) { + console.error(`โŒ Error: ${error instanceof Error ? error.message : String(error)}`); + process.exit(1); + } +} + +// @ts-ignore +if (import.meta.main) { + main().catch((error) => { + console.error("Unexpected error:", error); + process.exit(1); + }); +}