Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions experiments/claude-sonnet-4.6--web-docs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import fs from 'fs';
import path from 'path';
import type { ExperimentConfig } from '@vercel/agent-eval';

const config: ExperimentConfig = {
agent: 'claude-code',
model: 'claude-sonnet-4-6',
scripts: ['build'],
runs: 4,
earlyExit: true,
timeout: 720,
sandbox: 'vercel',
setup: async (sandbox) => {
// Bump Next.js to latest canary
await sandbox.runCommand('npm', ['install', 'next@16.2.0-canary.41']);

// Create AGENTS.md pointing to web docs instead of local docs
await sandbox.writeFiles({
'AGENTS.md': `<!-- BEGIN:nextjs-agent-rules -->
# This is NOT the Next.js you know

This version has breaking changes — APIs, conventions, and file structure may all differ from your training data. Read the relevant guide at \`https://nextjs.org/docs\` before writing any code. Heed deprecation notices.
<!-- END:nextjs-agent-rules -->
`,
'CLAUDE.md': '@AGENTS.md\n',
'GEMINI.md': '@AGENTS.md\n',
});

// Remove local docs to prevent fallback
await sandbox.runCommand('rm', ['-rf', 'node_modules/next/dist/docs/']);
},
};

export default config;
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

RUN v3.2.4 /vercel/sandbox

✓ EVAL.ts (6 tests) 4ms

Test Files 1 passed (1)
Tests 6 passed (6)
Start at 19:46:14
Duration 187ms (transform 27ms, setup 0ms, collect 24ms, tests 4ms, environment 0ms, prepare 53ms)

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

> build
> next build

▲ Next.js 16.2.0-canary.41 (Turbopack)

Creating an optimized production build ...
✓ Compiled successfully in 2.6s
Running TypeScript ...
Finished TypeScript in 1629ms ...
Collecting page data using 1 worker ...
Generating static pages using 1 worker (0/4) ...
Generating static pages using 1 worker (1/4)
Generating static pages using 1 worker (2/4)
Generating static pages using 1 worker (3/4)
✓ Generating static pages using 1 worker (4/4) in 85ms
Finalizing page optimization ...

Route (app)
┌ ○ /
├ ○ /_not-found
└ ○ /about


○ (Static) prerendered as static content

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"status": "passed",
"duration": 180.233,
"model": "claude-sonnet-4-6",
"transcriptPath": "./transcript.json",
"transcriptRawPath": "./transcript-raw.jsonl",
"o11y": {
"totalTurns": 4,
"toolCalls": {
"file_read": 0,
"file_write": 3,
"file_edit": 0,
"shell": 3,
"web_fetch": 2,
"web_search": 0,
"glob": 0,
"grep": 0,
"list_dir": 0,
"agent_task": 1,
"unknown": 7
},
"totalToolCalls": 16,
"webFetches": [
{
"url": "https://nextjs.org/docs/app/building-your-application/routing",
"success": true
},
{
"url": "https://nextjs.org/docs/app/api-reference/functions/generate-metadata",
"success": true
}
],
"filesRead": [],
"filesModified": [
"/vercel/sandbox/app/layout.tsx",
"/vercel/sandbox/app/page.tsx",
"/vercel/sandbox/app/about/page.tsx"
],
"shellCommands": [
{
"command": "mkdir -p /vercel/sandbox/app/about",
"success": true
},
{
"command": "rm /vercel/sandbox/pages/_app.tsx /vercel/sandbox/pages/_document.tsx /vercel/sandbox/pages/index.tsx /vercel/sandbox/pages/about.tsx && rmdir /vercel/sandbox/pages",
"success": true
},
{
"command": "npm run build 2>&1",
"success": true
}
],
"errors": [],
"thinkingBlocks": 4
},
"outputPaths": {
"eval": "./outputs/eval.txt",
"scripts": {
"build": "./outputs/scripts/build.txt"
}
}
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"totalRuns": 1,
"passedRuns": 1,
"passRate": "100%",
"meanDuration": 180.233,
"fingerprint": "e18823443818c4cf9f1a711be974eed71e70fde042c279fe70aff46e7c4591ec"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

RUN v3.2.4 /vercel/sandbox

✓ EVAL.ts (5 tests) 3ms

Test Files 1 passed (1)
Tests 5 passed (5)
Start at 19:46:00
Duration 183ms (transform 25ms, setup 0ms, collect 19ms, tests 3ms, environment 0ms, prepare 54ms)

Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

> build
> next build

Attention: Next.js now collects completely anonymous telemetry regarding usage.
This information is used to shape Next.js' roadmap and prioritize features.
You can learn more, including how to opt-out if you'd not like to participate in this anonymous program, by visiting the following URL:
https://nextjs.org/telemetry

▲ Next.js 16.2.0-canary.41 (Turbopack)
- Cache Components enabled

Creating an optimized production build ...
✓ Compiled successfully in 2.8s
Running TypeScript ...

We detected TypeScript in your project and reconfigured your tsconfig.json file for you.
The following suggested values were added to your tsconfig.json. These values can be changed to fit your project's needs:

- include was updated to add '.next/dev/types/**/*.ts'

Finished TypeScript in 1829ms ...
Collecting page data using 1 worker ...
Generating static pages using 1 worker (0/3) ...
✓ Generating static pages using 1 worker (3/3) in 99ms
Finalizing page optimization ...

Route (app)
┌ ◐ /
└ ○ /_not-found


○ (Static) prerendered as static content
◐ (Partial Prerender) prerendered as static HTML with dynamic server-streamed content

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"status": "passed",
"duration": 151.653,
"model": "claude-sonnet-4-6",
"transcriptPath": "./transcript.json",
"transcriptRawPath": "./transcript-raw.jsonl",
"o11y": {
"totalTurns": 5,
"toolCalls": {
"file_read": 4,
"file_write": 0,
"file_edit": 1,
"shell": 0,
"web_fetch": 1,
"web_search": 0,
"glob": 0,
"grep": 0,
"list_dir": 0,
"agent_task": 1,
"unknown": 0
},
"totalToolCalls": 7,
"webFetches": [
{
"url": "https://nextjs.org/docs/app/api-reference/functions/fetch",
"success": true
}
],
"filesRead": [
"/vercel/sandbox/app/UserProfile.tsx",
"/vercel/sandbox/app/page.tsx",
"/vercel/sandbox/app/ProductList.tsx"
],
"filesModified": [
"/vercel/sandbox/app/UserProfile.tsx"
],
"shellCommands": [],
"errors": [],
"thinkingBlocks": 4
},
"outputPaths": {
"eval": "./outputs/eval.txt",
"scripts": {
"build": "./outputs/scripts/build.txt"
}
}
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"totalRuns": 1,
"passedRuns": 1,
"passRate": "100%",
"meanDuration": 151.653,
"fingerprint": "d437f6be80353d96fec47a3e81b97b78f46aea50b82ca5788d0b858c74aaed41"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

RUN v3.2.4 /vercel/sandbox

✓ EVAL.ts (6 tests) 4ms

Test Files 1 passed (1)
Tests 6 passed (6)
Start at 19:46:05
Duration 203ms (transform 37ms, setup 0ms, collect 26ms, tests 4ms, environment 0ms, prepare 63ms)

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

> build
> next build

Attention: Next.js now collects completely anonymous telemetry regarding usage.
This information is used to shape Next.js' roadmap and prioritize features.
You can learn more, including how to opt-out if you'd not like to participate in this anonymous program, by visiting the following URL:
https://nextjs.org/telemetry

▲ Next.js 16.2.0-canary.41 (Turbopack)

Creating an optimized production build ...
✓ Compiled successfully in 2.7s
Running TypeScript ...

We detected TypeScript in your project and reconfigured your tsconfig.json file for you.
The following suggested values were added to your tsconfig.json. These values can be changed to fit your project's needs:

- include was updated to add '.next/dev/types/**/*.ts'

Finished TypeScript in 1844ms ...
Collecting page data using 1 worker ...
Generating static pages using 1 worker (0/3) ...
✓ Generating static pages using 1 worker (3/3) in 71ms
Finalizing page optimization ...

Route (app)
┌ ○ /
└ ○ /_not-found


○ (Static) prerendered as static content

Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"status": "passed",
"duration": 154.044,
"model": "claude-sonnet-4-6",
"transcriptPath": "./transcript.json",
"transcriptRawPath": "./transcript-raw.jsonl",
"o11y": {
"totalTurns": 4,
"toolCalls": {
"file_read": 4,
"file_write": 1,
"file_edit": 0,
"shell": 0,
"web_fetch": 1,
"web_search": 0,
"glob": 1,
"grep": 0,
"list_dir": 0,
"agent_task": 0,
"unknown": 0
},
"totalToolCalls": 7,
"webFetches": [
{
"url": "https://nextjs.org/docs/app/building-your-application/data-fetching/server-actions-and-mutations",
"success": true
}
],
"filesRead": [
"/vercel/sandbox/package.json",
"/vercel/sandbox/app/ContactForm.tsx",
"/vercel/sandbox/app/page.tsx",
"/vercel/sandbox/next.config.ts"
],
"filesModified": [
"/vercel/sandbox/app/ContactForm.tsx"
],
"shellCommands": [],
"errors": [],
"thinkingBlocks": 4
},
"outputPaths": {
"eval": "./outputs/eval.txt",
"scripts": {
"build": "./outputs/scripts/build.txt"
}
}
}

Large diffs are not rendered by default.

Loading