Skip to content

Commit c5094ab

Browse files
authored
Merge branch 'main' into tru-upload-more-file-types
2 parents 6aabb41 + ea525df commit c5094ab

File tree

332 files changed

+6939
-3071
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

332 files changed

+6939
-3071
lines changed

.changeset/bright-radios-kiss.md

Lines changed: 0 additions & 6 deletions
This file was deleted.

.github/workflows/evals.yml

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
name: Evals
2+
3+
on:
4+
pull_request:
5+
types: [labeled]
6+
workflow_dispatch:
7+
8+
env:
9+
DOCKER_BUILDKIT: 1
10+
COMPOSE_DOCKER_CLI_BUILD: 1
11+
12+
jobs:
13+
evals:
14+
# Run if triggered manually or if PR has 'evals' label.
15+
if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'evals')
16+
runs-on: blacksmith-16vcpu-ubuntu-2404
17+
timeout-minutes: 45
18+
19+
defaults:
20+
run:
21+
working-directory: packages/evals
22+
23+
steps:
24+
- name: Checkout repository
25+
uses: actions/checkout@v4
26+
27+
- name: Set up Docker Buildx
28+
uses: docker/setup-buildx-action@v3
29+
30+
- name: Create environment
31+
run: |
32+
cat > .env.local << EOF
33+
OPENROUTER_API_KEY=${{ secrets.OPENROUTER_API_KEY || 'test-key-for-build' }}
34+
EOF
35+
36+
cat > .env.development << EOF
37+
NODE_ENV=development
38+
DATABASE_URL=postgresql://postgres:password@db:5432/evals_development
39+
REDIS_URL=redis://redis:6379
40+
HOST_EXECUTION_METHOD=docker
41+
EOF
42+
43+
- name: Build image
44+
uses: docker/build-push-action@v5
45+
with:
46+
context: .
47+
file: packages/evals/Dockerfile.runner
48+
tags: evals-runner:latest
49+
cache-from: type=gha
50+
cache-to: type=gha,mode=max
51+
push: false
52+
load: true
53+
54+
- name: Tag image
55+
run: docker tag evals-runner:latest evals-runner
56+
57+
- name: Start containers
58+
run: |
59+
docker compose up -d db redis
60+
timeout 60 bash -c 'until docker compose exec -T db pg_isready -U postgres; do sleep 2; done'
61+
timeout 60 bash -c 'until docker compose exec -T redis redis-cli ping | grep -q PONG; do sleep 2; done'
62+
docker compose run --rm runner sh -c 'nc -z db 5432 && echo "✓ Runner -> Database connection successful"'
63+
docker compose run --rm runner sh -c 'nc -z redis 6379 && echo "✓ Runner -> Redis connection successful"'
64+
docker compose run --rm runner docker ps
65+
66+
- name: Run database migrations
67+
run: docker compose run --rm runner pnpm --filter @roo-code/evals db:migrate
68+
69+
- name: Run evals
70+
run: docker compose run --rm runner pnpm --filter @roo-code/evals cli --ci
71+
72+
- name: Cleanup
73+
if: always()
74+
run: docker compose down -v --remove-orphans

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,4 @@ logs
4343
.idea/
4444
.qodo/
4545
.vercel
46+
.roo/mcp.json

CHANGELOG.md

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,78 @@
11
# kilo-code
22

3+
## [v4.36.0]
4+
5+
- [#690](https://github.com/Kilo-Org/kilocode/pull/690) [`9b1451a`](https://github.com/Kilo-Org/kilocode/commit/9b1451a47bd2bc567646a4a0c2a12b42826ab9d1) Thanks [@kevinvandijk](https://github.com/kevinvandijk)! - Include changes from Roo Code v3.19.7:
6+
7+
- Fix McpHub sidebar focus behavior to prevent unwanted focus grabbing
8+
- Disable checkpoint functionality when nested git repositories are detected to prevent conflicts
9+
- Remove unused Storybook components and dependencies to reduce bundle size
10+
- Add data-testid ESLint rule for improved testing standards (thanks @elianiva!)
11+
- Update development dependencies including eslint, knip, @types/node, i18next, fast-xml-parser, and @google/genai
12+
- Improve CI infrastructure with GitHub Actions and Blacksmith runner migrations
13+
- Replace explicit caching with implicit caching to reduce latency for Gemini models
14+
- Clarify that the default concurrent file read limit is 15 files (thanks @olearycrew!)
15+
- Fix copy button logic (thanks @samhvw8!)
16+
- Fade buttons on history preview if no interaction in progress (thanks @sachasayan!)
17+
- Allow MCP server refreshing, fix state changes in MCP server management UI view (thanks @taylorwilsdon!)
18+
- Remove unnecessary npx usage in some npm scripts (thanks @user202729!)
19+
- Bug fix for trailing slash error when using LiteLLM provider (thanks @kcwhite!)
20+
- Fix Gemini 2.5 Pro Preview thinking budget bug
21+
- Add Gemini Pro 06-05 model support (thanks @daniel-lxs and @shariqriazz!)
22+
- Fix reading PDF, DOCX, and IPYNB files in read_file tool (thanks @samhvw8!)
23+
- Fix Mermaid CSP errors with enhanced bundling strategy (thanks @KJ7LNW!)
24+
- Improve model info detection for custom Bedrock ARNs (thanks @adamhill!)
25+
- Add OpenAI Compatible embedder for codebase indexing (thanks @SannidhyaSah!)
26+
- Fix multiple memory leaks in ChatView component (thanks @kiwina!)
27+
- Fix WorkspaceTracker resource leaks by disposing FileSystemWatcher (thanks @kiwina!)
28+
- Fix RooTips setTimeout cleanup to prevent state updates on unmounted components (thanks @kiwina!)
29+
- Fix FileSystemWatcher leak in RooIgnoreController (thanks @kiwina!)
30+
- Fix clipboard memory leak by clearing setTimeout in useCopyToClipboard (thanks @kiwina!)
31+
- Fix ClineProvider instance cleanup (thanks @xyOz-dev!)
32+
- Enforce codebase_search as primary tool for code understanding tasks (thanks @hannesrudolph!)
33+
- Improve Docker setup for evals
34+
- Move evals into pnpm workspace, switch from SQLite to Postgres
35+
- Refactor MCP to use getDefaultEnvironment for stdio client transport (thanks @samhvw8!)
36+
- Get rid of "partial" component in names referencing not necessarily partial messages (thanks @wkordalski!)
37+
- Improve feature request template (thanks @elianiva!)
38+
39+
- [#592](https://github.com/Kilo-Org/kilocode/pull/592) [`68c3d6e`](https://github.com/Kilo-Org/kilocode/commit/68c3d6e7a1250e08e2bd2b9cbbbd6b4312bad045) Thanks [@chrarnoldus](https://github.com/chrarnoldus)! - Workflow and rules configuration screen added
40+
41+
### Patch Changes
42+
43+
- [#697](https://github.com/Kilo-Org/kilocode/pull/697) [`9514f22`](https://github.com/Kilo-Org/kilocode/commit/9514f22a9d77b2d838ddcb97b5f2c5909aaea68a) Thanks [@kevinvandijk](https://github.com/kevinvandijk)! - Add correct path to walkthrough files to show walkthrough on first load (thanks for the report @adamhill!)
44+
45+
## [v4.35.1]
46+
47+
- [#695](https://github.com/Kilo-Org/kilocode/pull/695) [`a7910eb`](https://github.com/Kilo-Org/kilocode/commit/a7910eba54a4ede296bfa82beddae71a1d9f77c5) Thanks [@kevinvandijk](https://github.com/kevinvandijk)! - Fix: Feedback button overlaps new mode creation dialog
48+
49+
- [#693](https://github.com/Kilo-Org/kilocode/pull/693) [`2a9edf8`](https://github.com/Kilo-Org/kilocode/commit/2a9edf85ca2062d0b296430348ebac967f28febb) Thanks [@hassoncs](https://github.com/hassoncs)! - Temporarily remove .kilocode/rule loading for commit message generation until it works better
50+
51+
## [v4.35.0]
52+
53+
- [#633](https://github.com/Kilo-Org/kilocode/pull/633) [`347cf9e`](https://github.com/Kilo-Org/kilocode/commit/347cf9e6dc10d5b8706af5e111ccc854f7742566) Thanks [@hassoncs](https://github.com/hassoncs)! - # AI-Powered Git Commit Message Generation
54+
55+
Automatically generate meaningful Git commit messages using AI
56+
57+
## How It Works
58+
59+
1. Stage your changes in Git as usual
60+
2. Click the [KILO] square icon in the Source Control panel
61+
3. The AI analyzes your staged changes and generates an appropriate commit message
62+
4. The generated message is automatically populated in the commit input box
63+
64+
- [#638](https://github.com/Kilo-Org/kilocode/pull/638) [`3d2e749`](https://github.com/Kilo-Org/kilocode/commit/3d2e749d51797681c018bc390757fdabefd60620) Thanks [@tru-kilo](https://github.com/tru-kilo)! - Added ability to favorite tasks
65+
66+
## [v4.34.1]
67+
68+
### Patch Changes
69+
70+
- [#612](https://github.com/Kilo-Org/kilocode/pull/612) [`793cfdd`](https://github.com/Kilo-Org/kilocode/commit/793cfdd4fc1411c63c818e14b0b6ca8c5225a859) Thanks [@HadesArchitect](https://github.com/HadesArchitect)! - - #611 Customer Support Visibility (Added links to contact customer support)
71+
72+
- [#672](https://github.com/Kilo-Org/kilocode/pull/672) [`c3d955c`](https://github.com/Kilo-Org/kilocode/commit/c3d955c2280258601d5f4b05101710e34d540075) Thanks [@chrarnoldus](https://github.com/chrarnoldus)! - Fixed response times for gemini-2.5-pro-preview being very slow (minutes instead of seconds)
73+
74+
- [#671](https://github.com/Kilo-Org/kilocode/pull/671) [`e0a3740`](https://github.com/Kilo-Org/kilocode/commit/e0a37406fe8102b1acd4f8e9005652e828a14e36) Thanks [@chrarnoldus](https://github.com/chrarnoldus)! - OpenRouter bring-your-own-key models now have much more accurate cost estimates.
75+
376
## [v4.34.0]
477

578
### Minor Changes

apps/web-evals/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"scripts": {
66
"lint": "next lint",
77
"check-types": "tsc -b",
8-
"dev": "scripts/check-services.sh && next dev --turbopack",
8+
"dev": "scripts/check-services.sh && next dev",
99
"format": "prettier --write src",
1010
"build": "next build",
1111
"start": "next start"
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"use server"
2+
3+
import * as path from "path"
4+
import { fileURLToPath } from "url"
5+
6+
import { exerciseLanguages, listDirectories } from "@roo-code/evals"
7+
8+
const __dirname = path.dirname(fileURLToPath(import.meta.url)) // <repo>/apps/web-evals/src/actions
9+
10+
const EVALS_REPO_PATH = path.resolve(__dirname, "../../../../../evals")
11+
12+
export const getExercises = async () => {
13+
const result = await Promise.all(
14+
exerciseLanguages.map(async (language) => {
15+
const languagePath = path.join(EVALS_REPO_PATH, language)
16+
const exercises = await listDirectories(__dirname, languagePath)
17+
return exercises.map((exercise) => `${language}/${exercise}`)
18+
}),
19+
)
20+
21+
return result.flat()
22+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"use server"
2+
3+
import { redisClient } from "@/lib/server/redis"
4+
5+
export const getHeartbeat = async (runId: number) => {
6+
const redis = await redisClient()
7+
return redis.get(`heartbeat:${runId}`)
8+
}

apps/web-evals/src/lib/server/runners.ts renamed to apps/web-evals/src/actions/runners.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"use server"
22

3-
import { redisClient } from "./redis"
3+
import { redisClient } from "@/lib/server/redis"
44

55
export const getRunners = async (runId: number) => {
66
const redis = await redisClient()

apps/web-evals/src/lib/server/runs.ts renamed to apps/web-evals/src/actions/runs.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
"use server"
22

3-
import { spawn } from "child_process"
3+
import * as path from "path"
44
import fs from "fs"
5+
import { fileURLToPath } from "url"
6+
import { spawn } from "child_process"
57

68
import { revalidatePath } from "next/cache"
79
import pMap from "p-map"
@@ -12,11 +14,12 @@ import {
1214
createRun as _createRun,
1315
deleteRun as _deleteRun,
1416
createTask,
17+
getExercisesForLanguage,
1518
} from "@roo-code/evals"
1619

1720
import { CreateRun } from "@/lib/schemas"
1821

19-
import { getExercisesForLanguage } from "./exercises"
22+
const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
2023

2124
// eslint-disable-next-line @typescript-eslint/no-unused-vars
2225
export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
@@ -37,9 +40,9 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
3740
}
3841
} else {
3942
for (const language of exerciseLanguages) {
40-
const exercises = await getExercisesForLanguage(language)
43+
const exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
4144

42-
await pMap(exercises, (exercise) => createTask({ ...values, runId: run.id, language, exercise }), {
45+
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
4346
concurrency: 10,
4447
})
4548
}
@@ -52,9 +55,10 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
5255

5356
const dockerArgs = [
5457
`--name evals-controller-${run.id}`,
55-
"--rm",
58+
// "--rm",
5659
"--network evals_default",
5760
"-v /var/run/docker.sock:/var/run/docker.sock",
61+
"-v /tmp/evals:/var/log/evals",
5862
"-e HOST_EXECUTION_METHOD=docker",
5963
]
6064

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { NextResponse } from "next/server"
2+
3+
export async function GET() {
4+
try {
5+
return NextResponse.json(
6+
{
7+
status: "healthy",
8+
timestamp: new Date().toISOString(),
9+
uptime: process.uptime(),
10+
environment: process.env.NODE_ENV || "production",
11+
},
12+
{ status: 200 },
13+
)
14+
} catch (error) {
15+
return NextResponse.json(
16+
{
17+
status: "unhealthy",
18+
timestamp: new Date().toISOString(),
19+
error: error instanceof Error ? error.message : "Unknown error",
20+
},
21+
{ status: 503 },
22+
)
23+
}
24+
}

apps/web-evals/src/app/home.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { Ellipsis, Rocket } from "lucide-react"
77

88
import type { Run, TaskMetrics } from "@roo-code/evals"
99

10-
import { deleteRun } from "@/lib/server/runs"
10+
import { deleteRun } from "@/actions/runs"
1111
import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
1212
import {
1313
Button,

apps/web-evals/src/app/runs/[id]/connection-status.tsx

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)