Skip to content

Commit e6b5e75

Browse files
gundermancliamhelmer
authored andcommitted
feat(evals): add overall pass rate row to eval nightly summary table (google-gemini#20905)
1 parent 6d69d82 commit e6b5e75

File tree

1 file changed

+14
-7
lines changed

1 file changed

+14
-7
lines changed

scripts/aggregate_evals.js

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -155,40 +155,47 @@ function generateMarkdown(currentStatsByModel, history) {
155155

156156
const models = Object.keys(currentStatsByModel).sort();
157157

158-
for (const model of models) {
159-
const currentStats = currentStatsByModel[model];
160-
const totalStats = Object.values(currentStats).reduce(
158+
const getPassRate = (statsForModel) => {
159+
if (!statsForModel) return '-';
160+
const totalStats = Object.values(statsForModel).reduce(
161161
(acc, stats) => {
162162
acc.passed += stats.passed;
163163
acc.total += stats.total;
164164
return acc;
165165
},
166166
{ passed: 0, total: 0 },
167167
);
168+
return totalStats.total > 0
169+
? ((totalStats.passed / totalStats.total) * 100).toFixed(1) + '%'
170+
: '-';
171+
};
168172

169-
const totalPassRate =
170-
totalStats.total > 0
171-
? ((totalStats.passed / totalStats.total) * 100).toFixed(1) + '%'
172-
: 'N/A';
173+
for (const model of models) {
174+
const currentStats = currentStatsByModel[model];
175+
const totalPassRate = getPassRate(currentStats);
173176

174177
console.log(`#### Model: ${model}`);
175178
console.log(`**Total Pass Rate: ${totalPassRate}**\n`);
176179

177180
// Header
178181
let header = '| Test Name |';
179182
let separator = '| :--- |';
183+
let passRateRow = '| **Overall Pass Rate** |';
180184

181185
for (const item of reversedHistory) {
182186
header += ` [${item.run.databaseId}](${item.run.url}) |`;
183187
separator += ' :---: |';
188+
passRateRow += ` **${getPassRate(item.stats[model])}** |`;
184189
}
185190

186191
// Add Current column last
187192
header += ' Current |';
188193
separator += ' :---: |';
194+
passRateRow += ` **${totalPassRate}** |`;
189195

190196
console.log(header);
191197
console.log(separator);
198+
console.log(passRateRow);
192199

193200
// Collect all test names for this model
194201
const allTestNames = new Set(Object.keys(currentStats));

0 commit comments

Comments
 (0)