|
253 | 253 | <td class="has-text-centered"><a href="https://files.sri.inf.ethz.ch/swt-bench/assertflip/">🔗</a></td> |
254 | 254 | </tr> |
255 | 255 | <tr data-mode="unittest"> |
256 | | - <td><a href="https://docs.all-hands.dev/">OpenHands</a> <small>GPT-4, CI setup</small></td> |
| 256 | + <td><a href="https://docs.all-hands.dev/">OpenHands</a> <small>Cl. Sonnet 3.5, CI setup</small></td> |
257 | 257 | <td class="has-text-centered"><a href="https://all-hands.dev/"><img alt="All Hands AI" title="All Hands AI" src="./static/images/logos/allhands.svg" class="org-icon"></a></td> |
258 | 258 | <td>28.3%</td> |
259 | 259 | <td>52.4%</td> |
260 | 260 | <td><time>2025-02-18</time></td> |
261 | 261 | <td class="has-text-centered"><a href="https://github.com/logic-star-ai/swt-bench?tab=readme-ov-file#evaluation-results">🔗</a></td> |
262 | 262 | </tr> |
263 | 263 | <tr> |
264 | | - <td><a href="https://docs.all-hands.dev/">OpenHands</a> <small>GPT-4, vanilla</small></td> |
| 264 | + <td><a href="https://docs.all-hands.dev/">OpenHands</a> <small>Cl. Sonnet 3.5, vanilla</small></td> |
265 | 265 | <td class="has-text-centered"><a href="https://all-hands.dev/"><img alt="All Hands AI" title="All Hands AI" src="./static/images/logos/allhands.svg" class="org-icon"></a></td> |
266 | 266 | <td>22.8%</td> |
267 | 267 | <td>43.6%</td> |
268 | 268 | <td><time>2025-02-18</time></td> |
269 | 269 | <td class="has-text-centered"><a href="https://github.com/logic-star-ai/swt-bench?tab=readme-ov-file#evaluation-results">🔗</a></td> |
270 | 270 | </tr> |
271 | 271 | <tr> |
272 | | - <td><a href="https://arxiv.org/abs/2406.12952">SWE-Agent+</a></td> |
| 272 | + <td><a href="https://arxiv.org/abs/2406.12952">SWE-Agent+</a><small>GPT-4</small></td> |
273 | 273 | <td class="has-text-centered"><a href="https://logicstar.ai/"><img alt="LogicStar" title="LogicStar" src="./static/images/logos/logicstar.png" class="org-icon"></a></td> |
274 | 274 | <td>18.5%</td> |
275 | 275 | <td>27.6%</td> |
|
285 | 285 | <td class="has-text-centered"><a href="https://github.com/logic-star-ai/swt-bench?tab=readme-ov-file#evaluation-results">🔗</a></td> |
286 | 286 | </tr> |
287 | 287 | <tr> |
288 | | - <td><a href="https://swe-agent.com/latest/">SWE-Agent</a> <small>Claude 3.5 Sonnet</small></td> |
| 288 | + <td><a href="https://swe-agent.com/latest/">SWE-Agent</a> <small>Cl. 3.5 Sonnet</small></td> |
289 | 289 | <td class="has-text-centered"><a href="https://swe-agent.com/"><img alt="SWE-agent" title="SWE-agent" src="./static/images/logos/swe-agent.svg" class="org-icon"></a></td> |
290 | 290 | <td>12.3%</td> |
291 | 291 | <td>30.3%</td> |
|
341 | 341 | <td class="has-text-centered"><a href="https://github.com/logic-star-ai/swt-bench?tab=readme-ov-file#evaluation-results">🔗</a></td> |
342 | 342 | </tr> |
343 | 343 | <tr> |
344 | | - <td><a href="https://arxiv.org/abs/2209.11515">LIBRO</a></td> |
| 344 | + <td><a href="https://arxiv.org/abs/2209.11515">LIBRO</a><small>GPT-4</small></td> |
345 | 345 | <td class="has-text-centered"><a href="https://github.com/coinse/libro"><img alt="KAIST" title="KAIST" src="./static/images/logos/KAIST.svg" class="org-icon"></a></td> |
346 | 346 | <td>14.1%</td> |
347 | 347 | <td>23.8%</td> |
|
437 | 437 | <td class="has-text-centered"><a href="https://files.sri.inf.ethz.ch/swt-bench/otter/">🔗</a></td> |
438 | 438 | </tr> |
439 | 439 | <tr> |
440 | | - <td><a href="https://docs.all-hands.dev/">OpenHands</a> <small>GPT-4o</small></td> |
| 440 | + <td><a href="https://docs.all-hands.dev/">OpenHands</a> <small>Cl. Sonnet 3.5</small></td> |
441 | 441 | <td class="has-text-centered"><a href="https://all-hands.dev/"><img alt="All Hands AI" title="All Hands AI" src="./static/images/logos/allhands.svg" class="org-icon"></a></td> |
442 | 442 | <td>27.7%</td> |
443 | 443 | <td>52.9%</td> |
|
0 commit comments