1010 description : Endpoints for core API functionality (version 1)
1111 - name : v2
1212 description : Endpoints for core API functionality (version 2)
13+ - name : v3
14+ description : Endpoints for core API functionality (version 3)
1315 - name : Feedback
1416 description : Endpoints for capturing user feedback for runs
1517
@@ -279,6 +281,10 @@ paths:
279281 additionalProperties :
280282 $ref : " #/components/schemas/MetadataValueObject"
281283 nullable : true
284+ triggerRemoteEvals :
285+ type : boolean
286+ description : Optional flag to trigger remote evaluations
287+ default : true
282288 testRuns :
283289 type : array
284290 items :
@@ -303,6 +309,11 @@ paths:
303309 description : Use outputs.steps insteads.
304310 items :
305311 $ref : " #/components/schemas/StepRun"
312+ evaluations :
313+ type : array
314+ items :
315+ $ref : " #/components/schemas/LocalEvaluation"
316+ description : Optional array of local evaluations
306317 required :
307318 - caseId
308319 - stepRuns
@@ -520,6 +531,10 @@ paths:
520531 additionalProperties :
521532 $ref : " #/components/schemas/MetadataValueObject"
522533 nullable : true
534+ triggerRemoteEvals :
535+ type : boolean
536+ description : Optional flag to trigger remote evaluations
537+ default : true
523538 testRuns :
524539 type : array
525540 items :
@@ -542,6 +557,11 @@ paths:
542557 type : object
543558 additionalProperties : true
544559 description : The returned outputs for the test case
560+ evaluations :
561+ type : array
562+ items :
563+ $ref : " #/components/schemas/LocalEvaluation"
564+ description : Optional array of local evaluations
545565 required :
546566 - caseId
547567 - inputs
@@ -1442,6 +1462,46 @@ paths:
14421462 " 500 " :
14431463 description : Server error
14441464
1465+ /v3/evaluations :
1466+ get :
1467+ tags :
1468+ - v3
1469+ summary : Get evaluations
1470+ parameters :
1471+ - in : query
1472+ name : resultId
1473+ required : true
1474+ schema :
1475+ type : string
1476+ format : uuid
1477+ description : The ID of the result to get evaluations for
1478+ responses :
1479+ " 200 " :
1480+ description : Evaluations retrieved successfully
1481+ content :
1482+ application/json :
1483+ schema :
1484+ type : object
1485+ properties :
1486+ data :
1487+ type : array
1488+ items :
1489+ $ref : " #/components/schemas/EvaluationV3"
1490+ application/json; charset=utf-8 :
1491+ schema :
1492+ type : object
1493+ properties :
1494+ data :
1495+ type : array
1496+ items :
1497+ $ref : " #/components/schemas/EvaluationV3"
1498+ " 400 " :
1499+ description : Bad request
1500+ " 404 " :
1501+ description : Result not found
1502+ " 500 " :
1503+ description : Server error
1504+
14451505components :
14461506 securitySchemes :
14471507 bearerAuth :
@@ -2219,6 +2279,61 @@ components:
22192279 - pipelineId
22202280 - datasetId
22212281
2282+ LocalEvaluation :
2283+ type : object
2284+ properties :
2285+ name :
2286+ type : string
2287+ description : The name of the local evaluation
2288+ value :
2289+ type : number
2290+ description : The numeric value of the evaluation
2291+ label :
2292+ type : string
2293+ nullable : true
2294+ description : Optional label for the evaluation
2295+ debug :
2296+ $ref : " #/components/schemas/LocalEvaluationDebug"
2297+ nullable : true
2298+ required :
2299+ - name
2300+ - value
2301+
2302+ LocalEvaluationDebug :
2303+ type : object
2304+ properties :
2305+ resolvedPrompt :
2306+ type : string
2307+ description : The resolved prompt used for the evaluation
2308+ response :
2309+ type : string
2310+ description : The response received from the evaluation
2311+ finalClassification :
2312+ type : string
2313+ description : The final classification of the evaluation
2314+ processorLogs :
2315+ type : array
2316+ items :
2317+ type : array
2318+ items : {}
2319+ description : Processor logs
2320+ logs :
2321+ type : array
2322+ items :
2323+ type : array
2324+ items : {}
2325+ description : Evaluator logs
2326+ error :
2327+ type : object
2328+ properties :
2329+ message :
2330+ type : string
2331+ description : Error message
2332+ date :
2333+ type : string
2334+ format : date-time
2335+ description : Date and time of the error
2336+
22222337 CreateSingleTestCase :
22232338 type : object
22242339 properties :
@@ -2794,3 +2909,71 @@ components:
27942909 - evalValue
27952910 - note
27962911 - name
2912+
2913+ EvaluationV3 :
2914+ type : object
2915+ properties :
2916+ id :
2917+ type : string
2918+ format : uuid
2919+ description : The ID of the evaluation
2920+ createdAt :
2921+ $ref : " #/components/schemas/UnixSeconds"
2922+ updatedAt :
2923+ $ref : " #/components/schemas/UnixSeconds"
2924+ isPending :
2925+ type : boolean
2926+ description : Indicates if the evaluation is pending
2927+ isFiltered :
2928+ type : boolean
2929+ description : Indicates if the evaluation is filtered
2930+ debug :
2931+ type : object
2932+ additionalProperties : true
2933+ nullable : true
2934+ description : Debug information for the evaluation
2935+ evaluatorId :
2936+ type : string
2937+ format : uuid
2938+ description : The ID of the evaluator
2939+ nullable : true
2940+ runId :
2941+ type : string
2942+ format : uuid
2943+ description : The ID of the run
2944+ comparisonRunId :
2945+ type : string
2946+ format : uuid
2947+ nullable : true
2948+ description : The ID of the comparison run, if applicable
2949+ name :
2950+ type : string
2951+ nullable : true
2952+ description : The name of the evaluation
2953+ evalLabel :
2954+ type : string
2955+ nullable : true
2956+ description : The label of the evaluation
2957+ evalValue :
2958+ type : number
2959+ nullable : true
2960+ description : The value of the evaluation
2961+ manualCreatedByEmail :
2962+ type : string
2963+ nullable : true
2964+ description : The email of the user who manually created the evaluation, if applicable
2965+ note :
2966+ type : string
2967+ description : Additional notes for the evaluation
2968+ required :
2969+ - id
2970+ - createdAt
2971+ - updatedAt
2972+ - isPending
2973+ - isFiltered
2974+ - evaluatorId
2975+ - runId
2976+ - evalLabel
2977+ - evalValue
2978+ - note
2979+ - name
0 commit comments