Skip to content

Commit 1682022

Browse files
author
Vivek Nair
authored
feat: Add in local evaluations alpha (#180)
1 parent e1084e8 commit 1682022

3 files changed

Lines changed: 191 additions & 2 deletions

File tree

scripts/create-custom-specs.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,15 @@ const coreSpec = yaml.load(
1919

2020
const v1Tag = "v1";
2121
const v2Tag = "v2";
22+
const v3Tag = "v3";
2223

23-
const coreTags = [v1Tag, v2Tag];
24+
const coreTags = [v1Tag, v2Tag, v3Tag];
2425

2526
const corePaths = Object.fromEntries(
2627
Object.entries(coreSpec.paths).filter(([, data]) =>
27-
Object.values(data).some((op) => op.tags && coreTags.some(tag => op.tags.includes(tag)))
28+
Object.values(data).some(
29+
(op) => op.tags && coreTags.some((tag) => op.tags.includes(tag))
30+
)
2831
)
2932
);
3033

spec.yaml

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ tags:
1010
description: Endpoints for core API functionality (version 1)
1111
- name: v2
1212
description: Endpoints for core API functionality (version 2)
13+
- name: v3
14+
description: Endpoints for core API functionality (version 3)
1315
- name: Feedback
1416
description: Endpoints for capturing user feedback for runs
1517

@@ -279,6 +281,10 @@ paths:
279281
additionalProperties:
280282
$ref: "#/components/schemas/MetadataValueObject"
281283
nullable: true
284+
triggerRemoteEvals:
285+
type: boolean
286+
description: Optional flag to trigger remote evaluations
287+
default: true
282288
testRuns:
283289
type: array
284290
items:
@@ -303,6 +309,11 @@ paths:
303309
description: Use outputs.steps insteads.
304310
items:
305311
$ref: "#/components/schemas/StepRun"
312+
evaluations:
313+
type: array
314+
items:
315+
$ref: "#/components/schemas/LocalEvaluation"
316+
description: Optional array of local evaluations
306317
required:
307318
- caseId
308319
- stepRuns
@@ -520,6 +531,10 @@ paths:
520531
additionalProperties:
521532
$ref: "#/components/schemas/MetadataValueObject"
522533
nullable: true
534+
triggerRemoteEvals:
535+
type: boolean
536+
description: Optional flag to trigger remote evaluations
537+
default: true
523538
testRuns:
524539
type: array
525540
items:
@@ -542,6 +557,11 @@ paths:
542557
type: object
543558
additionalProperties: true
544559
description: The returned outputs for the test case
560+
evaluations:
561+
type: array
562+
items:
563+
$ref: "#/components/schemas/LocalEvaluation"
564+
description: Optional array of local evaluations
545565
required:
546566
- caseId
547567
- inputs
@@ -1442,6 +1462,46 @@ paths:
14421462
"500":
14431463
description: Server error
14441464

1465+
/v3/evaluations:
1466+
get:
1467+
tags:
1468+
- v3
1469+
summary: Get evaluations
1470+
parameters:
1471+
- in: query
1472+
name: resultId
1473+
required: true
1474+
schema:
1475+
type: string
1476+
format: uuid
1477+
description: The ID of the result to get evaluations for
1478+
responses:
1479+
"200":
1480+
description: Evaluations retrieved successfully
1481+
content:
1482+
application/json:
1483+
schema:
1484+
type: object
1485+
properties:
1486+
data:
1487+
type: array
1488+
items:
1489+
$ref: "#/components/schemas/EvaluationV3"
1490+
application/json; charset=utf-8:
1491+
schema:
1492+
type: object
1493+
properties:
1494+
data:
1495+
type: array
1496+
items:
1497+
$ref: "#/components/schemas/EvaluationV3"
1498+
"400":
1499+
description: Bad request
1500+
"404":
1501+
description: Result not found
1502+
"500":
1503+
description: Server error
1504+
14451505
components:
14461506
securitySchemes:
14471507
bearerAuth:
@@ -2219,6 +2279,61 @@ components:
22192279
- pipelineId
22202280
- datasetId
22212281

2282+
LocalEvaluation:
2283+
type: object
2284+
properties:
2285+
name:
2286+
type: string
2287+
description: The name of the local evaluation
2288+
value:
2289+
type: number
2290+
description: The numeric value of the evaluation
2291+
label:
2292+
type: string
2293+
nullable: true
2294+
description: Optional label for the evaluation
2295+
debug:
2296+
$ref: "#/components/schemas/LocalEvaluationDebug"
2297+
nullable: true
2298+
required:
2299+
- name
2300+
- value
2301+
2302+
LocalEvaluationDebug:
2303+
type: object
2304+
properties:
2305+
resolvedPrompt:
2306+
type: string
2307+
description: The resolved prompt used for the evaluation
2308+
response:
2309+
type: string
2310+
description: The response received from the evaluation
2311+
finalClassification:
2312+
type: string
2313+
description: The final classification of the evaluation
2314+
processorLogs:
2315+
type: array
2316+
items:
2317+
type: array
2318+
items: {}
2319+
description: Processor logs
2320+
logs:
2321+
type: array
2322+
items:
2323+
type: array
2324+
items: {}
2325+
description: Evaluator logs
2326+
error:
2327+
type: object
2328+
properties:
2329+
message:
2330+
type: string
2331+
description: Error message
2332+
date:
2333+
type: string
2334+
format: date-time
2335+
description: Date and time of the error
2336+
22222337
CreateSingleTestCase:
22232338
type: object
22242339
properties:
@@ -2794,3 +2909,71 @@ components:
27942909
- evalValue
27952910
- note
27962911
- name
2912+
2913+
EvaluationV3:
2914+
type: object
2915+
properties:
2916+
id:
2917+
type: string
2918+
format: uuid
2919+
description: The ID of the evaluation
2920+
createdAt:
2921+
$ref: "#/components/schemas/UnixSeconds"
2922+
updatedAt:
2923+
$ref: "#/components/schemas/UnixSeconds"
2924+
isPending:
2925+
type: boolean
2926+
description: Indicates if the evaluation is pending
2927+
isFiltered:
2928+
type: boolean
2929+
description: Indicates if the evaluation is filtered
2930+
debug:
2931+
type: object
2932+
additionalProperties: true
2933+
nullable: true
2934+
description: Debug information for the evaluation
2935+
evaluatorId:
2936+
type: string
2937+
format: uuid
2938+
description: The ID of the evaluator
2939+
nullable: true
2940+
runId:
2941+
type: string
2942+
format: uuid
2943+
description: The ID of the run
2944+
comparisonRunId:
2945+
type: string
2946+
format: uuid
2947+
nullable: true
2948+
description: The ID of the comparison run, if applicable
2949+
name:
2950+
type: string
2951+
nullable: true
2952+
description: The name of the evaluation
2953+
evalLabel:
2954+
type: string
2955+
nullable: true
2956+
description: The label of the evaluation
2957+
evalValue:
2958+
type: number
2959+
nullable: true
2960+
description: The value of the evaluation
2961+
manualCreatedByEmail:
2962+
type: string
2963+
nullable: true
2964+
description: The email of the user who manually created the evaluation, if applicable
2965+
note:
2966+
type: string
2967+
description: Additional notes for the evaluation
2968+
required:
2969+
- id
2970+
- createdAt
2971+
- updatedAt
2972+
- isPending
2973+
- isFiltered
2974+
- evaluatorId
2975+
- runId
2976+
- evalLabel
2977+
- evalValue
2978+
- note
2979+
- name

templates/node/index.mustache

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ export {
1919
V1TestResultPost200Response as TestResultPost200Response,
2020
V1TestResultPostRequest as TestResultPostRequest,
2121
V1TestResultPostRequestTestRunsInner as TestResultPostRequestTestRunsInner,
22+
LocalEvaluation,
23+
LocalEvaluationDebug,
24+
LocalEvaluationDebugError
2225
} from "./{{tsModelPackage}}";
2326
{{/withSeparateModelsAndApi}}
2427

0 commit comments

Comments
 (0)