diff --git a/scripts/create-custom-specs.ts b/scripts/create-custom-specs.ts index 705e58f..694ffaa 100644 --- a/scripts/create-custom-specs.ts +++ b/scripts/create-custom-specs.ts @@ -19,12 +19,15 @@ const coreSpec = yaml.load( const v1Tag = "v1"; const v2Tag = "v2"; +const v3Tag = "v3"; -const coreTags = [v1Tag, v2Tag]; +const coreTags = [v1Tag, v2Tag, v3Tag]; const corePaths = Object.fromEntries( Object.entries(coreSpec.paths).filter(([, data]) => - Object.values(data).some((op) => op.tags && coreTags.some(tag => op.tags.includes(tag))) + Object.values(data).some( + (op) => op.tags && coreTags.some((tag) => op.tags.includes(tag)) + ) ) ); diff --git a/spec.yaml b/spec.yaml index 69e8409..d78461e 100644 --- a/spec.yaml +++ b/spec.yaml @@ -10,6 +10,8 @@ tags: description: Endpoints for core API functionality (version 1) - name: v2 description: Endpoints for core API functionality (version 2) + - name: v3 + description: Endpoints for core API functionality (version 3) - name: Feedback description: Endpoints for capturing user feedback for runs @@ -279,6 +281,10 @@ paths: additionalProperties: $ref: "#/components/schemas/MetadataValueObject" nullable: true + triggerRemoteEvals: + type: boolean + description: Optional flag to trigger remote evaluations + default: true testRuns: type: array items: @@ -303,6 +309,11 @@ paths: description: Use outputs.steps insteads. items: $ref: "#/components/schemas/StepRun" + evaluations: + type: array + items: + $ref: "#/components/schemas/LocalEvaluation" + description: Optional array of local evaluations required: - caseId - stepRuns @@ -520,6 +531,10 @@ paths: additionalProperties: $ref: "#/components/schemas/MetadataValueObject" nullable: true + triggerRemoteEvals: + type: boolean + description: Optional flag to trigger remote evaluations + default: true testRuns: type: array items: @@ -542,6 +557,11 @@ paths: type: object additionalProperties: true description: The returned outputs for the test case + evaluations: + type: array + items: + $ref: "#/components/schemas/LocalEvaluation" + description: Optional array of local evaluations required: - caseId - inputs @@ -1442,6 +1462,46 @@ paths: "500": description: Server error + /v3/evaluations: + get: + tags: + - v3 + summary: Get evaluations + parameters: + - in: query + name: resultId + required: true + schema: + type: string + format: uuid + description: The ID of the result to get evaluations for + responses: + "200": + description: Evaluations retrieved successfully + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/EvaluationV3" + application/json; charset=utf-8: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/EvaluationV3" + "400": + description: Bad request + "404": + description: Result not found + "500": + description: Server error + components: securitySchemes: bearerAuth: @@ -2219,6 +2279,61 @@ components: - pipelineId - datasetId + LocalEvaluation: + type: object + properties: + name: + type: string + description: The name of the local evaluation + value: + type: number + description: The numeric value of the evaluation + label: + type: string + nullable: true + description: Optional label for the evaluation + debug: + $ref: "#/components/schemas/LocalEvaluationDebug" + nullable: true + required: + - name + - value + + LocalEvaluationDebug: + type: object + properties: + resolvedPrompt: + type: string + description: The resolved prompt used for the evaluation + response: + type: string + description: The response received from the evaluation + finalClassification: + type: string + description: The final classification of the evaluation + processorLogs: + type: array + items: + type: array + items: {} + description: Processor logs + logs: + type: array + items: + type: array + items: {} + description: Evaluator logs + error: + type: object + properties: + message: + type: string + description: Error message + date: + type: string + format: date-time + description: Date and time of the error + CreateSingleTestCase: type: object properties: @@ -2794,3 +2909,71 @@ components: - evalValue - note - name + + EvaluationV3: + type: object + properties: + id: + type: string + format: uuid + description: The ID of the evaluation + createdAt: + $ref: "#/components/schemas/UnixSeconds" + updatedAt: + $ref: "#/components/schemas/UnixSeconds" + isPending: + type: boolean + description: Indicates if the evaluation is pending + isFiltered: + type: boolean + description: Indicates if the evaluation is filtered + debug: + type: object + additionalProperties: true + nullable: true + description: Debug information for the evaluation + evaluatorId: + type: string + format: uuid + description: The ID of the evaluator + nullable: true + runId: + type: string + format: uuid + description: The ID of the run + comparisonRunId: + type: string + format: uuid + nullable: true + description: The ID of the comparison run, if applicable + name: + type: string + nullable: true + description: The name of the evaluation + evalLabel: + type: string + nullable: true + description: The label of the evaluation + evalValue: + type: number + nullable: true + description: The value of the evaluation + manualCreatedByEmail: + type: string + nullable: true + description: The email of the user who manually created the evaluation, if applicable + note: + type: string + description: Additional notes for the evaluation + required: + - id + - createdAt + - updatedAt + - isPending + - isFiltered + - evaluatorId + - runId + - evalLabel + - evalValue + - note + - name diff --git a/templates/node/index.mustache b/templates/node/index.mustache index 57540fc..b6a6a27 100644 --- a/templates/node/index.mustache +++ b/templates/node/index.mustache @@ -19,6 +19,9 @@ export { V1TestResultPost200Response as TestResultPost200Response, V1TestResultPostRequest as TestResultPostRequest, V1TestResultPostRequestTestRunsInner as TestResultPostRequestTestRunsInner, + LocalEvaluation, + LocalEvaluationDebug, + LocalEvaluationDebugError } from "./{{tsModelPackage}}"; {{/withSeparateModelsAndApi}}