From 2a8ed1880f9eff7c38b778771f71f0f1860f0cdf Mon Sep 17 00:00:00 2001 From: Vivek Nair Date: Fri, 20 Sep 2024 11:18:55 -0400 Subject: [PATCH 1/5] fix: wip --- spec.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/spec.yaml b/spec.yaml index 69e8409..b812bfd 100644 --- a/spec.yaml +++ b/spec.yaml @@ -279,6 +279,10 @@ paths: additionalProperties: $ref: "#/components/schemas/MetadataValueObject" nullable: true + triggerRemoteEvals: + type: boolean + description: Optional flag to trigger remote evaluations + default: true testRuns: type: array items: @@ -520,6 +524,10 @@ paths: additionalProperties: $ref: "#/components/schemas/MetadataValueObject" nullable: true + triggerRemoteEvals: + type: boolean + description: Optional flag to trigger remote evaluations + default: true testRuns: type: array items: From 6171efaf6ed4d67d6d5498a6c2902fec3d624b79 Mon Sep 17 00:00:00 2001 From: Vivek Nair Date: Fri, 20 Sep 2024 11:54:42 -0400 Subject: [PATCH 2/5] fix: wip --- spec.yaml | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/spec.yaml b/spec.yaml index b812bfd..c2733eb 100644 --- a/spec.yaml +++ b/spec.yaml @@ -283,6 +283,11 @@ paths: type: boolean description: Optional flag to trigger remote evaluations default: true + evaluations: + type: array + items: + $ref: "#/components/schemas/LocalEvaluation" + description: Optional array of local evaluations testRuns: type: array items: @@ -528,6 +533,11 @@ paths: type: boolean description: Optional flag to trigger remote evaluations default: true + localEvaluations: + type: array + items: + $ref: "#/components/schemas/LocalEvaluation" + description: Optional array of local evaluations testRuns: type: array items: @@ -2227,6 +2237,61 @@ components: - pipelineId - datasetId + LocalEvaluation: + type: object + properties: + name: + type: string + description: The name of the local evaluation + value: + type: number + description: The numeric value of the evaluation + label: + type: string + nullable: true + description: Optional label for the evaluation + debug: + $ref: "#/components/schemas/LocalEvaluationDebug" + nullable: true + required: + - name + - value + + LocalEvaluationDebug: + type: object + properties: + resolvedPrompt: + type: string + description: The resolved prompt used for the evaluation + response: + type: string + description: The response received from the evaluation + finalClassification: + type: string + description: The final classification of the evaluation + processorLogs: + type: array + items: + type: array + items: {} + description: Processor logs + logs: + type: array + items: + type: array + items: {} + description: Evaluator logs + error: + type: object + properties: + message: + type: string + description: Error message + date: + type: string + format: date-time + description: Date and time of the error + CreateSingleTestCase: type: object properties: From e168f90f8fd1c9b433b299150e365c5d4d7ed766 Mon Sep 17 00:00:00 2001 From: Vivek Nair Date: Fri, 20 Sep 2024 15:00:09 -0400 Subject: [PATCH 3/5] fix: wip --- spec.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spec.yaml b/spec.yaml index c2733eb..92b075f 100644 --- a/spec.yaml +++ b/spec.yaml @@ -283,11 +283,6 @@ paths: type: boolean description: Optional flag to trigger remote evaluations default: true - evaluations: - type: array - items: - $ref: "#/components/schemas/LocalEvaluation" - description: Optional array of local evaluations testRuns: type: array items: @@ -312,6 +307,11 @@ paths: description: Use outputs.steps insteads. items: $ref: "#/components/schemas/StepRun" + evaluations: + type: array + items: + $ref: "#/components/schemas/LocalEvaluation" + description: Optional array of local evaluations required: - caseId - stepRuns @@ -533,11 +533,6 @@ paths: type: boolean description: Optional flag to trigger remote evaluations default: true - localEvaluations: - type: array - items: - $ref: "#/components/schemas/LocalEvaluation" - description: Optional array of local evaluations testRuns: type: array items: @@ -560,6 +555,11 @@ paths: type: object additionalProperties: true description: The returned outputs for the test case + evaluations: + type: array + items: + $ref: "#/components/schemas/LocalEvaluation" + description: Optional array of local evaluations required: - caseId - inputs From f3c83af933c917177ef0a2bb39b1b5a31c5f9a8e Mon Sep 17 00:00:00 2001 From: Vivek Nair Date: Fri, 4 Oct 2024 15:18:07 -0400 Subject: [PATCH 4/5] fix: wip --- spec.yaml | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/spec.yaml b/spec.yaml index 92b075f..3f95619 100644 --- a/spec.yaml +++ b/spec.yaml @@ -1460,6 +1460,46 @@ paths: "500": description: Server error + /v3/evaluations: + get: + tags: + - v2 + summary: Get evaluations + parameters: + - in: query + name: resultId + required: true + schema: + type: string + format: uuid + description: The ID of the result to get evaluations for + responses: + "200": + description: Evaluations retrieved successfully + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/EvaluationV3" + application/json; charset=utf-8: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/EvaluationV3" + "400": + description: Bad request + "404": + description: Result not found + "500": + description: Server error + components: securitySchemes: bearerAuth: @@ -2867,3 +2907,71 @@ components: - evalValue - note - name + + EvaluationV3: + type: object + properties: + id: + type: string + format: uuid + description: The ID of the evaluation + createdAt: + $ref: "#/components/schemas/UnixSeconds" + updatedAt: + $ref: "#/components/schemas/UnixSeconds" + isPending: + type: boolean + description: Indicates if the evaluation is pending + isFiltered: + type: boolean + description: Indicates if the evaluation is filtered + debug: + type: object + additionalProperties: true + nullable: true + description: Debug information for the evaluation + evaluatorId: + type: string + format: uuid + description: The ID of the evaluator + nullable: true + runId: + type: string + format: uuid + description: The ID of the run + comparisonRunId: + type: string + format: uuid + nullable: true + description: The ID of the comparison run, if applicable + name: + type: string + nullable: true + description: The name of the evaluation + evalLabel: + type: string + nullable: true + description: The label of the evaluation + evalValue: + type: number + nullable: true + description: The value of the evaluation + manualCreatedByEmail: + type: string + nullable: true + description: The email of the user who manually created the evaluation, if applicable + note: + type: string + description: Additional notes for the evaluation + required: + - id + - createdAt + - updatedAt + - isPending + - isFiltered + - evaluatorId + - runId + - evalLabel + - evalValue + - note + - name From dcdf352f647db5e3290cfb106c045f5bcf437f88 Mon Sep 17 00:00:00 2001 From: Vivek Nair Date: Fri, 4 Oct 2024 17:25:08 -0400 Subject: [PATCH 5/5] fix: wip --- scripts/create-custom-specs.ts | 7 +++++-- spec.yaml | 4 +++- templates/node/index.mustache | 3 +++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/create-custom-specs.ts b/scripts/create-custom-specs.ts index 705e58f..694ffaa 100644 --- a/scripts/create-custom-specs.ts +++ b/scripts/create-custom-specs.ts @@ -19,12 +19,15 @@ const coreSpec = yaml.load( const v1Tag = "v1"; const v2Tag = "v2"; +const v3Tag = "v3"; -const coreTags = [v1Tag, v2Tag]; +const coreTags = [v1Tag, v2Tag, v3Tag]; const corePaths = Object.fromEntries( Object.entries(coreSpec.paths).filter(([, data]) => - Object.values(data).some((op) => op.tags && coreTags.some(tag => op.tags.includes(tag))) + Object.values(data).some( + (op) => op.tags && coreTags.some((tag) => op.tags.includes(tag)) + ) ) ); diff --git a/spec.yaml b/spec.yaml index 3f95619..d78461e 100644 --- a/spec.yaml +++ b/spec.yaml @@ -10,6 +10,8 @@ tags: description: Endpoints for core API functionality (version 1) - name: v2 description: Endpoints for core API functionality (version 2) + - name: v3 + description: Endpoints for core API functionality (version 3) - name: Feedback description: Endpoints for capturing user feedback for runs @@ -1463,7 +1465,7 @@ paths: /v3/evaluations: get: tags: - - v2 + - v3 summary: Get evaluations parameters: - in: query diff --git a/templates/node/index.mustache b/templates/node/index.mustache index 57540fc..b6a6a27 100644 --- a/templates/node/index.mustache +++ b/templates/node/index.mustache @@ -19,6 +19,9 @@ export { V1TestResultPost200Response as TestResultPost200Response, V1TestResultPostRequest as TestResultPostRequest, V1TestResultPostRequestTestRunsInner as TestResultPostRequestTestRunsInner, + LocalEvaluation, + LocalEvaluationDebug, + LocalEvaluationDebugError } from "./{{tsModelPackage}}"; {{/withSeparateModelsAndApi}}