From d12ca4b11cc6a0f022f2c3b8ab8752a6f600f504 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 18:16:44 +0000 Subject: [PATCH 01/12] codegen metadata --- .stats.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.stats.yml b/.stats.yml index 12a0365..76c12f5 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 44 -openapi_spec_hash: 9d81a4b0eca6d3629ba9d5432a65655c +openapi_spec_hash: 19d3afd940d8ed57b76401ef026e5f47 config_hash: 659f65b6ccf5612986f920f7f9abbcb5 From 3aa98843e0f042734eb5b74ea86c8dcca8636954 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 02:39:50 +0000 Subject: [PATCH 02/12] chore(ci): fix installation instructions --- scripts/utils/upload-artifact.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh index ebb0478..8f922b5 100755 --- a/scripts/utils/upload-artifact.sh +++ b/scripts/utils/upload-artifact.sh @@ -18,7 +18,7 @@ UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \ if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then echo -e "\033[32mUploaded build to Stainless storage.\033[0m" - echo -e "\033[32mInstallation: npm install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m" + echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m" else echo -e "\033[31mFailed to upload artifact.\033[0m" exit 1 From 18f661d21b849f15cbe85ce5063ef0dea877d89f Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Sat, 17 May 2025 02:50:02 +0000 Subject: [PATCH 03/12] chore(internal): codegen related update --- scripts/utils/upload-artifact.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh index 8f922b5..62d150a 100755 --- a/scripts/utils/upload-artifact.sh +++ b/scripts/utils/upload-artifact.sh @@ -18,7 +18,7 @@ UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \ if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then echo -e "\033[32mUploaded build to Stainless storage.\033[0m" - echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m" + echo -e "\033[32mInstallation: pip install --pre 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m" else echo -e "\033[31mFailed to upload artifact.\033[0m" exit 1 From 40ae04a279ba1e2573d17a17e097f71d1347a3d3 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 18:16:58 +0000 Subject: [PATCH 04/12] feat(api): api update --- .stats.yml | 2 +- .../types/projects/cluster_list_response.py | 20 +++++++++++++++++++ src/codex/types/projects/entry.py | 20 +++++++++++++++++++ .../types/projects/entry_query_response.py | 20 +++++++++++++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/.stats.yml b/.stats.yml index 76c12f5..aac346a 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 44 -openapi_spec_hash: 19d3afd940d8ed57b76401ef026e5f47 +openapi_spec_hash: f25ca671adcc0b224451c721048d9220 config_hash: 659f65b6ccf5612986f920f7f9abbcb5 diff --git a/src/codex/types/projects/cluster_list_response.py b/src/codex/types/projects/cluster_list_response.py index 2e8b542..1fc8bd5 100644 --- a/src/codex/types/projects/cluster_list_response.py +++ b/src/codex/types/projects/cluster_list_response.py @@ -13,6 +13,7 @@ "ManagedMetadataContextSufficiency", "ManagedMetadataHTMLFormatScores", "ManagedMetadataQueryEaseCustomized", + "ManagedMetadataResponseGroundedness", "ManagedMetadataResponseHelpfulness", "ManagedMetadataTrustworthiness", ] @@ -82,6 +83,22 @@ class ManagedMetadataQueryEaseCustomized(BaseModel): scores: Optional[List[float]] = None +class ManagedMetadataResponseGroundedness(BaseModel): + average: Optional[float] = None + """The average of all scores.""" + + latest: Optional[float] = None + """The most recent score.""" + + max: Optional[float] = None + """The maximum score.""" + + min: Optional[float] = None + """The minimum score.""" + + scores: Optional[List[float]] = None + + class ManagedMetadataResponseHelpfulness(BaseModel): average: Optional[float] = None """The average of all scores.""" @@ -147,6 +164,9 @@ class ManagedMetadata(BaseModel): query_ease_customized: Optional[ManagedMetadataQueryEaseCustomized] = None """Holds a list of scores and computes aggregate statistics.""" + response_groundedness: Optional[ManagedMetadataResponseGroundedness] = None + """Holds a list of scores and computes aggregate statistics.""" + response_helpfulness: Optional[ManagedMetadataResponseHelpfulness] = None """Holds a list of scores and computes aggregate statistics.""" diff --git a/src/codex/types/projects/entry.py b/src/codex/types/projects/entry.py index eb2a221..3f7a86d 100644 --- a/src/codex/types/projects/entry.py +++ b/src/codex/types/projects/entry.py @@ -13,6 +13,7 @@ "ManagedMetadataContextSufficiency", "ManagedMetadataHTMLFormatScores", "ManagedMetadataQueryEaseCustomized", + "ManagedMetadataResponseGroundedness", "ManagedMetadataResponseHelpfulness", "ManagedMetadataTrustworthiness", ] @@ -82,6 +83,22 @@ class ManagedMetadataQueryEaseCustomized(BaseModel): scores: Optional[List[float]] = None +class ManagedMetadataResponseGroundedness(BaseModel): + average: Optional[float] = None + """The average of all scores.""" + + latest: Optional[float] = None + """The most recent score.""" + + max: Optional[float] = None + """The maximum score.""" + + min: Optional[float] = None + """The minimum score.""" + + scores: Optional[List[float]] = None + + class ManagedMetadataResponseHelpfulness(BaseModel): average: Optional[float] = None """The average of all scores.""" @@ -147,6 +164,9 @@ class ManagedMetadata(BaseModel): query_ease_customized: Optional[ManagedMetadataQueryEaseCustomized] = None """Holds a list of scores and computes aggregate statistics.""" + response_groundedness: Optional[ManagedMetadataResponseGroundedness] = None + """Holds a list of scores and computes aggregate statistics.""" + response_helpfulness: Optional[ManagedMetadataResponseHelpfulness] = None """Holds a list of scores and computes aggregate statistics.""" diff --git a/src/codex/types/projects/entry_query_response.py b/src/codex/types/projects/entry_query_response.py index 318636b..cd5a4c9 100644 --- a/src/codex/types/projects/entry_query_response.py +++ b/src/codex/types/projects/entry_query_response.py @@ -12,6 +12,7 @@ "EntryManagedMetadataContextSufficiency", "EntryManagedMetadataHTMLFormatScores", "EntryManagedMetadataQueryEaseCustomized", + "EntryManagedMetadataResponseGroundedness", "EntryManagedMetadataResponseHelpfulness", "EntryManagedMetadataTrustworthiness", ] @@ -81,6 +82,22 @@ class EntryManagedMetadataQueryEaseCustomized(BaseModel): scores: Optional[List[float]] = None +class EntryManagedMetadataResponseGroundedness(BaseModel): + average: Optional[float] = None + """The average of all scores.""" + + latest: Optional[float] = None + """The most recent score.""" + + max: Optional[float] = None + """The maximum score.""" + + min: Optional[float] = None + """The minimum score.""" + + scores: Optional[List[float]] = None + + class EntryManagedMetadataResponseHelpfulness(BaseModel): average: Optional[float] = None """The average of all scores.""" @@ -146,6 +163,9 @@ class EntryManagedMetadata(BaseModel): query_ease_customized: Optional[EntryManagedMetadataQueryEaseCustomized] = None """Holds a list of scores and computes aggregate statistics.""" + response_groundedness: Optional[EntryManagedMetadataResponseGroundedness] = None + """Holds a list of scores and computes aggregate statistics.""" + response_helpfulness: Optional[EntryManagedMetadataResponseHelpfulness] = None """Holds a list of scores and computes aggregate statistics.""" From 7bbf57ae5327ddd85e6729997a4f85b427758258 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 22:16:41 +0000 Subject: [PATCH 05/12] feat(api): api update --- .stats.yml | 2 +- tests/api_resources/test_projects.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.stats.yml b/.stats.yml index aac346a..374e672 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 44 -openapi_spec_hash: f25ca671adcc0b224451c721048d9220 +openapi_spec_hash: 67d5aeebff72f48ee4730227ca0b47c2 config_hash: 659f65b6ccf5612986f920f7f9abbcb5 diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index 19e41a0..5c29fdd 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -204,7 +204,7 @@ def test_method_list(self, client: Codex) -> None: def test_method_list_with_all_params(self, client: Codex) -> None: project = client.projects.list( include_entry_counts=True, - limit=0, + limit=1, offset=0, order="asc", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", @@ -699,7 +699,7 @@ async def test_method_list(self, async_client: AsyncCodex) -> None: async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> None: project = await async_client.projects.list( include_entry_counts=True, - limit=0, + limit=1, offset=0, order="asc", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", From 428e5001b6b5576f5383c0f2ffd3ad5fe085128a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 02:29:17 +0000 Subject: [PATCH 06/12] chore(docs): grammar improvements --- SECURITY.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 9fc6ee2..0780828 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -16,11 +16,11 @@ before making any information public. ## Reporting Non-SDK Related Security Issues If you encounter security issues that are not directly related to SDKs but pertain to the services -or products provided by Codex please follow the respective company's security reporting guidelines. +or products provided by Codex, please follow the respective company's security reporting guidelines. ### Codex Terms and Policies -Please contact team@cleanlab.ai for any questions or concerns regarding security of our services. +Please contact team@cleanlab.ai for any questions or concerns regarding the security of our services. --- From 3a5293161e7313d7c18ec61be1b8e7ee56bad8c9 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 3 Jun 2025 21:53:29 +0000 Subject: [PATCH 07/12] feat(api): api update --- .stats.yml | 2 +- src/codex/types/project_create_params.py | 285 ++++++++++++++++++- src/codex/types/project_list_response.py | 285 ++++++++++++++++++- src/codex/types/project_retrieve_response.py | 284 +++++++++++++++++- src/codex/types/project_return_schema.py | 284 +++++++++++++++++- src/codex/types/project_update_params.py | 285 ++++++++++++++++++- tests/api_resources/test_projects.py | 264 +++++++++++++++++ 7 files changed, 1676 insertions(+), 13 deletions(-) diff --git a/.stats.yml b/.stats.yml index 374e672..e80f0e1 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 44 -openapi_spec_hash: 67d5aeebff72f48ee4730227ca0b47c2 +openapi_spec_hash: 0f1841fad65926e7ddfb22dd7a642b46 config_hash: 659f65b6ccf5612986f920f7f9abbcb5 diff --git a/src/codex/types/project_create_params.py b/src/codex/types/project_create_params.py index ecdd194..75892e0 100644 --- a/src/codex/types/project_create_params.py +++ b/src/codex/types/project_create_params.py @@ -2,10 +2,22 @@ from __future__ import annotations -from typing import Optional -from typing_extensions import Required, TypedDict +from typing import Dict, Optional +from typing_extensions import Literal, Required, TypedDict -__all__ = ["ProjectCreateParams", "Config"] +__all__ = [ + "ProjectCreateParams", + "Config", + "ConfigEvalConfig", + "ConfigEvalConfigCustomEvals", + "ConfigEvalConfigCustomEvalsEvals", + "ConfigEvalConfigDefaultEvals", + "ConfigEvalConfigDefaultEvalsContextSufficiency", + "ConfigEvalConfigDefaultEvalsQueryEase", + "ConfigEvalConfigDefaultEvalsResponseGroundedness", + "ConfigEvalConfigDefaultEvalsResponseHelpfulness", + "ConfigEvalConfigDefaultEvalsTrustworthiness", +] class ProjectCreateParams(TypedDict, total=False): @@ -18,9 +30,276 @@ class ProjectCreateParams(TypedDict, total=False): description: Optional[str] +class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): + criteria: Required[str] + """ + The evaluation criteria text that describes what aspect is being evaluated and + how + """ + + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + context_identifier: Optional[str] + """ + The exact string used in your evaluation criteria to reference the retrieved + context. + """ + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + query_identifier: Optional[str] + """ + The exact string used in your evaluation criteria to reference the user's query. + """ + + response_identifier: Optional[str] + """ + The exact string used in your evaluation criteria to reference the RAG/LLM + response. + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigCustomEvals(TypedDict, total=False): + evals: Dict[str, ConfigEvalConfigCustomEvalsEvals] + + +class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvals(TypedDict, total=False): + context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + query_ease: ConfigEvalConfigDefaultEvalsQueryEase + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_groundedness: ConfigEvalConfigDefaultEvalsResponseGroundedness + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_helpfulness: ConfigEvalConfigDefaultEvalsResponseHelpfulness + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + trustworthiness: ConfigEvalConfigDefaultEvalsTrustworthiness + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + +class ConfigEvalConfig(TypedDict, total=False): + custom_evals: ConfigEvalConfigCustomEvals + """Configuration for custom evaluation metrics.""" + + default_evals: ConfigEvalConfigDefaultEvals + """Configuration for default evaluation metrics.""" + + class Config(TypedDict, total=False): clustering_use_llm_matching: bool + eval_config: ConfigEvalConfig + """Configuration for project-specific evaluation metrics""" + llm_matching_model: str llm_matching_quality_preset: str diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py index 2b4fec4..59d3bf8 100644 --- a/src/codex/types/project_list_response.py +++ b/src/codex/types/project_list_response.py @@ -1,16 +1,297 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Optional +from typing import Dict, List, Optional from datetime import datetime +from typing_extensions import Literal from .._models import BaseModel -__all__ = ["ProjectListResponse", "Project", "ProjectConfig"] +__all__ = [ + "ProjectListResponse", + "Project", + "ProjectConfig", + "ProjectConfigEvalConfig", + "ProjectConfigEvalConfigCustomEvals", + "ProjectConfigEvalConfigCustomEvalsEvals", + "ProjectConfigEvalConfigDefaultEvals", + "ProjectConfigEvalConfigDefaultEvalsContextSufficiency", + "ProjectConfigEvalConfigDefaultEvalsQueryEase", + "ProjectConfigEvalConfigDefaultEvalsResponseGroundedness", + "ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness", + "ProjectConfigEvalConfigDefaultEvalsTrustworthiness", +] + + +class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel): + criteria: str + """ + The evaluation criteria text that describes what aspect is being evaluated and + how + """ + + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + context_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the retrieved + context. + """ + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + query_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the user's query. + """ + + response_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the RAG/LLM + response. + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ProjectConfigEvalConfigCustomEvals(BaseModel): + evals: Optional[Dict[str, ProjectConfigEvalConfigCustomEvalsEvals]] = None + + +class ProjectConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ProjectConfigEvalConfigDefaultEvalsQueryEase(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ProjectConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ProjectConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ProjectConfigEvalConfigDefaultEvals(BaseModel): + context_sufficiency: Optional[ProjectConfigEvalConfigDefaultEvalsContextSufficiency] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + query_ease: Optional[ProjectConfigEvalConfigDefaultEvalsQueryEase] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_groundedness: Optional[ProjectConfigEvalConfigDefaultEvalsResponseGroundedness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_helpfulness: Optional[ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + trustworthiness: Optional[ProjectConfigEvalConfigDefaultEvalsTrustworthiness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + +class ProjectConfigEvalConfig(BaseModel): + custom_evals: Optional[ProjectConfigEvalConfigCustomEvals] = None + """Configuration for custom evaluation metrics.""" + + default_evals: Optional[ProjectConfigEvalConfigDefaultEvals] = None + """Configuration for default evaluation metrics.""" class ProjectConfig(BaseModel): clustering_use_llm_matching: Optional[bool] = None + eval_config: Optional[ProjectConfigEvalConfig] = None + """Configuration for project-specific evaluation metrics""" + llm_matching_model: Optional[str] = None llm_matching_quality_preset: Optional[str] = None diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py index 62209d3..a631f0c 100644 --- a/src/codex/types/project_retrieve_response.py +++ b/src/codex/types/project_retrieve_response.py @@ -1,16 +1,296 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Optional +from typing import Dict, Optional from datetime import datetime +from typing_extensions import Literal from .._models import BaseModel -__all__ = ["ProjectRetrieveResponse", "Config"] +__all__ = [ + "ProjectRetrieveResponse", + "Config", + "ConfigEvalConfig", + "ConfigEvalConfigCustomEvals", + "ConfigEvalConfigCustomEvalsEvals", + "ConfigEvalConfigDefaultEvals", + "ConfigEvalConfigDefaultEvalsContextSufficiency", + "ConfigEvalConfigDefaultEvalsQueryEase", + "ConfigEvalConfigDefaultEvalsResponseGroundedness", + "ConfigEvalConfigDefaultEvalsResponseHelpfulness", + "ConfigEvalConfigDefaultEvalsTrustworthiness", +] + + +class ConfigEvalConfigCustomEvalsEvals(BaseModel): + criteria: str + """ + The evaluation criteria text that describes what aspect is being evaluated and + how + """ + + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + context_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the retrieved + context. + """ + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + query_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the user's query. + """ + + response_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the RAG/LLM + response. + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigCustomEvals(BaseModel): + evals: Optional[Dict[str, ConfigEvalConfigCustomEvalsEvals]] = None + + +class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvals(BaseModel): + context_sufficiency: Optional[ConfigEvalConfigDefaultEvalsContextSufficiency] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + query_ease: Optional[ConfigEvalConfigDefaultEvalsQueryEase] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_groundedness: Optional[ConfigEvalConfigDefaultEvalsResponseGroundedness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_helpfulness: Optional[ConfigEvalConfigDefaultEvalsResponseHelpfulness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + trustworthiness: Optional[ConfigEvalConfigDefaultEvalsTrustworthiness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + +class ConfigEvalConfig(BaseModel): + custom_evals: Optional[ConfigEvalConfigCustomEvals] = None + """Configuration for custom evaluation metrics.""" + + default_evals: Optional[ConfigEvalConfigDefaultEvals] = None + """Configuration for default evaluation metrics.""" class Config(BaseModel): clustering_use_llm_matching: Optional[bool] = None + eval_config: Optional[ConfigEvalConfig] = None + """Configuration for project-specific evaluation metrics""" + llm_matching_model: Optional[str] = None llm_matching_quality_preset: Optional[str] = None diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py index 51a6c1a..7da2e61 100644 --- a/src/codex/types/project_return_schema.py +++ b/src/codex/types/project_return_schema.py @@ -1,16 +1,296 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Optional +from typing import Dict, Optional from datetime import datetime +from typing_extensions import Literal from .._models import BaseModel -__all__ = ["ProjectReturnSchema", "Config"] +__all__ = [ + "ProjectReturnSchema", + "Config", + "ConfigEvalConfig", + "ConfigEvalConfigCustomEvals", + "ConfigEvalConfigCustomEvalsEvals", + "ConfigEvalConfigDefaultEvals", + "ConfigEvalConfigDefaultEvalsContextSufficiency", + "ConfigEvalConfigDefaultEvalsQueryEase", + "ConfigEvalConfigDefaultEvalsResponseGroundedness", + "ConfigEvalConfigDefaultEvalsResponseHelpfulness", + "ConfigEvalConfigDefaultEvalsTrustworthiness", +] + + +class ConfigEvalConfigCustomEvalsEvals(BaseModel): + criteria: str + """ + The evaluation criteria text that describes what aspect is being evaluated and + how + """ + + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + context_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the retrieved + context. + """ + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + query_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the user's query. + """ + + response_identifier: Optional[str] = None + """ + The exact string used in your evaluation criteria to reference the RAG/LLM + response. + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigCustomEvals(BaseModel): + evals: Optional[Dict[str, ConfigEvalConfigCustomEvalsEvals]] = None + + +class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): + eval_key: str + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: str + """Display name/label for the evaluation metric""" + + enabled: Optional[bool] = None + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] = None + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: Optional[bool] = None + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: Optional[float] = None + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Optional[Literal["above", "below"]] = None + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvals(BaseModel): + context_sufficiency: Optional[ConfigEvalConfigDefaultEvalsContextSufficiency] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + query_ease: Optional[ConfigEvalConfigDefaultEvalsQueryEase] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_groundedness: Optional[ConfigEvalConfigDefaultEvalsResponseGroundedness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_helpfulness: Optional[ConfigEvalConfigDefaultEvalsResponseHelpfulness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + trustworthiness: Optional[ConfigEvalConfigDefaultEvalsTrustworthiness] = None + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + +class ConfigEvalConfig(BaseModel): + custom_evals: Optional[ConfigEvalConfigCustomEvals] = None + """Configuration for custom evaluation metrics.""" + + default_evals: Optional[ConfigEvalConfigDefaultEvals] = None + """Configuration for default evaluation metrics.""" class Config(BaseModel): clustering_use_llm_matching: Optional[bool] = None + eval_config: Optional[ConfigEvalConfig] = None + """Configuration for project-specific evaluation metrics""" + llm_matching_model: Optional[str] = None llm_matching_quality_preset: Optional[str] = None diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py index 0a5aa54..d58dd59 100644 --- a/src/codex/types/project_update_params.py +++ b/src/codex/types/project_update_params.py @@ -2,10 +2,22 @@ from __future__ import annotations -from typing import Optional -from typing_extensions import Required, TypedDict +from typing import Dict, Optional +from typing_extensions import Literal, Required, TypedDict -__all__ = ["ProjectUpdateParams", "Config"] +__all__ = [ + "ProjectUpdateParams", + "Config", + "ConfigEvalConfig", + "ConfigEvalConfigCustomEvals", + "ConfigEvalConfigCustomEvalsEvals", + "ConfigEvalConfigDefaultEvals", + "ConfigEvalConfigDefaultEvalsContextSufficiency", + "ConfigEvalConfigDefaultEvalsQueryEase", + "ConfigEvalConfigDefaultEvalsResponseGroundedness", + "ConfigEvalConfigDefaultEvalsResponseHelpfulness", + "ConfigEvalConfigDefaultEvalsTrustworthiness", +] class ProjectUpdateParams(TypedDict, total=False): @@ -16,9 +28,276 @@ class ProjectUpdateParams(TypedDict, total=False): description: Optional[str] +class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): + criteria: Required[str] + """ + The evaluation criteria text that describes what aspect is being evaluated and + how + """ + + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + context_identifier: Optional[str] + """ + The exact string used in your evaluation criteria to reference the retrieved + context. + """ + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + query_identifier: Optional[str] + """ + The exact string used in your evaluation criteria to reference the user's query. + """ + + response_identifier: Optional[str] + """ + The exact string used in your evaluation criteria to reference the RAG/LLM + response. + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigCustomEvals(TypedDict, total=False): + evals: Dict[str, ConfigEvalConfigCustomEvalsEvals] + + +class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False): + eval_key: Required[str] + """ + Unique key for eval metric - currently maps to the TrustworthyRAG name property + and eval_scores dictionary key to check against threshold + """ + + name: Required[str] + """Display name/label for the evaluation metric""" + + enabled: bool + """Allows the evaluation to be disabled without removing it""" + + priority: Optional[int] + """ + Priority order for evals (lower number = higher priority) to determine primary + eval issue to surface + """ + + should_escalate: bool + """ + If true, failing this eval means the response is considered bad and can trigger + escalation to Codex/SME + """ + + threshold: float + """Threshold value that determines if the evaluation fails""" + + threshold_direction: Literal["above", "below"] + """Whether the evaluation fails when score is above or below the threshold""" + + +class ConfigEvalConfigDefaultEvals(TypedDict, total=False): + context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + query_ease: ConfigEvalConfigDefaultEvalsQueryEase + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_groundedness: ConfigEvalConfigDefaultEvalsResponseGroundedness + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + response_helpfulness: ConfigEvalConfigDefaultEvalsResponseHelpfulness + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + trustworthiness: ConfigEvalConfigDefaultEvalsTrustworthiness + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, while + other properties like thresholds and priorities can be configured. + """ + + +class ConfigEvalConfig(TypedDict, total=False): + custom_evals: ConfigEvalConfigCustomEvals + """Configuration for custom evaluation metrics.""" + + default_evals: ConfigEvalConfigDefaultEvals + """Configuration for default evaluation metrics.""" + + class Config(TypedDict, total=False): clustering_use_llm_matching: bool + eval_config: ConfigEvalConfig + """Configuration for project-specific evaluation metrics""" + llm_matching_model: str llm_matching_quality_preset: str diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index 5c29fdd..d5e0e1c 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -39,6 +39,72 @@ def test_method_create_with_all_params(self, client: Codex) -> None: project = client.projects.create( config={ "clustering_use_llm_matching": True, + "eval_config": { + "custom_evals": { + "evals": { + "foo": { + "criteria": "criteria", + "eval_key": "eval_key", + "name": "name", + "context_identifier": "context_identifier", + "enabled": True, + "priority": 0, + "query_identifier": "query_identifier", + "response_identifier": "response_identifier", + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + } + } + }, + "default_evals": { + "context_sufficiency": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "query_ease": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "response_groundedness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "response_helpfulness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "trustworthiness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + }, + }, "llm_matching_model": "llm_matching_model", "llm_matching_quality_preset": "llm_matching_quality_preset", "lower_llm_match_distance_threshold": 0, @@ -141,6 +207,72 @@ def test_method_update_with_all_params(self, client: Codex) -> None: project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", config={ "clustering_use_llm_matching": True, + "eval_config": { + "custom_evals": { + "evals": { + "foo": { + "criteria": "criteria", + "eval_key": "eval_key", + "name": "name", + "context_identifier": "context_identifier", + "enabled": True, + "priority": 0, + "query_identifier": "query_identifier", + "response_identifier": "response_identifier", + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + } + } + }, + "default_evals": { + "context_sufficiency": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "query_ease": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "response_groundedness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "response_helpfulness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "trustworthiness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + }, + }, "llm_matching_model": "llm_matching_model", "llm_matching_quality_preset": "llm_matching_quality_preset", "lower_llm_match_distance_threshold": 0, @@ -534,6 +666,72 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> project = await async_client.projects.create( config={ "clustering_use_llm_matching": True, + "eval_config": { + "custom_evals": { + "evals": { + "foo": { + "criteria": "criteria", + "eval_key": "eval_key", + "name": "name", + "context_identifier": "context_identifier", + "enabled": True, + "priority": 0, + "query_identifier": "query_identifier", + "response_identifier": "response_identifier", + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + } + } + }, + "default_evals": { + "context_sufficiency": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "query_ease": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "response_groundedness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "response_helpfulness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "trustworthiness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + }, + }, "llm_matching_model": "llm_matching_model", "llm_matching_quality_preset": "llm_matching_quality_preset", "lower_llm_match_distance_threshold": 0, @@ -636,6 +834,72 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", config={ "clustering_use_llm_matching": True, + "eval_config": { + "custom_evals": { + "evals": { + "foo": { + "criteria": "criteria", + "eval_key": "eval_key", + "name": "name", + "context_identifier": "context_identifier", + "enabled": True, + "priority": 0, + "query_identifier": "query_identifier", + "response_identifier": "response_identifier", + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + } + } + }, + "default_evals": { + "context_sufficiency": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "query_ease": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "response_groundedness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "response_helpfulness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + "trustworthiness": { + "eval_key": "eval_key", + "name": "name", + "enabled": True, + "priority": 0, + "should_escalate": True, + "threshold": 0, + "threshold_direction": "above", + }, + }, + }, "llm_matching_model": "llm_matching_model", "llm_matching_quality_preset": "llm_matching_quality_preset", "lower_llm_match_distance_threshold": 0, From 7ba3858c1c968c093c676478a1c7e5e13b92c12a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 28 May 2025 03:26:20 +0000 Subject: [PATCH 08/12] fix(docs/api): remove references to nonexistent types --- api.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/api.md b/api.md index f3a2ea1..8aac76f 100644 --- a/api.md +++ b/api.md @@ -139,8 +139,6 @@ from codex.types import ( ProjectReturnSchema, ProjectRetrieveResponse, ProjectListResponse, - ProjectExportResponse, - ProjectIncrementQueriesResponse, ProjectRetrieveAnalyticsResponse, ProjectValidateResponse, ) @@ -153,8 +151,8 @@ Methods: - client.projects.update(project_id, \*\*params) -> ProjectReturnSchema - client.projects.list(\*\*params) -> ProjectListResponse - client.projects.delete(project_id) -> None -- client.projects.export(project_id) -> object -- client.projects.increment_queries(project_id, \*\*params) -> object +- client.projects.export(project_id) -> object +- client.projects.increment_queries(project_id, \*\*params) -> object - client.projects.retrieve_analytics(project_id, \*\*params) -> ProjectRetrieveAnalyticsResponse - client.projects.validate(project_id, \*\*params) -> ProjectValidateResponse From 57f522fbd04637849146636a83976fe696160a97 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 29 May 2025 03:09:34 +0000 Subject: [PATCH 09/12] chore(api): mark some methods as deprecated --- src/codex/resources/projects/entries.py | 27 ++- src/codex/resources/projects/projects.py | 27 ++- tests/api_resources/projects/test_entries.py | 164 ++++++++++--------- tests/api_resources/test_projects.py | 100 ++++++----- 4 files changed, 186 insertions(+), 132 deletions(-) diff --git a/src/codex/resources/projects/entries.py b/src/codex/resources/projects/entries.py index 346dd35..c6b43a4 100644 --- a/src/codex/resources/projects/entries.py +++ b/src/codex/resources/projects/entries.py @@ -2,6 +2,7 @@ from __future__ import annotations +import typing_extensions from typing import Iterable, Optional import httpx @@ -312,6 +313,7 @@ def publish_draft_answer( cast_to=Entry, ) + @typing_extensions.deprecated("deprecated") def query( self, project_id: str, @@ -707,6 +709,7 @@ async def publish_draft_answer( cast_to=Entry, ) + @typing_extensions.deprecated("deprecated") async def query( self, project_id: str, @@ -839,8 +842,10 @@ def __init__(self, entries: EntriesResource) -> None: self.publish_draft_answer = to_raw_response_wrapper( entries.publish_draft_answer, ) - self.query = to_raw_response_wrapper( - entries.query, + self.query = ( # pyright: ignore[reportDeprecated] + to_raw_response_wrapper( + entries.query # pyright: ignore[reportDeprecated], + ) ) self.unpublish_answer = to_raw_response_wrapper( entries.unpublish_answer, @@ -869,8 +874,10 @@ def __init__(self, entries: AsyncEntriesResource) -> None: self.publish_draft_answer = async_to_raw_response_wrapper( entries.publish_draft_answer, ) - self.query = async_to_raw_response_wrapper( - entries.query, + self.query = ( # pyright: ignore[reportDeprecated] + async_to_raw_response_wrapper( + entries.query # pyright: ignore[reportDeprecated], + ) ) self.unpublish_answer = async_to_raw_response_wrapper( entries.unpublish_answer, @@ -899,8 +906,10 @@ def __init__(self, entries: EntriesResource) -> None: self.publish_draft_answer = to_streamed_response_wrapper( entries.publish_draft_answer, ) - self.query = to_streamed_response_wrapper( - entries.query, + self.query = ( # pyright: ignore[reportDeprecated] + to_streamed_response_wrapper( + entries.query # pyright: ignore[reportDeprecated], + ) ) self.unpublish_answer = to_streamed_response_wrapper( entries.unpublish_answer, @@ -929,8 +938,10 @@ def __init__(self, entries: AsyncEntriesResource) -> None: self.publish_draft_answer = async_to_streamed_response_wrapper( entries.publish_draft_answer, ) - self.query = async_to_streamed_response_wrapper( - entries.query, + self.query = ( # pyright: ignore[reportDeprecated] + async_to_streamed_response_wrapper( + entries.query # pyright: ignore[reportDeprecated], + ) ) self.unpublish_answer = async_to_streamed_response_wrapper( entries.unpublish_answer, diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py index 6195d1a..cf8c0f8 100644 --- a/src/codex/resources/projects/projects.py +++ b/src/codex/resources/projects/projects.py @@ -2,6 +2,7 @@ from __future__ import annotations +import typing_extensions from typing import Dict, List, Optional from typing_extensions import Literal @@ -330,6 +331,7 @@ def export( cast_to=object, ) + @typing_extensions.deprecated("deprecated") def increment_queries( self, project_id: str, @@ -872,6 +874,7 @@ async def export( cast_to=object, ) + @typing_extensions.deprecated("deprecated") async def increment_queries( self, project_id: str, @@ -1167,8 +1170,10 @@ def __init__(self, projects: ProjectsResource) -> None: self.export = to_raw_response_wrapper( projects.export, ) - self.increment_queries = to_raw_response_wrapper( - projects.increment_queries, + self.increment_queries = ( # pyright: ignore[reportDeprecated] + to_raw_response_wrapper( + projects.increment_queries # pyright: ignore[reportDeprecated], + ) ) self.retrieve_analytics = to_raw_response_wrapper( projects.retrieve_analytics, @@ -1212,8 +1217,10 @@ def __init__(self, projects: AsyncProjectsResource) -> None: self.export = async_to_raw_response_wrapper( projects.export, ) - self.increment_queries = async_to_raw_response_wrapper( - projects.increment_queries, + self.increment_queries = ( # pyright: ignore[reportDeprecated] + async_to_raw_response_wrapper( + projects.increment_queries # pyright: ignore[reportDeprecated], + ) ) self.retrieve_analytics = async_to_raw_response_wrapper( projects.retrieve_analytics, @@ -1257,8 +1264,10 @@ def __init__(self, projects: ProjectsResource) -> None: self.export = to_streamed_response_wrapper( projects.export, ) - self.increment_queries = to_streamed_response_wrapper( - projects.increment_queries, + self.increment_queries = ( # pyright: ignore[reportDeprecated] + to_streamed_response_wrapper( + projects.increment_queries # pyright: ignore[reportDeprecated], + ) ) self.retrieve_analytics = to_streamed_response_wrapper( projects.retrieve_analytics, @@ -1302,8 +1311,10 @@ def __init__(self, projects: AsyncProjectsResource) -> None: self.export = async_to_streamed_response_wrapper( projects.export, ) - self.increment_queries = async_to_streamed_response_wrapper( - projects.increment_queries, + self.increment_queries = ( # pyright: ignore[reportDeprecated] + async_to_streamed_response_wrapper( + projects.increment_queries # pyright: ignore[reportDeprecated], + ) ) self.retrieve_analytics = async_to_streamed_response_wrapper( projects.retrieve_analytics, diff --git a/tests/api_resources/projects/test_entries.py b/tests/api_resources/projects/test_entries.py index 73a45ad..32b0452 100644 --- a/tests/api_resources/projects/test_entries.py +++ b/tests/api_resources/projects/test_entries.py @@ -15,6 +15,8 @@ EntryNotifySmeResponse, ) +# pyright: reportDeprecated=false + base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -382,40 +384,45 @@ def test_path_params_publish_draft_answer(self, client: Codex) -> None: @pytest.mark.skip() @parametrize def test_method_query(self, client: Codex) -> None: - entry = client.projects.entries.query( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", - ) + with pytest.warns(DeprecationWarning): + entry = client.projects.entries.query( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + question="question", + ) + assert_matches_type(EntryQueryResponse, entry, path=["response"]) @pytest.mark.skip() @parametrize def test_method_query_with_all_params(self, client: Codex) -> None: - entry = client.projects.entries.query( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", - use_llm_matching=True, - client_metadata={}, - query_metadata={ - "context": "string", - "custom_metadata": {}, - "eval_scores": {"foo": 0}, - "evaluated_response": "evaluated_response", - }, - x_client_library_version="x-client-library-version", - x_integration_type="x-integration-type", - x_source="x-source", - x_stainless_package_version="x-stainless-package-version", - ) + with pytest.warns(DeprecationWarning): + entry = client.projects.entries.query( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + question="question", + use_llm_matching=True, + client_metadata={}, + query_metadata={ + "context": "string", + "custom_metadata": {}, + "eval_scores": {"foo": 0}, + "evaluated_response": "evaluated_response", + }, + x_client_library_version="x-client-library-version", + x_integration_type="x-integration-type", + x_source="x-source", + x_stainless_package_version="x-stainless-package-version", + ) + assert_matches_type(EntryQueryResponse, entry, path=["response"]) @pytest.mark.skip() @parametrize def test_raw_response_query(self, client: Codex) -> None: - response = client.projects.entries.with_raw_response.query( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", - ) + with pytest.warns(DeprecationWarning): + response = client.projects.entries.with_raw_response.query( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + question="question", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -425,26 +432,28 @@ def test_raw_response_query(self, client: Codex) -> None: @pytest.mark.skip() @parametrize def test_streaming_response_query(self, client: Codex) -> None: - with client.projects.entries.with_streaming_response.query( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" + with pytest.warns(DeprecationWarning): + with client.projects.entries.with_streaming_response.query( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + question="question", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" - entry = response.parse() - assert_matches_type(EntryQueryResponse, entry, path=["response"]) + entry = response.parse() + assert_matches_type(EntryQueryResponse, entry, path=["response"]) assert cast(Any, response.is_closed) is True @pytest.mark.skip() @parametrize def test_path_params_query(self, client: Codex) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): - client.projects.entries.with_raw_response.query( - project_id="", - question="question", - ) + with pytest.warns(DeprecationWarning): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): + client.projects.entries.with_raw_response.query( + project_id="", + question="question", + ) @pytest.mark.skip() @parametrize @@ -863,40 +872,45 @@ async def test_path_params_publish_draft_answer(self, async_client: AsyncCodex) @pytest.mark.skip() @parametrize async def test_method_query(self, async_client: AsyncCodex) -> None: - entry = await async_client.projects.entries.query( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", - ) + with pytest.warns(DeprecationWarning): + entry = await async_client.projects.entries.query( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + question="question", + ) + assert_matches_type(EntryQueryResponse, entry, path=["response"]) @pytest.mark.skip() @parametrize async def test_method_query_with_all_params(self, async_client: AsyncCodex) -> None: - entry = await async_client.projects.entries.query( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", - use_llm_matching=True, - client_metadata={}, - query_metadata={ - "context": "string", - "custom_metadata": {}, - "eval_scores": {"foo": 0}, - "evaluated_response": "evaluated_response", - }, - x_client_library_version="x-client-library-version", - x_integration_type="x-integration-type", - x_source="x-source", - x_stainless_package_version="x-stainless-package-version", - ) + with pytest.warns(DeprecationWarning): + entry = await async_client.projects.entries.query( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + question="question", + use_llm_matching=True, + client_metadata={}, + query_metadata={ + "context": "string", + "custom_metadata": {}, + "eval_scores": {"foo": 0}, + "evaluated_response": "evaluated_response", + }, + x_client_library_version="x-client-library-version", + x_integration_type="x-integration-type", + x_source="x-source", + x_stainless_package_version="x-stainless-package-version", + ) + assert_matches_type(EntryQueryResponse, entry, path=["response"]) @pytest.mark.skip() @parametrize async def test_raw_response_query(self, async_client: AsyncCodex) -> None: - response = await async_client.projects.entries.with_raw_response.query( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", - ) + with pytest.warns(DeprecationWarning): + response = await async_client.projects.entries.with_raw_response.query( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + question="question", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -906,26 +920,28 @@ async def test_raw_response_query(self, async_client: AsyncCodex) -> None: @pytest.mark.skip() @parametrize async def test_streaming_response_query(self, async_client: AsyncCodex) -> None: - async with async_client.projects.entries.with_streaming_response.query( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" + with pytest.warns(DeprecationWarning): + async with async_client.projects.entries.with_streaming_response.query( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + question="question", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" - entry = await response.parse() - assert_matches_type(EntryQueryResponse, entry, path=["response"]) + entry = await response.parse() + assert_matches_type(EntryQueryResponse, entry, path=["response"]) assert cast(Any, response.is_closed) is True @pytest.mark.skip() @parametrize async def test_path_params_query(self, async_client: AsyncCodex) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): - await async_client.projects.entries.with_raw_response.query( - project_id="", - question="question", - ) + with pytest.warns(DeprecationWarning): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): + await async_client.projects.entries.with_raw_response.query( + project_id="", + question="question", + ) @pytest.mark.skip() @parametrize diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index d5e0e1c..f7c3f01 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -17,6 +17,8 @@ ) from tests.utils import assert_matches_type +# pyright: reportDeprecated=false + base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -454,26 +456,31 @@ def test_path_params_export(self, client: Codex) -> None: @pytest.mark.skip() @parametrize def test_method_increment_queries(self, client: Codex) -> None: - project = client.projects.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) + with pytest.warns(DeprecationWarning): + project = client.projects.increment_queries( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + assert_matches_type(object, project, path=["response"]) @pytest.mark.skip() @parametrize def test_method_increment_queries_with_all_params(self, client: Codex) -> None: - project = client.projects.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - count=0, - ) + with pytest.warns(DeprecationWarning): + project = client.projects.increment_queries( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + count=0, + ) + assert_matches_type(object, project, path=["response"]) @pytest.mark.skip() @parametrize def test_raw_response_increment_queries(self, client: Codex) -> None: - response = client.projects.with_raw_response.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) + with pytest.warns(DeprecationWarning): + response = client.projects.with_raw_response.increment_queries( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -483,24 +490,26 @@ def test_raw_response_increment_queries(self, client: Codex) -> None: @pytest.mark.skip() @parametrize def test_streaming_response_increment_queries(self, client: Codex) -> None: - with client.projects.with_streaming_response.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" + with pytest.warns(DeprecationWarning): + with client.projects.with_streaming_response.increment_queries( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" - project = response.parse() - assert_matches_type(object, project, path=["response"]) + project = response.parse() + assert_matches_type(object, project, path=["response"]) assert cast(Any, response.is_closed) is True @pytest.mark.skip() @parametrize def test_path_params_increment_queries(self, client: Codex) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): - client.projects.with_raw_response.increment_queries( - project_id="", - ) + with pytest.warns(DeprecationWarning): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): + client.projects.with_raw_response.increment_queries( + project_id="", + ) @pytest.mark.skip() @parametrize @@ -1081,26 +1090,31 @@ async def test_path_params_export(self, async_client: AsyncCodex) -> None: @pytest.mark.skip() @parametrize async def test_method_increment_queries(self, async_client: AsyncCodex) -> None: - project = await async_client.projects.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) + with pytest.warns(DeprecationWarning): + project = await async_client.projects.increment_queries( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + assert_matches_type(object, project, path=["response"]) @pytest.mark.skip() @parametrize async def test_method_increment_queries_with_all_params(self, async_client: AsyncCodex) -> None: - project = await async_client.projects.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - count=0, - ) + with pytest.warns(DeprecationWarning): + project = await async_client.projects.increment_queries( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + count=0, + ) + assert_matches_type(object, project, path=["response"]) @pytest.mark.skip() @parametrize async def test_raw_response_increment_queries(self, async_client: AsyncCodex) -> None: - response = await async_client.projects.with_raw_response.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) + with pytest.warns(DeprecationWarning): + response = await async_client.projects.with_raw_response.increment_queries( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -1110,24 +1124,26 @@ async def test_raw_response_increment_queries(self, async_client: AsyncCodex) -> @pytest.mark.skip() @parametrize async def test_streaming_response_increment_queries(self, async_client: AsyncCodex) -> None: - async with async_client.projects.with_streaming_response.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" + with pytest.warns(DeprecationWarning): + async with async_client.projects.with_streaming_response.increment_queries( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" - project = await response.parse() - assert_matches_type(object, project, path=["response"]) + project = await response.parse() + assert_matches_type(object, project, path=["response"]) assert cast(Any, response.is_closed) is True @pytest.mark.skip() @parametrize async def test_path_params_increment_queries(self, async_client: AsyncCodex) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): - await async_client.projects.with_raw_response.increment_queries( - project_id="", - ) + with pytest.warns(DeprecationWarning): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): + await async_client.projects.with_raw_response.increment_queries( + project_id="", + ) @pytest.mark.skip() @parametrize From ef4acf292c728a838aecd6539d278b8c128be68a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 11 Jun 2025 18:41:44 +0000 Subject: [PATCH 10/12] feat(api): api update --- .stats.yml | 2 +- src/codex/types/project_create_params.py | 3 +++ src/codex/types/project_list_response.py | 3 +++ src/codex/types/project_retrieve_response.py | 3 +++ src/codex/types/project_return_schema.py | 3 +++ src/codex/types/project_update_params.py | 3 +++ tests/api_resources/test_projects.py | 4 ++++ 7 files changed, 20 insertions(+), 1 deletion(-) diff --git a/.stats.yml b/.stats.yml index e80f0e1..ddf7240 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 44 -openapi_spec_hash: 0f1841fad65926e7ddfb22dd7a642b46 +openapi_spec_hash: dfccb5c181396678a22b9c079847889f config_hash: 659f65b6ccf5612986f920f7f9abbcb5 diff --git a/src/codex/types/project_create_params.py b/src/codex/types/project_create_params.py index 75892e0..3142755 100644 --- a/src/codex/types/project_create_params.py +++ b/src/codex/types/project_create_params.py @@ -55,6 +55,9 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): enabled: bool """Allows the evaluation to be disabled without removing it""" + is_default: bool + """Whether the eval is a default, built-in eval or a custom eval""" + priority: Optional[int] """ Priority order for evals (lower number = higher priority) to determine primary diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py index 59d3bf8..d480573 100644 --- a/src/codex/types/project_list_response.py +++ b/src/codex/types/project_list_response.py @@ -47,6 +47,9 @@ class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel): enabled: Optional[bool] = None """Allows the evaluation to be disabled without removing it""" + is_default: Optional[bool] = None + """Whether the eval is a default, built-in eval or a custom eval""" + priority: Optional[int] = None """ Priority order for evals (lower number = higher priority) to determine primary diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py index a631f0c..fb62cff 100644 --- a/src/codex/types/project_retrieve_response.py +++ b/src/codex/types/project_retrieve_response.py @@ -46,6 +46,9 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel): enabled: Optional[bool] = None """Allows the evaluation to be disabled without removing it""" + is_default: Optional[bool] = None + """Whether the eval is a default, built-in eval or a custom eval""" + priority: Optional[int] = None """ Priority order for evals (lower number = higher priority) to determine primary diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py index 7da2e61..420ec6e 100644 --- a/src/codex/types/project_return_schema.py +++ b/src/codex/types/project_return_schema.py @@ -46,6 +46,9 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel): enabled: Optional[bool] = None """Allows the evaluation to be disabled without removing it""" + is_default: Optional[bool] = None + """Whether the eval is a default, built-in eval or a custom eval""" + priority: Optional[int] = None """ Priority order for evals (lower number = higher priority) to determine primary diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py index d58dd59..d199955 100644 --- a/src/codex/types/project_update_params.py +++ b/src/codex/types/project_update_params.py @@ -53,6 +53,9 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): enabled: bool """Allows the evaluation to be disabled without removing it""" + is_default: bool + """Whether the eval is a default, built-in eval or a custom eval""" + priority: Optional[int] """ Priority order for evals (lower number = higher priority) to determine primary diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index f7c3f01..8ba69a1 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -50,6 +50,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None: "name": "name", "context_identifier": "context_identifier", "enabled": True, + "is_default": True, "priority": 0, "query_identifier": "query_identifier", "response_identifier": "response_identifier", @@ -218,6 +219,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None: "name": "name", "context_identifier": "context_identifier", "enabled": True, + "is_default": True, "priority": 0, "query_identifier": "query_identifier", "response_identifier": "response_identifier", @@ -684,6 +686,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> "name": "name", "context_identifier": "context_identifier", "enabled": True, + "is_default": True, "priority": 0, "query_identifier": "query_identifier", "response_identifier": "response_identifier", @@ -852,6 +855,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> "name": "name", "context_identifier": "context_identifier", "enabled": True, + "is_default": True, "priority": 0, "query_identifier": "query_identifier", "response_identifier": "response_identifier", From 8fcb74d6d5ef8db36a78031c9bf59f0840def1d4 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 3 Jun 2025 02:20:36 +0000 Subject: [PATCH 11/12] chore(docs): remove reference to rye shell --- CONTRIBUTING.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b9fa9a1..548ff4c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,8 +17,7 @@ $ rye sync --all-features You can then run scripts using `rye run python script.py` or by activating the virtual environment: ```sh -$ rye shell -# or manually activate - https://docs.python.org/3/library/venv.html#how-venvs-work +# Activate the virtual environment - https://docs.python.org/3/library/venv.html#how-venvs-work $ source .venv/bin/activate # now you can omit the `rye run` prefix From d9499f6ccb7deac8948dc80342c9bf0f956d8397 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 11 Jun 2025 18:42:01 +0000 Subject: [PATCH 12/12] release: 0.1.0-alpha.21 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 25 +++++++++++++++++++++++++ pyproject.toml | 2 +- src/codex/_version.py | 2 +- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index fac1407..7c31fce 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.20" + ".": "0.1.0-alpha.21" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f151d60..fd14df6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ # Changelog +## 0.1.0-alpha.21 (2025-06-11) + +Full Changelog: [v0.1.0-alpha.20...v0.1.0-alpha.21](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.20...v0.1.0-alpha.21) + +### Features + +* **api:** api update ([ef4acf2](https://github.com/cleanlab/codex-python/commit/ef4acf292c728a838aecd6539d278b8c128be68a)) +* **api:** api update ([3a52931](https://github.com/cleanlab/codex-python/commit/3a5293161e7313d7c18ec61be1b8e7ee56bad8c9)) +* **api:** api update ([7bbf57a](https://github.com/cleanlab/codex-python/commit/7bbf57ae5327ddd85e6729997a4f85b427758258)) +* **api:** api update ([40ae04a](https://github.com/cleanlab/codex-python/commit/40ae04a279ba1e2573d17a17e097f71d1347a3d3)) + + +### Bug Fixes + +* **docs/api:** remove references to nonexistent types ([7ba3858](https://github.com/cleanlab/codex-python/commit/7ba3858c1c968c093c676478a1c7e5e13b92c12a)) + + +### Chores + +* **api:** mark some methods as deprecated ([57f522f](https://github.com/cleanlab/codex-python/commit/57f522fbd04637849146636a83976fe696160a97)) +* **ci:** fix installation instructions ([3aa9884](https://github.com/cleanlab/codex-python/commit/3aa98843e0f042734eb5b74ea86c8dcca8636954)) +* **docs:** grammar improvements ([428e500](https://github.com/cleanlab/codex-python/commit/428e5001b6b5576f5383c0f2ffd3ad5fe085128a)) +* **docs:** remove reference to rye shell ([8fcb74d](https://github.com/cleanlab/codex-python/commit/8fcb74d6d5ef8db36a78031c9bf59f0840def1d4)) +* **internal:** codegen related update ([18f661d](https://github.com/cleanlab/codex-python/commit/18f661d21b849f15cbe85ce5063ef0dea877d89f)) + ## 0.1.0-alpha.20 (2025-05-15) Full Changelog: [v0.1.0-alpha.19...v0.1.0-alpha.20](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.19...v0.1.0-alpha.20) diff --git a/pyproject.toml b/pyproject.toml index 04d039a..55d73e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "codex-sdk" -version = "0.1.0-alpha.20" +version = "0.1.0-alpha.21" description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead." dynamic = ["readme"] license = "MIT" diff --git a/src/codex/_version.py b/src/codex/_version.py index 44d6131..3b23c98 100644 --- a/src/codex/_version.py +++ b/src/codex/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "codex" -__version__ = "0.1.0-alpha.20" # x-release-please-version +__version__ = "0.1.0-alpha.21" # x-release-please-version