From d12ca4b11cc6a0f022f2c3b8ab8752a6f600f504 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 15 May 2025 18:16:44 +0000
Subject: [PATCH 01/12] codegen metadata
---
.stats.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.stats.yml b/.stats.yml
index 12a0365..76c12f5 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
configured_endpoints: 44
-openapi_spec_hash: 9d81a4b0eca6d3629ba9d5432a65655c
+openapi_spec_hash: 19d3afd940d8ed57b76401ef026e5f47
config_hash: 659f65b6ccf5612986f920f7f9abbcb5
From 3aa98843e0f042734eb5b74ea86c8dcca8636954 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 02:39:50 +0000
Subject: [PATCH 02/12] chore(ci): fix installation instructions
---
scripts/utils/upload-artifact.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
index ebb0478..8f922b5 100755
--- a/scripts/utils/upload-artifact.sh
+++ b/scripts/utils/upload-artifact.sh
@@ -18,7 +18,7 @@ UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \
if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
- echo -e "\033[32mInstallation: npm install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
+ echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
else
echo -e "\033[31mFailed to upload artifact.\033[0m"
exit 1
From 18f661d21b849f15cbe85ce5063ef0dea877d89f Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Sat, 17 May 2025 02:50:02 +0000
Subject: [PATCH 03/12] chore(internal): codegen related update
---
scripts/utils/upload-artifact.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
index 8f922b5..62d150a 100755
--- a/scripts/utils/upload-artifact.sh
+++ b/scripts/utils/upload-artifact.sh
@@ -18,7 +18,7 @@ UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \
if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
- echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
+ echo -e "\033[32mInstallation: pip install --pre 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
else
echo -e "\033[31mFailed to upload artifact.\033[0m"
exit 1
From 40ae04a279ba1e2573d17a17e097f71d1347a3d3 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 21 May 2025 18:16:58 +0000
Subject: [PATCH 04/12] feat(api): api update
---
.stats.yml | 2 +-
.../types/projects/cluster_list_response.py | 20 +++++++++++++++++++
src/codex/types/projects/entry.py | 20 +++++++++++++++++++
.../types/projects/entry_query_response.py | 20 +++++++++++++++++++
4 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/.stats.yml b/.stats.yml
index 76c12f5..aac346a 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
configured_endpoints: 44
-openapi_spec_hash: 19d3afd940d8ed57b76401ef026e5f47
+openapi_spec_hash: f25ca671adcc0b224451c721048d9220
config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/src/codex/types/projects/cluster_list_response.py b/src/codex/types/projects/cluster_list_response.py
index 2e8b542..1fc8bd5 100644
--- a/src/codex/types/projects/cluster_list_response.py
+++ b/src/codex/types/projects/cluster_list_response.py
@@ -13,6 +13,7 @@
"ManagedMetadataContextSufficiency",
"ManagedMetadataHTMLFormatScores",
"ManagedMetadataQueryEaseCustomized",
+ "ManagedMetadataResponseGroundedness",
"ManagedMetadataResponseHelpfulness",
"ManagedMetadataTrustworthiness",
]
@@ -82,6 +83,22 @@ class ManagedMetadataQueryEaseCustomized(BaseModel):
scores: Optional[List[float]] = None
+class ManagedMetadataResponseGroundedness(BaseModel):
+ average: Optional[float] = None
+ """The average of all scores."""
+
+ latest: Optional[float] = None
+ """The most recent score."""
+
+ max: Optional[float] = None
+ """The maximum score."""
+
+ min: Optional[float] = None
+ """The minimum score."""
+
+ scores: Optional[List[float]] = None
+
+
class ManagedMetadataResponseHelpfulness(BaseModel):
average: Optional[float] = None
"""The average of all scores."""
@@ -147,6 +164,9 @@ class ManagedMetadata(BaseModel):
query_ease_customized: Optional[ManagedMetadataQueryEaseCustomized] = None
"""Holds a list of scores and computes aggregate statistics."""
+ response_groundedness: Optional[ManagedMetadataResponseGroundedness] = None
+ """Holds a list of scores and computes aggregate statistics."""
+
response_helpfulness: Optional[ManagedMetadataResponseHelpfulness] = None
"""Holds a list of scores and computes aggregate statistics."""
diff --git a/src/codex/types/projects/entry.py b/src/codex/types/projects/entry.py
index eb2a221..3f7a86d 100644
--- a/src/codex/types/projects/entry.py
+++ b/src/codex/types/projects/entry.py
@@ -13,6 +13,7 @@
"ManagedMetadataContextSufficiency",
"ManagedMetadataHTMLFormatScores",
"ManagedMetadataQueryEaseCustomized",
+ "ManagedMetadataResponseGroundedness",
"ManagedMetadataResponseHelpfulness",
"ManagedMetadataTrustworthiness",
]
@@ -82,6 +83,22 @@ class ManagedMetadataQueryEaseCustomized(BaseModel):
scores: Optional[List[float]] = None
+class ManagedMetadataResponseGroundedness(BaseModel):
+ average: Optional[float] = None
+ """The average of all scores."""
+
+ latest: Optional[float] = None
+ """The most recent score."""
+
+ max: Optional[float] = None
+ """The maximum score."""
+
+ min: Optional[float] = None
+ """The minimum score."""
+
+ scores: Optional[List[float]] = None
+
+
class ManagedMetadataResponseHelpfulness(BaseModel):
average: Optional[float] = None
"""The average of all scores."""
@@ -147,6 +164,9 @@ class ManagedMetadata(BaseModel):
query_ease_customized: Optional[ManagedMetadataQueryEaseCustomized] = None
"""Holds a list of scores and computes aggregate statistics."""
+ response_groundedness: Optional[ManagedMetadataResponseGroundedness] = None
+ """Holds a list of scores and computes aggregate statistics."""
+
response_helpfulness: Optional[ManagedMetadataResponseHelpfulness] = None
"""Holds a list of scores and computes aggregate statistics."""
diff --git a/src/codex/types/projects/entry_query_response.py b/src/codex/types/projects/entry_query_response.py
index 318636b..cd5a4c9 100644
--- a/src/codex/types/projects/entry_query_response.py
+++ b/src/codex/types/projects/entry_query_response.py
@@ -12,6 +12,7 @@
"EntryManagedMetadataContextSufficiency",
"EntryManagedMetadataHTMLFormatScores",
"EntryManagedMetadataQueryEaseCustomized",
+ "EntryManagedMetadataResponseGroundedness",
"EntryManagedMetadataResponseHelpfulness",
"EntryManagedMetadataTrustworthiness",
]
@@ -81,6 +82,22 @@ class EntryManagedMetadataQueryEaseCustomized(BaseModel):
scores: Optional[List[float]] = None
+class EntryManagedMetadataResponseGroundedness(BaseModel):
+ average: Optional[float] = None
+ """The average of all scores."""
+
+ latest: Optional[float] = None
+ """The most recent score."""
+
+ max: Optional[float] = None
+ """The maximum score."""
+
+ min: Optional[float] = None
+ """The minimum score."""
+
+ scores: Optional[List[float]] = None
+
+
class EntryManagedMetadataResponseHelpfulness(BaseModel):
average: Optional[float] = None
"""The average of all scores."""
@@ -146,6 +163,9 @@ class EntryManagedMetadata(BaseModel):
query_ease_customized: Optional[EntryManagedMetadataQueryEaseCustomized] = None
"""Holds a list of scores and computes aggregate statistics."""
+ response_groundedness: Optional[EntryManagedMetadataResponseGroundedness] = None
+ """Holds a list of scores and computes aggregate statistics."""
+
response_helpfulness: Optional[EntryManagedMetadataResponseHelpfulness] = None
"""Holds a list of scores and computes aggregate statistics."""
From 7bbf57ae5327ddd85e6729997a4f85b427758258 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 21 May 2025 22:16:41 +0000
Subject: [PATCH 05/12] feat(api): api update
---
.stats.yml | 2 +-
tests/api_resources/test_projects.py | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.stats.yml b/.stats.yml
index aac346a..374e672 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
configured_endpoints: 44
-openapi_spec_hash: f25ca671adcc0b224451c721048d9220
+openapi_spec_hash: 67d5aeebff72f48ee4730227ca0b47c2
config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index 19e41a0..5c29fdd 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -204,7 +204,7 @@ def test_method_list(self, client: Codex) -> None:
def test_method_list_with_all_params(self, client: Codex) -> None:
project = client.projects.list(
include_entry_counts=True,
- limit=0,
+ limit=1,
offset=0,
order="asc",
organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
@@ -699,7 +699,7 @@ async def test_method_list(self, async_client: AsyncCodex) -> None:
async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> None:
project = await async_client.projects.list(
include_entry_counts=True,
- limit=0,
+ limit=1,
offset=0,
order="asc",
organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
From 428e5001b6b5576f5383c0f2ffd3ad5fe085128a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 22 May 2025 02:29:17 +0000
Subject: [PATCH 06/12] chore(docs): grammar improvements
---
SECURITY.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/SECURITY.md b/SECURITY.md
index 9fc6ee2..0780828 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -16,11 +16,11 @@ before making any information public.
## Reporting Non-SDK Related Security Issues
If you encounter security issues that are not directly related to SDKs but pertain to the services
-or products provided by Codex please follow the respective company's security reporting guidelines.
+or products provided by Codex, please follow the respective company's security reporting guidelines.
### Codex Terms and Policies
-Please contact team@cleanlab.ai for any questions or concerns regarding security of our services.
+Please contact team@cleanlab.ai for any questions or concerns regarding the security of our services.
---
From 3a5293161e7313d7c18ec61be1b8e7ee56bad8c9 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 3 Jun 2025 21:53:29 +0000
Subject: [PATCH 07/12] feat(api): api update
---
.stats.yml | 2 +-
src/codex/types/project_create_params.py | 285 ++++++++++++++++++-
src/codex/types/project_list_response.py | 285 ++++++++++++++++++-
src/codex/types/project_retrieve_response.py | 284 +++++++++++++++++-
src/codex/types/project_return_schema.py | 284 +++++++++++++++++-
src/codex/types/project_update_params.py | 285 ++++++++++++++++++-
tests/api_resources/test_projects.py | 264 +++++++++++++++++
7 files changed, 1676 insertions(+), 13 deletions(-)
diff --git a/.stats.yml b/.stats.yml
index 374e672..e80f0e1 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
configured_endpoints: 44
-openapi_spec_hash: 67d5aeebff72f48ee4730227ca0b47c2
+openapi_spec_hash: 0f1841fad65926e7ddfb22dd7a642b46
config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/src/codex/types/project_create_params.py b/src/codex/types/project_create_params.py
index ecdd194..75892e0 100644
--- a/src/codex/types/project_create_params.py
+++ b/src/codex/types/project_create_params.py
@@ -2,10 +2,22 @@
from __future__ import annotations
-from typing import Optional
-from typing_extensions import Required, TypedDict
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
-__all__ = ["ProjectCreateParams", "Config"]
+__all__ = [
+ "ProjectCreateParams",
+ "Config",
+ "ConfigEvalConfig",
+ "ConfigEvalConfigCustomEvals",
+ "ConfigEvalConfigCustomEvalsEvals",
+ "ConfigEvalConfigDefaultEvals",
+ "ConfigEvalConfigDefaultEvalsContextSufficiency",
+ "ConfigEvalConfigDefaultEvalsQueryEase",
+ "ConfigEvalConfigDefaultEvalsResponseGroundedness",
+ "ConfigEvalConfigDefaultEvalsResponseHelpfulness",
+ "ConfigEvalConfigDefaultEvalsTrustworthiness",
+]
class ProjectCreateParams(TypedDict, total=False):
@@ -18,9 +30,276 @@ class ProjectCreateParams(TypedDict, total=False):
description: Optional[str]
+class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
+ criteria: Required[str]
+ """
+ The evaluation criteria text that describes what aspect is being evaluated and
+ how
+ """
+
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ context_identifier: Optional[str]
+ """
+ The exact string used in your evaluation criteria to reference the retrieved
+ context.
+ """
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ query_identifier: Optional[str]
+ """
+ The exact string used in your evaluation criteria to reference the user's query.
+ """
+
+ response_identifier: Optional[str]
+ """
+ The exact string used in your evaluation criteria to reference the RAG/LLM
+ response.
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigCustomEvals(TypedDict, total=False):
+ evals: Dict[str, ConfigEvalConfigCustomEvalsEvals]
+
+
+class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvals(TypedDict, total=False):
+ context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ query_ease: ConfigEvalConfigDefaultEvalsQueryEase
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_groundedness: ConfigEvalConfigDefaultEvalsResponseGroundedness
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_helpfulness: ConfigEvalConfigDefaultEvalsResponseHelpfulness
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ trustworthiness: ConfigEvalConfigDefaultEvalsTrustworthiness
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+
+class ConfigEvalConfig(TypedDict, total=False):
+ custom_evals: ConfigEvalConfigCustomEvals
+ """Configuration for custom evaluation metrics."""
+
+ default_evals: ConfigEvalConfigDefaultEvals
+ """Configuration for default evaluation metrics."""
+
+
class Config(TypedDict, total=False):
clustering_use_llm_matching: bool
+ eval_config: ConfigEvalConfig
+ """Configuration for project-specific evaluation metrics"""
+
llm_matching_model: str
llm_matching_quality_preset: str
diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py
index 2b4fec4..59d3bf8 100644
--- a/src/codex/types/project_list_response.py
+++ b/src/codex/types/project_list_response.py
@@ -1,16 +1,297 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List, Optional
+from typing import Dict, List, Optional
from datetime import datetime
+from typing_extensions import Literal
from .._models import BaseModel
-__all__ = ["ProjectListResponse", "Project", "ProjectConfig"]
+__all__ = [
+ "ProjectListResponse",
+ "Project",
+ "ProjectConfig",
+ "ProjectConfigEvalConfig",
+ "ProjectConfigEvalConfigCustomEvals",
+ "ProjectConfigEvalConfigCustomEvalsEvals",
+ "ProjectConfigEvalConfigDefaultEvals",
+ "ProjectConfigEvalConfigDefaultEvalsContextSufficiency",
+ "ProjectConfigEvalConfigDefaultEvalsQueryEase",
+ "ProjectConfigEvalConfigDefaultEvalsResponseGroundedness",
+ "ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness",
+ "ProjectConfigEvalConfigDefaultEvalsTrustworthiness",
+]
+
+
+class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel):
+ criteria: str
+ """
+ The evaluation criteria text that describes what aspect is being evaluated and
+ how
+ """
+
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ context_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the retrieved
+ context.
+ """
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ query_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the user's query.
+ """
+
+ response_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the RAG/LLM
+ response.
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigCustomEvals(BaseModel):
+ evals: Optional[Dict[str, ProjectConfigEvalConfigCustomEvalsEvals]] = None
+
+
+class ProjectConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvalsQueryEase(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvals(BaseModel):
+ context_sufficiency: Optional[ProjectConfigEvalConfigDefaultEvalsContextSufficiency] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ query_ease: Optional[ProjectConfigEvalConfigDefaultEvalsQueryEase] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_groundedness: Optional[ProjectConfigEvalConfigDefaultEvalsResponseGroundedness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_helpfulness: Optional[ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ trustworthiness: Optional[ProjectConfigEvalConfigDefaultEvalsTrustworthiness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+
+class ProjectConfigEvalConfig(BaseModel):
+ custom_evals: Optional[ProjectConfigEvalConfigCustomEvals] = None
+ """Configuration for custom evaluation metrics."""
+
+ default_evals: Optional[ProjectConfigEvalConfigDefaultEvals] = None
+ """Configuration for default evaluation metrics."""
class ProjectConfig(BaseModel):
clustering_use_llm_matching: Optional[bool] = None
+ eval_config: Optional[ProjectConfigEvalConfig] = None
+ """Configuration for project-specific evaluation metrics"""
+
llm_matching_model: Optional[str] = None
llm_matching_quality_preset: Optional[str] = None
diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py
index 62209d3..a631f0c 100644
--- a/src/codex/types/project_retrieve_response.py
+++ b/src/codex/types/project_retrieve_response.py
@@ -1,16 +1,296 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Optional
+from typing import Dict, Optional
from datetime import datetime
+from typing_extensions import Literal
from .._models import BaseModel
-__all__ = ["ProjectRetrieveResponse", "Config"]
+__all__ = [
+ "ProjectRetrieveResponse",
+ "Config",
+ "ConfigEvalConfig",
+ "ConfigEvalConfigCustomEvals",
+ "ConfigEvalConfigCustomEvalsEvals",
+ "ConfigEvalConfigDefaultEvals",
+ "ConfigEvalConfigDefaultEvalsContextSufficiency",
+ "ConfigEvalConfigDefaultEvalsQueryEase",
+ "ConfigEvalConfigDefaultEvalsResponseGroundedness",
+ "ConfigEvalConfigDefaultEvalsResponseHelpfulness",
+ "ConfigEvalConfigDefaultEvalsTrustworthiness",
+]
+
+
+class ConfigEvalConfigCustomEvalsEvals(BaseModel):
+ criteria: str
+ """
+ The evaluation criteria text that describes what aspect is being evaluated and
+ how
+ """
+
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ context_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the retrieved
+ context.
+ """
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ query_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the user's query.
+ """
+
+ response_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the RAG/LLM
+ response.
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigCustomEvals(BaseModel):
+ evals: Optional[Dict[str, ConfigEvalConfigCustomEvalsEvals]] = None
+
+
+class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvals(BaseModel):
+ context_sufficiency: Optional[ConfigEvalConfigDefaultEvalsContextSufficiency] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ query_ease: Optional[ConfigEvalConfigDefaultEvalsQueryEase] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_groundedness: Optional[ConfigEvalConfigDefaultEvalsResponseGroundedness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_helpfulness: Optional[ConfigEvalConfigDefaultEvalsResponseHelpfulness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ trustworthiness: Optional[ConfigEvalConfigDefaultEvalsTrustworthiness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+
+class ConfigEvalConfig(BaseModel):
+ custom_evals: Optional[ConfigEvalConfigCustomEvals] = None
+ """Configuration for custom evaluation metrics."""
+
+ default_evals: Optional[ConfigEvalConfigDefaultEvals] = None
+ """Configuration for default evaluation metrics."""
class Config(BaseModel):
clustering_use_llm_matching: Optional[bool] = None
+ eval_config: Optional[ConfigEvalConfig] = None
+ """Configuration for project-specific evaluation metrics"""
+
llm_matching_model: Optional[str] = None
llm_matching_quality_preset: Optional[str] = None
diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py
index 51a6c1a..7da2e61 100644
--- a/src/codex/types/project_return_schema.py
+++ b/src/codex/types/project_return_schema.py
@@ -1,16 +1,296 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Optional
+from typing import Dict, Optional
from datetime import datetime
+from typing_extensions import Literal
from .._models import BaseModel
-__all__ = ["ProjectReturnSchema", "Config"]
+__all__ = [
+ "ProjectReturnSchema",
+ "Config",
+ "ConfigEvalConfig",
+ "ConfigEvalConfigCustomEvals",
+ "ConfigEvalConfigCustomEvalsEvals",
+ "ConfigEvalConfigDefaultEvals",
+ "ConfigEvalConfigDefaultEvalsContextSufficiency",
+ "ConfigEvalConfigDefaultEvalsQueryEase",
+ "ConfigEvalConfigDefaultEvalsResponseGroundedness",
+ "ConfigEvalConfigDefaultEvalsResponseHelpfulness",
+ "ConfigEvalConfigDefaultEvalsTrustworthiness",
+]
+
+
+class ConfigEvalConfigCustomEvalsEvals(BaseModel):
+ criteria: str
+ """
+ The evaluation criteria text that describes what aspect is being evaluated and
+ how
+ """
+
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ context_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the retrieved
+ context.
+ """
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ query_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the user's query.
+ """
+
+ response_identifier: Optional[str] = None
+ """
+ The exact string used in your evaluation criteria to reference the RAG/LLM
+ response.
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigCustomEvals(BaseModel):
+ evals: Optional[Dict[str, ConfigEvalConfigCustomEvalsEvals]] = None
+
+
+class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel):
+ eval_key: str
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: str
+ """Display name/label for the evaluation metric"""
+
+ enabled: Optional[bool] = None
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int] = None
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: Optional[bool] = None
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: Optional[float] = None
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Optional[Literal["above", "below"]] = None
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvals(BaseModel):
+ context_sufficiency: Optional[ConfigEvalConfigDefaultEvalsContextSufficiency] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ query_ease: Optional[ConfigEvalConfigDefaultEvalsQueryEase] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_groundedness: Optional[ConfigEvalConfigDefaultEvalsResponseGroundedness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_helpfulness: Optional[ConfigEvalConfigDefaultEvalsResponseHelpfulness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ trustworthiness: Optional[ConfigEvalConfigDefaultEvalsTrustworthiness] = None
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+
+class ConfigEvalConfig(BaseModel):
+ custom_evals: Optional[ConfigEvalConfigCustomEvals] = None
+ """Configuration for custom evaluation metrics."""
+
+ default_evals: Optional[ConfigEvalConfigDefaultEvals] = None
+ """Configuration for default evaluation metrics."""
class Config(BaseModel):
clustering_use_llm_matching: Optional[bool] = None
+ eval_config: Optional[ConfigEvalConfig] = None
+ """Configuration for project-specific evaluation metrics"""
+
llm_matching_model: Optional[str] = None
llm_matching_quality_preset: Optional[str] = None
diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py
index 0a5aa54..d58dd59 100644
--- a/src/codex/types/project_update_params.py
+++ b/src/codex/types/project_update_params.py
@@ -2,10 +2,22 @@
from __future__ import annotations
-from typing import Optional
-from typing_extensions import Required, TypedDict
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
-__all__ = ["ProjectUpdateParams", "Config"]
+__all__ = [
+ "ProjectUpdateParams",
+ "Config",
+ "ConfigEvalConfig",
+ "ConfigEvalConfigCustomEvals",
+ "ConfigEvalConfigCustomEvalsEvals",
+ "ConfigEvalConfigDefaultEvals",
+ "ConfigEvalConfigDefaultEvalsContextSufficiency",
+ "ConfigEvalConfigDefaultEvalsQueryEase",
+ "ConfigEvalConfigDefaultEvalsResponseGroundedness",
+ "ConfigEvalConfigDefaultEvalsResponseHelpfulness",
+ "ConfigEvalConfigDefaultEvalsTrustworthiness",
+]
class ProjectUpdateParams(TypedDict, total=False):
@@ -16,9 +28,276 @@ class ProjectUpdateParams(TypedDict, total=False):
description: Optional[str]
+class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
+ criteria: Required[str]
+ """
+ The evaluation criteria text that describes what aspect is being evaluated and
+ how
+ """
+
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ context_identifier: Optional[str]
+ """
+ The exact string used in your evaluation criteria to reference the retrieved
+ context.
+ """
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ query_identifier: Optional[str]
+ """
+ The exact string used in your evaluation criteria to reference the user's query.
+ """
+
+ response_identifier: Optional[str]
+ """
+ The exact string used in your evaluation criteria to reference the RAG/LLM
+ response.
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigCustomEvals(TypedDict, total=False):
+ evals: Dict[str, ConfigEvalConfigCustomEvalsEvals]
+
+
+class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False):
+ eval_key: Required[str]
+ """
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
+ and eval_scores dictionary key to check against threshold
+ """
+
+ name: Required[str]
+ """Display name/label for the evaluation metric"""
+
+ enabled: bool
+ """Allows the evaluation to be disabled without removing it"""
+
+ priority: Optional[int]
+ """
+ Priority order for evals (lower number = higher priority) to determine primary
+ eval issue to surface
+ """
+
+ should_escalate: bool
+ """
+ If true, failing this eval means the response is considered bad and can trigger
+ escalation to Codex/SME
+ """
+
+ threshold: float
+ """Threshold value that determines if the evaluation fails"""
+
+ threshold_direction: Literal["above", "below"]
+ """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvals(TypedDict, total=False):
+ context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ query_ease: ConfigEvalConfigDefaultEvalsQueryEase
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_groundedness: ConfigEvalConfigDefaultEvalsResponseGroundedness
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ response_helpfulness: ConfigEvalConfigDefaultEvalsResponseHelpfulness
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+ trustworthiness: ConfigEvalConfigDefaultEvalsTrustworthiness
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+ The evaluation criteria and identifiers are immutable and system-managed, while
+ other properties like thresholds and priorities can be configured.
+ """
+
+
+class ConfigEvalConfig(TypedDict, total=False):
+ custom_evals: ConfigEvalConfigCustomEvals
+ """Configuration for custom evaluation metrics."""
+
+ default_evals: ConfigEvalConfigDefaultEvals
+ """Configuration for default evaluation metrics."""
+
+
class Config(TypedDict, total=False):
clustering_use_llm_matching: bool
+ eval_config: ConfigEvalConfig
+ """Configuration for project-specific evaluation metrics"""
+
llm_matching_model: str
llm_matching_quality_preset: str
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index 5c29fdd..d5e0e1c 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -39,6 +39,72 @@ def test_method_create_with_all_params(self, client: Codex) -> None:
project = client.projects.create(
config={
"clustering_use_llm_matching": True,
+ "eval_config": {
+ "custom_evals": {
+ "evals": {
+ "foo": {
+ "criteria": "criteria",
+ "eval_key": "eval_key",
+ "name": "name",
+ "context_identifier": "context_identifier",
+ "enabled": True,
+ "priority": 0,
+ "query_identifier": "query_identifier",
+ "response_identifier": "response_identifier",
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ }
+ }
+ },
+ "default_evals": {
+ "context_sufficiency": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "query_ease": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "response_groundedness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "response_helpfulness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "trustworthiness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ },
+ },
"llm_matching_model": "llm_matching_model",
"llm_matching_quality_preset": "llm_matching_quality_preset",
"lower_llm_match_distance_threshold": 0,
@@ -141,6 +207,72 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
config={
"clustering_use_llm_matching": True,
+ "eval_config": {
+ "custom_evals": {
+ "evals": {
+ "foo": {
+ "criteria": "criteria",
+ "eval_key": "eval_key",
+ "name": "name",
+ "context_identifier": "context_identifier",
+ "enabled": True,
+ "priority": 0,
+ "query_identifier": "query_identifier",
+ "response_identifier": "response_identifier",
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ }
+ }
+ },
+ "default_evals": {
+ "context_sufficiency": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "query_ease": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "response_groundedness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "response_helpfulness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "trustworthiness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ },
+ },
"llm_matching_model": "llm_matching_model",
"llm_matching_quality_preset": "llm_matching_quality_preset",
"lower_llm_match_distance_threshold": 0,
@@ -534,6 +666,72 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) ->
project = await async_client.projects.create(
config={
"clustering_use_llm_matching": True,
+ "eval_config": {
+ "custom_evals": {
+ "evals": {
+ "foo": {
+ "criteria": "criteria",
+ "eval_key": "eval_key",
+ "name": "name",
+ "context_identifier": "context_identifier",
+ "enabled": True,
+ "priority": 0,
+ "query_identifier": "query_identifier",
+ "response_identifier": "response_identifier",
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ }
+ }
+ },
+ "default_evals": {
+ "context_sufficiency": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "query_ease": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "response_groundedness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "response_helpfulness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "trustworthiness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ },
+ },
"llm_matching_model": "llm_matching_model",
"llm_matching_quality_preset": "llm_matching_quality_preset",
"lower_llm_match_distance_threshold": 0,
@@ -636,6 +834,72 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
config={
"clustering_use_llm_matching": True,
+ "eval_config": {
+ "custom_evals": {
+ "evals": {
+ "foo": {
+ "criteria": "criteria",
+ "eval_key": "eval_key",
+ "name": "name",
+ "context_identifier": "context_identifier",
+ "enabled": True,
+ "priority": 0,
+ "query_identifier": "query_identifier",
+ "response_identifier": "response_identifier",
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ }
+ }
+ },
+ "default_evals": {
+ "context_sufficiency": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "query_ease": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "response_groundedness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "response_helpfulness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ "trustworthiness": {
+ "eval_key": "eval_key",
+ "name": "name",
+ "enabled": True,
+ "priority": 0,
+ "should_escalate": True,
+ "threshold": 0,
+ "threshold_direction": "above",
+ },
+ },
+ },
"llm_matching_model": "llm_matching_model",
"llm_matching_quality_preset": "llm_matching_quality_preset",
"lower_llm_match_distance_threshold": 0,
From 7ba3858c1c968c093c676478a1c7e5e13b92c12a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 28 May 2025 03:26:20 +0000
Subject: [PATCH 08/12] fix(docs/api): remove references to nonexistent types
---
api.md | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/api.md b/api.md
index f3a2ea1..8aac76f 100644
--- a/api.md
+++ b/api.md
@@ -139,8 +139,6 @@ from codex.types import (
ProjectReturnSchema,
ProjectRetrieveResponse,
ProjectListResponse,
- ProjectExportResponse,
- ProjectIncrementQueriesResponse,
ProjectRetrieveAnalyticsResponse,
ProjectValidateResponse,
)
@@ -153,8 +151,8 @@ Methods:
- client.projects.update(project_id, \*\*params) -> ProjectReturnSchema
- client.projects.list(\*\*params) -> ProjectListResponse
- client.projects.delete(project_id) -> None
-- client.projects.export(project_id) -> object
-- client.projects.increment_queries(project_id, \*\*params) -> object
+- client.projects.export(project_id) -> object
+- client.projects.increment_queries(project_id, \*\*params) -> object
- client.projects.retrieve_analytics(project_id, \*\*params) -> ProjectRetrieveAnalyticsResponse
- client.projects.validate(project_id, \*\*params) -> ProjectValidateResponse
From 57f522fbd04637849146636a83976fe696160a97 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 29 May 2025 03:09:34 +0000
Subject: [PATCH 09/12] chore(api): mark some methods as deprecated
---
src/codex/resources/projects/entries.py | 27 ++-
src/codex/resources/projects/projects.py | 27 ++-
tests/api_resources/projects/test_entries.py | 164 ++++++++++---------
tests/api_resources/test_projects.py | 100 ++++++-----
4 files changed, 186 insertions(+), 132 deletions(-)
diff --git a/src/codex/resources/projects/entries.py b/src/codex/resources/projects/entries.py
index 346dd35..c6b43a4 100644
--- a/src/codex/resources/projects/entries.py
+++ b/src/codex/resources/projects/entries.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+import typing_extensions
from typing import Iterable, Optional
import httpx
@@ -312,6 +313,7 @@ def publish_draft_answer(
cast_to=Entry,
)
+ @typing_extensions.deprecated("deprecated")
def query(
self,
project_id: str,
@@ -707,6 +709,7 @@ async def publish_draft_answer(
cast_to=Entry,
)
+ @typing_extensions.deprecated("deprecated")
async def query(
self,
project_id: str,
@@ -839,8 +842,10 @@ def __init__(self, entries: EntriesResource) -> None:
self.publish_draft_answer = to_raw_response_wrapper(
entries.publish_draft_answer,
)
- self.query = to_raw_response_wrapper(
- entries.query,
+ self.query = ( # pyright: ignore[reportDeprecated]
+ to_raw_response_wrapper(
+ entries.query # pyright: ignore[reportDeprecated],
+ )
)
self.unpublish_answer = to_raw_response_wrapper(
entries.unpublish_answer,
@@ -869,8 +874,10 @@ def __init__(self, entries: AsyncEntriesResource) -> None:
self.publish_draft_answer = async_to_raw_response_wrapper(
entries.publish_draft_answer,
)
- self.query = async_to_raw_response_wrapper(
- entries.query,
+ self.query = ( # pyright: ignore[reportDeprecated]
+ async_to_raw_response_wrapper(
+ entries.query # pyright: ignore[reportDeprecated],
+ )
)
self.unpublish_answer = async_to_raw_response_wrapper(
entries.unpublish_answer,
@@ -899,8 +906,10 @@ def __init__(self, entries: EntriesResource) -> None:
self.publish_draft_answer = to_streamed_response_wrapper(
entries.publish_draft_answer,
)
- self.query = to_streamed_response_wrapper(
- entries.query,
+ self.query = ( # pyright: ignore[reportDeprecated]
+ to_streamed_response_wrapper(
+ entries.query # pyright: ignore[reportDeprecated],
+ )
)
self.unpublish_answer = to_streamed_response_wrapper(
entries.unpublish_answer,
@@ -929,8 +938,10 @@ def __init__(self, entries: AsyncEntriesResource) -> None:
self.publish_draft_answer = async_to_streamed_response_wrapper(
entries.publish_draft_answer,
)
- self.query = async_to_streamed_response_wrapper(
- entries.query,
+ self.query = ( # pyright: ignore[reportDeprecated]
+ async_to_streamed_response_wrapper(
+ entries.query # pyright: ignore[reportDeprecated],
+ )
)
self.unpublish_answer = async_to_streamed_response_wrapper(
entries.unpublish_answer,
diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py
index 6195d1a..cf8c0f8 100644
--- a/src/codex/resources/projects/projects.py
+++ b/src/codex/resources/projects/projects.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+import typing_extensions
from typing import Dict, List, Optional
from typing_extensions import Literal
@@ -330,6 +331,7 @@ def export(
cast_to=object,
)
+ @typing_extensions.deprecated("deprecated")
def increment_queries(
self,
project_id: str,
@@ -872,6 +874,7 @@ async def export(
cast_to=object,
)
+ @typing_extensions.deprecated("deprecated")
async def increment_queries(
self,
project_id: str,
@@ -1167,8 +1170,10 @@ def __init__(self, projects: ProjectsResource) -> None:
self.export = to_raw_response_wrapper(
projects.export,
)
- self.increment_queries = to_raw_response_wrapper(
- projects.increment_queries,
+ self.increment_queries = ( # pyright: ignore[reportDeprecated]
+ to_raw_response_wrapper(
+ projects.increment_queries # pyright: ignore[reportDeprecated],
+ )
)
self.retrieve_analytics = to_raw_response_wrapper(
projects.retrieve_analytics,
@@ -1212,8 +1217,10 @@ def __init__(self, projects: AsyncProjectsResource) -> None:
self.export = async_to_raw_response_wrapper(
projects.export,
)
- self.increment_queries = async_to_raw_response_wrapper(
- projects.increment_queries,
+ self.increment_queries = ( # pyright: ignore[reportDeprecated]
+ async_to_raw_response_wrapper(
+ projects.increment_queries # pyright: ignore[reportDeprecated],
+ )
)
self.retrieve_analytics = async_to_raw_response_wrapper(
projects.retrieve_analytics,
@@ -1257,8 +1264,10 @@ def __init__(self, projects: ProjectsResource) -> None:
self.export = to_streamed_response_wrapper(
projects.export,
)
- self.increment_queries = to_streamed_response_wrapper(
- projects.increment_queries,
+ self.increment_queries = ( # pyright: ignore[reportDeprecated]
+ to_streamed_response_wrapper(
+ projects.increment_queries # pyright: ignore[reportDeprecated],
+ )
)
self.retrieve_analytics = to_streamed_response_wrapper(
projects.retrieve_analytics,
@@ -1302,8 +1311,10 @@ def __init__(self, projects: AsyncProjectsResource) -> None:
self.export = async_to_streamed_response_wrapper(
projects.export,
)
- self.increment_queries = async_to_streamed_response_wrapper(
- projects.increment_queries,
+ self.increment_queries = ( # pyright: ignore[reportDeprecated]
+ async_to_streamed_response_wrapper(
+ projects.increment_queries # pyright: ignore[reportDeprecated],
+ )
)
self.retrieve_analytics = async_to_streamed_response_wrapper(
projects.retrieve_analytics,
diff --git a/tests/api_resources/projects/test_entries.py b/tests/api_resources/projects/test_entries.py
index 73a45ad..32b0452 100644
--- a/tests/api_resources/projects/test_entries.py
+++ b/tests/api_resources/projects/test_entries.py
@@ -15,6 +15,8 @@
EntryNotifySmeResponse,
)
+# pyright: reportDeprecated=false
+
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -382,40 +384,45 @@ def test_path_params_publish_draft_answer(self, client: Codex) -> None:
@pytest.mark.skip()
@parametrize
def test_method_query(self, client: Codex) -> None:
- entry = client.projects.entries.query(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- question="question",
- )
+ with pytest.warns(DeprecationWarning):
+ entry = client.projects.entries.query(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ question="question",
+ )
+
assert_matches_type(EntryQueryResponse, entry, path=["response"])
@pytest.mark.skip()
@parametrize
def test_method_query_with_all_params(self, client: Codex) -> None:
- entry = client.projects.entries.query(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- question="question",
- use_llm_matching=True,
- client_metadata={},
- query_metadata={
- "context": "string",
- "custom_metadata": {},
- "eval_scores": {"foo": 0},
- "evaluated_response": "evaluated_response",
- },
- x_client_library_version="x-client-library-version",
- x_integration_type="x-integration-type",
- x_source="x-source",
- x_stainless_package_version="x-stainless-package-version",
- )
+ with pytest.warns(DeprecationWarning):
+ entry = client.projects.entries.query(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ question="question",
+ use_llm_matching=True,
+ client_metadata={},
+ query_metadata={
+ "context": "string",
+ "custom_metadata": {},
+ "eval_scores": {"foo": 0},
+ "evaluated_response": "evaluated_response",
+ },
+ x_client_library_version="x-client-library-version",
+ x_integration_type="x-integration-type",
+ x_source="x-source",
+ x_stainless_package_version="x-stainless-package-version",
+ )
+
assert_matches_type(EntryQueryResponse, entry, path=["response"])
@pytest.mark.skip()
@parametrize
def test_raw_response_query(self, client: Codex) -> None:
- response = client.projects.entries.with_raw_response.query(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- question="question",
- )
+ with pytest.warns(DeprecationWarning):
+ response = client.projects.entries.with_raw_response.query(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ question="question",
+ )
assert response.is_closed is True
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -425,26 +432,28 @@ def test_raw_response_query(self, client: Codex) -> None:
@pytest.mark.skip()
@parametrize
def test_streaming_response_query(self, client: Codex) -> None:
- with client.projects.entries.with_streaming_response.query(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- question="question",
- ) as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ with pytest.warns(DeprecationWarning):
+ with client.projects.entries.with_streaming_response.query(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ question="question",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- entry = response.parse()
- assert_matches_type(EntryQueryResponse, entry, path=["response"])
+ entry = response.parse()
+ assert_matches_type(EntryQueryResponse, entry, path=["response"])
assert cast(Any, response.is_closed) is True
@pytest.mark.skip()
@parametrize
def test_path_params_query(self, client: Codex) -> None:
- with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
- client.projects.entries.with_raw_response.query(
- project_id="",
- question="question",
- )
+ with pytest.warns(DeprecationWarning):
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
+ client.projects.entries.with_raw_response.query(
+ project_id="",
+ question="question",
+ )
@pytest.mark.skip()
@parametrize
@@ -863,40 +872,45 @@ async def test_path_params_publish_draft_answer(self, async_client: AsyncCodex)
@pytest.mark.skip()
@parametrize
async def test_method_query(self, async_client: AsyncCodex) -> None:
- entry = await async_client.projects.entries.query(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- question="question",
- )
+ with pytest.warns(DeprecationWarning):
+ entry = await async_client.projects.entries.query(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ question="question",
+ )
+
assert_matches_type(EntryQueryResponse, entry, path=["response"])
@pytest.mark.skip()
@parametrize
async def test_method_query_with_all_params(self, async_client: AsyncCodex) -> None:
- entry = await async_client.projects.entries.query(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- question="question",
- use_llm_matching=True,
- client_metadata={},
- query_metadata={
- "context": "string",
- "custom_metadata": {},
- "eval_scores": {"foo": 0},
- "evaluated_response": "evaluated_response",
- },
- x_client_library_version="x-client-library-version",
- x_integration_type="x-integration-type",
- x_source="x-source",
- x_stainless_package_version="x-stainless-package-version",
- )
+ with pytest.warns(DeprecationWarning):
+ entry = await async_client.projects.entries.query(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ question="question",
+ use_llm_matching=True,
+ client_metadata={},
+ query_metadata={
+ "context": "string",
+ "custom_metadata": {},
+ "eval_scores": {"foo": 0},
+ "evaluated_response": "evaluated_response",
+ },
+ x_client_library_version="x-client-library-version",
+ x_integration_type="x-integration-type",
+ x_source="x-source",
+ x_stainless_package_version="x-stainless-package-version",
+ )
+
assert_matches_type(EntryQueryResponse, entry, path=["response"])
@pytest.mark.skip()
@parametrize
async def test_raw_response_query(self, async_client: AsyncCodex) -> None:
- response = await async_client.projects.entries.with_raw_response.query(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- question="question",
- )
+ with pytest.warns(DeprecationWarning):
+ response = await async_client.projects.entries.with_raw_response.query(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ question="question",
+ )
assert response.is_closed is True
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -906,26 +920,28 @@ async def test_raw_response_query(self, async_client: AsyncCodex) -> None:
@pytest.mark.skip()
@parametrize
async def test_streaming_response_query(self, async_client: AsyncCodex) -> None:
- async with async_client.projects.entries.with_streaming_response.query(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- question="question",
- ) as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ with pytest.warns(DeprecationWarning):
+ async with async_client.projects.entries.with_streaming_response.query(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ question="question",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- entry = await response.parse()
- assert_matches_type(EntryQueryResponse, entry, path=["response"])
+ entry = await response.parse()
+ assert_matches_type(EntryQueryResponse, entry, path=["response"])
assert cast(Any, response.is_closed) is True
@pytest.mark.skip()
@parametrize
async def test_path_params_query(self, async_client: AsyncCodex) -> None:
- with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
- await async_client.projects.entries.with_raw_response.query(
- project_id="",
- question="question",
- )
+ with pytest.warns(DeprecationWarning):
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
+ await async_client.projects.entries.with_raw_response.query(
+ project_id="",
+ question="question",
+ )
@pytest.mark.skip()
@parametrize
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index d5e0e1c..f7c3f01 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -17,6 +17,8 @@
)
from tests.utils import assert_matches_type
+# pyright: reportDeprecated=false
+
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -454,26 +456,31 @@ def test_path_params_export(self, client: Codex) -> None:
@pytest.mark.skip()
@parametrize
def test_method_increment_queries(self, client: Codex) -> None:
- project = client.projects.increment_queries(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- )
+ with pytest.warns(DeprecationWarning):
+ project = client.projects.increment_queries(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ )
+
assert_matches_type(object, project, path=["response"])
@pytest.mark.skip()
@parametrize
def test_method_increment_queries_with_all_params(self, client: Codex) -> None:
- project = client.projects.increment_queries(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- count=0,
- )
+ with pytest.warns(DeprecationWarning):
+ project = client.projects.increment_queries(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ count=0,
+ )
+
assert_matches_type(object, project, path=["response"])
@pytest.mark.skip()
@parametrize
def test_raw_response_increment_queries(self, client: Codex) -> None:
- response = client.projects.with_raw_response.increment_queries(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- )
+ with pytest.warns(DeprecationWarning):
+ response = client.projects.with_raw_response.increment_queries(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ )
assert response.is_closed is True
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -483,24 +490,26 @@ def test_raw_response_increment_queries(self, client: Codex) -> None:
@pytest.mark.skip()
@parametrize
def test_streaming_response_increment_queries(self, client: Codex) -> None:
- with client.projects.with_streaming_response.increment_queries(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- ) as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ with pytest.warns(DeprecationWarning):
+ with client.projects.with_streaming_response.increment_queries(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- project = response.parse()
- assert_matches_type(object, project, path=["response"])
+ project = response.parse()
+ assert_matches_type(object, project, path=["response"])
assert cast(Any, response.is_closed) is True
@pytest.mark.skip()
@parametrize
def test_path_params_increment_queries(self, client: Codex) -> None:
- with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
- client.projects.with_raw_response.increment_queries(
- project_id="",
- )
+ with pytest.warns(DeprecationWarning):
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
+ client.projects.with_raw_response.increment_queries(
+ project_id="",
+ )
@pytest.mark.skip()
@parametrize
@@ -1081,26 +1090,31 @@ async def test_path_params_export(self, async_client: AsyncCodex) -> None:
@pytest.mark.skip()
@parametrize
async def test_method_increment_queries(self, async_client: AsyncCodex) -> None:
- project = await async_client.projects.increment_queries(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- )
+ with pytest.warns(DeprecationWarning):
+ project = await async_client.projects.increment_queries(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ )
+
assert_matches_type(object, project, path=["response"])
@pytest.mark.skip()
@parametrize
async def test_method_increment_queries_with_all_params(self, async_client: AsyncCodex) -> None:
- project = await async_client.projects.increment_queries(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- count=0,
- )
+ with pytest.warns(DeprecationWarning):
+ project = await async_client.projects.increment_queries(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ count=0,
+ )
+
assert_matches_type(object, project, path=["response"])
@pytest.mark.skip()
@parametrize
async def test_raw_response_increment_queries(self, async_client: AsyncCodex) -> None:
- response = await async_client.projects.with_raw_response.increment_queries(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- )
+ with pytest.warns(DeprecationWarning):
+ response = await async_client.projects.with_raw_response.increment_queries(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ )
assert response.is_closed is True
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1110,24 +1124,26 @@ async def test_raw_response_increment_queries(self, async_client: AsyncCodex) ->
@pytest.mark.skip()
@parametrize
async def test_streaming_response_increment_queries(self, async_client: AsyncCodex) -> None:
- async with async_client.projects.with_streaming_response.increment_queries(
- project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
- ) as response:
- assert not response.is_closed
- assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ with pytest.warns(DeprecationWarning):
+ async with async_client.projects.with_streaming_response.increment_queries(
+ project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
- project = await response.parse()
- assert_matches_type(object, project, path=["response"])
+ project = await response.parse()
+ assert_matches_type(object, project, path=["response"])
assert cast(Any, response.is_closed) is True
@pytest.mark.skip()
@parametrize
async def test_path_params_increment_queries(self, async_client: AsyncCodex) -> None:
- with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
- await async_client.projects.with_raw_response.increment_queries(
- project_id="",
- )
+ with pytest.warns(DeprecationWarning):
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
+ await async_client.projects.with_raw_response.increment_queries(
+ project_id="",
+ )
@pytest.mark.skip()
@parametrize
From ef4acf292c728a838aecd6539d278b8c128be68a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 11 Jun 2025 18:41:44 +0000
Subject: [PATCH 10/12] feat(api): api update
---
.stats.yml | 2 +-
src/codex/types/project_create_params.py | 3 +++
src/codex/types/project_list_response.py | 3 +++
src/codex/types/project_retrieve_response.py | 3 +++
src/codex/types/project_return_schema.py | 3 +++
src/codex/types/project_update_params.py | 3 +++
tests/api_resources/test_projects.py | 4 ++++
7 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/.stats.yml b/.stats.yml
index e80f0e1..ddf7240 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
configured_endpoints: 44
-openapi_spec_hash: 0f1841fad65926e7ddfb22dd7a642b46
+openapi_spec_hash: dfccb5c181396678a22b9c079847889f
config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/src/codex/types/project_create_params.py b/src/codex/types/project_create_params.py
index 75892e0..3142755 100644
--- a/src/codex/types/project_create_params.py
+++ b/src/codex/types/project_create_params.py
@@ -55,6 +55,9 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
enabled: bool
"""Allows the evaluation to be disabled without removing it"""
+ is_default: bool
+ """Whether the eval is a default, built-in eval or a custom eval"""
+
priority: Optional[int]
"""
Priority order for evals (lower number = higher priority) to determine primary
diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py
index 59d3bf8..d480573 100644
--- a/src/codex/types/project_list_response.py
+++ b/src/codex/types/project_list_response.py
@@ -47,6 +47,9 @@ class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel):
enabled: Optional[bool] = None
"""Allows the evaluation to be disabled without removing it"""
+ is_default: Optional[bool] = None
+ """Whether the eval is a default, built-in eval or a custom eval"""
+
priority: Optional[int] = None
"""
Priority order for evals (lower number = higher priority) to determine primary
diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py
index a631f0c..fb62cff 100644
--- a/src/codex/types/project_retrieve_response.py
+++ b/src/codex/types/project_retrieve_response.py
@@ -46,6 +46,9 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel):
enabled: Optional[bool] = None
"""Allows the evaluation to be disabled without removing it"""
+ is_default: Optional[bool] = None
+ """Whether the eval is a default, built-in eval or a custom eval"""
+
priority: Optional[int] = None
"""
Priority order for evals (lower number = higher priority) to determine primary
diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py
index 7da2e61..420ec6e 100644
--- a/src/codex/types/project_return_schema.py
+++ b/src/codex/types/project_return_schema.py
@@ -46,6 +46,9 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel):
enabled: Optional[bool] = None
"""Allows the evaluation to be disabled without removing it"""
+ is_default: Optional[bool] = None
+ """Whether the eval is a default, built-in eval or a custom eval"""
+
priority: Optional[int] = None
"""
Priority order for evals (lower number = higher priority) to determine primary
diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py
index d58dd59..d199955 100644
--- a/src/codex/types/project_update_params.py
+++ b/src/codex/types/project_update_params.py
@@ -53,6 +53,9 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
enabled: bool
"""Allows the evaluation to be disabled without removing it"""
+ is_default: bool
+ """Whether the eval is a default, built-in eval or a custom eval"""
+
priority: Optional[int]
"""
Priority order for evals (lower number = higher priority) to determine primary
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index f7c3f01..8ba69a1 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -50,6 +50,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None:
"name": "name",
"context_identifier": "context_identifier",
"enabled": True,
+ "is_default": True,
"priority": 0,
"query_identifier": "query_identifier",
"response_identifier": "response_identifier",
@@ -218,6 +219,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
"name": "name",
"context_identifier": "context_identifier",
"enabled": True,
+ "is_default": True,
"priority": 0,
"query_identifier": "query_identifier",
"response_identifier": "response_identifier",
@@ -684,6 +686,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) ->
"name": "name",
"context_identifier": "context_identifier",
"enabled": True,
+ "is_default": True,
"priority": 0,
"query_identifier": "query_identifier",
"response_identifier": "response_identifier",
@@ -852,6 +855,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
"name": "name",
"context_identifier": "context_identifier",
"enabled": True,
+ "is_default": True,
"priority": 0,
"query_identifier": "query_identifier",
"response_identifier": "response_identifier",
From 8fcb74d6d5ef8db36a78031c9bf59f0840def1d4 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 3 Jun 2025 02:20:36 +0000
Subject: [PATCH 11/12] chore(docs): remove reference to rye shell
---
CONTRIBUTING.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b9fa9a1..548ff4c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -17,8 +17,7 @@ $ rye sync --all-features
You can then run scripts using `rye run python script.py` or by activating the virtual environment:
```sh
-$ rye shell
-# or manually activate - https://docs.python.org/3/library/venv.html#how-venvs-work
+# Activate the virtual environment - https://docs.python.org/3/library/venv.html#how-venvs-work
$ source .venv/bin/activate
# now you can omit the `rye run` prefix
From d9499f6ccb7deac8948dc80342c9bf0f956d8397 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 11 Jun 2025 18:42:01 +0000
Subject: [PATCH 12/12] release: 0.1.0-alpha.21
---
.release-please-manifest.json | 2 +-
CHANGELOG.md | 25 +++++++++++++++++++++++++
pyproject.toml | 2 +-
src/codex/_version.py | 2 +-
4 files changed, 28 insertions(+), 3 deletions(-)
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index fac1407..7c31fce 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.1.0-alpha.20"
+ ".": "0.1.0-alpha.21"
}
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f151d60..fd14df6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,30 @@
# Changelog
+## 0.1.0-alpha.21 (2025-06-11)
+
+Full Changelog: [v0.1.0-alpha.20...v0.1.0-alpha.21](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.20...v0.1.0-alpha.21)
+
+### Features
+
+* **api:** api update ([ef4acf2](https://github.com/cleanlab/codex-python/commit/ef4acf292c728a838aecd6539d278b8c128be68a))
+* **api:** api update ([3a52931](https://github.com/cleanlab/codex-python/commit/3a5293161e7313d7c18ec61be1b8e7ee56bad8c9))
+* **api:** api update ([7bbf57a](https://github.com/cleanlab/codex-python/commit/7bbf57ae5327ddd85e6729997a4f85b427758258))
+* **api:** api update ([40ae04a](https://github.com/cleanlab/codex-python/commit/40ae04a279ba1e2573d17a17e097f71d1347a3d3))
+
+
+### Bug Fixes
+
+* **docs/api:** remove references to nonexistent types ([7ba3858](https://github.com/cleanlab/codex-python/commit/7ba3858c1c968c093c676478a1c7e5e13b92c12a))
+
+
+### Chores
+
+* **api:** mark some methods as deprecated ([57f522f](https://github.com/cleanlab/codex-python/commit/57f522fbd04637849146636a83976fe696160a97))
+* **ci:** fix installation instructions ([3aa9884](https://github.com/cleanlab/codex-python/commit/3aa98843e0f042734eb5b74ea86c8dcca8636954))
+* **docs:** grammar improvements ([428e500](https://github.com/cleanlab/codex-python/commit/428e5001b6b5576f5383c0f2ffd3ad5fe085128a))
+* **docs:** remove reference to rye shell ([8fcb74d](https://github.com/cleanlab/codex-python/commit/8fcb74d6d5ef8db36a78031c9bf59f0840def1d4))
+* **internal:** codegen related update ([18f661d](https://github.com/cleanlab/codex-python/commit/18f661d21b849f15cbe85ce5063ef0dea877d89f))
+
## 0.1.0-alpha.20 (2025-05-15)
Full Changelog: [v0.1.0-alpha.19...v0.1.0-alpha.20](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.19...v0.1.0-alpha.20)
diff --git a/pyproject.toml b/pyproject.toml
index 04d039a..55d73e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "codex-sdk"
-version = "0.1.0-alpha.20"
+version = "0.1.0-alpha.21"
description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead."
dynamic = ["readme"]
license = "MIT"
diff --git a/src/codex/_version.py b/src/codex/_version.py
index 44d6131..3b23c98 100644
--- a/src/codex/_version.py
+++ b/src/codex/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "codex"
-__version__ = "0.1.0-alpha.20" # x-release-please-version
+__version__ = "0.1.0-alpha.21" # x-release-please-version