From d12ca4b11cc6a0f022f2c3b8ab8752a6f600f504 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 15 May 2025 18:16:44 +0000
Subject: [PATCH 01/12] codegen metadata

---
 .stats.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.stats.yml b/.stats.yml
index 12a0365..76c12f5 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
 configured_endpoints: 44
-openapi_spec_hash: 9d81a4b0eca6d3629ba9d5432a65655c
+openapi_spec_hash: 19d3afd940d8ed57b76401ef026e5f47
 config_hash: 659f65b6ccf5612986f920f7f9abbcb5

From 3aa98843e0f042734eb5b74ea86c8dcca8636954 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 02:39:50 +0000
Subject: [PATCH 02/12] chore(ci): fix installation instructions

---
 scripts/utils/upload-artifact.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
index ebb0478..8f922b5 100755
--- a/scripts/utils/upload-artifact.sh
+++ b/scripts/utils/upload-artifact.sh
@@ -18,7 +18,7 @@ UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \
 
 if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
   echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
-  echo -e "\033[32mInstallation: npm install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
+  echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
 else
   echo -e "\033[31mFailed to upload artifact.\033[0m"
   exit 1

From 18f661d21b849f15cbe85ce5063ef0dea877d89f Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Sat, 17 May 2025 02:50:02 +0000
Subject: [PATCH 03/12] chore(internal): codegen related update

---
 scripts/utils/upload-artifact.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
index 8f922b5..62d150a 100755
--- a/scripts/utils/upload-artifact.sh
+++ b/scripts/utils/upload-artifact.sh
@@ -18,7 +18,7 @@ UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \
 
 if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
   echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
-  echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
+  echo -e "\033[32mInstallation: pip install --pre 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
 else
   echo -e "\033[31mFailed to upload artifact.\033[0m"
   exit 1

From 40ae04a279ba1e2573d17a17e097f71d1347a3d3 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 21 May 2025 18:16:58 +0000
Subject: [PATCH 04/12] feat(api): api update

---
 .stats.yml                                    |  2 +-
 .../types/projects/cluster_list_response.py   | 20 +++++++++++++++++++
 src/codex/types/projects/entry.py             | 20 +++++++++++++++++++
 .../types/projects/entry_query_response.py    | 20 +++++++++++++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/.stats.yml b/.stats.yml
index 76c12f5..aac346a 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
 configured_endpoints: 44
-openapi_spec_hash: 19d3afd940d8ed57b76401ef026e5f47
+openapi_spec_hash: f25ca671adcc0b224451c721048d9220
 config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/src/codex/types/projects/cluster_list_response.py b/src/codex/types/projects/cluster_list_response.py
index 2e8b542..1fc8bd5 100644
--- a/src/codex/types/projects/cluster_list_response.py
+++ b/src/codex/types/projects/cluster_list_response.py
@@ -13,6 +13,7 @@
     "ManagedMetadataContextSufficiency",
     "ManagedMetadataHTMLFormatScores",
     "ManagedMetadataQueryEaseCustomized",
+    "ManagedMetadataResponseGroundedness",
     "ManagedMetadataResponseHelpfulness",
     "ManagedMetadataTrustworthiness",
 ]
@@ -82,6 +83,22 @@ class ManagedMetadataQueryEaseCustomized(BaseModel):
     scores: Optional[List[float]] = None
 
 
+class ManagedMetadataResponseGroundedness(BaseModel):
+    average: Optional[float] = None
+    """The average of all scores."""
+
+    latest: Optional[float] = None
+    """The most recent score."""
+
+    max: Optional[float] = None
+    """The maximum score."""
+
+    min: Optional[float] = None
+    """The minimum score."""
+
+    scores: Optional[List[float]] = None
+
+
 class ManagedMetadataResponseHelpfulness(BaseModel):
     average: Optional[float] = None
     """The average of all scores."""
@@ -147,6 +164,9 @@ class ManagedMetadata(BaseModel):
     query_ease_customized: Optional[ManagedMetadataQueryEaseCustomized] = None
     """Holds a list of scores and computes aggregate statistics."""
 
+    response_groundedness: Optional[ManagedMetadataResponseGroundedness] = None
+    """Holds a list of scores and computes aggregate statistics."""
+
     response_helpfulness: Optional[ManagedMetadataResponseHelpfulness] = None
     """Holds a list of scores and computes aggregate statistics."""
 
diff --git a/src/codex/types/projects/entry.py b/src/codex/types/projects/entry.py
index eb2a221..3f7a86d 100644
--- a/src/codex/types/projects/entry.py
+++ b/src/codex/types/projects/entry.py
@@ -13,6 +13,7 @@
     "ManagedMetadataContextSufficiency",
     "ManagedMetadataHTMLFormatScores",
     "ManagedMetadataQueryEaseCustomized",
+    "ManagedMetadataResponseGroundedness",
     "ManagedMetadataResponseHelpfulness",
     "ManagedMetadataTrustworthiness",
 ]
@@ -82,6 +83,22 @@ class ManagedMetadataQueryEaseCustomized(BaseModel):
     scores: Optional[List[float]] = None
 
 
+class ManagedMetadataResponseGroundedness(BaseModel):
+    average: Optional[float] = None
+    """The average of all scores."""
+
+    latest: Optional[float] = None
+    """The most recent score."""
+
+    max: Optional[float] = None
+    """The maximum score."""
+
+    min: Optional[float] = None
+    """The minimum score."""
+
+    scores: Optional[List[float]] = None
+
+
 class ManagedMetadataResponseHelpfulness(BaseModel):
     average: Optional[float] = None
     """The average of all scores."""
@@ -147,6 +164,9 @@ class ManagedMetadata(BaseModel):
     query_ease_customized: Optional[ManagedMetadataQueryEaseCustomized] = None
     """Holds a list of scores and computes aggregate statistics."""
 
+    response_groundedness: Optional[ManagedMetadataResponseGroundedness] = None
+    """Holds a list of scores and computes aggregate statistics."""
+
     response_helpfulness: Optional[ManagedMetadataResponseHelpfulness] = None
     """Holds a list of scores and computes aggregate statistics."""
 
diff --git a/src/codex/types/projects/entry_query_response.py b/src/codex/types/projects/entry_query_response.py
index 318636b..cd5a4c9 100644
--- a/src/codex/types/projects/entry_query_response.py
+++ b/src/codex/types/projects/entry_query_response.py
@@ -12,6 +12,7 @@
     "EntryManagedMetadataContextSufficiency",
     "EntryManagedMetadataHTMLFormatScores",
     "EntryManagedMetadataQueryEaseCustomized",
+    "EntryManagedMetadataResponseGroundedness",
     "EntryManagedMetadataResponseHelpfulness",
     "EntryManagedMetadataTrustworthiness",
 ]
@@ -81,6 +82,22 @@ class EntryManagedMetadataQueryEaseCustomized(BaseModel):
     scores: Optional[List[float]] = None
 
 
+class EntryManagedMetadataResponseGroundedness(BaseModel):
+    average: Optional[float] = None
+    """The average of all scores."""
+
+    latest: Optional[float] = None
+    """The most recent score."""
+
+    max: Optional[float] = None
+    """The maximum score."""
+
+    min: Optional[float] = None
+    """The minimum score."""
+
+    scores: Optional[List[float]] = None
+
+
 class EntryManagedMetadataResponseHelpfulness(BaseModel):
     average: Optional[float] = None
     """The average of all scores."""
@@ -146,6 +163,9 @@ class EntryManagedMetadata(BaseModel):
     query_ease_customized: Optional[EntryManagedMetadataQueryEaseCustomized] = None
     """Holds a list of scores and computes aggregate statistics."""
 
+    response_groundedness: Optional[EntryManagedMetadataResponseGroundedness] = None
+    """Holds a list of scores and computes aggregate statistics."""
+
     response_helpfulness: Optional[EntryManagedMetadataResponseHelpfulness] = None
     """Holds a list of scores and computes aggregate statistics."""
 

From 7bbf57ae5327ddd85e6729997a4f85b427758258 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 21 May 2025 22:16:41 +0000
Subject: [PATCH 05/12] feat(api): api update

---
 .stats.yml                           | 2 +-
 tests/api_resources/test_projects.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index aac346a..374e672 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
 configured_endpoints: 44
-openapi_spec_hash: f25ca671adcc0b224451c721048d9220
+openapi_spec_hash: 67d5aeebff72f48ee4730227ca0b47c2
 config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index 19e41a0..5c29fdd 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -204,7 +204,7 @@ def test_method_list(self, client: Codex) -> None:
     def test_method_list_with_all_params(self, client: Codex) -> None:
         project = client.projects.list(
             include_entry_counts=True,
-            limit=0,
+            limit=1,
             offset=0,
             order="asc",
             organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
@@ -699,7 +699,7 @@ async def test_method_list(self, async_client: AsyncCodex) -> None:
     async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.list(
             include_entry_counts=True,
-            limit=0,
+            limit=1,
             offset=0,
             order="asc",
             organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",

From 428e5001b6b5576f5383c0f2ffd3ad5fe085128a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 22 May 2025 02:29:17 +0000
Subject: [PATCH 06/12] chore(docs): grammar improvements

---
 SECURITY.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 9fc6ee2..0780828 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -16,11 +16,11 @@ before making any information public.
 ## Reporting Non-SDK Related Security Issues
 
 If you encounter security issues that are not directly related to SDKs but pertain to the services
-or products provided by Codex please follow the respective company's security reporting guidelines.
+or products provided by Codex, please follow the respective company's security reporting guidelines.
 
 ### Codex Terms and Policies
 
-Please contact team@cleanlab.ai for any questions or concerns regarding security of our services.
+Please contact team@cleanlab.ai for any questions or concerns regarding the security of our services.
 
 ---
 

From 3a5293161e7313d7c18ec61be1b8e7ee56bad8c9 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 3 Jun 2025 21:53:29 +0000
Subject: [PATCH 07/12] feat(api): api update

---
 .stats.yml                                   |   2 +-
 src/codex/types/project_create_params.py     | 285 ++++++++++++++++++-
 src/codex/types/project_list_response.py     | 285 ++++++++++++++++++-
 src/codex/types/project_retrieve_response.py | 284 +++++++++++++++++-
 src/codex/types/project_return_schema.py     | 284 +++++++++++++++++-
 src/codex/types/project_update_params.py     | 285 ++++++++++++++++++-
 tests/api_resources/test_projects.py         | 264 +++++++++++++++++
 7 files changed, 1676 insertions(+), 13 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index 374e672..e80f0e1 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
 configured_endpoints: 44
-openapi_spec_hash: 67d5aeebff72f48ee4730227ca0b47c2
+openapi_spec_hash: 0f1841fad65926e7ddfb22dd7a642b46
 config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/src/codex/types/project_create_params.py b/src/codex/types/project_create_params.py
index ecdd194..75892e0 100644
--- a/src/codex/types/project_create_params.py
+++ b/src/codex/types/project_create_params.py
@@ -2,10 +2,22 @@
 
 from __future__ import annotations
 
-from typing import Optional
-from typing_extensions import Required, TypedDict
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["ProjectCreateParams", "Config"]
+__all__ = [
+    "ProjectCreateParams",
+    "Config",
+    "ConfigEvalConfig",
+    "ConfigEvalConfigCustomEvals",
+    "ConfigEvalConfigCustomEvalsEvals",
+    "ConfigEvalConfigDefaultEvals",
+    "ConfigEvalConfigDefaultEvalsContextSufficiency",
+    "ConfigEvalConfigDefaultEvalsQueryEase",
+    "ConfigEvalConfigDefaultEvalsResponseGroundedness",
+    "ConfigEvalConfigDefaultEvalsResponseHelpfulness",
+    "ConfigEvalConfigDefaultEvalsTrustworthiness",
+]
 
 
 class ProjectCreateParams(TypedDict, total=False):
@@ -18,9 +30,276 @@ class ProjectCreateParams(TypedDict, total=False):
     description: Optional[str]
 
 
+class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
+    criteria: Required[str]
+    """
+    The evaluation criteria text that describes what aspect is being evaluated and
+    how
+    """
+
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    context_identifier: Optional[str]
+    """
+    The exact string used in your evaluation criteria to reference the retrieved
+    context.
+    """
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    query_identifier: Optional[str]
+    """
+    The exact string used in your evaluation criteria to reference the user's query.
+    """
+
+    response_identifier: Optional[str]
+    """
+    The exact string used in your evaluation criteria to reference the RAG/LLM
+    response.
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigCustomEvals(TypedDict, total=False):
+    evals: Dict[str, ConfigEvalConfigCustomEvalsEvals]
+
+
+class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvals(TypedDict, total=False):
+    context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    query_ease: ConfigEvalConfigDefaultEvalsQueryEase
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_groundedness: ConfigEvalConfigDefaultEvalsResponseGroundedness
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_helpfulness: ConfigEvalConfigDefaultEvalsResponseHelpfulness
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    trustworthiness: ConfigEvalConfigDefaultEvalsTrustworthiness
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+
+class ConfigEvalConfig(TypedDict, total=False):
+    custom_evals: ConfigEvalConfigCustomEvals
+    """Configuration for custom evaluation metrics."""
+
+    default_evals: ConfigEvalConfigDefaultEvals
+    """Configuration for default evaluation metrics."""
+
+
 class Config(TypedDict, total=False):
     clustering_use_llm_matching: bool
 
+    eval_config: ConfigEvalConfig
+    """Configuration for project-specific evaluation metrics"""
+
     llm_matching_model: str
 
     llm_matching_quality_preset: str
diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py
index 2b4fec4..59d3bf8 100644
--- a/src/codex/types/project_list_response.py
+++ b/src/codex/types/project_list_response.py
@@ -1,16 +1,297 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 from datetime import datetime
+from typing_extensions import Literal
 
 from .._models import BaseModel
 
-__all__ = ["ProjectListResponse", "Project", "ProjectConfig"]
+__all__ = [
+    "ProjectListResponse",
+    "Project",
+    "ProjectConfig",
+    "ProjectConfigEvalConfig",
+    "ProjectConfigEvalConfigCustomEvals",
+    "ProjectConfigEvalConfigCustomEvalsEvals",
+    "ProjectConfigEvalConfigDefaultEvals",
+    "ProjectConfigEvalConfigDefaultEvalsContextSufficiency",
+    "ProjectConfigEvalConfigDefaultEvalsQueryEase",
+    "ProjectConfigEvalConfigDefaultEvalsResponseGroundedness",
+    "ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness",
+    "ProjectConfigEvalConfigDefaultEvalsTrustworthiness",
+]
+
+
+class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel):
+    criteria: str
+    """
+    The evaluation criteria text that describes what aspect is being evaluated and
+    how
+    """
+
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    context_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the retrieved
+    context.
+    """
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    query_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the user's query.
+    """
+
+    response_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the RAG/LLM
+    response.
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigCustomEvals(BaseModel):
+    evals: Optional[Dict[str, ProjectConfigEvalConfigCustomEvalsEvals]] = None
+
+
+class ProjectConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvalsQueryEase(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ProjectConfigEvalConfigDefaultEvals(BaseModel):
+    context_sufficiency: Optional[ProjectConfigEvalConfigDefaultEvalsContextSufficiency] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    query_ease: Optional[ProjectConfigEvalConfigDefaultEvalsQueryEase] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_groundedness: Optional[ProjectConfigEvalConfigDefaultEvalsResponseGroundedness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_helpfulness: Optional[ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    trustworthiness: Optional[ProjectConfigEvalConfigDefaultEvalsTrustworthiness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+
+class ProjectConfigEvalConfig(BaseModel):
+    custom_evals: Optional[ProjectConfigEvalConfigCustomEvals] = None
+    """Configuration for custom evaluation metrics."""
+
+    default_evals: Optional[ProjectConfigEvalConfigDefaultEvals] = None
+    """Configuration for default evaluation metrics."""
 
 
 class ProjectConfig(BaseModel):
     clustering_use_llm_matching: Optional[bool] = None
 
+    eval_config: Optional[ProjectConfigEvalConfig] = None
+    """Configuration for project-specific evaluation metrics"""
+
     llm_matching_model: Optional[str] = None
 
     llm_matching_quality_preset: Optional[str] = None
diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py
index 62209d3..a631f0c 100644
--- a/src/codex/types/project_retrieve_response.py
+++ b/src/codex/types/project_retrieve_response.py
@@ -1,16 +1,296 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Optional
+from typing import Dict, Optional
 from datetime import datetime
+from typing_extensions import Literal
 
 from .._models import BaseModel
 
-__all__ = ["ProjectRetrieveResponse", "Config"]
+__all__ = [
+    "ProjectRetrieveResponse",
+    "Config",
+    "ConfigEvalConfig",
+    "ConfigEvalConfigCustomEvals",
+    "ConfigEvalConfigCustomEvalsEvals",
+    "ConfigEvalConfigDefaultEvals",
+    "ConfigEvalConfigDefaultEvalsContextSufficiency",
+    "ConfigEvalConfigDefaultEvalsQueryEase",
+    "ConfigEvalConfigDefaultEvalsResponseGroundedness",
+    "ConfigEvalConfigDefaultEvalsResponseHelpfulness",
+    "ConfigEvalConfigDefaultEvalsTrustworthiness",
+]
+
+
+class ConfigEvalConfigCustomEvalsEvals(BaseModel):
+    criteria: str
+    """
+    The evaluation criteria text that describes what aspect is being evaluated and
+    how
+    """
+
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    context_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the retrieved
+    context.
+    """
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    query_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the user's query.
+    """
+
+    response_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the RAG/LLM
+    response.
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigCustomEvals(BaseModel):
+    evals: Optional[Dict[str, ConfigEvalConfigCustomEvalsEvals]] = None
+
+
+class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvals(BaseModel):
+    context_sufficiency: Optional[ConfigEvalConfigDefaultEvalsContextSufficiency] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    query_ease: Optional[ConfigEvalConfigDefaultEvalsQueryEase] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_groundedness: Optional[ConfigEvalConfigDefaultEvalsResponseGroundedness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_helpfulness: Optional[ConfigEvalConfigDefaultEvalsResponseHelpfulness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    trustworthiness: Optional[ConfigEvalConfigDefaultEvalsTrustworthiness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+
+class ConfigEvalConfig(BaseModel):
+    custom_evals: Optional[ConfigEvalConfigCustomEvals] = None
+    """Configuration for custom evaluation metrics."""
+
+    default_evals: Optional[ConfigEvalConfigDefaultEvals] = None
+    """Configuration for default evaluation metrics."""
 
 
 class Config(BaseModel):
     clustering_use_llm_matching: Optional[bool] = None
 
+    eval_config: Optional[ConfigEvalConfig] = None
+    """Configuration for project-specific evaluation metrics"""
+
     llm_matching_model: Optional[str] = None
 
     llm_matching_quality_preset: Optional[str] = None
diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py
index 51a6c1a..7da2e61 100644
--- a/src/codex/types/project_return_schema.py
+++ b/src/codex/types/project_return_schema.py
@@ -1,16 +1,296 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Optional
+from typing import Dict, Optional
 from datetime import datetime
+from typing_extensions import Literal
 
 from .._models import BaseModel
 
-__all__ = ["ProjectReturnSchema", "Config"]
+__all__ = [
+    "ProjectReturnSchema",
+    "Config",
+    "ConfigEvalConfig",
+    "ConfigEvalConfigCustomEvals",
+    "ConfigEvalConfigCustomEvalsEvals",
+    "ConfigEvalConfigDefaultEvals",
+    "ConfigEvalConfigDefaultEvalsContextSufficiency",
+    "ConfigEvalConfigDefaultEvalsQueryEase",
+    "ConfigEvalConfigDefaultEvalsResponseGroundedness",
+    "ConfigEvalConfigDefaultEvalsResponseHelpfulness",
+    "ConfigEvalConfigDefaultEvalsTrustworthiness",
+]
+
+
+class ConfigEvalConfigCustomEvalsEvals(BaseModel):
+    criteria: str
+    """
+    The evaluation criteria text that describes what aspect is being evaluated and
+    how
+    """
+
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    context_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the retrieved
+    context.
+    """
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    query_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the user's query.
+    """
+
+    response_identifier: Optional[str] = None
+    """
+    The exact string used in your evaluation criteria to reference the RAG/LLM
+    response.
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigCustomEvals(BaseModel):
+    evals: Optional[Dict[str, ConfigEvalConfigCustomEvalsEvals]] = None
+
+
+class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel):
+    eval_key: str
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: str
+    """Display name/label for the evaluation metric"""
+
+    enabled: Optional[bool] = None
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int] = None
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: Optional[bool] = None
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: Optional[float] = None
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Optional[Literal["above", "below"]] = None
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvals(BaseModel):
+    context_sufficiency: Optional[ConfigEvalConfigDefaultEvalsContextSufficiency] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    query_ease: Optional[ConfigEvalConfigDefaultEvalsQueryEase] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_groundedness: Optional[ConfigEvalConfigDefaultEvalsResponseGroundedness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_helpfulness: Optional[ConfigEvalConfigDefaultEvalsResponseHelpfulness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    trustworthiness: Optional[ConfigEvalConfigDefaultEvalsTrustworthiness] = None
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+
+class ConfigEvalConfig(BaseModel):
+    custom_evals: Optional[ConfigEvalConfigCustomEvals] = None
+    """Configuration for custom evaluation metrics."""
+
+    default_evals: Optional[ConfigEvalConfigDefaultEvals] = None
+    """Configuration for default evaluation metrics."""
 
 
 class Config(BaseModel):
     clustering_use_llm_matching: Optional[bool] = None
 
+    eval_config: Optional[ConfigEvalConfig] = None
+    """Configuration for project-specific evaluation metrics"""
+
     llm_matching_model: Optional[str] = None
 
     llm_matching_quality_preset: Optional[str] = None
diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py
index 0a5aa54..d58dd59 100644
--- a/src/codex/types/project_update_params.py
+++ b/src/codex/types/project_update_params.py
@@ -2,10 +2,22 @@
 
 from __future__ import annotations
 
-from typing import Optional
-from typing_extensions import Required, TypedDict
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["ProjectUpdateParams", "Config"]
+__all__ = [
+    "ProjectUpdateParams",
+    "Config",
+    "ConfigEvalConfig",
+    "ConfigEvalConfigCustomEvals",
+    "ConfigEvalConfigCustomEvalsEvals",
+    "ConfigEvalConfigDefaultEvals",
+    "ConfigEvalConfigDefaultEvalsContextSufficiency",
+    "ConfigEvalConfigDefaultEvalsQueryEase",
+    "ConfigEvalConfigDefaultEvalsResponseGroundedness",
+    "ConfigEvalConfigDefaultEvalsResponseHelpfulness",
+    "ConfigEvalConfigDefaultEvalsTrustworthiness",
+]
 
 
 class ProjectUpdateParams(TypedDict, total=False):
@@ -16,9 +28,276 @@ class ProjectUpdateParams(TypedDict, total=False):
     description: Optional[str]
 
 
+class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
+    criteria: Required[str]
+    """
+    The evaluation criteria text that describes what aspect is being evaluated and
+    how
+    """
+
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    context_identifier: Optional[str]
+    """
+    The exact string used in your evaluation criteria to reference the retrieved
+    context.
+    """
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    query_identifier: Optional[str]
+    """
+    The exact string used in your evaluation criteria to reference the user's query.
+    """
+
+    response_identifier: Optional[str]
+    """
+    The exact string used in your evaluation criteria to reference the RAG/LLM
+    response.
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigCustomEvals(TypedDict, total=False):
+    evals: Dict[str, ConfigEvalConfigCustomEvalsEvals]
+
+
+class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False):
+    eval_key: Required[str]
+    """
+    Unique key for eval metric - currently maps to the TrustworthyRAG name property
+    and eval_scores dictionary key to check against threshold
+    """
+
+    name: Required[str]
+    """Display name/label for the evaluation metric"""
+
+    enabled: bool
+    """Allows the evaluation to be disabled without removing it"""
+
+    priority: Optional[int]
+    """
+    Priority order for evals (lower number = higher priority) to determine primary
+    eval issue to surface
+    """
+
+    should_escalate: bool
+    """
+    If true, failing this eval means the response is considered bad and can trigger
+    escalation to Codex/SME
+    """
+
+    threshold: float
+    """Threshold value that determines if the evaluation fails"""
+
+    threshold_direction: Literal["above", "below"]
+    """Whether the evaluation fails when score is above or below the threshold"""
+
+
+class ConfigEvalConfigDefaultEvals(TypedDict, total=False):
+    context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    query_ease: ConfigEvalConfigDefaultEvalsQueryEase
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_groundedness: ConfigEvalConfigDefaultEvalsResponseGroundedness
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    response_helpfulness: ConfigEvalConfigDefaultEvalsResponseHelpfulness
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+    trustworthiness: ConfigEvalConfigDefaultEvalsTrustworthiness
+    """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
+
+    The evaluation criteria and identifiers are immutable and system-managed, while
+    other properties like thresholds and priorities can be configured.
+    """
+
+
+class ConfigEvalConfig(TypedDict, total=False):
+    custom_evals: ConfigEvalConfigCustomEvals
+    """Configuration for custom evaluation metrics."""
+
+    default_evals: ConfigEvalConfigDefaultEvals
+    """Configuration for default evaluation metrics."""
+
+
 class Config(TypedDict, total=False):
     clustering_use_llm_matching: bool
 
+    eval_config: ConfigEvalConfig
+    """Configuration for project-specific evaluation metrics"""
+
     llm_matching_model: str
 
     llm_matching_quality_preset: str
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index 5c29fdd..d5e0e1c 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -39,6 +39,72 @@ def test_method_create_with_all_params(self, client: Codex) -> None:
         project = client.projects.create(
             config={
                 "clustering_use_llm_matching": True,
+                "eval_config": {
+                    "custom_evals": {
+                        "evals": {
+                            "foo": {
+                                "criteria": "criteria",
+                                "eval_key": "eval_key",
+                                "name": "name",
+                                "context_identifier": "context_identifier",
+                                "enabled": True,
+                                "priority": 0,
+                                "query_identifier": "query_identifier",
+                                "response_identifier": "response_identifier",
+                                "should_escalate": True,
+                                "threshold": 0,
+                                "threshold_direction": "above",
+                            }
+                        }
+                    },
+                    "default_evals": {
+                        "context_sufficiency": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "query_ease": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "response_groundedness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "response_helpfulness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "trustworthiness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                    },
+                },
                 "llm_matching_model": "llm_matching_model",
                 "llm_matching_quality_preset": "llm_matching_quality_preset",
                 "lower_llm_match_distance_threshold": 0,
@@ -141,6 +207,72 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             config={
                 "clustering_use_llm_matching": True,
+                "eval_config": {
+                    "custom_evals": {
+                        "evals": {
+                            "foo": {
+                                "criteria": "criteria",
+                                "eval_key": "eval_key",
+                                "name": "name",
+                                "context_identifier": "context_identifier",
+                                "enabled": True,
+                                "priority": 0,
+                                "query_identifier": "query_identifier",
+                                "response_identifier": "response_identifier",
+                                "should_escalate": True,
+                                "threshold": 0,
+                                "threshold_direction": "above",
+                            }
+                        }
+                    },
+                    "default_evals": {
+                        "context_sufficiency": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "query_ease": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "response_groundedness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "response_helpfulness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "trustworthiness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                    },
+                },
                 "llm_matching_model": "llm_matching_model",
                 "llm_matching_quality_preset": "llm_matching_quality_preset",
                 "lower_llm_match_distance_threshold": 0,
@@ -534,6 +666,72 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) ->
         project = await async_client.projects.create(
             config={
                 "clustering_use_llm_matching": True,
+                "eval_config": {
+                    "custom_evals": {
+                        "evals": {
+                            "foo": {
+                                "criteria": "criteria",
+                                "eval_key": "eval_key",
+                                "name": "name",
+                                "context_identifier": "context_identifier",
+                                "enabled": True,
+                                "priority": 0,
+                                "query_identifier": "query_identifier",
+                                "response_identifier": "response_identifier",
+                                "should_escalate": True,
+                                "threshold": 0,
+                                "threshold_direction": "above",
+                            }
+                        }
+                    },
+                    "default_evals": {
+                        "context_sufficiency": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "query_ease": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "response_groundedness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "response_helpfulness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "trustworthiness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                    },
+                },
                 "llm_matching_model": "llm_matching_model",
                 "llm_matching_quality_preset": "llm_matching_quality_preset",
                 "lower_llm_match_distance_threshold": 0,
@@ -636,6 +834,72 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             config={
                 "clustering_use_llm_matching": True,
+                "eval_config": {
+                    "custom_evals": {
+                        "evals": {
+                            "foo": {
+                                "criteria": "criteria",
+                                "eval_key": "eval_key",
+                                "name": "name",
+                                "context_identifier": "context_identifier",
+                                "enabled": True,
+                                "priority": 0,
+                                "query_identifier": "query_identifier",
+                                "response_identifier": "response_identifier",
+                                "should_escalate": True,
+                                "threshold": 0,
+                                "threshold_direction": "above",
+                            }
+                        }
+                    },
+                    "default_evals": {
+                        "context_sufficiency": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "query_ease": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "response_groundedness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "response_helpfulness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                        "trustworthiness": {
+                            "eval_key": "eval_key",
+                            "name": "name",
+                            "enabled": True,
+                            "priority": 0,
+                            "should_escalate": True,
+                            "threshold": 0,
+                            "threshold_direction": "above",
+                        },
+                    },
+                },
                 "llm_matching_model": "llm_matching_model",
                 "llm_matching_quality_preset": "llm_matching_quality_preset",
                 "lower_llm_match_distance_threshold": 0,

From 7ba3858c1c968c093c676478a1c7e5e13b92c12a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 28 May 2025 03:26:20 +0000
Subject: [PATCH 08/12] fix(docs/api): remove references to nonexistent types

---
 api.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/api.md b/api.md
index f3a2ea1..8aac76f 100644
--- a/api.md
+++ b/api.md
@@ -139,8 +139,6 @@ from codex.types import (
     ProjectReturnSchema,
     ProjectRetrieveResponse,
     ProjectListResponse,
-    ProjectExportResponse,
-    ProjectIncrementQueriesResponse,
     ProjectRetrieveAnalyticsResponse,
     ProjectValidateResponse,
 )
@@ -153,8 +151,8 @@ Methods:
 - <code title="put /api/projects/{project_id}">client.projects.<a href="./src/codex/resources/projects/projects.py">update</a>(project_id, \*\*<a href="src/codex/types/project_update_params.py">params</a>) -> <a href="./src/codex/types/project_return_schema.py">ProjectReturnSchema</a></code>
 - <code title="get /api/projects/">client.projects.<a href="./src/codex/resources/projects/projects.py">list</a>(\*\*<a href="src/codex/types/project_list_params.py">params</a>) -> <a href="./src/codex/types/project_list_response.py">ProjectListResponse</a></code>
 - <code title="delete /api/projects/{project_id}">client.projects.<a href="./src/codex/resources/projects/projects.py">delete</a>(project_id) -> None</code>
-- <code title="get /api/projects/{project_id}/export">client.projects.<a href="./src/codex/resources/projects/projects.py">export</a>(project_id) -> <a href="./src/codex/types/project_export_response.py">object</a></code>
-- <code title="post /api/projects/{project_id}/increment_queries">client.projects.<a href="./src/codex/resources/projects/projects.py">increment_queries</a>(project_id, \*\*<a href="src/codex/types/project_increment_queries_params.py">params</a>) -> <a href="./src/codex/types/project_increment_queries_response.py">object</a></code>
+- <code title="get /api/projects/{project_id}/export">client.projects.<a href="./src/codex/resources/projects/projects.py">export</a>(project_id) -> object</code>
+- <code title="post /api/projects/{project_id}/increment_queries">client.projects.<a href="./src/codex/resources/projects/projects.py">increment_queries</a>(project_id, \*\*<a href="src/codex/types/project_increment_queries_params.py">params</a>) -> object</code>
 - <code title="get /api/projects/{project_id}/analytics/">client.projects.<a href="./src/codex/resources/projects/projects.py">retrieve_analytics</a>(project_id, \*\*<a href="src/codex/types/project_retrieve_analytics_params.py">params</a>) -> <a href="./src/codex/types/project_retrieve_analytics_response.py">ProjectRetrieveAnalyticsResponse</a></code>
 - <code title="post /api/projects/{project_id}/validate">client.projects.<a href="./src/codex/resources/projects/projects.py">validate</a>(project_id, \*\*<a href="src/codex/types/project_validate_params.py">params</a>) -> <a href="./src/codex/types/project_validate_response.py">ProjectValidateResponse</a></code>
 

From 57f522fbd04637849146636a83976fe696160a97 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 29 May 2025 03:09:34 +0000
Subject: [PATCH 09/12] chore(api): mark some methods as deprecated

---
 src/codex/resources/projects/entries.py      |  27 ++-
 src/codex/resources/projects/projects.py     |  27 ++-
 tests/api_resources/projects/test_entries.py | 164 ++++++++++---------
 tests/api_resources/test_projects.py         | 100 ++++++-----
 4 files changed, 186 insertions(+), 132 deletions(-)

diff --git a/src/codex/resources/projects/entries.py b/src/codex/resources/projects/entries.py
index 346dd35..c6b43a4 100644
--- a/src/codex/resources/projects/entries.py
+++ b/src/codex/resources/projects/entries.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import typing_extensions
 from typing import Iterable, Optional
 
 import httpx
@@ -312,6 +313,7 @@ def publish_draft_answer(
             cast_to=Entry,
         )
 
+    @typing_extensions.deprecated("deprecated")
     def query(
         self,
         project_id: str,
@@ -707,6 +709,7 @@ async def publish_draft_answer(
             cast_to=Entry,
         )
 
+    @typing_extensions.deprecated("deprecated")
     async def query(
         self,
         project_id: str,
@@ -839,8 +842,10 @@ def __init__(self, entries: EntriesResource) -> None:
         self.publish_draft_answer = to_raw_response_wrapper(
             entries.publish_draft_answer,
         )
-        self.query = to_raw_response_wrapper(
-            entries.query,
+        self.query = (  # pyright: ignore[reportDeprecated]
+            to_raw_response_wrapper(
+                entries.query  # pyright: ignore[reportDeprecated],
+            )
         )
         self.unpublish_answer = to_raw_response_wrapper(
             entries.unpublish_answer,
@@ -869,8 +874,10 @@ def __init__(self, entries: AsyncEntriesResource) -> None:
         self.publish_draft_answer = async_to_raw_response_wrapper(
             entries.publish_draft_answer,
         )
-        self.query = async_to_raw_response_wrapper(
-            entries.query,
+        self.query = (  # pyright: ignore[reportDeprecated]
+            async_to_raw_response_wrapper(
+                entries.query  # pyright: ignore[reportDeprecated],
+            )
         )
         self.unpublish_answer = async_to_raw_response_wrapper(
             entries.unpublish_answer,
@@ -899,8 +906,10 @@ def __init__(self, entries: EntriesResource) -> None:
         self.publish_draft_answer = to_streamed_response_wrapper(
             entries.publish_draft_answer,
         )
-        self.query = to_streamed_response_wrapper(
-            entries.query,
+        self.query = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                entries.query  # pyright: ignore[reportDeprecated],
+            )
         )
         self.unpublish_answer = to_streamed_response_wrapper(
             entries.unpublish_answer,
@@ -929,8 +938,10 @@ def __init__(self, entries: AsyncEntriesResource) -> None:
         self.publish_draft_answer = async_to_streamed_response_wrapper(
             entries.publish_draft_answer,
         )
-        self.query = async_to_streamed_response_wrapper(
-            entries.query,
+        self.query = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                entries.query  # pyright: ignore[reportDeprecated],
+            )
         )
         self.unpublish_answer = async_to_streamed_response_wrapper(
             entries.unpublish_answer,
diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py
index 6195d1a..cf8c0f8 100644
--- a/src/codex/resources/projects/projects.py
+++ b/src/codex/resources/projects/projects.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import typing_extensions
 from typing import Dict, List, Optional
 from typing_extensions import Literal
 
@@ -330,6 +331,7 @@ def export(
             cast_to=object,
         )
 
+    @typing_extensions.deprecated("deprecated")
     def increment_queries(
         self,
         project_id: str,
@@ -872,6 +874,7 @@ async def export(
             cast_to=object,
         )
 
+    @typing_extensions.deprecated("deprecated")
     async def increment_queries(
         self,
         project_id: str,
@@ -1167,8 +1170,10 @@ def __init__(self, projects: ProjectsResource) -> None:
         self.export = to_raw_response_wrapper(
             projects.export,
         )
-        self.increment_queries = to_raw_response_wrapper(
-            projects.increment_queries,
+        self.increment_queries = (  # pyright: ignore[reportDeprecated]
+            to_raw_response_wrapper(
+                projects.increment_queries  # pyright: ignore[reportDeprecated],
+            )
         )
         self.retrieve_analytics = to_raw_response_wrapper(
             projects.retrieve_analytics,
@@ -1212,8 +1217,10 @@ def __init__(self, projects: AsyncProjectsResource) -> None:
         self.export = async_to_raw_response_wrapper(
             projects.export,
         )
-        self.increment_queries = async_to_raw_response_wrapper(
-            projects.increment_queries,
+        self.increment_queries = (  # pyright: ignore[reportDeprecated]
+            async_to_raw_response_wrapper(
+                projects.increment_queries  # pyright: ignore[reportDeprecated],
+            )
         )
         self.retrieve_analytics = async_to_raw_response_wrapper(
             projects.retrieve_analytics,
@@ -1257,8 +1264,10 @@ def __init__(self, projects: ProjectsResource) -> None:
         self.export = to_streamed_response_wrapper(
             projects.export,
         )
-        self.increment_queries = to_streamed_response_wrapper(
-            projects.increment_queries,
+        self.increment_queries = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                projects.increment_queries  # pyright: ignore[reportDeprecated],
+            )
         )
         self.retrieve_analytics = to_streamed_response_wrapper(
             projects.retrieve_analytics,
@@ -1302,8 +1311,10 @@ def __init__(self, projects: AsyncProjectsResource) -> None:
         self.export = async_to_streamed_response_wrapper(
             projects.export,
         )
-        self.increment_queries = async_to_streamed_response_wrapper(
-            projects.increment_queries,
+        self.increment_queries = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                projects.increment_queries  # pyright: ignore[reportDeprecated],
+            )
         )
         self.retrieve_analytics = async_to_streamed_response_wrapper(
             projects.retrieve_analytics,
diff --git a/tests/api_resources/projects/test_entries.py b/tests/api_resources/projects/test_entries.py
index 73a45ad..32b0452 100644
--- a/tests/api_resources/projects/test_entries.py
+++ b/tests/api_resources/projects/test_entries.py
@@ -15,6 +15,8 @@
     EntryNotifySmeResponse,
 )
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
 
@@ -382,40 +384,45 @@ def test_path_params_publish_draft_answer(self, client: Codex) -> None:
     @pytest.mark.skip()
     @parametrize
     def test_method_query(self, client: Codex) -> None:
-        entry = client.projects.entries.query(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            question="question",
-        )
+        with pytest.warns(DeprecationWarning):
+            entry = client.projects.entries.query(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                question="question",
+            )
+
         assert_matches_type(EntryQueryResponse, entry, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
     def test_method_query_with_all_params(self, client: Codex) -> None:
-        entry = client.projects.entries.query(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            question="question",
-            use_llm_matching=True,
-            client_metadata={},
-            query_metadata={
-                "context": "string",
-                "custom_metadata": {},
-                "eval_scores": {"foo": 0},
-                "evaluated_response": "evaluated_response",
-            },
-            x_client_library_version="x-client-library-version",
-            x_integration_type="x-integration-type",
-            x_source="x-source",
-            x_stainless_package_version="x-stainless-package-version",
-        )
+        with pytest.warns(DeprecationWarning):
+            entry = client.projects.entries.query(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                question="question",
+                use_llm_matching=True,
+                client_metadata={},
+                query_metadata={
+                    "context": "string",
+                    "custom_metadata": {},
+                    "eval_scores": {"foo": 0},
+                    "evaluated_response": "evaluated_response",
+                },
+                x_client_library_version="x-client-library-version",
+                x_integration_type="x-integration-type",
+                x_source="x-source",
+                x_stainless_package_version="x-stainless-package-version",
+            )
+
         assert_matches_type(EntryQueryResponse, entry, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
     def test_raw_response_query(self, client: Codex) -> None:
-        response = client.projects.entries.with_raw_response.query(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            question="question",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.projects.entries.with_raw_response.query(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                question="question",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -425,26 +432,28 @@ def test_raw_response_query(self, client: Codex) -> None:
     @pytest.mark.skip()
     @parametrize
     def test_streaming_response_query(self, client: Codex) -> None:
-        with client.projects.entries.with_streaming_response.query(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            question="question",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.projects.entries.with_streaming_response.query(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                question="question",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            entry = response.parse()
-            assert_matches_type(EntryQueryResponse, entry, path=["response"])
+                entry = response.parse()
+                assert_matches_type(EntryQueryResponse, entry, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @pytest.mark.skip()
     @parametrize
     def test_path_params_query(self, client: Codex) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
-            client.projects.entries.with_raw_response.query(
-                project_id="",
-                question="question",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
+                client.projects.entries.with_raw_response.query(
+                    project_id="",
+                    question="question",
+                )
 
     @pytest.mark.skip()
     @parametrize
@@ -863,40 +872,45 @@ async def test_path_params_publish_draft_answer(self, async_client: AsyncCodex)
     @pytest.mark.skip()
     @parametrize
     async def test_method_query(self, async_client: AsyncCodex) -> None:
-        entry = await async_client.projects.entries.query(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            question="question",
-        )
+        with pytest.warns(DeprecationWarning):
+            entry = await async_client.projects.entries.query(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                question="question",
+            )
+
         assert_matches_type(EntryQueryResponse, entry, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
     async def test_method_query_with_all_params(self, async_client: AsyncCodex) -> None:
-        entry = await async_client.projects.entries.query(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            question="question",
-            use_llm_matching=True,
-            client_metadata={},
-            query_metadata={
-                "context": "string",
-                "custom_metadata": {},
-                "eval_scores": {"foo": 0},
-                "evaluated_response": "evaluated_response",
-            },
-            x_client_library_version="x-client-library-version",
-            x_integration_type="x-integration-type",
-            x_source="x-source",
-            x_stainless_package_version="x-stainless-package-version",
-        )
+        with pytest.warns(DeprecationWarning):
+            entry = await async_client.projects.entries.query(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                question="question",
+                use_llm_matching=True,
+                client_metadata={},
+                query_metadata={
+                    "context": "string",
+                    "custom_metadata": {},
+                    "eval_scores": {"foo": 0},
+                    "evaluated_response": "evaluated_response",
+                },
+                x_client_library_version="x-client-library-version",
+                x_integration_type="x-integration-type",
+                x_source="x-source",
+                x_stainless_package_version="x-stainless-package-version",
+            )
+
         assert_matches_type(EntryQueryResponse, entry, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
     async def test_raw_response_query(self, async_client: AsyncCodex) -> None:
-        response = await async_client.projects.entries.with_raw_response.query(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            question="question",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.projects.entries.with_raw_response.query(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                question="question",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -906,26 +920,28 @@ async def test_raw_response_query(self, async_client: AsyncCodex) -> None:
     @pytest.mark.skip()
     @parametrize
     async def test_streaming_response_query(self, async_client: AsyncCodex) -> None:
-        async with async_client.projects.entries.with_streaming_response.query(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            question="question",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.projects.entries.with_streaming_response.query(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                question="question",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            entry = await response.parse()
-            assert_matches_type(EntryQueryResponse, entry, path=["response"])
+                entry = await response.parse()
+                assert_matches_type(EntryQueryResponse, entry, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @pytest.mark.skip()
     @parametrize
     async def test_path_params_query(self, async_client: AsyncCodex) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
-            await async_client.projects.entries.with_raw_response.query(
-                project_id="",
-                question="question",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
+                await async_client.projects.entries.with_raw_response.query(
+                    project_id="",
+                    question="question",
+                )
 
     @pytest.mark.skip()
     @parametrize
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index d5e0e1c..f7c3f01 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -17,6 +17,8 @@
 )
 from tests.utils import assert_matches_type
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
 
@@ -454,26 +456,31 @@ def test_path_params_export(self, client: Codex) -> None:
     @pytest.mark.skip()
     @parametrize
     def test_method_increment_queries(self, client: Codex) -> None:
-        project = client.projects.increment_queries(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-        )
+        with pytest.warns(DeprecationWarning):
+            project = client.projects.increment_queries(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+            )
+
         assert_matches_type(object, project, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
     def test_method_increment_queries_with_all_params(self, client: Codex) -> None:
-        project = client.projects.increment_queries(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            count=0,
-        )
+        with pytest.warns(DeprecationWarning):
+            project = client.projects.increment_queries(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                count=0,
+            )
+
         assert_matches_type(object, project, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
     def test_raw_response_increment_queries(self, client: Codex) -> None:
-        response = client.projects.with_raw_response.increment_queries(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.projects.with_raw_response.increment_queries(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -483,24 +490,26 @@ def test_raw_response_increment_queries(self, client: Codex) -> None:
     @pytest.mark.skip()
     @parametrize
     def test_streaming_response_increment_queries(self, client: Codex) -> None:
-        with client.projects.with_streaming_response.increment_queries(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.projects.with_streaming_response.increment_queries(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            project = response.parse()
-            assert_matches_type(object, project, path=["response"])
+                project = response.parse()
+                assert_matches_type(object, project, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @pytest.mark.skip()
     @parametrize
     def test_path_params_increment_queries(self, client: Codex) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
-            client.projects.with_raw_response.increment_queries(
-                project_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
+                client.projects.with_raw_response.increment_queries(
+                    project_id="",
+                )
 
     @pytest.mark.skip()
     @parametrize
@@ -1081,26 +1090,31 @@ async def test_path_params_export(self, async_client: AsyncCodex) -> None:
     @pytest.mark.skip()
     @parametrize
     async def test_method_increment_queries(self, async_client: AsyncCodex) -> None:
-        project = await async_client.projects.increment_queries(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-        )
+        with pytest.warns(DeprecationWarning):
+            project = await async_client.projects.increment_queries(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+            )
+
         assert_matches_type(object, project, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
     async def test_method_increment_queries_with_all_params(self, async_client: AsyncCodex) -> None:
-        project = await async_client.projects.increment_queries(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-            count=0,
-        )
+        with pytest.warns(DeprecationWarning):
+            project = await async_client.projects.increment_queries(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+                count=0,
+            )
+
         assert_matches_type(object, project, path=["response"])
 
     @pytest.mark.skip()
     @parametrize
     async def test_raw_response_increment_queries(self, async_client: AsyncCodex) -> None:
-        response = await async_client.projects.with_raw_response.increment_queries(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.projects.with_raw_response.increment_queries(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1110,24 +1124,26 @@ async def test_raw_response_increment_queries(self, async_client: AsyncCodex) ->
     @pytest.mark.skip()
     @parametrize
     async def test_streaming_response_increment_queries(self, async_client: AsyncCodex) -> None:
-        async with async_client.projects.with_streaming_response.increment_queries(
-            project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.projects.with_streaming_response.increment_queries(
+                project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            project = await response.parse()
-            assert_matches_type(object, project, path=["response"])
+                project = await response.parse()
+                assert_matches_type(object, project, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @pytest.mark.skip()
     @parametrize
     async def test_path_params_increment_queries(self, async_client: AsyncCodex) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
-            await async_client.projects.with_raw_response.increment_queries(
-                project_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
+                await async_client.projects.with_raw_response.increment_queries(
+                    project_id="",
+                )
 
     @pytest.mark.skip()
     @parametrize

From ef4acf292c728a838aecd6539d278b8c128be68a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 11 Jun 2025 18:41:44 +0000
Subject: [PATCH 10/12] feat(api): api update

---
 .stats.yml                                   | 2 +-
 src/codex/types/project_create_params.py     | 3 +++
 src/codex/types/project_list_response.py     | 3 +++
 src/codex/types/project_retrieve_response.py | 3 +++
 src/codex/types/project_return_schema.py     | 3 +++
 src/codex/types/project_update_params.py     | 3 +++
 tests/api_resources/test_projects.py         | 4 ++++
 7 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/.stats.yml b/.stats.yml
index e80f0e1..ddf7240 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
 configured_endpoints: 44
-openapi_spec_hash: 0f1841fad65926e7ddfb22dd7a642b46
+openapi_spec_hash: dfccb5c181396678a22b9c079847889f
 config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/src/codex/types/project_create_params.py b/src/codex/types/project_create_params.py
index 75892e0..3142755 100644
--- a/src/codex/types/project_create_params.py
+++ b/src/codex/types/project_create_params.py
@@ -55,6 +55,9 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
     enabled: bool
     """Allows the evaluation to be disabled without removing it"""
 
+    is_default: bool
+    """Whether the eval is a default, built-in eval or a custom eval"""
+
     priority: Optional[int]
     """
     Priority order for evals (lower number = higher priority) to determine primary
diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py
index 59d3bf8..d480573 100644
--- a/src/codex/types/project_list_response.py
+++ b/src/codex/types/project_list_response.py
@@ -47,6 +47,9 @@ class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel):
     enabled: Optional[bool] = None
     """Allows the evaluation to be disabled without removing it"""
 
+    is_default: Optional[bool] = None
+    """Whether the eval is a default, built-in eval or a custom eval"""
+
     priority: Optional[int] = None
     """
     Priority order for evals (lower number = higher priority) to determine primary
diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py
index a631f0c..fb62cff 100644
--- a/src/codex/types/project_retrieve_response.py
+++ b/src/codex/types/project_retrieve_response.py
@@ -46,6 +46,9 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel):
     enabled: Optional[bool] = None
     """Allows the evaluation to be disabled without removing it"""
 
+    is_default: Optional[bool] = None
+    """Whether the eval is a default, built-in eval or a custom eval"""
+
     priority: Optional[int] = None
     """
     Priority order for evals (lower number = higher priority) to determine primary
diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py
index 7da2e61..420ec6e 100644
--- a/src/codex/types/project_return_schema.py
+++ b/src/codex/types/project_return_schema.py
@@ -46,6 +46,9 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel):
     enabled: Optional[bool] = None
     """Allows the evaluation to be disabled without removing it"""
 
+    is_default: Optional[bool] = None
+    """Whether the eval is a default, built-in eval or a custom eval"""
+
     priority: Optional[int] = None
     """
     Priority order for evals (lower number = higher priority) to determine primary
diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py
index d58dd59..d199955 100644
--- a/src/codex/types/project_update_params.py
+++ b/src/codex/types/project_update_params.py
@@ -53,6 +53,9 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
     enabled: bool
     """Allows the evaluation to be disabled without removing it"""
 
+    is_default: bool
+    """Whether the eval is a default, built-in eval or a custom eval"""
+
     priority: Optional[int]
     """
     Priority order for evals (lower number = higher priority) to determine primary
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index f7c3f01..8ba69a1 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -50,6 +50,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None:
                                 "name": "name",
                                 "context_identifier": "context_identifier",
                                 "enabled": True,
+                                "is_default": True,
                                 "priority": 0,
                                 "query_identifier": "query_identifier",
                                 "response_identifier": "response_identifier",
@@ -218,6 +219,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
                                 "name": "name",
                                 "context_identifier": "context_identifier",
                                 "enabled": True,
+                                "is_default": True,
                                 "priority": 0,
                                 "query_identifier": "query_identifier",
                                 "response_identifier": "response_identifier",
@@ -684,6 +686,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) ->
                                 "name": "name",
                                 "context_identifier": "context_identifier",
                                 "enabled": True,
+                                "is_default": True,
                                 "priority": 0,
                                 "query_identifier": "query_identifier",
                                 "response_identifier": "response_identifier",
@@ -852,6 +855,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
                                 "name": "name",
                                 "context_identifier": "context_identifier",
                                 "enabled": True,
+                                "is_default": True,
                                 "priority": 0,
                                 "query_identifier": "query_identifier",
                                 "response_identifier": "response_identifier",

From 8fcb74d6d5ef8db36a78031c9bf59f0840def1d4 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 3 Jun 2025 02:20:36 +0000
Subject: [PATCH 11/12] chore(docs): remove reference to rye shell

---
 CONTRIBUTING.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b9fa9a1..548ff4c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -17,8 +17,7 @@ $ rye sync --all-features
 You can then run scripts using `rye run python script.py` or by activating the virtual environment:
 
 ```sh
-$ rye shell
-# or manually activate - https://docs.python.org/3/library/venv.html#how-venvs-work
+# Activate the virtual environment - https://docs.python.org/3/library/venv.html#how-venvs-work
 $ source .venv/bin/activate
 
 # now you can omit the `rye run` prefix

From d9499f6ccb7deac8948dc80342c9bf0f956d8397 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 11 Jun 2025 18:42:01 +0000
Subject: [PATCH 12/12] release: 0.1.0-alpha.21

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 25 +++++++++++++++++++++++++
 pyproject.toml                |  2 +-
 src/codex/_version.py         |  2 +-
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index fac1407..7c31fce 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.20"
+  ".": "0.1.0-alpha.21"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f151d60..fd14df6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,30 @@
 # Changelog
 
+## 0.1.0-alpha.21 (2025-06-11)
+
+Full Changelog: [v0.1.0-alpha.20...v0.1.0-alpha.21](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.20...v0.1.0-alpha.21)
+
+### Features
+
+* **api:** api update ([ef4acf2](https://github.com/cleanlab/codex-python/commit/ef4acf292c728a838aecd6539d278b8c128be68a))
+* **api:** api update ([3a52931](https://github.com/cleanlab/codex-python/commit/3a5293161e7313d7c18ec61be1b8e7ee56bad8c9))
+* **api:** api update ([7bbf57a](https://github.com/cleanlab/codex-python/commit/7bbf57ae5327ddd85e6729997a4f85b427758258))
+* **api:** api update ([40ae04a](https://github.com/cleanlab/codex-python/commit/40ae04a279ba1e2573d17a17e097f71d1347a3d3))
+
+
+### Bug Fixes
+
+* **docs/api:** remove references to nonexistent types ([7ba3858](https://github.com/cleanlab/codex-python/commit/7ba3858c1c968c093c676478a1c7e5e13b92c12a))
+
+
+### Chores
+
+* **api:** mark some methods as deprecated ([57f522f](https://github.com/cleanlab/codex-python/commit/57f522fbd04637849146636a83976fe696160a97))
+* **ci:** fix installation instructions ([3aa9884](https://github.com/cleanlab/codex-python/commit/3aa98843e0f042734eb5b74ea86c8dcca8636954))
+* **docs:** grammar improvements ([428e500](https://github.com/cleanlab/codex-python/commit/428e5001b6b5576f5383c0f2ffd3ad5fe085128a))
+* **docs:** remove reference to rye shell ([8fcb74d](https://github.com/cleanlab/codex-python/commit/8fcb74d6d5ef8db36a78031c9bf59f0840def1d4))
+* **internal:** codegen related update ([18f661d](https://github.com/cleanlab/codex-python/commit/18f661d21b849f15cbe85ce5063ef0dea877d89f))
+
 ## 0.1.0-alpha.20 (2025-05-15)
 
 Full Changelog: [v0.1.0-alpha.19...v0.1.0-alpha.20](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.19...v0.1.0-alpha.20)
diff --git a/pyproject.toml b/pyproject.toml
index 04d039a..55d73e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "codex-sdk"
-version = "0.1.0-alpha.20"
+version = "0.1.0-alpha.21"
 description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead."
 dynamic = ["readme"]
 license = "MIT"
diff --git a/src/codex/_version.py b/src/codex/_version.py
index 44d6131..3b23c98 100644
--- a/src/codex/_version.py
+++ b/src/codex/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "codex"
-__version__ = "0.1.0-alpha.20"  # x-release-please-version
+__version__ = "0.1.0-alpha.21"  # x-release-please-version