diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index ac9a2e7..55d2025 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -6,4 +6,4 @@ USER vscode
RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash
ENV PATH=/home/vscode/.rye/shims:$PATH
-RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
+RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index bbeb30b..c17fdc1 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -24,6 +24,9 @@
}
}
}
+ },
+ "features": {
+ "ghcr.io/devcontainers/features/node:1": {}
}
// Features to add to the dev container. More info: https://containers.dev/features.
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index ee49ac2..fd0ccba 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.1.0-alpha.11"
+ ".": "0.1.0-alpha.12"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 966331a..7982133 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1 +1 @@
-configured_endpoints: 34
+configured_endpoints: 37
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 172b1d6..abb1602 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,43 @@
# Changelog
+## 0.1.0-alpha.12 (2025-03-11)
+
+Full Changelog: [v0.1.0-alpha.11...v0.1.0-alpha.12](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.11...v0.1.0-alpha.12)
+
+### Features
+
+* **api:** add tlm routes ([#79](https://github.com/cleanlab/codex-python/issues/79)) ([783282d](https://github.com/cleanlab/codex-python/commit/783282da5cef0d7fbbadbeb826153622ec9a37d1))
+* **api:** api update ([#60](https://github.com/cleanlab/codex-python/issues/60)) ([f28da42](https://github.com/cleanlab/codex-python/commit/f28da423ea6350df2422a6b3c984044686cb4674))
+* **api:** api update ([#67](https://github.com/cleanlab/codex-python/issues/67)) ([5697955](https://github.com/cleanlab/codex-python/commit/569795521774bd2ac303dcaf8058e791f1af501c))
+* **client:** allow passing `NotGiven` for body ([#70](https://github.com/cleanlab/codex-python/issues/70)) ([f022d08](https://github.com/cleanlab/codex-python/commit/f022d082ad3e07ba3600c0d9e8becefad96ca175))
+* **client:** send `X-Stainless-Read-Timeout` header ([#63](https://github.com/cleanlab/codex-python/issues/63)) ([5904ed6](https://github.com/cleanlab/codex-python/commit/5904ed630f3dce437f3eb0f248d6a96b7c237e19))
+
+
+### Bug Fixes
+
+* asyncify on non-asyncio runtimes ([#69](https://github.com/cleanlab/codex-python/issues/69)) ([dc7519f](https://github.com/cleanlab/codex-python/commit/dc7519f876a99cdb58f4b634de45989e44c53c88))
+* **client:** mark some request bodies as optional ([f022d08](https://github.com/cleanlab/codex-python/commit/f022d082ad3e07ba3600c0d9e8becefad96ca175))
+
+
+### Chores
+
+* **docs:** update client docstring ([#75](https://github.com/cleanlab/codex-python/issues/75)) ([5b371a6](https://github.com/cleanlab/codex-python/commit/5b371a629dbd7763e00d8fd4315c4e437b4f0145))
+* **internal:** bummp ruff dependency ([#62](https://github.com/cleanlab/codex-python/issues/62)) ([123ccca](https://github.com/cleanlab/codex-python/commit/123ccca213572048ca6678900414e746516a9de1))
+* **internal:** change default timeout to an int ([#61](https://github.com/cleanlab/codex-python/issues/61)) ([66fc9b7](https://github.com/cleanlab/codex-python/commit/66fc9b758ba38b160b7d6b17b94f294f248e0ecd))
+* **internal:** fix devcontainers setup ([#71](https://github.com/cleanlab/codex-python/issues/71)) ([9ec8473](https://github.com/cleanlab/codex-python/commit/9ec847324c47ab63d9cf39d50f392367585065cf))
+* **internal:** fix type traversing dictionary params ([#64](https://github.com/cleanlab/codex-python/issues/64)) ([648fc48](https://github.com/cleanlab/codex-python/commit/648fc489ce7f9827bfc2354e93d470b6e4b7e1bf))
+* **internal:** minor type handling changes ([#65](https://github.com/cleanlab/codex-python/issues/65)) ([27aa5db](https://github.com/cleanlab/codex-python/commit/27aa5db50e0aa13eb2c4d88196a4ac70681ae808))
+* **internal:** properly set __pydantic_private__ ([#72](https://github.com/cleanlab/codex-python/issues/72)) ([9765c39](https://github.com/cleanlab/codex-python/commit/9765c3979b7856713e75175a76d342f6be956dea))
+* **internal:** remove unused http client options forwarding ([#76](https://github.com/cleanlab/codex-python/issues/76)) ([c5ed0fd](https://github.com/cleanlab/codex-python/commit/c5ed0fdc13238df5ecf8cbfd17e15974c6d1b24b))
+* **internal:** update client tests ([#68](https://github.com/cleanlab/codex-python/issues/68)) ([9297d25](https://github.com/cleanlab/codex-python/commit/9297d25f8f6e59af3fa610539d9736f5f0af2fe2))
+* **internal:** version bump ([#58](https://github.com/cleanlab/codex-python/issues/58)) ([d032df2](https://github.com/cleanlab/codex-python/commit/d032df2296313bf0d2b2712672756ed185afb0e0))
+
+
+### Documentation
+
+* revise readme docs about nested params ([#77](https://github.com/cleanlab/codex-python/issues/77)) ([649ec25](https://github.com/cleanlab/codex-python/commit/649ec251abd53beb368cb68d134a54b35338d327))
+* update URLs from stainlessapi.com to stainless.com ([#73](https://github.com/cleanlab/codex-python/issues/73)) ([8f500b4](https://github.com/cleanlab/codex-python/commit/8f500b4c0af5c050350107ca7e567a289c7cf8f9))
+
## 0.1.0-alpha.11 (2025-01-30)
Full Changelog: [v0.1.0-alpha.10...v0.1.0-alpha.11](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.10...v0.1.0-alpha.11)
diff --git a/SECURITY.md b/SECURITY.md
index 54f6446..9fc6ee2 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,9 +2,9 @@
## Reporting Security Issues
-This SDK is generated by [Stainless Software Inc](http://stainlessapi.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
+This SDK is generated by [Stainless Software Inc](http://stainless.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
-To report a security issue, please contact the Stainless team at security@stainlessapi.com.
+To report a security issue, please contact the Stainless team at security@stainless.com.
## Responsible Disclosure
diff --git a/api.md b/api.md
index b2fc2f0..5e25833 100644
--- a/api.md
+++ b/api.md
@@ -119,6 +119,18 @@ Methods:
- client.users.myself.organizations.list() -> UserOrganizationsSchema
+## Verification
+
+Types:
+
+```python
+from codex.types.users import VerificationResendResponse
+```
+
+Methods:
+
+- client.users.verification.resend() -> VerificationResendResponse
+
# Projects
Types:
@@ -175,3 +187,16 @@ Methods:
- client.projects.entries.delete(entry_id, \*, project_id) -> None
- client.projects.entries.add_question(project_id, \*\*params) -> Entry
- client.projects.entries.query(project_id, \*\*params) -> Optional[Entry]
+
+# Tlm
+
+Types:
+
+```python
+from codex.types import TlmPromptResponse, TlmScoreResponse
+```
+
+Methods:
+
+- client.tlm.prompt(\*\*params) -> TlmPromptResponse
+- client.tlm.score(\*\*params) -> TlmScoreResponse
diff --git a/pyproject.toml b/pyproject.toml
index 2641c49..ede4351 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "codex-sdk"
-version = "0.1.0-alpha.11"
+version = "0.1.0-alpha.12"
description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead."
dynamic = ["readme"]
license = "MIT"
@@ -177,7 +177,7 @@ select = [
"T201",
"T203",
# misuse of typing.TYPE_CHECKING
- "TCH004",
+ "TC004",
# import rules
"TID251",
]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index ef07871..1961e8d 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -78,7 +78,7 @@ pytz==2023.3.post1
# via dirty-equals
respx==0.22.0
rich==13.7.1
-ruff==0.6.9
+ruff==0.9.4
setuptools==68.2.2
# via nodeenv
six==1.16.0
diff --git a/scripts/test b/scripts/test
index 4fa5698..2b87845 100755
--- a/scripts/test
+++ b/scripts/test
@@ -52,6 +52,8 @@ else
echo
fi
+export DEFER_PYDANTIC_BUILD=false
+
echo "==> Running tests"
rye run pytest "$@"
diff --git a/scripts/utils/ruffen-docs.py b/scripts/utils/ruffen-docs.py
index 37b3d94..0cf2bd2 100644
--- a/scripts/utils/ruffen-docs.py
+++ b/scripts/utils/ruffen-docs.py
@@ -47,7 +47,7 @@ def _md_match(match: Match[str]) -> str:
with _collect_error(match):
code = format_code_block(code)
code = textwrap.indent(code, match["indent"])
- return f'{match["before"]}{code}{match["after"]}'
+ return f"{match['before']}{code}{match['after']}"
def _pycon_match(match: Match[str]) -> str:
code = ""
@@ -97,7 +97,7 @@ def finish_fragment() -> None:
def _md_pycon_match(match: Match[str]) -> str:
code = _pycon_match(match)
code = textwrap.indent(code, match["indent"])
- return f'{match["before"]}{code}{match["after"]}'
+ return f"{match['before']}{code}{match['after']}"
src = MD_RE.sub(_md_match, src)
src = MD_PYCON_RE.sub(_md_pycon_match, src)
diff --git a/src/codex/_base_client.py b/src/codex/_base_client.py
index 9090e75..273341b 100644
--- a/src/codex/_base_client.py
+++ b/src/codex/_base_client.py
@@ -9,7 +9,6 @@
import inspect
import logging
import platform
-import warnings
import email.utils
from types import TracebackType
from random import random
@@ -36,7 +35,7 @@
import httpx
import distro
import pydantic
-from httpx import URL, Limits
+from httpx import URL
from pydantic import PrivateAttr
from . import _exceptions
@@ -51,19 +50,16 @@
Timeout,
NotGiven,
ResponseT,
- Transport,
AnyMapping,
PostParser,
- ProxiesTypes,
RequestFiles,
HttpxSendArgs,
- AsyncTransport,
RequestOptions,
HttpxRequestFiles,
ModelBuilderProtocol,
)
from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
-from ._compat import model_copy, model_dump
+from ._compat import PYDANTIC_V2, model_copy, model_dump
from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
from ._response import (
APIResponse,
@@ -207,6 +203,9 @@ def _set_private_attributes(
model: Type[_T],
options: FinalRequestOptions,
) -> None:
+ if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+ self.__pydantic_private__ = {}
+
self._model = model
self._client = client
self._options = options
@@ -292,6 +291,9 @@ def _set_private_attributes(
client: AsyncAPIClient,
options: FinalRequestOptions,
) -> None:
+ if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+ self.__pydantic_private__ = {}
+
self._model = model
self._client = client
self._options = options
@@ -331,9 +333,6 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
_base_url: URL
max_retries: int
timeout: Union[float, Timeout, None]
- _limits: httpx.Limits
- _proxies: ProxiesTypes | None
- _transport: Transport | AsyncTransport | None
_strict_response_validation: bool
_idempotency_header: str | None
_default_stream_cls: type[_DefaultStreamT] | None = None
@@ -346,9 +345,6 @@ def __init__(
_strict_response_validation: bool,
max_retries: int = DEFAULT_MAX_RETRIES,
timeout: float | Timeout | None = DEFAULT_TIMEOUT,
- limits: httpx.Limits,
- transport: Transport | AsyncTransport | None,
- proxies: ProxiesTypes | None,
custom_headers: Mapping[str, str] | None = None,
custom_query: Mapping[str, object] | None = None,
) -> None:
@@ -356,9 +352,6 @@ def __init__(
self._base_url = self._enforce_trailing_slash(URL(base_url))
self.max_retries = max_retries
self.timeout = timeout
- self._limits = limits
- self._proxies = proxies
- self._transport = transport
self._custom_headers = custom_headers or {}
self._custom_query = custom_query or {}
self._strict_response_validation = _strict_response_validation
@@ -418,10 +411,17 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
- # Don't set the retry count header if it was already set or removed by the caller. We check
+ # Don't set these headers if they were already set or removed by the caller. We check
# `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
- if "x-stainless-retry-count" not in (header.lower() for header in custom_headers):
+ lower_custom_headers = [header.lower() for header in custom_headers]
+ if "x-stainless-retry-count" not in lower_custom_headers:
headers["x-stainless-retry-count"] = str(retries_taken)
+ if "x-stainless-read-timeout" not in lower_custom_headers:
+ timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
+ if isinstance(timeout, Timeout):
+ timeout = timeout.read
+ if timeout is not None:
+ headers["x-stainless-read-timeout"] = str(timeout)
return headers
@@ -511,7 +511,7 @@ def _build_request(
# so that passing a `TypedDict` doesn't cause an error.
# https://github.com/microsoft/pyright/issues/3526#event-6715453066
params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
- json=json_data,
+ json=json_data if is_given(json_data) else None,
files=files,
**kwargs,
)
@@ -787,46 +787,11 @@ def __init__(
base_url: str | URL,
max_retries: int = DEFAULT_MAX_RETRIES,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
- transport: Transport | None = None,
- proxies: ProxiesTypes | None = None,
- limits: Limits | None = None,
http_client: httpx.Client | None = None,
custom_headers: Mapping[str, str] | None = None,
custom_query: Mapping[str, object] | None = None,
_strict_response_validation: bool,
) -> None:
- kwargs: dict[str, Any] = {}
- if limits is not None:
- warnings.warn(
- "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
- category=DeprecationWarning,
- stacklevel=3,
- )
- if http_client is not None:
- raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
- else:
- limits = DEFAULT_CONNECTION_LIMITS
-
- if transport is not None:
- kwargs["transport"] = transport
- warnings.warn(
- "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
- category=DeprecationWarning,
- stacklevel=3,
- )
- if http_client is not None:
- raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
- if proxies is not None:
- kwargs["proxies"] = proxies
- warnings.warn(
- "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
- category=DeprecationWarning,
- stacklevel=3,
- )
- if http_client is not None:
- raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
if not is_given(timeout):
# if the user passed in a custom http client with a non-default
# timeout set then we use that timeout.
@@ -847,12 +812,9 @@ def __init__(
super().__init__(
version=version,
- limits=limits,
# cast to a valid type because mypy doesn't understand our type narrowing
timeout=cast(Timeout, timeout),
- proxies=proxies,
base_url=base_url,
- transport=transport,
max_retries=max_retries,
custom_query=custom_query,
custom_headers=custom_headers,
@@ -862,9 +824,6 @@ def __init__(
base_url=base_url,
# cast to a valid type because mypy doesn't understand our type narrowing
timeout=cast(Timeout, timeout),
- limits=limits,
- follow_redirects=True,
- **kwargs, # type: ignore
)
def is_closed(self) -> bool:
@@ -1359,45 +1318,10 @@ def __init__(
_strict_response_validation: bool,
max_retries: int = DEFAULT_MAX_RETRIES,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
- transport: AsyncTransport | None = None,
- proxies: ProxiesTypes | None = None,
- limits: Limits | None = None,
http_client: httpx.AsyncClient | None = None,
custom_headers: Mapping[str, str] | None = None,
custom_query: Mapping[str, object] | None = None,
) -> None:
- kwargs: dict[str, Any] = {}
- if limits is not None:
- warnings.warn(
- "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
- category=DeprecationWarning,
- stacklevel=3,
- )
- if http_client is not None:
- raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
- else:
- limits = DEFAULT_CONNECTION_LIMITS
-
- if transport is not None:
- kwargs["transport"] = transport
- warnings.warn(
- "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
- category=DeprecationWarning,
- stacklevel=3,
- )
- if http_client is not None:
- raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
- if proxies is not None:
- kwargs["proxies"] = proxies
- warnings.warn(
- "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
- category=DeprecationWarning,
- stacklevel=3,
- )
- if http_client is not None:
- raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
if not is_given(timeout):
# if the user passed in a custom http client with a non-default
# timeout set then we use that timeout.
@@ -1419,11 +1343,8 @@ def __init__(
super().__init__(
version=version,
base_url=base_url,
- limits=limits,
# cast to a valid type because mypy doesn't understand our type narrowing
timeout=cast(Timeout, timeout),
- proxies=proxies,
- transport=transport,
max_retries=max_retries,
custom_query=custom_query,
custom_headers=custom_headers,
@@ -1433,9 +1354,6 @@ def __init__(
base_url=base_url,
# cast to a valid type because mypy doesn't understand our type narrowing
timeout=cast(Timeout, timeout),
- limits=limits,
- follow_redirects=True,
- **kwargs, # type: ignore
)
def is_closed(self) -> bool:
diff --git a/src/codex/_client.py b/src/codex/_client.py
index 09aaafd..9bdd5f0 100644
--- a/src/codex/_client.py
+++ b/src/codex/_client.py
@@ -25,7 +25,7 @@
get_async_library,
)
from ._version import __version__
-from .resources import health
+from .resources import tlm, health
from ._streaming import Stream as Stream, AsyncStream as AsyncStream
from ._exceptions import APIStatusError
from ._base_client import (
@@ -61,6 +61,7 @@ class Codex(SyncAPIClient):
organizations: organizations.OrganizationsResource
users: users.UsersResource
projects: projects.ProjectsResource
+ tlm: tlm.TlmResource
with_raw_response: CodexWithRawResponse
with_streaming_response: CodexWithStreamedResponse
@@ -141,6 +142,7 @@ def __init__(
self.organizations = organizations.OrganizationsResource(self)
self.users = users.UsersResource(self)
self.projects = projects.ProjectsResource(self)
+ self.tlm = tlm.TlmResource(self)
self.with_raw_response = CodexWithRawResponse(self)
self.with_streaming_response = CodexWithStreamedResponse(self)
@@ -291,6 +293,7 @@ class AsyncCodex(AsyncAPIClient):
organizations: organizations.AsyncOrganizationsResource
users: users.AsyncUsersResource
projects: projects.AsyncProjectsResource
+ tlm: tlm.AsyncTlmResource
with_raw_response: AsyncCodexWithRawResponse
with_streaming_response: AsyncCodexWithStreamedResponse
@@ -325,7 +328,7 @@ def __init__(
# part of our public interface in the future.
_strict_response_validation: bool = False,
) -> None:
- """Construct a new async Codex client instance."""
+ """Construct a new async AsyncCodex client instance."""
self.api_key = api_key
self.access_key = access_key
@@ -371,6 +374,7 @@ def __init__(
self.organizations = organizations.AsyncOrganizationsResource(self)
self.users = users.AsyncUsersResource(self)
self.projects = projects.AsyncProjectsResource(self)
+ self.tlm = tlm.AsyncTlmResource(self)
self.with_raw_response = AsyncCodexWithRawResponse(self)
self.with_streaming_response = AsyncCodexWithStreamedResponse(self)
@@ -522,6 +526,7 @@ def __init__(self, client: Codex) -> None:
self.organizations = organizations.OrganizationsResourceWithRawResponse(client.organizations)
self.users = users.UsersResourceWithRawResponse(client.users)
self.projects = projects.ProjectsResourceWithRawResponse(client.projects)
+ self.tlm = tlm.TlmResourceWithRawResponse(client.tlm)
class AsyncCodexWithRawResponse:
@@ -530,6 +535,7 @@ def __init__(self, client: AsyncCodex) -> None:
self.organizations = organizations.AsyncOrganizationsResourceWithRawResponse(client.organizations)
self.users = users.AsyncUsersResourceWithRawResponse(client.users)
self.projects = projects.AsyncProjectsResourceWithRawResponse(client.projects)
+ self.tlm = tlm.AsyncTlmResourceWithRawResponse(client.tlm)
class CodexWithStreamedResponse:
@@ -538,6 +544,7 @@ def __init__(self, client: Codex) -> None:
self.organizations = organizations.OrganizationsResourceWithStreamingResponse(client.organizations)
self.users = users.UsersResourceWithStreamingResponse(client.users)
self.projects = projects.ProjectsResourceWithStreamingResponse(client.projects)
+ self.tlm = tlm.TlmResourceWithStreamingResponse(client.tlm)
class AsyncCodexWithStreamedResponse:
@@ -546,6 +553,7 @@ def __init__(self, client: AsyncCodex) -> None:
self.organizations = organizations.AsyncOrganizationsResourceWithStreamingResponse(client.organizations)
self.users = users.AsyncUsersResourceWithStreamingResponse(client.users)
self.projects = projects.AsyncProjectsResourceWithStreamingResponse(client.projects)
+ self.tlm = tlm.AsyncTlmResourceWithStreamingResponse(client.tlm)
Client = Codex
diff --git a/src/codex/_constants.py b/src/codex/_constants.py
index a2ac3b6..6ddf2c7 100644
--- a/src/codex/_constants.py
+++ b/src/codex/_constants.py
@@ -6,7 +6,7 @@
OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
# default timeout is 1 minute
-DEFAULT_TIMEOUT = httpx.Timeout(timeout=60.0, connect=5.0)
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
DEFAULT_MAX_RETRIES = 2
DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
diff --git a/src/codex/_models.py b/src/codex/_models.py
index 9a918aa..c4401ff 100644
--- a/src/codex/_models.py
+++ b/src/codex/_models.py
@@ -172,7 +172,7 @@ def to_json(
@override
def __str__(self) -> str:
# mypy complains about an invalid self arg
- return f'{self.__repr_name__()}({self.__repr_str__(", ")})' # type: ignore[misc]
+ return f"{self.__repr_name__()}({self.__repr_str__(', ')})" # type: ignore[misc]
# Override the 'construct' method in a way that supports recursive parsing without validation.
# Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
@@ -426,10 +426,16 @@ def construct_type(*, value: object, type_: object) -> object:
If the given value does not match the expected type then it is returned as-is.
"""
+
+ # store a reference to the original type we were given before we extract any inner
+ # types so that we can properly resolve forward references in `TypeAliasType` annotations
+ original_type = None
+
# we allow `object` as the input type because otherwise, passing things like
# `Literal['value']` will be reported as a type error by type checkers
type_ = cast("type[object]", type_)
if is_type_alias_type(type_):
+ original_type = type_ # type: ignore[unreachable]
type_ = type_.__value__ # type: ignore[unreachable]
# unwrap `Annotated[T, ...]` -> `T`
@@ -446,7 +452,7 @@ def construct_type(*, value: object, type_: object) -> object:
if is_union(origin):
try:
- return validate_type(type_=cast("type[object]", type_), value=value)
+ return validate_type(type_=cast("type[object]", original_type or type_), value=value)
except Exception:
pass
diff --git a/src/codex/_utils/_sync.py b/src/codex/_utils/_sync.py
index 8b3aaf2..ad7ec71 100644
--- a/src/codex/_utils/_sync.py
+++ b/src/codex/_utils/_sync.py
@@ -7,16 +7,20 @@
from typing import Any, TypeVar, Callable, Awaitable
from typing_extensions import ParamSpec
+import anyio
+import sniffio
+import anyio.to_thread
+
T_Retval = TypeVar("T_Retval")
T_ParamSpec = ParamSpec("T_ParamSpec")
if sys.version_info >= (3, 9):
- to_thread = asyncio.to_thread
+ _asyncio_to_thread = asyncio.to_thread
else:
# backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
# for Python 3.8 support
- async def to_thread(
+ async def _asyncio_to_thread(
func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
) -> Any:
"""Asynchronously run function *func* in a separate thread.
@@ -34,6 +38,17 @@ async def to_thread(
return await loop.run_in_executor(None, func_call)
+async def to_thread(
+ func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+) -> T_Retval:
+ if sniffio.current_async_library() == "asyncio":
+ return await _asyncio_to_thread(func, *args, **kwargs)
+
+ return await anyio.to_thread.run_sync(
+ functools.partial(func, *args, **kwargs),
+ )
+
+
# inspired by `asyncer`, https://github.com/tiangolo/asyncer
def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
"""
diff --git a/src/codex/_utils/_transform.py b/src/codex/_utils/_transform.py
index a6b62ca..18afd9d 100644
--- a/src/codex/_utils/_transform.py
+++ b/src/codex/_utils/_transform.py
@@ -25,7 +25,7 @@
is_annotated_type,
strip_annotated_type,
)
-from .._compat import model_dump, is_typeddict
+from .._compat import get_origin, model_dump, is_typeddict
_T = TypeVar("_T")
@@ -164,9 +164,14 @@ def _transform_recursive(
inner_type = annotation
stripped_type = strip_annotated_type(inner_type)
+ origin = get_origin(stripped_type) or stripped_type
if is_typeddict(stripped_type) and is_mapping(data):
return _transform_typeddict(data, stripped_type)
+ if origin == dict and is_mapping(data):
+ items_type = get_args(stripped_type)[1]
+ return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
if (
# List[T]
(is_list_type(stripped_type) and is_list(data))
@@ -307,9 +312,14 @@ async def _async_transform_recursive(
inner_type = annotation
stripped_type = strip_annotated_type(inner_type)
+ origin = get_origin(stripped_type) or stripped_type
if is_typeddict(stripped_type) and is_mapping(data):
return await _async_transform_typeddict(data, stripped_type)
+ if origin == dict and is_mapping(data):
+ items_type = get_args(stripped_type)[1]
+ return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
if (
# List[T]
(is_list_type(stripped_type) and is_list(data))
diff --git a/src/codex/_version.py b/src/codex/_version.py
index fd5f708..fe7cc73 100644
--- a/src/codex/_version.py
+++ b/src/codex/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "codex"
-__version__ = "0.1.0-alpha.11" # x-release-please-version
+__version__ = "0.1.0-alpha.12" # x-release-please-version
diff --git a/src/codex/resources/__init__.py b/src/codex/resources/__init__.py
index b96b725..f91f0e4 100644
--- a/src/codex/resources/__init__.py
+++ b/src/codex/resources/__init__.py
@@ -1,5 +1,13 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from .tlm import (
+ TlmResource,
+ AsyncTlmResource,
+ TlmResourceWithRawResponse,
+ AsyncTlmResourceWithRawResponse,
+ TlmResourceWithStreamingResponse,
+ AsyncTlmResourceWithStreamingResponse,
+)
from .users import (
UsersResource,
AsyncUsersResource,
@@ -58,4 +66,10 @@
"AsyncProjectsResourceWithRawResponse",
"ProjectsResourceWithStreamingResponse",
"AsyncProjectsResourceWithStreamingResponse",
+ "TlmResource",
+ "AsyncTlmResource",
+ "TlmResourceWithRawResponse",
+ "AsyncTlmResourceWithRawResponse",
+ "TlmResourceWithStreamingResponse",
+ "AsyncTlmResourceWithStreamingResponse",
]
diff --git a/src/codex/resources/projects/entries.py b/src/codex/resources/projects/entries.py
index d307d3d..2fcc8e0 100644
--- a/src/codex/resources/projects/entries.py
+++ b/src/codex/resources/projects/entries.py
@@ -70,6 +70,8 @@ def create(
"""
Create a knowledge entry for a project.
+ Raises: HTTPException: If an existing entry is found with the same question.
+
Args:
extra_headers: Send extra headers
@@ -138,6 +140,7 @@ def update(
*,
project_id: str,
answer: Optional[str] | NotGiven = NOT_GIVEN,
+ frequency_count: Optional[int] | NotGiven = NOT_GIVEN,
question: Optional[str] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -167,6 +170,7 @@ def update(
body=maybe_transform(
{
"answer": answer,
+ "frequency_count": frequency_count,
"question": question,
},
entry_update_params.EntryUpdateParams,
@@ -318,8 +322,10 @@ def query(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Optional[Entry]:
- """
- Query knowledge for a project.
+ """Query knowledge for a project.
+
+ Also increments the frequency_count for the
+ matching entry if found.
Returns the matching entry if found and answered, otherwise returns None. This
allows the client to distinguish between: (1) no matching question found
@@ -383,6 +389,8 @@ async def create(
"""
Create a knowledge entry for a project.
+ Raises: HTTPException: If an existing entry is found with the same question.
+
Args:
extra_headers: Send extra headers
@@ -451,6 +459,7 @@ async def update(
*,
project_id: str,
answer: Optional[str] | NotGiven = NOT_GIVEN,
+ frequency_count: Optional[int] | NotGiven = NOT_GIVEN,
question: Optional[str] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -480,6 +489,7 @@ async def update(
body=await async_maybe_transform(
{
"answer": answer,
+ "frequency_count": frequency_count,
"question": question,
},
entry_update_params.EntryUpdateParams,
@@ -631,8 +641,10 @@ async def query(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Optional[Entry]:
- """
- Query knowledge for a project.
+ """Query knowledge for a project.
+
+ Also increments the frequency_count for the
+ matching entry if found.
Returns the matching entry if found and answered, otherwise returns None. This
allows the client to distinguish between: (1) no matching question found
diff --git a/src/codex/resources/tlm.py b/src/codex/resources/tlm.py
new file mode 100644
index 0000000..c6585d0
--- /dev/null
+++ b/src/codex/resources/tlm.py
@@ -0,0 +1,656 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ..types import tlm_score_params, tlm_prompt_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.tlm_score_response import TlmScoreResponse
+from ..types.tlm_prompt_response import TlmPromptResponse
+
+__all__ = ["TlmResource", "AsyncTlmResource"]
+
+
+class TlmResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> TlmResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers
+ """
+ return TlmResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> TlmResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response
+ """
+ return TlmResourceWithStreamingResponse(self)
+
+ def prompt(
+ self,
+ *,
+ prompt: str,
+ constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+ options: Optional[tlm_prompt_params.Options] | NotGiven = NOT_GIVEN,
+ quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN,
+ task: Optional[str] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> TlmPromptResponse:
+ """
+ Prompts the TLM API.
+
+ Args:
+ options: Typed dict of advanced configuration options for the Trustworthy Language Model.
+ Many of these configurations are determined by the quality preset selected
+ (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+ Specifying TLMOptions values directly overrides any default values set from the
+ quality preset.
+
+ For all options described below, higher settings will lead to longer runtimes
+ and may consume more tokens internally. You may not be able to run long prompts
+ (or prompts with long responses) in your account, unless your token/rate limits
+ are increased. If you hit token limit issues, try lower/less expensive
+ TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+ increase your limits.
+
+ The default values corresponding to each quality preset are:
+
+ - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+ `use_self_reflection` = True.
+ - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+ `use_self_reflection` = True.
+ - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+ `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+ "base" preset, a cheaper self-reflection will be used to compute the
+ trustworthiness score.
+
+ By default, the TLM uses the "medium" quality preset. The default base LLM
+ `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+ You can set custom values for these arguments regardless of the quality preset
+ specified.
+
+ Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+ "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+ "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+ "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+ faster models yield faster/cheaper results). - Models still in beta: "o1",
+ "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+ "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+ "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+ "gpt-4o-mini".
+
+ max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+ Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+ If you experience token/rate limit errors while using TLM, try lowering this number.
+ For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+ num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+ TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+ Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+ This parameter must be between 1 and 20.
+ When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+ num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+ Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+ This consistency helps quantify the epistemic uncertainty associated with
+ strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+ TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+ use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+ generated and self-evaluate this response.
+ Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+ Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+ and catches answers that are obviously incorrect/bad.
+
+ similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+ similarity between sampled responses considered by the model in the consistency assessment.
+ Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+ "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+ Set this to "string" to improve latency/costs.
+
+ reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+ when considering alternative possible responses and double-checking responses.
+ Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+ log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+ For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+ custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+ The expected input format is a list of dictionaries, where each dictionary has the following keys:
+ - name: Name of the evaluation criteria.
+ - criteria: Instructions specifying the evaluation criteria.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/api/tlm/prompt",
+ body=maybe_transform(
+ {
+ "prompt": prompt,
+ "constrain_outputs": constrain_outputs,
+ "options": options,
+ "quality_preset": quality_preset,
+ "task": task,
+ },
+ tlm_prompt_params.TlmPromptParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=TlmPromptResponse,
+ )
+
+ def score(
+ self,
+ *,
+ prompt: str,
+ response: str,
+ constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+ options: Optional[tlm_score_params.Options] | NotGiven = NOT_GIVEN,
+ quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN,
+ task: Optional[str] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> TlmScoreResponse:
+ """
+ Scores the TLM API.
+
+ TODO:
+
+ - Track query count in DB
+ - Enforce hard cap on queries for users w/o credit card on file
+
+ Args:
+ options: Typed dict of advanced configuration options for the Trustworthy Language Model.
+ Many of these configurations are determined by the quality preset selected
+ (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+ Specifying TLMOptions values directly overrides any default values set from the
+ quality preset.
+
+ For all options described below, higher settings will lead to longer runtimes
+ and may consume more tokens internally. You may not be able to run long prompts
+ (or prompts with long responses) in your account, unless your token/rate limits
+ are increased. If you hit token limit issues, try lower/less expensive
+ TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+ increase your limits.
+
+ The default values corresponding to each quality preset are:
+
+ - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+ `use_self_reflection` = True.
+ - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+ `use_self_reflection` = True.
+ - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+ `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+ "base" preset, a cheaper self-reflection will be used to compute the
+ trustworthiness score.
+
+ By default, the TLM uses the "medium" quality preset. The default base LLM
+ `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+ You can set custom values for these arguments regardless of the quality preset
+ specified.
+
+ Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+ "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+ "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+ "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+ faster models yield faster/cheaper results). - Models still in beta: "o1",
+ "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+ "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+ "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+ "gpt-4o-mini".
+
+ max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+ Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+ If you experience token/rate limit errors while using TLM, try lowering this number.
+ For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+ num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+ TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+ Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+ This parameter must be between 1 and 20.
+ When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+ num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+ Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+ This consistency helps quantify the epistemic uncertainty associated with
+ strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+ TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+ use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+ generated and self-evaluate this response.
+ Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+ Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+ and catches answers that are obviously incorrect/bad.
+
+ similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+ similarity between sampled responses considered by the model in the consistency assessment.
+ Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+ "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+ Set this to "string" to improve latency/costs.
+
+ reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+ when considering alternative possible responses and double-checking responses.
+ Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+ log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+ For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+ custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+ The expected input format is a list of dictionaries, where each dictionary has the following keys:
+ - name: Name of the evaluation criteria.
+ - criteria: Instructions specifying the evaluation criteria.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/api/tlm/score",
+ body=maybe_transform(
+ {
+ "prompt": prompt,
+ "response": response,
+ "constrain_outputs": constrain_outputs,
+ "options": options,
+ "quality_preset": quality_preset,
+ "task": task,
+ },
+ tlm_score_params.TlmScoreParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=TlmScoreResponse,
+ )
+
+
+class AsyncTlmResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncTlmResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncTlmResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncTlmResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response
+ """
+ return AsyncTlmResourceWithStreamingResponse(self)
+
+ async def prompt(
+ self,
+ *,
+ prompt: str,
+ constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+ options: Optional[tlm_prompt_params.Options] | NotGiven = NOT_GIVEN,
+ quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN,
+ task: Optional[str] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> TlmPromptResponse:
+ """
+ Prompts the TLM API.
+
+ Args:
+ options: Typed dict of advanced configuration options for the Trustworthy Language Model.
+ Many of these configurations are determined by the quality preset selected
+ (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+ Specifying TLMOptions values directly overrides any default values set from the
+ quality preset.
+
+ For all options described below, higher settings will lead to longer runtimes
+ and may consume more tokens internally. You may not be able to run long prompts
+ (or prompts with long responses) in your account, unless your token/rate limits
+ are increased. If you hit token limit issues, try lower/less expensive
+ TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+ increase your limits.
+
+ The default values corresponding to each quality preset are:
+
+ - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+ `use_self_reflection` = True.
+ - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+ `use_self_reflection` = True.
+ - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+ `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+ "base" preset, a cheaper self-reflection will be used to compute the
+ trustworthiness score.
+
+ By default, the TLM uses the "medium" quality preset. The default base LLM
+ `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+ You can set custom values for these arguments regardless of the quality preset
+ specified.
+
+ Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+ "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+ "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+ "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+ faster models yield faster/cheaper results). - Models still in beta: "o1",
+ "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+ "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+ "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+ "gpt-4o-mini".
+
+ max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+ Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+ If you experience token/rate limit errors while using TLM, try lowering this number.
+ For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+ num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+ TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+ Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+ This parameter must be between 1 and 20.
+ When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+ num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+ Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+ This consistency helps quantify the epistemic uncertainty associated with
+ strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+ TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+ use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+ generated and self-evaluate this response.
+ Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+ Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+ and catches answers that are obviously incorrect/bad.
+
+ similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+ similarity between sampled responses considered by the model in the consistency assessment.
+ Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+ "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+ Set this to "string" to improve latency/costs.
+
+ reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+ when considering alternative possible responses and double-checking responses.
+ Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+ log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+ For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+ custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+ The expected input format is a list of dictionaries, where each dictionary has the following keys:
+ - name: Name of the evaluation criteria.
+ - criteria: Instructions specifying the evaluation criteria.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/api/tlm/prompt",
+ body=await async_maybe_transform(
+ {
+ "prompt": prompt,
+ "constrain_outputs": constrain_outputs,
+ "options": options,
+ "quality_preset": quality_preset,
+ "task": task,
+ },
+ tlm_prompt_params.TlmPromptParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=TlmPromptResponse,
+ )
+
+ async def score(
+ self,
+ *,
+ prompt: str,
+ response: str,
+ constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+ options: Optional[tlm_score_params.Options] | NotGiven = NOT_GIVEN,
+ quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN,
+ task: Optional[str] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> TlmScoreResponse:
+ """
+ Scores the TLM API.
+
+ TODO:
+
+ - Track query count in DB
+ - Enforce hard cap on queries for users w/o credit card on file
+
+ Args:
+ options: Typed dict of advanced configuration options for the Trustworthy Language Model.
+ Many of these configurations are determined by the quality preset selected
+ (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+ Specifying TLMOptions values directly overrides any default values set from the
+ quality preset.
+
+ For all options described below, higher settings will lead to longer runtimes
+ and may consume more tokens internally. You may not be able to run long prompts
+ (or prompts with long responses) in your account, unless your token/rate limits
+ are increased. If you hit token limit issues, try lower/less expensive
+ TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+ increase your limits.
+
+ The default values corresponding to each quality preset are:
+
+ - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+ `use_self_reflection` = True.
+ - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+ `use_self_reflection` = True.
+ - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+ `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+ "base" preset, a cheaper self-reflection will be used to compute the
+ trustworthiness score.
+
+ By default, the TLM uses the "medium" quality preset. The default base LLM
+ `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+ You can set custom values for these arguments regardless of the quality preset
+ specified.
+
+ Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+ "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+ "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+ "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+ faster models yield faster/cheaper results). - Models still in beta: "o1",
+ "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+ "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+ "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+ "gpt-4o-mini".
+
+ max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+ Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+ If you experience token/rate limit errors while using TLM, try lowering this number.
+ For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+ num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+ TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+ Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+ This parameter must be between 1 and 20.
+ When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+ num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+ Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+ This consistency helps quantify the epistemic uncertainty associated with
+ strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+ TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+ use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+ generated and self-evaluate this response.
+ Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+ Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+ and catches answers that are obviously incorrect/bad.
+
+ similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+ similarity between sampled responses considered by the model in the consistency assessment.
+ Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+ "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+ Set this to "string" to improve latency/costs.
+
+ reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+ when considering alternative possible responses and double-checking responses.
+ Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+ log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+ For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+ custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+ The expected input format is a list of dictionaries, where each dictionary has the following keys:
+ - name: Name of the evaluation criteria.
+ - criteria: Instructions specifying the evaluation criteria.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/api/tlm/score",
+ body=await async_maybe_transform(
+ {
+ "prompt": prompt,
+ "response": response,
+ "constrain_outputs": constrain_outputs,
+ "options": options,
+ "quality_preset": quality_preset,
+ "task": task,
+ },
+ tlm_score_params.TlmScoreParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=TlmScoreResponse,
+ )
+
+
+class TlmResourceWithRawResponse:
+ def __init__(self, tlm: TlmResource) -> None:
+ self._tlm = tlm
+
+ self.prompt = to_raw_response_wrapper(
+ tlm.prompt,
+ )
+ self.score = to_raw_response_wrapper(
+ tlm.score,
+ )
+
+
+class AsyncTlmResourceWithRawResponse:
+ def __init__(self, tlm: AsyncTlmResource) -> None:
+ self._tlm = tlm
+
+ self.prompt = async_to_raw_response_wrapper(
+ tlm.prompt,
+ )
+ self.score = async_to_raw_response_wrapper(
+ tlm.score,
+ )
+
+
+class TlmResourceWithStreamingResponse:
+ def __init__(self, tlm: TlmResource) -> None:
+ self._tlm = tlm
+
+ self.prompt = to_streamed_response_wrapper(
+ tlm.prompt,
+ )
+ self.score = to_streamed_response_wrapper(
+ tlm.score,
+ )
+
+
+class AsyncTlmResourceWithStreamingResponse:
+ def __init__(self, tlm: AsyncTlmResource) -> None:
+ self._tlm = tlm
+
+ self.prompt = async_to_streamed_response_wrapper(
+ tlm.prompt,
+ )
+ self.score = async_to_streamed_response_wrapper(
+ tlm.score,
+ )
diff --git a/src/codex/resources/users/__init__.py b/src/codex/resources/users/__init__.py
index 18ed37e..9618f58 100644
--- a/src/codex/resources/users/__init__.py
+++ b/src/codex/resources/users/__init__.py
@@ -16,6 +16,14 @@
MyselfResourceWithStreamingResponse,
AsyncMyselfResourceWithStreamingResponse,
)
+from .verification import (
+ VerificationResource,
+ AsyncVerificationResource,
+ VerificationResourceWithRawResponse,
+ AsyncVerificationResourceWithRawResponse,
+ VerificationResourceWithStreamingResponse,
+ AsyncVerificationResourceWithStreamingResponse,
+)
__all__ = [
"MyselfResource",
@@ -24,6 +32,12 @@
"AsyncMyselfResourceWithRawResponse",
"MyselfResourceWithStreamingResponse",
"AsyncMyselfResourceWithStreamingResponse",
+ "VerificationResource",
+ "AsyncVerificationResource",
+ "VerificationResourceWithRawResponse",
+ "AsyncVerificationResourceWithRawResponse",
+ "VerificationResourceWithStreamingResponse",
+ "AsyncVerificationResourceWithStreamingResponse",
"UsersResource",
"AsyncUsersResource",
"UsersResourceWithRawResponse",
diff --git a/src/codex/resources/users/users.py b/src/codex/resources/users/users.py
index fb7ee0f..a7d9d2a 100644
--- a/src/codex/resources/users/users.py
+++ b/src/codex/resources/users/users.py
@@ -22,6 +22,14 @@
async_to_streamed_response_wrapper,
)
from ...types.user import User
+from .verification import (
+ VerificationResource,
+ AsyncVerificationResource,
+ VerificationResourceWithRawResponse,
+ AsyncVerificationResourceWithRawResponse,
+ VerificationResourceWithStreamingResponse,
+ AsyncVerificationResourceWithStreamingResponse,
+)
from .myself.myself import (
MyselfResource,
AsyncMyselfResource,
@@ -40,6 +48,10 @@ class UsersResource(SyncAPIResource):
def myself(self) -> MyselfResource:
return MyselfResource(self._client)
+ @cached_property
+ def verification(self) -> VerificationResource:
+ return VerificationResource(self._client)
+
@cached_property
def with_raw_response(self) -> UsersResourceWithRawResponse:
"""
@@ -114,6 +126,10 @@ class AsyncUsersResource(AsyncAPIResource):
def myself(self) -> AsyncMyselfResource:
return AsyncMyselfResource(self._client)
+ @cached_property
+ def verification(self) -> AsyncVerificationResource:
+ return AsyncVerificationResource(self._client)
+
@cached_property
def with_raw_response(self) -> AsyncUsersResourceWithRawResponse:
"""
@@ -195,6 +211,10 @@ def __init__(self, users: UsersResource) -> None:
def myself(self) -> MyselfResourceWithRawResponse:
return MyselfResourceWithRawResponse(self._users.myself)
+ @cached_property
+ def verification(self) -> VerificationResourceWithRawResponse:
+ return VerificationResourceWithRawResponse(self._users.verification)
+
class AsyncUsersResourceWithRawResponse:
def __init__(self, users: AsyncUsersResource) -> None:
@@ -208,6 +228,10 @@ def __init__(self, users: AsyncUsersResource) -> None:
def myself(self) -> AsyncMyselfResourceWithRawResponse:
return AsyncMyselfResourceWithRawResponse(self._users.myself)
+ @cached_property
+ def verification(self) -> AsyncVerificationResourceWithRawResponse:
+ return AsyncVerificationResourceWithRawResponse(self._users.verification)
+
class UsersResourceWithStreamingResponse:
def __init__(self, users: UsersResource) -> None:
@@ -221,6 +245,10 @@ def __init__(self, users: UsersResource) -> None:
def myself(self) -> MyselfResourceWithStreamingResponse:
return MyselfResourceWithStreamingResponse(self._users.myself)
+ @cached_property
+ def verification(self) -> VerificationResourceWithStreamingResponse:
+ return VerificationResourceWithStreamingResponse(self._users.verification)
+
class AsyncUsersResourceWithStreamingResponse:
def __init__(self, users: AsyncUsersResource) -> None:
@@ -233,3 +261,7 @@ def __init__(self, users: AsyncUsersResource) -> None:
@cached_property
def myself(self) -> AsyncMyselfResourceWithStreamingResponse:
return AsyncMyselfResourceWithStreamingResponse(self._users.myself)
+
+ @cached_property
+ def verification(self) -> AsyncVerificationResourceWithStreamingResponse:
+ return AsyncVerificationResourceWithStreamingResponse(self._users.verification)
diff --git a/src/codex/resources/users/verification.py b/src/codex/resources/users/verification.py
new file mode 100644
index 0000000..e75326e
--- /dev/null
+++ b/src/codex/resources/users/verification.py
@@ -0,0 +1,135 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.users.verification_resend_response import VerificationResendResponse
+
+__all__ = ["VerificationResource", "AsyncVerificationResource"]
+
+
+class VerificationResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> VerificationResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers
+ """
+ return VerificationResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> VerificationResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response
+ """
+ return VerificationResourceWithStreamingResponse(self)
+
+ def resend(
+ self,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> VerificationResendResponse:
+ """Resend verification email to the specified user through Auth0."""
+ return self._post(
+ "/api/users/verification/resend",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=VerificationResendResponse,
+ )
+
+
+class AsyncVerificationResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncVerificationResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncVerificationResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncVerificationResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response
+ """
+ return AsyncVerificationResourceWithStreamingResponse(self)
+
+ async def resend(
+ self,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> VerificationResendResponse:
+ """Resend verification email to the specified user through Auth0."""
+ return await self._post(
+ "/api/users/verification/resend",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=VerificationResendResponse,
+ )
+
+
+class VerificationResourceWithRawResponse:
+ def __init__(self, verification: VerificationResource) -> None:
+ self._verification = verification
+
+ self.resend = to_raw_response_wrapper(
+ verification.resend,
+ )
+
+
+class AsyncVerificationResourceWithRawResponse:
+ def __init__(self, verification: AsyncVerificationResource) -> None:
+ self._verification = verification
+
+ self.resend = async_to_raw_response_wrapper(
+ verification.resend,
+ )
+
+
+class VerificationResourceWithStreamingResponse:
+ def __init__(self, verification: VerificationResource) -> None:
+ self._verification = verification
+
+ self.resend = to_streamed_response_wrapper(
+ verification.resend,
+ )
+
+
+class AsyncVerificationResourceWithStreamingResponse:
+ def __init__(self, verification: AsyncVerificationResource) -> None:
+ self._verification = verification
+
+ self.resend = async_to_streamed_response_wrapper(
+ verification.resend,
+ )
diff --git a/src/codex/types/__init__.py b/src/codex/types/__init__.py
index f7ec95b..8f241bc 100644
--- a/src/codex/types/__init__.py
+++ b/src/codex/types/__init__.py
@@ -3,7 +3,11 @@
from __future__ import annotations
from .user import User as User
+from .tlm_score_params import TlmScoreParams as TlmScoreParams
+from .tlm_prompt_params import TlmPromptParams as TlmPromptParams
+from .tlm_score_response import TlmScoreResponse as TlmScoreResponse
from .project_list_params import ProjectListParams as ProjectListParams
+from .tlm_prompt_response import TlmPromptResponse as TlmPromptResponse
from .health_check_response import HealthCheckResponse as HealthCheckResponse
from .project_create_params import ProjectCreateParams as ProjectCreateParams
from .project_list_response import ProjectListResponse as ProjectListResponse
diff --git a/src/codex/types/projects/entry.py b/src/codex/types/projects/entry.py
index d3e1fc5..4621cc4 100644
--- a/src/codex/types/projects/entry.py
+++ b/src/codex/types/projects/entry.py
@@ -18,3 +18,5 @@ class Entry(BaseModel):
answer: Optional[str] = None
answered_at: Optional[datetime] = None
+
+ frequency_count: Optional[int] = None
diff --git a/src/codex/types/projects/entry_update_params.py b/src/codex/types/projects/entry_update_params.py
index 0a676f3..ba10549 100644
--- a/src/codex/types/projects/entry_update_params.py
+++ b/src/codex/types/projects/entry_update_params.py
@@ -13,4 +13,6 @@ class EntryUpdateParams(TypedDict, total=False):
answer: Optional[str]
+ frequency_count: Optional[int]
+
question: Optional[str]
diff --git a/src/codex/types/tlm_prompt_params.py b/src/codex/types/tlm_prompt_params.py
new file mode 100644
index 0000000..860f1a7
--- /dev/null
+++ b/src/codex/types/tlm_prompt_params.py
@@ -0,0 +1,127 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["TlmPromptParams", "Options"]
+
+
+class TlmPromptParams(TypedDict, total=False):
+ prompt: Required[str]
+
+ constrain_outputs: Optional[List[str]]
+
+ options: Optional[Options]
+ """
+ Typed dict of advanced configuration options for the Trustworthy Language Model.
+ Many of these configurations are determined by the quality preset selected
+ (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+ Specifying TLMOptions values directly overrides any default values set from the
+ quality preset.
+
+ For all options described below, higher settings will lead to longer runtimes
+ and may consume more tokens internally. You may not be able to run long prompts
+ (or prompts with long responses) in your account, unless your token/rate limits
+ are increased. If you hit token limit issues, try lower/less expensive
+ TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+ increase your limits.
+
+ The default values corresponding to each quality preset are:
+
+ - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+ `use_self_reflection` = True.
+ - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+ `use_self_reflection` = True.
+ - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+ `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+ "base" preset, a cheaper self-reflection will be used to compute the
+ trustworthiness score.
+
+ By default, the TLM uses the "medium" quality preset. The default base LLM
+ `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+ You can set custom values for these arguments regardless of the quality preset
+ specified.
+
+ Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+ "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+ "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+ "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+ faster models yield faster/cheaper results). - Models still in beta: "o1",
+ "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+ "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+ "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+ "gpt-4o-mini".
+
+ max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+ Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+ If you experience token/rate limit errors while using TLM, try lowering this number.
+ For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+ num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+ TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+ Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+ This parameter must be between 1 and 20.
+ When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+ num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+ Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+ This consistency helps quantify the epistemic uncertainty associated with
+ strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+ TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+ use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+ generated and self-evaluate this response.
+ Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+ Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+ and catches answers that are obviously incorrect/bad.
+
+ similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+ similarity between sampled responses considered by the model in the consistency assessment.
+ Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+ "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+ Set this to "string" to improve latency/costs.
+
+ reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+ when considering alternative possible responses and double-checking responses.
+ Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+ log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+ For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+ custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+ The expected input format is a list of dictionaries, where each dictionary has the following keys:
+ - name: Name of the evaluation criteria.
+ - criteria: Instructions specifying the evaluation criteria.
+ """
+
+ quality_preset: Literal["best", "high", "medium", "low", "base"]
+
+ task: Optional[str]
+
+
+class Options(TypedDict, total=False):
+ custom_eval_criteria: Iterable[object]
+
+ log: List[str]
+
+ max_tokens: int
+
+ model: str
+
+ num_candidate_responses: int
+
+ num_consistency_samples: int
+
+ reasoning_effort: str
+
+ similarity_measure: str
+
+ use_self_reflection: bool
diff --git a/src/codex/types/tlm_prompt_response.py b/src/codex/types/tlm_prompt_response.py
new file mode 100644
index 0000000..d939c00
--- /dev/null
+++ b/src/codex/types/tlm_prompt_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["TlmPromptResponse"]
+
+
+class TlmPromptResponse(BaseModel):
+ response: str
+
+ trustworthiness_score: float
+
+ log: Optional[object] = None
diff --git a/src/codex/types/tlm_score_params.py b/src/codex/types/tlm_score_params.py
new file mode 100644
index 0000000..213da42
--- /dev/null
+++ b/src/codex/types/tlm_score_params.py
@@ -0,0 +1,129 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["TlmScoreParams", "Options"]
+
+
+class TlmScoreParams(TypedDict, total=False):
+ prompt: Required[str]
+
+ response: Required[str]
+
+ constrain_outputs: Optional[List[str]]
+
+ options: Optional[Options]
+ """
+ Typed dict of advanced configuration options for the Trustworthy Language Model.
+ Many of these configurations are determined by the quality preset selected
+ (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+ Specifying TLMOptions values directly overrides any default values set from the
+ quality preset.
+
+ For all options described below, higher settings will lead to longer runtimes
+ and may consume more tokens internally. You may not be able to run long prompts
+ (or prompts with long responses) in your account, unless your token/rate limits
+ are increased. If you hit token limit issues, try lower/less expensive
+ TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+ increase your limits.
+
+ The default values corresponding to each quality preset are:
+
+ - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+ `use_self_reflection` = True. This preset improves LLM responses.
+ - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+ `use_self_reflection` = True.
+ - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+ `use_self_reflection` = True.
+ - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+ `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+ "base" preset, a cheaper self-reflection will be used to compute the
+ trustworthiness score.
+
+ By default, the TLM uses the "medium" quality preset. The default base LLM
+ `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+ You can set custom values for these arguments regardless of the quality preset
+ specified.
+
+ Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+ "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+ "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+ "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+ faster models yield faster/cheaper results). - Models still in beta: "o1",
+ "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+ "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+ "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+ "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+ "gpt-4o-mini".
+
+ max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+ Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+ If you experience token/rate limit errors while using TLM, try lowering this number.
+ For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+ num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+ TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+ Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+ This parameter must be between 1 and 20.
+ When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+ num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+ Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+ This consistency helps quantify the epistemic uncertainty associated with
+ strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+ TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+ use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+ generated and self-evaluate this response.
+ Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+ Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+ and catches answers that are obviously incorrect/bad.
+
+ similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+ similarity between sampled responses considered by the model in the consistency assessment.
+ Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+ "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+ Set this to "string" to improve latency/costs.
+
+ reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+ when considering alternative possible responses and double-checking responses.
+ Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+ log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+ For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+ custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+ The expected input format is a list of dictionaries, where each dictionary has the following keys:
+ - name: Name of the evaluation criteria.
+ - criteria: Instructions specifying the evaluation criteria.
+ """
+
+ quality_preset: Literal["best", "high", "medium", "low", "base"]
+
+ task: Optional[str]
+
+
+class Options(TypedDict, total=False):
+ custom_eval_criteria: Iterable[object]
+
+ log: List[str]
+
+ max_tokens: int
+
+ model: str
+
+ num_candidate_responses: int
+
+ num_consistency_samples: int
+
+ reasoning_effort: str
+
+ similarity_measure: str
+
+ use_self_reflection: bool
diff --git a/src/codex/types/tlm_score_response.py b/src/codex/types/tlm_score_response.py
new file mode 100644
index 0000000..e92b2e0
--- /dev/null
+++ b/src/codex/types/tlm_score_response.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["TlmScoreResponse"]
+
+
+class TlmScoreResponse(BaseModel):
+ trustworthiness_score: float
+
+ log: Optional[object] = None
diff --git a/src/codex/types/users/__init__.py b/src/codex/types/users/__init__.py
index 4256bd7..438bc6f 100644
--- a/src/codex/types/users/__init__.py
+++ b/src/codex/types/users/__init__.py
@@ -3,3 +3,4 @@
from __future__ import annotations
from .user_schema import UserSchema as UserSchema
+from .verification_resend_response import VerificationResendResponse as VerificationResendResponse
diff --git a/src/codex/types/users/verification_resend_response.py b/src/codex/types/users/verification_resend_response.py
new file mode 100644
index 0000000..6617ff5
--- /dev/null
+++ b/src/codex/types/users/verification_resend_response.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import TypeAlias
+
+__all__ = ["VerificationResendResponse"]
+
+VerificationResendResponse: TypeAlias = Dict[str, str]
diff --git a/tests/api_resources/projects/test_entries.py b/tests/api_resources/projects/test_entries.py
index 5b51ec1..026add4 100644
--- a/tests/api_resources/projects/test_entries.py
+++ b/tests/api_resources/projects/test_entries.py
@@ -144,6 +144,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
entry_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
answer="answer",
+ frequency_count=0,
question="question",
)
assert_matches_type(Entry, entry, path=["response"])
@@ -519,6 +520,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
entry_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
answer="answer",
+ frequency_count=0,
question="question",
)
assert_matches_type(Entry, entry, path=["response"])
diff --git a/tests/api_resources/test_tlm.py b/tests/api_resources/test_tlm.py
new file mode 100644
index 0000000..32d5a67
--- /dev/null
+++ b/tests/api_resources/test_tlm.py
@@ -0,0 +1,254 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from codex import Codex, AsyncCodex
+from codex.types import TlmScoreResponse, TlmPromptResponse
+from tests.utils import assert_matches_type
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestTlm:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_method_prompt(self, client: Codex) -> None:
+ tlm = client.tlm.prompt(
+ prompt="prompt",
+ )
+ assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_method_prompt_with_all_params(self, client: Codex) -> None:
+ tlm = client.tlm.prompt(
+ prompt="prompt",
+ constrain_outputs=["string"],
+ options={
+ "custom_eval_criteria": [{}],
+ "log": ["string"],
+ "max_tokens": 0,
+ "model": "model",
+ "num_candidate_responses": 0,
+ "num_consistency_samples": 0,
+ "reasoning_effort": "reasoning_effort",
+ "similarity_measure": "similarity_measure",
+ "use_self_reflection": True,
+ },
+ quality_preset="best",
+ task="task",
+ )
+ assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_raw_response_prompt(self, client: Codex) -> None:
+ response = client.tlm.with_raw_response.prompt(
+ prompt="prompt",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ tlm = response.parse()
+ assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_streaming_response_prompt(self, client: Codex) -> None:
+ with client.tlm.with_streaming_response.prompt(
+ prompt="prompt",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ tlm = response.parse()
+ assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_method_score(self, client: Codex) -> None:
+ tlm = client.tlm.score(
+ prompt="prompt",
+ response="response",
+ )
+ assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_method_score_with_all_params(self, client: Codex) -> None:
+ tlm = client.tlm.score(
+ prompt="prompt",
+ response="response",
+ constrain_outputs=["string"],
+ options={
+ "custom_eval_criteria": [{}],
+ "log": ["string"],
+ "max_tokens": 0,
+ "model": "model",
+ "num_candidate_responses": 0,
+ "num_consistency_samples": 0,
+ "reasoning_effort": "reasoning_effort",
+ "similarity_measure": "similarity_measure",
+ "use_self_reflection": True,
+ },
+ quality_preset="best",
+ task="task",
+ )
+ assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_raw_response_score(self, client: Codex) -> None:
+ response = client.tlm.with_raw_response.score(
+ prompt="prompt",
+ response="response",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ tlm = response.parse()
+ assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_streaming_response_score(self, client: Codex) -> None:
+ with client.tlm.with_streaming_response.score(
+ prompt="prompt",
+ response="response",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ tlm = response.parse()
+ assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncTlm:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_method_prompt(self, async_client: AsyncCodex) -> None:
+ tlm = await async_client.tlm.prompt(
+ prompt="prompt",
+ )
+ assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) -> None:
+ tlm = await async_client.tlm.prompt(
+ prompt="prompt",
+ constrain_outputs=["string"],
+ options={
+ "custom_eval_criteria": [{}],
+ "log": ["string"],
+ "max_tokens": 0,
+ "model": "model",
+ "num_candidate_responses": 0,
+ "num_consistency_samples": 0,
+ "reasoning_effort": "reasoning_effort",
+ "similarity_measure": "similarity_measure",
+ "use_self_reflection": True,
+ },
+ quality_preset="best",
+ task="task",
+ )
+ assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_raw_response_prompt(self, async_client: AsyncCodex) -> None:
+ response = await async_client.tlm.with_raw_response.prompt(
+ prompt="prompt",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ tlm = await response.parse()
+ assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_streaming_response_prompt(self, async_client: AsyncCodex) -> None:
+ async with async_client.tlm.with_streaming_response.prompt(
+ prompt="prompt",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ tlm = await response.parse()
+ assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_method_score(self, async_client: AsyncCodex) -> None:
+ tlm = await async_client.tlm.score(
+ prompt="prompt",
+ response="response",
+ )
+ assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> None:
+ tlm = await async_client.tlm.score(
+ prompt="prompt",
+ response="response",
+ constrain_outputs=["string"],
+ options={
+ "custom_eval_criteria": [{}],
+ "log": ["string"],
+ "max_tokens": 0,
+ "model": "model",
+ "num_candidate_responses": 0,
+ "num_consistency_samples": 0,
+ "reasoning_effort": "reasoning_effort",
+ "similarity_measure": "similarity_measure",
+ "use_self_reflection": True,
+ },
+ quality_preset="best",
+ task="task",
+ )
+ assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_raw_response_score(self, async_client: AsyncCodex) -> None:
+ response = await async_client.tlm.with_raw_response.score(
+ prompt="prompt",
+ response="response",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ tlm = await response.parse()
+ assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_streaming_response_score(self, async_client: AsyncCodex) -> None:
+ async with async_client.tlm.with_streaming_response.score(
+ prompt="prompt",
+ response="response",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ tlm = await response.parse()
+ assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/users/test_verification.py b/tests/api_resources/users/test_verification.py
new file mode 100644
index 0000000..8332327
--- /dev/null
+++ b/tests/api_resources/users/test_verification.py
@@ -0,0 +1,78 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from codex import Codex, AsyncCodex
+from tests.utils import assert_matches_type
+from codex.types.users import VerificationResendResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestVerification:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_method_resend(self, client: Codex) -> None:
+ verification = client.users.verification.resend()
+ assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_raw_response_resend(self, client: Codex) -> None:
+ response = client.users.verification.with_raw_response.resend()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ verification = response.parse()
+ assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ def test_streaming_response_resend(self, client: Codex) -> None:
+ with client.users.verification.with_streaming_response.resend() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ verification = response.parse()
+ assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncVerification:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_method_resend(self, async_client: AsyncCodex) -> None:
+ verification = await async_client.users.verification.resend()
+ assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_raw_response_resend(self, async_client: AsyncCodex) -> None:
+ response = await async_client.users.verification.with_raw_response.resend()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ verification = await response.parse()
+ assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+ @pytest.mark.skip()
+ @parametrize
+ async def test_streaming_response_resend(self, async_client: AsyncCodex) -> None:
+ async with async_client.users.verification.with_streaming_response.resend() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ verification = await response.parse()
+ assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/test_client.py b/tests/test_client.py
index b421541..0b0b783 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -23,10 +23,12 @@
from codex import Codex, AsyncCodex, APIResponseValidationError
from codex._types import Omit
+from codex._utils import maybe_transform
from codex._models import BaseModel, FinalRequestOptions
from codex._constants import RAW_RESPONSE_HEADER
from codex._exceptions import APIStatusError, APITimeoutError, APIResponseValidationError
from codex._base_client import DEFAULT_TIMEOUT, HTTPX_DEFAULT_TIMEOUT, BaseClient, make_request_options
+from codex.types.project_create_params import ProjectCreateParams
from .utils import update_env
@@ -680,7 +682,13 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
with pytest.raises(APITimeoutError):
self.client.post(
"/api/projects/",
- body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")),
+ body=cast(
+ object,
+ maybe_transform(
+ dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"),
+ ProjectCreateParams,
+ ),
+ ),
cast_to=httpx.Response,
options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
)
@@ -695,7 +703,13 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
with pytest.raises(APIStatusError):
self.client.post(
"/api/projects/",
- body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")),
+ body=cast(
+ object,
+ maybe_transform(
+ dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"),
+ ProjectCreateParams,
+ ),
+ ),
cast_to=httpx.Response,
options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
)
@@ -1425,7 +1439,13 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
with pytest.raises(APITimeoutError):
await self.client.post(
"/api/projects/",
- body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")),
+ body=cast(
+ object,
+ maybe_transform(
+ dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"),
+ ProjectCreateParams,
+ ),
+ ),
cast_to=httpx.Response,
options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
)
@@ -1440,7 +1460,13 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
with pytest.raises(APIStatusError):
await self.client.post(
"/api/projects/",
- body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")),
+ body=cast(
+ object,
+ maybe_transform(
+ dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"),
+ ProjectCreateParams,
+ ),
+ ),
cast_to=httpx.Response,
options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
)
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 2e91888..324f31a 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -2,7 +2,7 @@
import io
import pathlib
-from typing import Any, List, Union, TypeVar, Iterable, Optional, cast
+from typing import Any, Dict, List, Union, TypeVar, Iterable, Optional, cast
from datetime import date, datetime
from typing_extensions import Required, Annotated, TypedDict
@@ -388,6 +388,15 @@ def my_iter() -> Iterable[Baz8]:
}
+@parametrize
+@pytest.mark.asyncio
+async def test_dictionary_items(use_async: bool) -> None:
+ class DictItems(TypedDict):
+ foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
+
+ assert await transform({"foo": {"foo_baz": "bar"}}, Dict[str, DictItems], use_async) == {"foo": {"fooBaz": "bar"}}
+
+
class TypedDictIterableUnionStr(TypedDict):
foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]