diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index ac9a2e7..55d2025 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -6,4 +6,4 @@ USER vscode RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash ENV PATH=/home/vscode/.rye/shims:$PATH -RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc +RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index bbeb30b..c17fdc1 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -24,6 +24,9 @@ } } } + }, + "features": { + "ghcr.io/devcontainers/features/node:1": {} } // Features to add to the dev container. More info: https://containers.dev/features. diff --git a/.release-please-manifest.json b/.release-please-manifest.json index ee49ac2..fd0ccba 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.11" + ".": "0.1.0-alpha.12" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index 966331a..7982133 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1 +1 @@ -configured_endpoints: 34 +configured_endpoints: 37 diff --git a/CHANGELOG.md b/CHANGELOG.md index 172b1d6..abb1602 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +## 0.1.0-alpha.12 (2025-03-11) + +Full Changelog: [v0.1.0-alpha.11...v0.1.0-alpha.12](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.11...v0.1.0-alpha.12) + +### Features + +* **api:** add tlm routes ([#79](https://github.com/cleanlab/codex-python/issues/79)) ([783282d](https://github.com/cleanlab/codex-python/commit/783282da5cef0d7fbbadbeb826153622ec9a37d1)) +* **api:** api update ([#60](https://github.com/cleanlab/codex-python/issues/60)) ([f28da42](https://github.com/cleanlab/codex-python/commit/f28da423ea6350df2422a6b3c984044686cb4674)) +* **api:** api update ([#67](https://github.com/cleanlab/codex-python/issues/67)) ([5697955](https://github.com/cleanlab/codex-python/commit/569795521774bd2ac303dcaf8058e791f1af501c)) +* **client:** allow passing `NotGiven` for body ([#70](https://github.com/cleanlab/codex-python/issues/70)) ([f022d08](https://github.com/cleanlab/codex-python/commit/f022d082ad3e07ba3600c0d9e8becefad96ca175)) +* **client:** send `X-Stainless-Read-Timeout` header ([#63](https://github.com/cleanlab/codex-python/issues/63)) ([5904ed6](https://github.com/cleanlab/codex-python/commit/5904ed630f3dce437f3eb0f248d6a96b7c237e19)) + + +### Bug Fixes + +* asyncify on non-asyncio runtimes ([#69](https://github.com/cleanlab/codex-python/issues/69)) ([dc7519f](https://github.com/cleanlab/codex-python/commit/dc7519f876a99cdb58f4b634de45989e44c53c88)) +* **client:** mark some request bodies as optional ([f022d08](https://github.com/cleanlab/codex-python/commit/f022d082ad3e07ba3600c0d9e8becefad96ca175)) + + +### Chores + +* **docs:** update client docstring ([#75](https://github.com/cleanlab/codex-python/issues/75)) ([5b371a6](https://github.com/cleanlab/codex-python/commit/5b371a629dbd7763e00d8fd4315c4e437b4f0145)) +* **internal:** bummp ruff dependency ([#62](https://github.com/cleanlab/codex-python/issues/62)) ([123ccca](https://github.com/cleanlab/codex-python/commit/123ccca213572048ca6678900414e746516a9de1)) +* **internal:** change default timeout to an int ([#61](https://github.com/cleanlab/codex-python/issues/61)) ([66fc9b7](https://github.com/cleanlab/codex-python/commit/66fc9b758ba38b160b7d6b17b94f294f248e0ecd)) +* **internal:** fix devcontainers setup ([#71](https://github.com/cleanlab/codex-python/issues/71)) ([9ec8473](https://github.com/cleanlab/codex-python/commit/9ec847324c47ab63d9cf39d50f392367585065cf)) +* **internal:** fix type traversing dictionary params ([#64](https://github.com/cleanlab/codex-python/issues/64)) ([648fc48](https://github.com/cleanlab/codex-python/commit/648fc489ce7f9827bfc2354e93d470b6e4b7e1bf)) +* **internal:** minor type handling changes ([#65](https://github.com/cleanlab/codex-python/issues/65)) ([27aa5db](https://github.com/cleanlab/codex-python/commit/27aa5db50e0aa13eb2c4d88196a4ac70681ae808)) +* **internal:** properly set __pydantic_private__ ([#72](https://github.com/cleanlab/codex-python/issues/72)) ([9765c39](https://github.com/cleanlab/codex-python/commit/9765c3979b7856713e75175a76d342f6be956dea)) +* **internal:** remove unused http client options forwarding ([#76](https://github.com/cleanlab/codex-python/issues/76)) ([c5ed0fd](https://github.com/cleanlab/codex-python/commit/c5ed0fdc13238df5ecf8cbfd17e15974c6d1b24b)) +* **internal:** update client tests ([#68](https://github.com/cleanlab/codex-python/issues/68)) ([9297d25](https://github.com/cleanlab/codex-python/commit/9297d25f8f6e59af3fa610539d9736f5f0af2fe2)) +* **internal:** version bump ([#58](https://github.com/cleanlab/codex-python/issues/58)) ([d032df2](https://github.com/cleanlab/codex-python/commit/d032df2296313bf0d2b2712672756ed185afb0e0)) + + +### Documentation + +* revise readme docs about nested params ([#77](https://github.com/cleanlab/codex-python/issues/77)) ([649ec25](https://github.com/cleanlab/codex-python/commit/649ec251abd53beb368cb68d134a54b35338d327)) +* update URLs from stainlessapi.com to stainless.com ([#73](https://github.com/cleanlab/codex-python/issues/73)) ([8f500b4](https://github.com/cleanlab/codex-python/commit/8f500b4c0af5c050350107ca7e567a289c7cf8f9)) + ## 0.1.0-alpha.11 (2025-01-30) Full Changelog: [v0.1.0-alpha.10...v0.1.0-alpha.11](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.10...v0.1.0-alpha.11) diff --git a/SECURITY.md b/SECURITY.md index 54f6446..9fc6ee2 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,9 +2,9 @@ ## Reporting Security Issues -This SDK is generated by [Stainless Software Inc](http://stainlessapi.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken. +This SDK is generated by [Stainless Software Inc](http://stainless.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken. -To report a security issue, please contact the Stainless team at security@stainlessapi.com. +To report a security issue, please contact the Stainless team at security@stainless.com. ## Responsible Disclosure diff --git a/api.md b/api.md index b2fc2f0..5e25833 100644 --- a/api.md +++ b/api.md @@ -119,6 +119,18 @@ Methods: - client.users.myself.organizations.list() -> UserOrganizationsSchema +## Verification + +Types: + +```python +from codex.types.users import VerificationResendResponse +``` + +Methods: + +- client.users.verification.resend() -> VerificationResendResponse + # Projects Types: @@ -175,3 +187,16 @@ Methods: - client.projects.entries.delete(entry_id, \*, project_id) -> None - client.projects.entries.add_question(project_id, \*\*params) -> Entry - client.projects.entries.query(project_id, \*\*params) -> Optional[Entry] + +# Tlm + +Types: + +```python +from codex.types import TlmPromptResponse, TlmScoreResponse +``` + +Methods: + +- client.tlm.prompt(\*\*params) -> TlmPromptResponse +- client.tlm.score(\*\*params) -> TlmScoreResponse diff --git a/pyproject.toml b/pyproject.toml index 2641c49..ede4351 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "codex-sdk" -version = "0.1.0-alpha.11" +version = "0.1.0-alpha.12" description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead." dynamic = ["readme"] license = "MIT" @@ -177,7 +177,7 @@ select = [ "T201", "T203", # misuse of typing.TYPE_CHECKING - "TCH004", + "TC004", # import rules "TID251", ] diff --git a/requirements-dev.lock b/requirements-dev.lock index ef07871..1961e8d 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -78,7 +78,7 @@ pytz==2023.3.post1 # via dirty-equals respx==0.22.0 rich==13.7.1 -ruff==0.6.9 +ruff==0.9.4 setuptools==68.2.2 # via nodeenv six==1.16.0 diff --git a/scripts/test b/scripts/test index 4fa5698..2b87845 100755 --- a/scripts/test +++ b/scripts/test @@ -52,6 +52,8 @@ else echo fi +export DEFER_PYDANTIC_BUILD=false + echo "==> Running tests" rye run pytest "$@" diff --git a/scripts/utils/ruffen-docs.py b/scripts/utils/ruffen-docs.py index 37b3d94..0cf2bd2 100644 --- a/scripts/utils/ruffen-docs.py +++ b/scripts/utils/ruffen-docs.py @@ -47,7 +47,7 @@ def _md_match(match: Match[str]) -> str: with _collect_error(match): code = format_code_block(code) code = textwrap.indent(code, match["indent"]) - return f'{match["before"]}{code}{match["after"]}' + return f"{match['before']}{code}{match['after']}" def _pycon_match(match: Match[str]) -> str: code = "" @@ -97,7 +97,7 @@ def finish_fragment() -> None: def _md_pycon_match(match: Match[str]) -> str: code = _pycon_match(match) code = textwrap.indent(code, match["indent"]) - return f'{match["before"]}{code}{match["after"]}' + return f"{match['before']}{code}{match['after']}" src = MD_RE.sub(_md_match, src) src = MD_PYCON_RE.sub(_md_pycon_match, src) diff --git a/src/codex/_base_client.py b/src/codex/_base_client.py index 9090e75..273341b 100644 --- a/src/codex/_base_client.py +++ b/src/codex/_base_client.py @@ -9,7 +9,6 @@ import inspect import logging import platform -import warnings import email.utils from types import TracebackType from random import random @@ -36,7 +35,7 @@ import httpx import distro import pydantic -from httpx import URL, Limits +from httpx import URL from pydantic import PrivateAttr from . import _exceptions @@ -51,19 +50,16 @@ Timeout, NotGiven, ResponseT, - Transport, AnyMapping, PostParser, - ProxiesTypes, RequestFiles, HttpxSendArgs, - AsyncTransport, RequestOptions, HttpxRequestFiles, ModelBuilderProtocol, ) from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping -from ._compat import model_copy, model_dump +from ._compat import PYDANTIC_V2, model_copy, model_dump from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type from ._response import ( APIResponse, @@ -207,6 +203,9 @@ def _set_private_attributes( model: Type[_T], options: FinalRequestOptions, ) -> None: + if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None: + self.__pydantic_private__ = {} + self._model = model self._client = client self._options = options @@ -292,6 +291,9 @@ def _set_private_attributes( client: AsyncAPIClient, options: FinalRequestOptions, ) -> None: + if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None: + self.__pydantic_private__ = {} + self._model = model self._client = client self._options = options @@ -331,9 +333,6 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]): _base_url: URL max_retries: int timeout: Union[float, Timeout, None] - _limits: httpx.Limits - _proxies: ProxiesTypes | None - _transport: Transport | AsyncTransport | None _strict_response_validation: bool _idempotency_header: str | None _default_stream_cls: type[_DefaultStreamT] | None = None @@ -346,9 +345,6 @@ def __init__( _strict_response_validation: bool, max_retries: int = DEFAULT_MAX_RETRIES, timeout: float | Timeout | None = DEFAULT_TIMEOUT, - limits: httpx.Limits, - transport: Transport | AsyncTransport | None, - proxies: ProxiesTypes | None, custom_headers: Mapping[str, str] | None = None, custom_query: Mapping[str, object] | None = None, ) -> None: @@ -356,9 +352,6 @@ def __init__( self._base_url = self._enforce_trailing_slash(URL(base_url)) self.max_retries = max_retries self.timeout = timeout - self._limits = limits - self._proxies = proxies - self._transport = transport self._custom_headers = custom_headers or {} self._custom_query = custom_query or {} self._strict_response_validation = _strict_response_validation @@ -418,10 +411,17 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0 if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers: headers[idempotency_header] = options.idempotency_key or self._idempotency_key() - # Don't set the retry count header if it was already set or removed by the caller. We check + # Don't set these headers if they were already set or removed by the caller. We check # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case. - if "x-stainless-retry-count" not in (header.lower() for header in custom_headers): + lower_custom_headers = [header.lower() for header in custom_headers] + if "x-stainless-retry-count" not in lower_custom_headers: headers["x-stainless-retry-count"] = str(retries_taken) + if "x-stainless-read-timeout" not in lower_custom_headers: + timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout + if isinstance(timeout, Timeout): + timeout = timeout.read + if timeout is not None: + headers["x-stainless-read-timeout"] = str(timeout) return headers @@ -511,7 +511,7 @@ def _build_request( # so that passing a `TypedDict` doesn't cause an error. # https://github.com/microsoft/pyright/issues/3526#event-6715453066 params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None, - json=json_data, + json=json_data if is_given(json_data) else None, files=files, **kwargs, ) @@ -787,46 +787,11 @@ def __init__( base_url: str | URL, max_retries: int = DEFAULT_MAX_RETRIES, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, - transport: Transport | None = None, - proxies: ProxiesTypes | None = None, - limits: Limits | None = None, http_client: httpx.Client | None = None, custom_headers: Mapping[str, str] | None = None, custom_query: Mapping[str, object] | None = None, _strict_response_validation: bool, ) -> None: - kwargs: dict[str, Any] = {} - if limits is not None: - warnings.warn( - "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`") - else: - limits = DEFAULT_CONNECTION_LIMITS - - if transport is not None: - kwargs["transport"] = transport - warnings.warn( - "The `transport` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `transport`") - - if proxies is not None: - kwargs["proxies"] = proxies - warnings.warn( - "The `proxies` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `proxies`") - if not is_given(timeout): # if the user passed in a custom http client with a non-default # timeout set then we use that timeout. @@ -847,12 +812,9 @@ def __init__( super().__init__( version=version, - limits=limits, # cast to a valid type because mypy doesn't understand our type narrowing timeout=cast(Timeout, timeout), - proxies=proxies, base_url=base_url, - transport=transport, max_retries=max_retries, custom_query=custom_query, custom_headers=custom_headers, @@ -862,9 +824,6 @@ def __init__( base_url=base_url, # cast to a valid type because mypy doesn't understand our type narrowing timeout=cast(Timeout, timeout), - limits=limits, - follow_redirects=True, - **kwargs, # type: ignore ) def is_closed(self) -> bool: @@ -1359,45 +1318,10 @@ def __init__( _strict_response_validation: bool, max_retries: int = DEFAULT_MAX_RETRIES, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, - transport: AsyncTransport | None = None, - proxies: ProxiesTypes | None = None, - limits: Limits | None = None, http_client: httpx.AsyncClient | None = None, custom_headers: Mapping[str, str] | None = None, custom_query: Mapping[str, object] | None = None, ) -> None: - kwargs: dict[str, Any] = {} - if limits is not None: - warnings.warn( - "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`") - else: - limits = DEFAULT_CONNECTION_LIMITS - - if transport is not None: - kwargs["transport"] = transport - warnings.warn( - "The `transport` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `transport`") - - if proxies is not None: - kwargs["proxies"] = proxies - warnings.warn( - "The `proxies` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `proxies`") - if not is_given(timeout): # if the user passed in a custom http client with a non-default # timeout set then we use that timeout. @@ -1419,11 +1343,8 @@ def __init__( super().__init__( version=version, base_url=base_url, - limits=limits, # cast to a valid type because mypy doesn't understand our type narrowing timeout=cast(Timeout, timeout), - proxies=proxies, - transport=transport, max_retries=max_retries, custom_query=custom_query, custom_headers=custom_headers, @@ -1433,9 +1354,6 @@ def __init__( base_url=base_url, # cast to a valid type because mypy doesn't understand our type narrowing timeout=cast(Timeout, timeout), - limits=limits, - follow_redirects=True, - **kwargs, # type: ignore ) def is_closed(self) -> bool: diff --git a/src/codex/_client.py b/src/codex/_client.py index 09aaafd..9bdd5f0 100644 --- a/src/codex/_client.py +++ b/src/codex/_client.py @@ -25,7 +25,7 @@ get_async_library, ) from ._version import __version__ -from .resources import health +from .resources import tlm, health from ._streaming import Stream as Stream, AsyncStream as AsyncStream from ._exceptions import APIStatusError from ._base_client import ( @@ -61,6 +61,7 @@ class Codex(SyncAPIClient): organizations: organizations.OrganizationsResource users: users.UsersResource projects: projects.ProjectsResource + tlm: tlm.TlmResource with_raw_response: CodexWithRawResponse with_streaming_response: CodexWithStreamedResponse @@ -141,6 +142,7 @@ def __init__( self.organizations = organizations.OrganizationsResource(self) self.users = users.UsersResource(self) self.projects = projects.ProjectsResource(self) + self.tlm = tlm.TlmResource(self) self.with_raw_response = CodexWithRawResponse(self) self.with_streaming_response = CodexWithStreamedResponse(self) @@ -291,6 +293,7 @@ class AsyncCodex(AsyncAPIClient): organizations: organizations.AsyncOrganizationsResource users: users.AsyncUsersResource projects: projects.AsyncProjectsResource + tlm: tlm.AsyncTlmResource with_raw_response: AsyncCodexWithRawResponse with_streaming_response: AsyncCodexWithStreamedResponse @@ -325,7 +328,7 @@ def __init__( # part of our public interface in the future. _strict_response_validation: bool = False, ) -> None: - """Construct a new async Codex client instance.""" + """Construct a new async AsyncCodex client instance.""" self.api_key = api_key self.access_key = access_key @@ -371,6 +374,7 @@ def __init__( self.organizations = organizations.AsyncOrganizationsResource(self) self.users = users.AsyncUsersResource(self) self.projects = projects.AsyncProjectsResource(self) + self.tlm = tlm.AsyncTlmResource(self) self.with_raw_response = AsyncCodexWithRawResponse(self) self.with_streaming_response = AsyncCodexWithStreamedResponse(self) @@ -522,6 +526,7 @@ def __init__(self, client: Codex) -> None: self.organizations = organizations.OrganizationsResourceWithRawResponse(client.organizations) self.users = users.UsersResourceWithRawResponse(client.users) self.projects = projects.ProjectsResourceWithRawResponse(client.projects) + self.tlm = tlm.TlmResourceWithRawResponse(client.tlm) class AsyncCodexWithRawResponse: @@ -530,6 +535,7 @@ def __init__(self, client: AsyncCodex) -> None: self.organizations = organizations.AsyncOrganizationsResourceWithRawResponse(client.organizations) self.users = users.AsyncUsersResourceWithRawResponse(client.users) self.projects = projects.AsyncProjectsResourceWithRawResponse(client.projects) + self.tlm = tlm.AsyncTlmResourceWithRawResponse(client.tlm) class CodexWithStreamedResponse: @@ -538,6 +544,7 @@ def __init__(self, client: Codex) -> None: self.organizations = organizations.OrganizationsResourceWithStreamingResponse(client.organizations) self.users = users.UsersResourceWithStreamingResponse(client.users) self.projects = projects.ProjectsResourceWithStreamingResponse(client.projects) + self.tlm = tlm.TlmResourceWithStreamingResponse(client.tlm) class AsyncCodexWithStreamedResponse: @@ -546,6 +553,7 @@ def __init__(self, client: AsyncCodex) -> None: self.organizations = organizations.AsyncOrganizationsResourceWithStreamingResponse(client.organizations) self.users = users.AsyncUsersResourceWithStreamingResponse(client.users) self.projects = projects.AsyncProjectsResourceWithStreamingResponse(client.projects) + self.tlm = tlm.AsyncTlmResourceWithStreamingResponse(client.tlm) Client = Codex diff --git a/src/codex/_constants.py b/src/codex/_constants.py index a2ac3b6..6ddf2c7 100644 --- a/src/codex/_constants.py +++ b/src/codex/_constants.py @@ -6,7 +6,7 @@ OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to" # default timeout is 1 minute -DEFAULT_TIMEOUT = httpx.Timeout(timeout=60.0, connect=5.0) +DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0) DEFAULT_MAX_RETRIES = 2 DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20) diff --git a/src/codex/_models.py b/src/codex/_models.py index 9a918aa..c4401ff 100644 --- a/src/codex/_models.py +++ b/src/codex/_models.py @@ -172,7 +172,7 @@ def to_json( @override def __str__(self) -> str: # mypy complains about an invalid self arg - return f'{self.__repr_name__()}({self.__repr_str__(", ")})' # type: ignore[misc] + return f"{self.__repr_name__()}({self.__repr_str__(', ')})" # type: ignore[misc] # Override the 'construct' method in a way that supports recursive parsing without validation. # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836. @@ -426,10 +426,16 @@ def construct_type(*, value: object, type_: object) -> object: If the given value does not match the expected type then it is returned as-is. """ + + # store a reference to the original type we were given before we extract any inner + # types so that we can properly resolve forward references in `TypeAliasType` annotations + original_type = None + # we allow `object` as the input type because otherwise, passing things like # `Literal['value']` will be reported as a type error by type checkers type_ = cast("type[object]", type_) if is_type_alias_type(type_): + original_type = type_ # type: ignore[unreachable] type_ = type_.__value__ # type: ignore[unreachable] # unwrap `Annotated[T, ...]` -> `T` @@ -446,7 +452,7 @@ def construct_type(*, value: object, type_: object) -> object: if is_union(origin): try: - return validate_type(type_=cast("type[object]", type_), value=value) + return validate_type(type_=cast("type[object]", original_type or type_), value=value) except Exception: pass diff --git a/src/codex/_utils/_sync.py b/src/codex/_utils/_sync.py index 8b3aaf2..ad7ec71 100644 --- a/src/codex/_utils/_sync.py +++ b/src/codex/_utils/_sync.py @@ -7,16 +7,20 @@ from typing import Any, TypeVar, Callable, Awaitable from typing_extensions import ParamSpec +import anyio +import sniffio +import anyio.to_thread + T_Retval = TypeVar("T_Retval") T_ParamSpec = ParamSpec("T_ParamSpec") if sys.version_info >= (3, 9): - to_thread = asyncio.to_thread + _asyncio_to_thread = asyncio.to_thread else: # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread # for Python 3.8 support - async def to_thread( + async def _asyncio_to_thread( func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs ) -> Any: """Asynchronously run function *func* in a separate thread. @@ -34,6 +38,17 @@ async def to_thread( return await loop.run_in_executor(None, func_call) +async def to_thread( + func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs +) -> T_Retval: + if sniffio.current_async_library() == "asyncio": + return await _asyncio_to_thread(func, *args, **kwargs) + + return await anyio.to_thread.run_sync( + functools.partial(func, *args, **kwargs), + ) + + # inspired by `asyncer`, https://github.com/tiangolo/asyncer def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]: """ diff --git a/src/codex/_utils/_transform.py b/src/codex/_utils/_transform.py index a6b62ca..18afd9d 100644 --- a/src/codex/_utils/_transform.py +++ b/src/codex/_utils/_transform.py @@ -25,7 +25,7 @@ is_annotated_type, strip_annotated_type, ) -from .._compat import model_dump, is_typeddict +from .._compat import get_origin, model_dump, is_typeddict _T = TypeVar("_T") @@ -164,9 +164,14 @@ def _transform_recursive( inner_type = annotation stripped_type = strip_annotated_type(inner_type) + origin = get_origin(stripped_type) or stripped_type if is_typeddict(stripped_type) and is_mapping(data): return _transform_typeddict(data, stripped_type) + if origin == dict and is_mapping(data): + items_type = get_args(stripped_type)[1] + return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()} + if ( # List[T] (is_list_type(stripped_type) and is_list(data)) @@ -307,9 +312,14 @@ async def _async_transform_recursive( inner_type = annotation stripped_type = strip_annotated_type(inner_type) + origin = get_origin(stripped_type) or stripped_type if is_typeddict(stripped_type) and is_mapping(data): return await _async_transform_typeddict(data, stripped_type) + if origin == dict and is_mapping(data): + items_type = get_args(stripped_type)[1] + return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()} + if ( # List[T] (is_list_type(stripped_type) and is_list(data)) diff --git a/src/codex/_version.py b/src/codex/_version.py index fd5f708..fe7cc73 100644 --- a/src/codex/_version.py +++ b/src/codex/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "codex" -__version__ = "0.1.0-alpha.11" # x-release-please-version +__version__ = "0.1.0-alpha.12" # x-release-please-version diff --git a/src/codex/resources/__init__.py b/src/codex/resources/__init__.py index b96b725..f91f0e4 100644 --- a/src/codex/resources/__init__.py +++ b/src/codex/resources/__init__.py @@ -1,5 +1,13 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from .tlm import ( + TlmResource, + AsyncTlmResource, + TlmResourceWithRawResponse, + AsyncTlmResourceWithRawResponse, + TlmResourceWithStreamingResponse, + AsyncTlmResourceWithStreamingResponse, +) from .users import ( UsersResource, AsyncUsersResource, @@ -58,4 +66,10 @@ "AsyncProjectsResourceWithRawResponse", "ProjectsResourceWithStreamingResponse", "AsyncProjectsResourceWithStreamingResponse", + "TlmResource", + "AsyncTlmResource", + "TlmResourceWithRawResponse", + "AsyncTlmResourceWithRawResponse", + "TlmResourceWithStreamingResponse", + "AsyncTlmResourceWithStreamingResponse", ] diff --git a/src/codex/resources/projects/entries.py b/src/codex/resources/projects/entries.py index d307d3d..2fcc8e0 100644 --- a/src/codex/resources/projects/entries.py +++ b/src/codex/resources/projects/entries.py @@ -70,6 +70,8 @@ def create( """ Create a knowledge entry for a project. + Raises: HTTPException: If an existing entry is found with the same question. + Args: extra_headers: Send extra headers @@ -138,6 +140,7 @@ def update( *, project_id: str, answer: Optional[str] | NotGiven = NOT_GIVEN, + frequency_count: Optional[int] | NotGiven = NOT_GIVEN, question: Optional[str] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -167,6 +170,7 @@ def update( body=maybe_transform( { "answer": answer, + "frequency_count": frequency_count, "question": question, }, entry_update_params.EntryUpdateParams, @@ -318,8 +322,10 @@ def query( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Optional[Entry]: - """ - Query knowledge for a project. + """Query knowledge for a project. + + Also increments the frequency_count for the + matching entry if found. Returns the matching entry if found and answered, otherwise returns None. This allows the client to distinguish between: (1) no matching question found @@ -383,6 +389,8 @@ async def create( """ Create a knowledge entry for a project. + Raises: HTTPException: If an existing entry is found with the same question. + Args: extra_headers: Send extra headers @@ -451,6 +459,7 @@ async def update( *, project_id: str, answer: Optional[str] | NotGiven = NOT_GIVEN, + frequency_count: Optional[int] | NotGiven = NOT_GIVEN, question: Optional[str] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -480,6 +489,7 @@ async def update( body=await async_maybe_transform( { "answer": answer, + "frequency_count": frequency_count, "question": question, }, entry_update_params.EntryUpdateParams, @@ -631,8 +641,10 @@ async def query( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Optional[Entry]: - """ - Query knowledge for a project. + """Query knowledge for a project. + + Also increments the frequency_count for the + matching entry if found. Returns the matching entry if found and answered, otherwise returns None. This allows the client to distinguish between: (1) no matching question found diff --git a/src/codex/resources/tlm.py b/src/codex/resources/tlm.py new file mode 100644 index 0000000..c6585d0 --- /dev/null +++ b/src/codex/resources/tlm.py @@ -0,0 +1,656 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Optional +from typing_extensions import Literal + +import httpx + +from ..types import tlm_score_params, tlm_prompt_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import ( + maybe_transform, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._base_client import make_request_options +from ..types.tlm_score_response import TlmScoreResponse +from ..types.tlm_prompt_response import TlmPromptResponse + +__all__ = ["TlmResource", "AsyncTlmResource"] + + +class TlmResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> TlmResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers + """ + return TlmResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> TlmResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response + """ + return TlmResourceWithStreamingResponse(self) + + def prompt( + self, + *, + prompt: str, + constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN, + options: Optional[tlm_prompt_params.Options] | NotGiven = NOT_GIVEN, + quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, + task: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TlmPromptResponse: + """ + Prompts the TLM API. + + Args: + options: Typed dict of advanced configuration options for the Trustworthy Language Model. + Many of these configurations are determined by the quality preset selected + (learn about quality presets in the TLM [initialization method](./#class-tlm)). + Specifying TLMOptions values directly overrides any default values set from the + quality preset. + + For all options described below, higher settings will lead to longer runtimes + and may consume more tokens internally. You may not be able to run long prompts + (or prompts with long responses) in your account, unless your token/rate limits + are increased. If you hit token limit issues, try lower/less expensive + TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to + increase your limits. + + The default values corresponding to each quality preset are: + + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a cheaper self-reflection will be used to compute the + trustworthiness score. + + By default, the TLM uses the "medium" quality preset. The default base LLM + `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets. + You can set custom values for these arguments regardless of the quality preset + specified. + + Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview", + "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku", + "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default = + "gpt-4o-mini"): Underlying base LLM to use (better models yield better results, + faster models yield faster/cheaper results). - Models still in beta: "o1", + "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite", + "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1", + "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro", + "gpt-4o-mini". + + max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring). + Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes. + If you experience token/rate limit errors while using TLM, try lowering this number. + For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512. + + num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM. + TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one. + Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens). + This parameter must be between 1 and 20. + When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it. + + num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency. + Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes. + This consistency helps quantify the epistemic uncertainty associated with + strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. + TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible. + + use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it + generated and self-evaluate this response. + Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes. + Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches answers that are obviously incorrect/bad. + + similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures + similarity between sampled responses considered by the model in the consistency assessment. + Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap), + "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model). + Set this to "string" to improve latency/costs. + + reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens) + when considering alternative possible responses and double-checking responses. + Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs. + + log (list[str], default = []): optionally specify additional logs or metadata that TLM should return. + For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness. + + custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria. + The expected input format is a list of dictionaries, where each dictionary has the following keys: + - name: Name of the evaluation criteria. + - criteria: Instructions specifying the evaluation criteria. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/api/tlm/prompt", + body=maybe_transform( + { + "prompt": prompt, + "constrain_outputs": constrain_outputs, + "options": options, + "quality_preset": quality_preset, + "task": task, + }, + tlm_prompt_params.TlmPromptParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=TlmPromptResponse, + ) + + def score( + self, + *, + prompt: str, + response: str, + constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN, + options: Optional[tlm_score_params.Options] | NotGiven = NOT_GIVEN, + quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, + task: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TlmScoreResponse: + """ + Scores the TLM API. + + TODO: + + - Track query count in DB + - Enforce hard cap on queries for users w/o credit card on file + + Args: + options: Typed dict of advanced configuration options for the Trustworthy Language Model. + Many of these configurations are determined by the quality preset selected + (learn about quality presets in the TLM [initialization method](./#class-tlm)). + Specifying TLMOptions values directly overrides any default values set from the + quality preset. + + For all options described below, higher settings will lead to longer runtimes + and may consume more tokens internally. You may not be able to run long prompts + (or prompts with long responses) in your account, unless your token/rate limits + are increased. If you hit token limit issues, try lower/less expensive + TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to + increase your limits. + + The default values corresponding to each quality preset are: + + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a cheaper self-reflection will be used to compute the + trustworthiness score. + + By default, the TLM uses the "medium" quality preset. The default base LLM + `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets. + You can set custom values for these arguments regardless of the quality preset + specified. + + Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview", + "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku", + "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default = + "gpt-4o-mini"): Underlying base LLM to use (better models yield better results, + faster models yield faster/cheaper results). - Models still in beta: "o1", + "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite", + "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1", + "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro", + "gpt-4o-mini". + + max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring). + Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes. + If you experience token/rate limit errors while using TLM, try lowering this number. + For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512. + + num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM. + TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one. + Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens). + This parameter must be between 1 and 20. + When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it. + + num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency. + Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes. + This consistency helps quantify the epistemic uncertainty associated with + strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. + TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible. + + use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it + generated and self-evaluate this response. + Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes. + Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches answers that are obviously incorrect/bad. + + similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures + similarity between sampled responses considered by the model in the consistency assessment. + Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap), + "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model). + Set this to "string" to improve latency/costs. + + reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens) + when considering alternative possible responses and double-checking responses. + Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs. + + log (list[str], default = []): optionally specify additional logs or metadata that TLM should return. + For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness. + + custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria. + The expected input format is a list of dictionaries, where each dictionary has the following keys: + - name: Name of the evaluation criteria. + - criteria: Instructions specifying the evaluation criteria. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/api/tlm/score", + body=maybe_transform( + { + "prompt": prompt, + "response": response, + "constrain_outputs": constrain_outputs, + "options": options, + "quality_preset": quality_preset, + "task": task, + }, + tlm_score_params.TlmScoreParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=TlmScoreResponse, + ) + + +class AsyncTlmResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncTlmResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers + """ + return AsyncTlmResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncTlmResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response + """ + return AsyncTlmResourceWithStreamingResponse(self) + + async def prompt( + self, + *, + prompt: str, + constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN, + options: Optional[tlm_prompt_params.Options] | NotGiven = NOT_GIVEN, + quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, + task: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TlmPromptResponse: + """ + Prompts the TLM API. + + Args: + options: Typed dict of advanced configuration options for the Trustworthy Language Model. + Many of these configurations are determined by the quality preset selected + (learn about quality presets in the TLM [initialization method](./#class-tlm)). + Specifying TLMOptions values directly overrides any default values set from the + quality preset. + + For all options described below, higher settings will lead to longer runtimes + and may consume more tokens internally. You may not be able to run long prompts + (or prompts with long responses) in your account, unless your token/rate limits + are increased. If you hit token limit issues, try lower/less expensive + TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to + increase your limits. + + The default values corresponding to each quality preset are: + + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a cheaper self-reflection will be used to compute the + trustworthiness score. + + By default, the TLM uses the "medium" quality preset. The default base LLM + `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets. + You can set custom values for these arguments regardless of the quality preset + specified. + + Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview", + "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku", + "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default = + "gpt-4o-mini"): Underlying base LLM to use (better models yield better results, + faster models yield faster/cheaper results). - Models still in beta: "o1", + "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite", + "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1", + "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro", + "gpt-4o-mini". + + max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring). + Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes. + If you experience token/rate limit errors while using TLM, try lowering this number. + For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512. + + num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM. + TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one. + Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens). + This parameter must be between 1 and 20. + When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it. + + num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency. + Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes. + This consistency helps quantify the epistemic uncertainty associated with + strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. + TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible. + + use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it + generated and self-evaluate this response. + Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes. + Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches answers that are obviously incorrect/bad. + + similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures + similarity between sampled responses considered by the model in the consistency assessment. + Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap), + "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model). + Set this to "string" to improve latency/costs. + + reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens) + when considering alternative possible responses and double-checking responses. + Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs. + + log (list[str], default = []): optionally specify additional logs or metadata that TLM should return. + For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness. + + custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria. + The expected input format is a list of dictionaries, where each dictionary has the following keys: + - name: Name of the evaluation criteria. + - criteria: Instructions specifying the evaluation criteria. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/api/tlm/prompt", + body=await async_maybe_transform( + { + "prompt": prompt, + "constrain_outputs": constrain_outputs, + "options": options, + "quality_preset": quality_preset, + "task": task, + }, + tlm_prompt_params.TlmPromptParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=TlmPromptResponse, + ) + + async def score( + self, + *, + prompt: str, + response: str, + constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN, + options: Optional[tlm_score_params.Options] | NotGiven = NOT_GIVEN, + quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, + task: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> TlmScoreResponse: + """ + Scores the TLM API. + + TODO: + + - Track query count in DB + - Enforce hard cap on queries for users w/o credit card on file + + Args: + options: Typed dict of advanced configuration options for the Trustworthy Language Model. + Many of these configurations are determined by the quality preset selected + (learn about quality presets in the TLM [initialization method](./#class-tlm)). + Specifying TLMOptions values directly overrides any default values set from the + quality preset. + + For all options described below, higher settings will lead to longer runtimes + and may consume more tokens internally. You may not be able to run long prompts + (or prompts with long responses) in your account, unless your token/rate limits + are increased. If you hit token limit issues, try lower/less expensive + TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to + increase your limits. + + The default values corresponding to each quality preset are: + + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a cheaper self-reflection will be used to compute the + trustworthiness score. + + By default, the TLM uses the "medium" quality preset. The default base LLM + `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets. + You can set custom values for these arguments regardless of the quality preset + specified. + + Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview", + "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku", + "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default = + "gpt-4o-mini"): Underlying base LLM to use (better models yield better results, + faster models yield faster/cheaper results). - Models still in beta: "o1", + "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite", + "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1", + "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro", + "gpt-4o-mini". + + max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring). + Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes. + If you experience token/rate limit errors while using TLM, try lowering this number. + For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512. + + num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM. + TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one. + Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens). + This parameter must be between 1 and 20. + When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it. + + num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency. + Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes. + This consistency helps quantify the epistemic uncertainty associated with + strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. + TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible. + + use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it + generated and self-evaluate this response. + Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes. + Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches answers that are obviously incorrect/bad. + + similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures + similarity between sampled responses considered by the model in the consistency assessment. + Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap), + "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model). + Set this to "string" to improve latency/costs. + + reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens) + when considering alternative possible responses and double-checking responses. + Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs. + + log (list[str], default = []): optionally specify additional logs or metadata that TLM should return. + For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness. + + custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria. + The expected input format is a list of dictionaries, where each dictionary has the following keys: + - name: Name of the evaluation criteria. + - criteria: Instructions specifying the evaluation criteria. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/api/tlm/score", + body=await async_maybe_transform( + { + "prompt": prompt, + "response": response, + "constrain_outputs": constrain_outputs, + "options": options, + "quality_preset": quality_preset, + "task": task, + }, + tlm_score_params.TlmScoreParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=TlmScoreResponse, + ) + + +class TlmResourceWithRawResponse: + def __init__(self, tlm: TlmResource) -> None: + self._tlm = tlm + + self.prompt = to_raw_response_wrapper( + tlm.prompt, + ) + self.score = to_raw_response_wrapper( + tlm.score, + ) + + +class AsyncTlmResourceWithRawResponse: + def __init__(self, tlm: AsyncTlmResource) -> None: + self._tlm = tlm + + self.prompt = async_to_raw_response_wrapper( + tlm.prompt, + ) + self.score = async_to_raw_response_wrapper( + tlm.score, + ) + + +class TlmResourceWithStreamingResponse: + def __init__(self, tlm: TlmResource) -> None: + self._tlm = tlm + + self.prompt = to_streamed_response_wrapper( + tlm.prompt, + ) + self.score = to_streamed_response_wrapper( + tlm.score, + ) + + +class AsyncTlmResourceWithStreamingResponse: + def __init__(self, tlm: AsyncTlmResource) -> None: + self._tlm = tlm + + self.prompt = async_to_streamed_response_wrapper( + tlm.prompt, + ) + self.score = async_to_streamed_response_wrapper( + tlm.score, + ) diff --git a/src/codex/resources/users/__init__.py b/src/codex/resources/users/__init__.py index 18ed37e..9618f58 100644 --- a/src/codex/resources/users/__init__.py +++ b/src/codex/resources/users/__init__.py @@ -16,6 +16,14 @@ MyselfResourceWithStreamingResponse, AsyncMyselfResourceWithStreamingResponse, ) +from .verification import ( + VerificationResource, + AsyncVerificationResource, + VerificationResourceWithRawResponse, + AsyncVerificationResourceWithRawResponse, + VerificationResourceWithStreamingResponse, + AsyncVerificationResourceWithStreamingResponse, +) __all__ = [ "MyselfResource", @@ -24,6 +32,12 @@ "AsyncMyselfResourceWithRawResponse", "MyselfResourceWithStreamingResponse", "AsyncMyselfResourceWithStreamingResponse", + "VerificationResource", + "AsyncVerificationResource", + "VerificationResourceWithRawResponse", + "AsyncVerificationResourceWithRawResponse", + "VerificationResourceWithStreamingResponse", + "AsyncVerificationResourceWithStreamingResponse", "UsersResource", "AsyncUsersResource", "UsersResourceWithRawResponse", diff --git a/src/codex/resources/users/users.py b/src/codex/resources/users/users.py index fb7ee0f..a7d9d2a 100644 --- a/src/codex/resources/users/users.py +++ b/src/codex/resources/users/users.py @@ -22,6 +22,14 @@ async_to_streamed_response_wrapper, ) from ...types.user import User +from .verification import ( + VerificationResource, + AsyncVerificationResource, + VerificationResourceWithRawResponse, + AsyncVerificationResourceWithRawResponse, + VerificationResourceWithStreamingResponse, + AsyncVerificationResourceWithStreamingResponse, +) from .myself.myself import ( MyselfResource, AsyncMyselfResource, @@ -40,6 +48,10 @@ class UsersResource(SyncAPIResource): def myself(self) -> MyselfResource: return MyselfResource(self._client) + @cached_property + def verification(self) -> VerificationResource: + return VerificationResource(self._client) + @cached_property def with_raw_response(self) -> UsersResourceWithRawResponse: """ @@ -114,6 +126,10 @@ class AsyncUsersResource(AsyncAPIResource): def myself(self) -> AsyncMyselfResource: return AsyncMyselfResource(self._client) + @cached_property + def verification(self) -> AsyncVerificationResource: + return AsyncVerificationResource(self._client) + @cached_property def with_raw_response(self) -> AsyncUsersResourceWithRawResponse: """ @@ -195,6 +211,10 @@ def __init__(self, users: UsersResource) -> None: def myself(self) -> MyselfResourceWithRawResponse: return MyselfResourceWithRawResponse(self._users.myself) + @cached_property + def verification(self) -> VerificationResourceWithRawResponse: + return VerificationResourceWithRawResponse(self._users.verification) + class AsyncUsersResourceWithRawResponse: def __init__(self, users: AsyncUsersResource) -> None: @@ -208,6 +228,10 @@ def __init__(self, users: AsyncUsersResource) -> None: def myself(self) -> AsyncMyselfResourceWithRawResponse: return AsyncMyselfResourceWithRawResponse(self._users.myself) + @cached_property + def verification(self) -> AsyncVerificationResourceWithRawResponse: + return AsyncVerificationResourceWithRawResponse(self._users.verification) + class UsersResourceWithStreamingResponse: def __init__(self, users: UsersResource) -> None: @@ -221,6 +245,10 @@ def __init__(self, users: UsersResource) -> None: def myself(self) -> MyselfResourceWithStreamingResponse: return MyselfResourceWithStreamingResponse(self._users.myself) + @cached_property + def verification(self) -> VerificationResourceWithStreamingResponse: + return VerificationResourceWithStreamingResponse(self._users.verification) + class AsyncUsersResourceWithStreamingResponse: def __init__(self, users: AsyncUsersResource) -> None: @@ -233,3 +261,7 @@ def __init__(self, users: AsyncUsersResource) -> None: @cached_property def myself(self) -> AsyncMyselfResourceWithStreamingResponse: return AsyncMyselfResourceWithStreamingResponse(self._users.myself) + + @cached_property + def verification(self) -> AsyncVerificationResourceWithStreamingResponse: + return AsyncVerificationResourceWithStreamingResponse(self._users.verification) diff --git a/src/codex/resources/users/verification.py b/src/codex/resources/users/verification.py new file mode 100644 index 0000000..e75326e --- /dev/null +++ b/src/codex/resources/users/verification.py @@ -0,0 +1,135 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import httpx + +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ..._base_client import make_request_options +from ...types.users.verification_resend_response import VerificationResendResponse + +__all__ = ["VerificationResource", "AsyncVerificationResource"] + + +class VerificationResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> VerificationResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers + """ + return VerificationResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> VerificationResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response + """ + return VerificationResourceWithStreamingResponse(self) + + def resend( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VerificationResendResponse: + """Resend verification email to the specified user through Auth0.""" + return self._post( + "/api/users/verification/resend", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VerificationResendResponse, + ) + + +class AsyncVerificationResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncVerificationResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers + """ + return AsyncVerificationResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncVerificationResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response + """ + return AsyncVerificationResourceWithStreamingResponse(self) + + async def resend( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VerificationResendResponse: + """Resend verification email to the specified user through Auth0.""" + return await self._post( + "/api/users/verification/resend", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VerificationResendResponse, + ) + + +class VerificationResourceWithRawResponse: + def __init__(self, verification: VerificationResource) -> None: + self._verification = verification + + self.resend = to_raw_response_wrapper( + verification.resend, + ) + + +class AsyncVerificationResourceWithRawResponse: + def __init__(self, verification: AsyncVerificationResource) -> None: + self._verification = verification + + self.resend = async_to_raw_response_wrapper( + verification.resend, + ) + + +class VerificationResourceWithStreamingResponse: + def __init__(self, verification: VerificationResource) -> None: + self._verification = verification + + self.resend = to_streamed_response_wrapper( + verification.resend, + ) + + +class AsyncVerificationResourceWithStreamingResponse: + def __init__(self, verification: AsyncVerificationResource) -> None: + self._verification = verification + + self.resend = async_to_streamed_response_wrapper( + verification.resend, + ) diff --git a/src/codex/types/__init__.py b/src/codex/types/__init__.py index f7ec95b..8f241bc 100644 --- a/src/codex/types/__init__.py +++ b/src/codex/types/__init__.py @@ -3,7 +3,11 @@ from __future__ import annotations from .user import User as User +from .tlm_score_params import TlmScoreParams as TlmScoreParams +from .tlm_prompt_params import TlmPromptParams as TlmPromptParams +from .tlm_score_response import TlmScoreResponse as TlmScoreResponse from .project_list_params import ProjectListParams as ProjectListParams +from .tlm_prompt_response import TlmPromptResponse as TlmPromptResponse from .health_check_response import HealthCheckResponse as HealthCheckResponse from .project_create_params import ProjectCreateParams as ProjectCreateParams from .project_list_response import ProjectListResponse as ProjectListResponse diff --git a/src/codex/types/projects/entry.py b/src/codex/types/projects/entry.py index d3e1fc5..4621cc4 100644 --- a/src/codex/types/projects/entry.py +++ b/src/codex/types/projects/entry.py @@ -18,3 +18,5 @@ class Entry(BaseModel): answer: Optional[str] = None answered_at: Optional[datetime] = None + + frequency_count: Optional[int] = None diff --git a/src/codex/types/projects/entry_update_params.py b/src/codex/types/projects/entry_update_params.py index 0a676f3..ba10549 100644 --- a/src/codex/types/projects/entry_update_params.py +++ b/src/codex/types/projects/entry_update_params.py @@ -13,4 +13,6 @@ class EntryUpdateParams(TypedDict, total=False): answer: Optional[str] + frequency_count: Optional[int] + question: Optional[str] diff --git a/src/codex/types/tlm_prompt_params.py b/src/codex/types/tlm_prompt_params.py new file mode 100644 index 0000000..860f1a7 --- /dev/null +++ b/src/codex/types/tlm_prompt_params.py @@ -0,0 +1,127 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Iterable, Optional +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["TlmPromptParams", "Options"] + + +class TlmPromptParams(TypedDict, total=False): + prompt: Required[str] + + constrain_outputs: Optional[List[str]] + + options: Optional[Options] + """ + Typed dict of advanced configuration options for the Trustworthy Language Model. + Many of these configurations are determined by the quality preset selected + (learn about quality presets in the TLM [initialization method](./#class-tlm)). + Specifying TLMOptions values directly overrides any default values set from the + quality preset. + + For all options described below, higher settings will lead to longer runtimes + and may consume more tokens internally. You may not be able to run long prompts + (or prompts with long responses) in your account, unless your token/rate limits + are increased. If you hit token limit issues, try lower/less expensive + TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to + increase your limits. + + The default values corresponding to each quality preset are: + + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a cheaper self-reflection will be used to compute the + trustworthiness score. + + By default, the TLM uses the "medium" quality preset. The default base LLM + `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets. + You can set custom values for these arguments regardless of the quality preset + specified. + + Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview", + "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku", + "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default = + "gpt-4o-mini"): Underlying base LLM to use (better models yield better results, + faster models yield faster/cheaper results). - Models still in beta: "o1", + "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite", + "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1", + "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro", + "gpt-4o-mini". + + max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring). + Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes. + If you experience token/rate limit errors while using TLM, try lowering this number. + For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512. + + num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM. + TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one. + Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens). + This parameter must be between 1 and 20. + When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it. + + num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency. + Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes. + This consistency helps quantify the epistemic uncertainty associated with + strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. + TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible. + + use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it + generated and self-evaluate this response. + Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes. + Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches answers that are obviously incorrect/bad. + + similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures + similarity between sampled responses considered by the model in the consistency assessment. + Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap), + "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model). + Set this to "string" to improve latency/costs. + + reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens) + when considering alternative possible responses and double-checking responses. + Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs. + + log (list[str], default = []): optionally specify additional logs or metadata that TLM should return. + For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness. + + custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria. + The expected input format is a list of dictionaries, where each dictionary has the following keys: + - name: Name of the evaluation criteria. + - criteria: Instructions specifying the evaluation criteria. + """ + + quality_preset: Literal["best", "high", "medium", "low", "base"] + + task: Optional[str] + + +class Options(TypedDict, total=False): + custom_eval_criteria: Iterable[object] + + log: List[str] + + max_tokens: int + + model: str + + num_candidate_responses: int + + num_consistency_samples: int + + reasoning_effort: str + + similarity_measure: str + + use_self_reflection: bool diff --git a/src/codex/types/tlm_prompt_response.py b/src/codex/types/tlm_prompt_response.py new file mode 100644 index 0000000..d939c00 --- /dev/null +++ b/src/codex/types/tlm_prompt_response.py @@ -0,0 +1,15 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional + +from .._models import BaseModel + +__all__ = ["TlmPromptResponse"] + + +class TlmPromptResponse(BaseModel): + response: str + + trustworthiness_score: float + + log: Optional[object] = None diff --git a/src/codex/types/tlm_score_params.py b/src/codex/types/tlm_score_params.py new file mode 100644 index 0000000..213da42 --- /dev/null +++ b/src/codex/types/tlm_score_params.py @@ -0,0 +1,129 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Iterable, Optional +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["TlmScoreParams", "Options"] + + +class TlmScoreParams(TypedDict, total=False): + prompt: Required[str] + + response: Required[str] + + constrain_outputs: Optional[List[str]] + + options: Optional[Options] + """ + Typed dict of advanced configuration options for the Trustworthy Language Model. + Many of these configurations are determined by the quality preset selected + (learn about quality presets in the TLM [initialization method](./#class-tlm)). + Specifying TLMOptions values directly overrides any default values set from the + quality preset. + + For all options described below, higher settings will lead to longer runtimes + and may consume more tokens internally. You may not be able to run long prompts + (or prompts with long responses) in your account, unless your token/rate limits + are increased. If you hit token limit issues, try lower/less expensive + TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to + increase your limits. + + The default values corresponding to each quality preset are: + + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a cheaper self-reflection will be used to compute the + trustworthiness score. + + By default, the TLM uses the "medium" quality preset. The default base LLM + `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets. + You can set custom values for these arguments regardless of the quality preset + specified. + + Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview", + "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku", + "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default = + "gpt-4o-mini"): Underlying base LLM to use (better models yield better results, + faster models yield faster/cheaper results). - Models still in beta: "o1", + "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet", + "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite", + "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1", + "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro", + "gpt-4o-mini". + + max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring). + Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes. + If you experience token/rate limit errors while using TLM, try lowering this number. + For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512. + + num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM. + TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one. + Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens). + This parameter must be between 1 and 20. + When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it. + + num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency. + Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes. + This consistency helps quantify the epistemic uncertainty associated with + strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. + TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible. + + use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it + generated and self-evaluate this response. + Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes. + Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches answers that are obviously incorrect/bad. + + similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures + similarity between sampled responses considered by the model in the consistency assessment. + Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap), + "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model). + Set this to "string" to improve latency/costs. + + reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens) + when considering alternative possible responses and double-checking responses. + Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs. + + log (list[str], default = []): optionally specify additional logs or metadata that TLM should return. + For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness. + + custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria. + The expected input format is a list of dictionaries, where each dictionary has the following keys: + - name: Name of the evaluation criteria. + - criteria: Instructions specifying the evaluation criteria. + """ + + quality_preset: Literal["best", "high", "medium", "low", "base"] + + task: Optional[str] + + +class Options(TypedDict, total=False): + custom_eval_criteria: Iterable[object] + + log: List[str] + + max_tokens: int + + model: str + + num_candidate_responses: int + + num_consistency_samples: int + + reasoning_effort: str + + similarity_measure: str + + use_self_reflection: bool diff --git a/src/codex/types/tlm_score_response.py b/src/codex/types/tlm_score_response.py new file mode 100644 index 0000000..e92b2e0 --- /dev/null +++ b/src/codex/types/tlm_score_response.py @@ -0,0 +1,13 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional + +from .._models import BaseModel + +__all__ = ["TlmScoreResponse"] + + +class TlmScoreResponse(BaseModel): + trustworthiness_score: float + + log: Optional[object] = None diff --git a/src/codex/types/users/__init__.py b/src/codex/types/users/__init__.py index 4256bd7..438bc6f 100644 --- a/src/codex/types/users/__init__.py +++ b/src/codex/types/users/__init__.py @@ -3,3 +3,4 @@ from __future__ import annotations from .user_schema import UserSchema as UserSchema +from .verification_resend_response import VerificationResendResponse as VerificationResendResponse diff --git a/src/codex/types/users/verification_resend_response.py b/src/codex/types/users/verification_resend_response.py new file mode 100644 index 0000000..6617ff5 --- /dev/null +++ b/src/codex/types/users/verification_resend_response.py @@ -0,0 +1,8 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict +from typing_extensions import TypeAlias + +__all__ = ["VerificationResendResponse"] + +VerificationResendResponse: TypeAlias = Dict[str, str] diff --git a/tests/api_resources/projects/test_entries.py b/tests/api_resources/projects/test_entries.py index 5b51ec1..026add4 100644 --- a/tests/api_resources/projects/test_entries.py +++ b/tests/api_resources/projects/test_entries.py @@ -144,6 +144,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None: entry_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", answer="answer", + frequency_count=0, question="question", ) assert_matches_type(Entry, entry, path=["response"]) @@ -519,6 +520,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> entry_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", answer="answer", + frequency_count=0, question="question", ) assert_matches_type(Entry, entry, path=["response"]) diff --git a/tests/api_resources/test_tlm.py b/tests/api_resources/test_tlm.py new file mode 100644 index 0000000..32d5a67 --- /dev/null +++ b/tests/api_resources/test_tlm.py @@ -0,0 +1,254 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from codex import Codex, AsyncCodex +from codex.types import TlmScoreResponse, TlmPromptResponse +from tests.utils import assert_matches_type + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestTlm: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip() + @parametrize + def test_method_prompt(self, client: Codex) -> None: + tlm = client.tlm.prompt( + prompt="prompt", + ) + assert_matches_type(TlmPromptResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_method_prompt_with_all_params(self, client: Codex) -> None: + tlm = client.tlm.prompt( + prompt="prompt", + constrain_outputs=["string"], + options={ + "custom_eval_criteria": [{}], + "log": ["string"], + "max_tokens": 0, + "model": "model", + "num_candidate_responses": 0, + "num_consistency_samples": 0, + "reasoning_effort": "reasoning_effort", + "similarity_measure": "similarity_measure", + "use_self_reflection": True, + }, + quality_preset="best", + task="task", + ) + assert_matches_type(TlmPromptResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_raw_response_prompt(self, client: Codex) -> None: + response = client.tlm.with_raw_response.prompt( + prompt="prompt", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + tlm = response.parse() + assert_matches_type(TlmPromptResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_streaming_response_prompt(self, client: Codex) -> None: + with client.tlm.with_streaming_response.prompt( + prompt="prompt", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + tlm = response.parse() + assert_matches_type(TlmPromptResponse, tlm, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip() + @parametrize + def test_method_score(self, client: Codex) -> None: + tlm = client.tlm.score( + prompt="prompt", + response="response", + ) + assert_matches_type(TlmScoreResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_method_score_with_all_params(self, client: Codex) -> None: + tlm = client.tlm.score( + prompt="prompt", + response="response", + constrain_outputs=["string"], + options={ + "custom_eval_criteria": [{}], + "log": ["string"], + "max_tokens": 0, + "model": "model", + "num_candidate_responses": 0, + "num_consistency_samples": 0, + "reasoning_effort": "reasoning_effort", + "similarity_measure": "similarity_measure", + "use_self_reflection": True, + }, + quality_preset="best", + task="task", + ) + assert_matches_type(TlmScoreResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_raw_response_score(self, client: Codex) -> None: + response = client.tlm.with_raw_response.score( + prompt="prompt", + response="response", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + tlm = response.parse() + assert_matches_type(TlmScoreResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_streaming_response_score(self, client: Codex) -> None: + with client.tlm.with_streaming_response.score( + prompt="prompt", + response="response", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + tlm = response.parse() + assert_matches_type(TlmScoreResponse, tlm, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncTlm: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip() + @parametrize + async def test_method_prompt(self, async_client: AsyncCodex) -> None: + tlm = await async_client.tlm.prompt( + prompt="prompt", + ) + assert_matches_type(TlmPromptResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) -> None: + tlm = await async_client.tlm.prompt( + prompt="prompt", + constrain_outputs=["string"], + options={ + "custom_eval_criteria": [{}], + "log": ["string"], + "max_tokens": 0, + "model": "model", + "num_candidate_responses": 0, + "num_consistency_samples": 0, + "reasoning_effort": "reasoning_effort", + "similarity_measure": "similarity_measure", + "use_self_reflection": True, + }, + quality_preset="best", + task="task", + ) + assert_matches_type(TlmPromptResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_raw_response_prompt(self, async_client: AsyncCodex) -> None: + response = await async_client.tlm.with_raw_response.prompt( + prompt="prompt", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + tlm = await response.parse() + assert_matches_type(TlmPromptResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_streaming_response_prompt(self, async_client: AsyncCodex) -> None: + async with async_client.tlm.with_streaming_response.prompt( + prompt="prompt", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + tlm = await response.parse() + assert_matches_type(TlmPromptResponse, tlm, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip() + @parametrize + async def test_method_score(self, async_client: AsyncCodex) -> None: + tlm = await async_client.tlm.score( + prompt="prompt", + response="response", + ) + assert_matches_type(TlmScoreResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> None: + tlm = await async_client.tlm.score( + prompt="prompt", + response="response", + constrain_outputs=["string"], + options={ + "custom_eval_criteria": [{}], + "log": ["string"], + "max_tokens": 0, + "model": "model", + "num_candidate_responses": 0, + "num_consistency_samples": 0, + "reasoning_effort": "reasoning_effort", + "similarity_measure": "similarity_measure", + "use_self_reflection": True, + }, + quality_preset="best", + task="task", + ) + assert_matches_type(TlmScoreResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_raw_response_score(self, async_client: AsyncCodex) -> None: + response = await async_client.tlm.with_raw_response.score( + prompt="prompt", + response="response", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + tlm = await response.parse() + assert_matches_type(TlmScoreResponse, tlm, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_streaming_response_score(self, async_client: AsyncCodex) -> None: + async with async_client.tlm.with_streaming_response.score( + prompt="prompt", + response="response", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + tlm = await response.parse() + assert_matches_type(TlmScoreResponse, tlm, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/users/test_verification.py b/tests/api_resources/users/test_verification.py new file mode 100644 index 0000000..8332327 --- /dev/null +++ b/tests/api_resources/users/test_verification.py @@ -0,0 +1,78 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from codex import Codex, AsyncCodex +from tests.utils import assert_matches_type +from codex.types.users import VerificationResendResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestVerification: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip() + @parametrize + def test_method_resend(self, client: Codex) -> None: + verification = client.users.verification.resend() + assert_matches_type(VerificationResendResponse, verification, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_raw_response_resend(self, client: Codex) -> None: + response = client.users.verification.with_raw_response.resend() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + verification = response.parse() + assert_matches_type(VerificationResendResponse, verification, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_streaming_response_resend(self, client: Codex) -> None: + with client.users.verification.with_streaming_response.resend() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + verification = response.parse() + assert_matches_type(VerificationResendResponse, verification, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncVerification: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip() + @parametrize + async def test_method_resend(self, async_client: AsyncCodex) -> None: + verification = await async_client.users.verification.resend() + assert_matches_type(VerificationResendResponse, verification, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_raw_response_resend(self, async_client: AsyncCodex) -> None: + response = await async_client.users.verification.with_raw_response.resend() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + verification = await response.parse() + assert_matches_type(VerificationResendResponse, verification, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_streaming_response_resend(self, async_client: AsyncCodex) -> None: + async with async_client.users.verification.with_streaming_response.resend() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + verification = await response.parse() + assert_matches_type(VerificationResendResponse, verification, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/test_client.py b/tests/test_client.py index b421541..0b0b783 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -23,10 +23,12 @@ from codex import Codex, AsyncCodex, APIResponseValidationError from codex._types import Omit +from codex._utils import maybe_transform from codex._models import BaseModel, FinalRequestOptions from codex._constants import RAW_RESPONSE_HEADER from codex._exceptions import APIStatusError, APITimeoutError, APIResponseValidationError from codex._base_client import DEFAULT_TIMEOUT, HTTPX_DEFAULT_TIMEOUT, BaseClient, make_request_options +from codex.types.project_create_params import ProjectCreateParams from .utils import update_env @@ -680,7 +682,13 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No with pytest.raises(APITimeoutError): self.client.post( "/api/projects/", - body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")), + body=cast( + object, + maybe_transform( + dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"), + ProjectCreateParams, + ), + ), cast_to=httpx.Response, options={"headers": {RAW_RESPONSE_HEADER: "stream"}}, ) @@ -695,7 +703,13 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non with pytest.raises(APIStatusError): self.client.post( "/api/projects/", - body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")), + body=cast( + object, + maybe_transform( + dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"), + ProjectCreateParams, + ), + ), cast_to=httpx.Response, options={"headers": {RAW_RESPONSE_HEADER: "stream"}}, ) @@ -1425,7 +1439,13 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) with pytest.raises(APITimeoutError): await self.client.post( "/api/projects/", - body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")), + body=cast( + object, + maybe_transform( + dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"), + ProjectCreateParams, + ), + ), cast_to=httpx.Response, options={"headers": {RAW_RESPONSE_HEADER: "stream"}}, ) @@ -1440,7 +1460,13 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) with pytest.raises(APIStatusError): await self.client.post( "/api/projects/", - body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")), + body=cast( + object, + maybe_transform( + dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"), + ProjectCreateParams, + ), + ), cast_to=httpx.Response, options={"headers": {RAW_RESPONSE_HEADER: "stream"}}, ) diff --git a/tests/test_transform.py b/tests/test_transform.py index 2e91888..324f31a 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -2,7 +2,7 @@ import io import pathlib -from typing import Any, List, Union, TypeVar, Iterable, Optional, cast +from typing import Any, Dict, List, Union, TypeVar, Iterable, Optional, cast from datetime import date, datetime from typing_extensions import Required, Annotated, TypedDict @@ -388,6 +388,15 @@ def my_iter() -> Iterable[Baz8]: } +@parametrize +@pytest.mark.asyncio +async def test_dictionary_items(use_async: bool) -> None: + class DictItems(TypedDict): + foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")] + + assert await transform({"foo": {"foo_baz": "bar"}}, Dict[str, DictItems], use_async) == {"foo": {"fooBaz": "bar"}} + + class TypedDictIterableUnionStr(TypedDict): foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]