diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index ac9a2e7..55d2025 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -6,4 +6,4 @@ USER vscode
 RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
-RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
+RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index bbeb30b..c17fdc1 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -24,6 +24,9 @@
         }
       }
     }
+  },
+  "features": {
+    "ghcr.io/devcontainers/features/node:1": {}
   }
 
   // Features to add to the dev container. More info: https://containers.dev/features.
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index ee49ac2..fd0ccba 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.11"
+  ".": "0.1.0-alpha.12"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 966331a..7982133 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1 +1 @@
-configured_endpoints: 34
+configured_endpoints: 37
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 172b1d6..abb1602 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,43 @@
 # Changelog
 
+## 0.1.0-alpha.12 (2025-03-11)
+
+Full Changelog: [v0.1.0-alpha.11...v0.1.0-alpha.12](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.11...v0.1.0-alpha.12)
+
+### Features
+
+* **api:** add tlm routes ([#79](https://github.com/cleanlab/codex-python/issues/79)) ([783282d](https://github.com/cleanlab/codex-python/commit/783282da5cef0d7fbbadbeb826153622ec9a37d1))
+* **api:** api update ([#60](https://github.com/cleanlab/codex-python/issues/60)) ([f28da42](https://github.com/cleanlab/codex-python/commit/f28da423ea6350df2422a6b3c984044686cb4674))
+* **api:** api update ([#67](https://github.com/cleanlab/codex-python/issues/67)) ([5697955](https://github.com/cleanlab/codex-python/commit/569795521774bd2ac303dcaf8058e791f1af501c))
+* **client:** allow passing `NotGiven` for body ([#70](https://github.com/cleanlab/codex-python/issues/70)) ([f022d08](https://github.com/cleanlab/codex-python/commit/f022d082ad3e07ba3600c0d9e8becefad96ca175))
+* **client:** send `X-Stainless-Read-Timeout` header ([#63](https://github.com/cleanlab/codex-python/issues/63)) ([5904ed6](https://github.com/cleanlab/codex-python/commit/5904ed630f3dce437f3eb0f248d6a96b7c237e19))
+
+
+### Bug Fixes
+
+* asyncify on non-asyncio runtimes ([#69](https://github.com/cleanlab/codex-python/issues/69)) ([dc7519f](https://github.com/cleanlab/codex-python/commit/dc7519f876a99cdb58f4b634de45989e44c53c88))
+* **client:** mark some request bodies as optional ([f022d08](https://github.com/cleanlab/codex-python/commit/f022d082ad3e07ba3600c0d9e8becefad96ca175))
+
+
+### Chores
+
+* **docs:** update client docstring ([#75](https://github.com/cleanlab/codex-python/issues/75)) ([5b371a6](https://github.com/cleanlab/codex-python/commit/5b371a629dbd7763e00d8fd4315c4e437b4f0145))
+* **internal:** bummp ruff dependency ([#62](https://github.com/cleanlab/codex-python/issues/62)) ([123ccca](https://github.com/cleanlab/codex-python/commit/123ccca213572048ca6678900414e746516a9de1))
+* **internal:** change default timeout to an int ([#61](https://github.com/cleanlab/codex-python/issues/61)) ([66fc9b7](https://github.com/cleanlab/codex-python/commit/66fc9b758ba38b160b7d6b17b94f294f248e0ecd))
+* **internal:** fix devcontainers setup ([#71](https://github.com/cleanlab/codex-python/issues/71)) ([9ec8473](https://github.com/cleanlab/codex-python/commit/9ec847324c47ab63d9cf39d50f392367585065cf))
+* **internal:** fix type traversing dictionary params ([#64](https://github.com/cleanlab/codex-python/issues/64)) ([648fc48](https://github.com/cleanlab/codex-python/commit/648fc489ce7f9827bfc2354e93d470b6e4b7e1bf))
+* **internal:** minor type handling changes ([#65](https://github.com/cleanlab/codex-python/issues/65)) ([27aa5db](https://github.com/cleanlab/codex-python/commit/27aa5db50e0aa13eb2c4d88196a4ac70681ae808))
+* **internal:** properly set __pydantic_private__ ([#72](https://github.com/cleanlab/codex-python/issues/72)) ([9765c39](https://github.com/cleanlab/codex-python/commit/9765c3979b7856713e75175a76d342f6be956dea))
+* **internal:** remove unused http client options forwarding ([#76](https://github.com/cleanlab/codex-python/issues/76)) ([c5ed0fd](https://github.com/cleanlab/codex-python/commit/c5ed0fdc13238df5ecf8cbfd17e15974c6d1b24b))
+* **internal:** update client tests ([#68](https://github.com/cleanlab/codex-python/issues/68)) ([9297d25](https://github.com/cleanlab/codex-python/commit/9297d25f8f6e59af3fa610539d9736f5f0af2fe2))
+* **internal:** version bump ([#58](https://github.com/cleanlab/codex-python/issues/58)) ([d032df2](https://github.com/cleanlab/codex-python/commit/d032df2296313bf0d2b2712672756ed185afb0e0))
+
+
+### Documentation
+
+* revise readme docs about nested params ([#77](https://github.com/cleanlab/codex-python/issues/77)) ([649ec25](https://github.com/cleanlab/codex-python/commit/649ec251abd53beb368cb68d134a54b35338d327))
+* update URLs from stainlessapi.com to stainless.com ([#73](https://github.com/cleanlab/codex-python/issues/73)) ([8f500b4](https://github.com/cleanlab/codex-python/commit/8f500b4c0af5c050350107ca7e567a289c7cf8f9))
+
 ## 0.1.0-alpha.11 (2025-01-30)
 
 Full Changelog: [v0.1.0-alpha.10...v0.1.0-alpha.11](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.10...v0.1.0-alpha.11)
diff --git a/SECURITY.md b/SECURITY.md
index 54f6446..9fc6ee2 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,9 +2,9 @@
 
 ## Reporting Security Issues
 
-This SDK is generated by [Stainless Software Inc](http://stainlessapi.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
+This SDK is generated by [Stainless Software Inc](http://stainless.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
 
-To report a security issue, please contact the Stainless team at security@stainlessapi.com.
+To report a security issue, please contact the Stainless team at security@stainless.com.
 
 ## Responsible Disclosure
 
diff --git a/api.md b/api.md
index b2fc2f0..5e25833 100644
--- a/api.md
+++ b/api.md
@@ -119,6 +119,18 @@ Methods:
 
 - <code title="get /api/users/myself/organizations">client.users.myself.organizations.<a href="./src/codex/resources/users/myself/organizations.py">list</a>() -> <a href="./src/codex/types/users/myself/user_organizations_schema.py">UserOrganizationsSchema</a></code>
 
+## Verification
+
+Types:
+
+```python
+from codex.types.users import VerificationResendResponse
+```
+
+Methods:
+
+- <code title="post /api/users/verification/resend">client.users.verification.<a href="./src/codex/resources/users/verification.py">resend</a>() -> <a href="./src/codex/types/users/verification_resend_response.py">VerificationResendResponse</a></code>
+
 # Projects
 
 Types:
@@ -175,3 +187,16 @@ Methods:
 - <code title="delete /api/projects/{project_id}/entries/{entry_id}">client.projects.entries.<a href="./src/codex/resources/projects/entries.py">delete</a>(entry_id, \*, project_id) -> None</code>
 - <code title="post /api/projects/{project_id}/entries/add_question">client.projects.entries.<a href="./src/codex/resources/projects/entries.py">add_question</a>(project_id, \*\*<a href="src/codex/types/projects/entry_add_question_params.py">params</a>) -> <a href="./src/codex/types/projects/entry.py">Entry</a></code>
 - <code title="post /api/projects/{project_id}/entries/query">client.projects.entries.<a href="./src/codex/resources/projects/entries.py">query</a>(project_id, \*\*<a href="src/codex/types/projects/entry_query_params.py">params</a>) -> <a href="./src/codex/types/projects/entry.py">Optional[Entry]</a></code>
+
+# Tlm
+
+Types:
+
+```python
+from codex.types import TlmPromptResponse, TlmScoreResponse
+```
+
+Methods:
+
+- <code title="post /api/tlm/prompt">client.tlm.<a href="./src/codex/resources/tlm.py">prompt</a>(\*\*<a href="src/codex/types/tlm_prompt_params.py">params</a>) -> <a href="./src/codex/types/tlm_prompt_response.py">TlmPromptResponse</a></code>
+- <code title="post /api/tlm/score">client.tlm.<a href="./src/codex/resources/tlm.py">score</a>(\*\*<a href="src/codex/types/tlm_score_params.py">params</a>) -> <a href="./src/codex/types/tlm_score_response.py">TlmScoreResponse</a></code>
diff --git a/pyproject.toml b/pyproject.toml
index 2641c49..ede4351 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "codex-sdk"
-version = "0.1.0-alpha.11"
+version = "0.1.0-alpha.12"
 description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead."
 dynamic = ["readme"]
 license = "MIT"
@@ -177,7 +177,7 @@ select = [
   "T201",
   "T203",
   # misuse of typing.TYPE_CHECKING
-  "TCH004",
+  "TC004",
   # import rules
   "TID251",
 ]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index ef07871..1961e8d 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -78,7 +78,7 @@ pytz==2023.3.post1
     # via dirty-equals
 respx==0.22.0
 rich==13.7.1
-ruff==0.6.9
+ruff==0.9.4
 setuptools==68.2.2
     # via nodeenv
 six==1.16.0
diff --git a/scripts/test b/scripts/test
index 4fa5698..2b87845 100755
--- a/scripts/test
+++ b/scripts/test
@@ -52,6 +52,8 @@ else
   echo
 fi
 
+export DEFER_PYDANTIC_BUILD=false
+
 echo "==> Running tests"
 rye run pytest "$@"
 
diff --git a/scripts/utils/ruffen-docs.py b/scripts/utils/ruffen-docs.py
index 37b3d94..0cf2bd2 100644
--- a/scripts/utils/ruffen-docs.py
+++ b/scripts/utils/ruffen-docs.py
@@ -47,7 +47,7 @@ def _md_match(match: Match[str]) -> str:
         with _collect_error(match):
             code = format_code_block(code)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     def _pycon_match(match: Match[str]) -> str:
         code = ""
@@ -97,7 +97,7 @@ def finish_fragment() -> None:
     def _md_pycon_match(match: Match[str]) -> str:
         code = _pycon_match(match)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     src = MD_RE.sub(_md_match, src)
     src = MD_PYCON_RE.sub(_md_pycon_match, src)
diff --git a/src/codex/_base_client.py b/src/codex/_base_client.py
index 9090e75..273341b 100644
--- a/src/codex/_base_client.py
+++ b/src/codex/_base_client.py
@@ -9,7 +9,6 @@
 import inspect
 import logging
 import platform
-import warnings
 import email.utils
 from types import TracebackType
 from random import random
@@ -36,7 +35,7 @@
 import httpx
 import distro
 import pydantic
-from httpx import URL, Limits
+from httpx import URL
 from pydantic import PrivateAttr
 
 from . import _exceptions
@@ -51,19 +50,16 @@
     Timeout,
     NotGiven,
     ResponseT,
-    Transport,
     AnyMapping,
     PostParser,
-    ProxiesTypes,
     RequestFiles,
     HttpxSendArgs,
-    AsyncTransport,
     RequestOptions,
     HttpxRequestFiles,
     ModelBuilderProtocol,
 )
 from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
-from ._compat import model_copy, model_dump
+from ._compat import PYDANTIC_V2, model_copy, model_dump
 from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
 from ._response import (
     APIResponse,
@@ -207,6 +203,9 @@ def _set_private_attributes(
         model: Type[_T],
         options: FinalRequestOptions,
     ) -> None:
+        if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
         self._model = model
         self._client = client
         self._options = options
@@ -292,6 +291,9 @@ def _set_private_attributes(
         client: AsyncAPIClient,
         options: FinalRequestOptions,
     ) -> None:
+        if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
         self._model = model
         self._client = client
         self._options = options
@@ -331,9 +333,6 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
     _base_url: URL
     max_retries: int
     timeout: Union[float, Timeout, None]
-    _limits: httpx.Limits
-    _proxies: ProxiesTypes | None
-    _transport: Transport | AsyncTransport | None
     _strict_response_validation: bool
     _idempotency_header: str | None
     _default_stream_cls: type[_DefaultStreamT] | None = None
@@ -346,9 +345,6 @@ def __init__(
         _strict_response_validation: bool,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None = DEFAULT_TIMEOUT,
-        limits: httpx.Limits,
-        transport: Transport | AsyncTransport | None,
-        proxies: ProxiesTypes | None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
     ) -> None:
@@ -356,9 +352,6 @@ def __init__(
         self._base_url = self._enforce_trailing_slash(URL(base_url))
         self.max_retries = max_retries
         self.timeout = timeout
-        self._limits = limits
-        self._proxies = proxies
-        self._transport = transport
         self._custom_headers = custom_headers or {}
         self._custom_query = custom_query or {}
         self._strict_response_validation = _strict_response_validation
@@ -418,10 +411,17 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
         if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
             headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
 
-        # Don't set the retry count header if it was already set or removed by the caller. We check
+        # Don't set these headers if they were already set or removed by the caller. We check
         # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
-        if "x-stainless-retry-count" not in (header.lower() for header in custom_headers):
+        lower_custom_headers = [header.lower() for header in custom_headers]
+        if "x-stainless-retry-count" not in lower_custom_headers:
             headers["x-stainless-retry-count"] = str(retries_taken)
+        if "x-stainless-read-timeout" not in lower_custom_headers:
+            timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
+            if isinstance(timeout, Timeout):
+                timeout = timeout.read
+            if timeout is not None:
+                headers["x-stainless-read-timeout"] = str(timeout)
 
         return headers
 
@@ -511,7 +511,7 @@ def _build_request(
             # so that passing a `TypedDict` doesn't cause an error.
             # https://github.com/microsoft/pyright/issues/3526#event-6715453066
             params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
-            json=json_data,
+            json=json_data if is_given(json_data) else None,
             files=files,
             **kwargs,
         )
@@ -787,46 +787,11 @@ def __init__(
         base_url: str | URL,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: Transport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
         http_client: httpx.Client | None = None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
         _strict_response_validation: bool,
     ) -> None:
-        kwargs: dict[str, Any] = {}
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_CONNECTION_LIMITS
-
-        if transport is not None:
-            kwargs["transport"] = transport
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            kwargs["proxies"] = proxies
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
         if not is_given(timeout):
             # if the user passed in a custom http client with a non-default
             # timeout set then we use that timeout.
@@ -847,12 +812,9 @@ def __init__(
 
         super().__init__(
             version=version,
-            limits=limits,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
             base_url=base_url,
-            transport=transport,
             max_retries=max_retries,
             custom_query=custom_query,
             custom_headers=custom_headers,
@@ -862,9 +824,6 @@ def __init__(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            limits=limits,
-            follow_redirects=True,
-            **kwargs,  # type: ignore
         )
 
     def is_closed(self) -> bool:
@@ -1359,45 +1318,10 @@ def __init__(
         _strict_response_validation: bool,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: AsyncTransport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
         http_client: httpx.AsyncClient | None = None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
     ) -> None:
-        kwargs: dict[str, Any] = {}
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_CONNECTION_LIMITS
-
-        if transport is not None:
-            kwargs["transport"] = transport
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            kwargs["proxies"] = proxies
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
         if not is_given(timeout):
             # if the user passed in a custom http client with a non-default
             # timeout set then we use that timeout.
@@ -1419,11 +1343,8 @@ def __init__(
         super().__init__(
             version=version,
             base_url=base_url,
-            limits=limits,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
             max_retries=max_retries,
             custom_query=custom_query,
             custom_headers=custom_headers,
@@ -1433,9 +1354,6 @@ def __init__(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            limits=limits,
-            follow_redirects=True,
-            **kwargs,  # type: ignore
         )
 
     def is_closed(self) -> bool:
diff --git a/src/codex/_client.py b/src/codex/_client.py
index 09aaafd..9bdd5f0 100644
--- a/src/codex/_client.py
+++ b/src/codex/_client.py
@@ -25,7 +25,7 @@
     get_async_library,
 )
 from ._version import __version__
-from .resources import health
+from .resources import tlm, health
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import APIStatusError
 from ._base_client import (
@@ -61,6 +61,7 @@ class Codex(SyncAPIClient):
     organizations: organizations.OrganizationsResource
     users: users.UsersResource
     projects: projects.ProjectsResource
+    tlm: tlm.TlmResource
     with_raw_response: CodexWithRawResponse
     with_streaming_response: CodexWithStreamedResponse
 
@@ -141,6 +142,7 @@ def __init__(
         self.organizations = organizations.OrganizationsResource(self)
         self.users = users.UsersResource(self)
         self.projects = projects.ProjectsResource(self)
+        self.tlm = tlm.TlmResource(self)
         self.with_raw_response = CodexWithRawResponse(self)
         self.with_streaming_response = CodexWithStreamedResponse(self)
 
@@ -291,6 +293,7 @@ class AsyncCodex(AsyncAPIClient):
     organizations: organizations.AsyncOrganizationsResource
     users: users.AsyncUsersResource
     projects: projects.AsyncProjectsResource
+    tlm: tlm.AsyncTlmResource
     with_raw_response: AsyncCodexWithRawResponse
     with_streaming_response: AsyncCodexWithStreamedResponse
 
@@ -325,7 +328,7 @@ def __init__(
         # part of our public interface in the future.
         _strict_response_validation: bool = False,
     ) -> None:
-        """Construct a new async Codex client instance."""
+        """Construct a new async AsyncCodex client instance."""
         self.api_key = api_key
 
         self.access_key = access_key
@@ -371,6 +374,7 @@ def __init__(
         self.organizations = organizations.AsyncOrganizationsResource(self)
         self.users = users.AsyncUsersResource(self)
         self.projects = projects.AsyncProjectsResource(self)
+        self.tlm = tlm.AsyncTlmResource(self)
         self.with_raw_response = AsyncCodexWithRawResponse(self)
         self.with_streaming_response = AsyncCodexWithStreamedResponse(self)
 
@@ -522,6 +526,7 @@ def __init__(self, client: Codex) -> None:
         self.organizations = organizations.OrganizationsResourceWithRawResponse(client.organizations)
         self.users = users.UsersResourceWithRawResponse(client.users)
         self.projects = projects.ProjectsResourceWithRawResponse(client.projects)
+        self.tlm = tlm.TlmResourceWithRawResponse(client.tlm)
 
 
 class AsyncCodexWithRawResponse:
@@ -530,6 +535,7 @@ def __init__(self, client: AsyncCodex) -> None:
         self.organizations = organizations.AsyncOrganizationsResourceWithRawResponse(client.organizations)
         self.users = users.AsyncUsersResourceWithRawResponse(client.users)
         self.projects = projects.AsyncProjectsResourceWithRawResponse(client.projects)
+        self.tlm = tlm.AsyncTlmResourceWithRawResponse(client.tlm)
 
 
 class CodexWithStreamedResponse:
@@ -538,6 +544,7 @@ def __init__(self, client: Codex) -> None:
         self.organizations = organizations.OrganizationsResourceWithStreamingResponse(client.organizations)
         self.users = users.UsersResourceWithStreamingResponse(client.users)
         self.projects = projects.ProjectsResourceWithStreamingResponse(client.projects)
+        self.tlm = tlm.TlmResourceWithStreamingResponse(client.tlm)
 
 
 class AsyncCodexWithStreamedResponse:
@@ -546,6 +553,7 @@ def __init__(self, client: AsyncCodex) -> None:
         self.organizations = organizations.AsyncOrganizationsResourceWithStreamingResponse(client.organizations)
         self.users = users.AsyncUsersResourceWithStreamingResponse(client.users)
         self.projects = projects.AsyncProjectsResourceWithStreamingResponse(client.projects)
+        self.tlm = tlm.AsyncTlmResourceWithStreamingResponse(client.tlm)
 
 
 Client = Codex
diff --git a/src/codex/_constants.py b/src/codex/_constants.py
index a2ac3b6..6ddf2c7 100644
--- a/src/codex/_constants.py
+++ b/src/codex/_constants.py
@@ -6,7 +6,7 @@
 OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
 
 # default timeout is 1 minute
-DEFAULT_TIMEOUT = httpx.Timeout(timeout=60.0, connect=5.0)
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
 DEFAULT_MAX_RETRIES = 2
 DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
 
diff --git a/src/codex/_models.py b/src/codex/_models.py
index 9a918aa..c4401ff 100644
--- a/src/codex/_models.py
+++ b/src/codex/_models.py
@@ -172,7 +172,7 @@ def to_json(
     @override
     def __str__(self) -> str:
         # mypy complains about an invalid self arg
-        return f'{self.__repr_name__()}({self.__repr_str__(", ")})'  # type: ignore[misc]
+        return f"{self.__repr_name__()}({self.__repr_str__(', ')})"  # type: ignore[misc]
 
     # Override the 'construct' method in a way that supports recursive parsing without validation.
     # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
@@ -426,10 +426,16 @@ def construct_type(*, value: object, type_: object) -> object:
 
     If the given value does not match the expected type then it is returned as-is.
     """
+
+    # store a reference to the original type we were given before we extract any inner
+    # types so that we can properly resolve forward references in `TypeAliasType` annotations
+    original_type = None
+
     # we allow `object` as the input type because otherwise, passing things like
     # `Literal['value']` will be reported as a type error by type checkers
     type_ = cast("type[object]", type_)
     if is_type_alias_type(type_):
+        original_type = type_  # type: ignore[unreachable]
         type_ = type_.__value__  # type: ignore[unreachable]
 
     # unwrap `Annotated[T, ...]` -> `T`
@@ -446,7 +452,7 @@ def construct_type(*, value: object, type_: object) -> object:
 
     if is_union(origin):
         try:
-            return validate_type(type_=cast("type[object]", type_), value=value)
+            return validate_type(type_=cast("type[object]", original_type or type_), value=value)
         except Exception:
             pass
 
diff --git a/src/codex/_utils/_sync.py b/src/codex/_utils/_sync.py
index 8b3aaf2..ad7ec71 100644
--- a/src/codex/_utils/_sync.py
+++ b/src/codex/_utils/_sync.py
@@ -7,16 +7,20 @@
 from typing import Any, TypeVar, Callable, Awaitable
 from typing_extensions import ParamSpec
 
+import anyio
+import sniffio
+import anyio.to_thread
+
 T_Retval = TypeVar("T_Retval")
 T_ParamSpec = ParamSpec("T_ParamSpec")
 
 
 if sys.version_info >= (3, 9):
-    to_thread = asyncio.to_thread
+    _asyncio_to_thread = asyncio.to_thread
 else:
     # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
     # for Python 3.8 support
-    async def to_thread(
+    async def _asyncio_to_thread(
         func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
     ) -> Any:
         """Asynchronously run function *func* in a separate thread.
@@ -34,6 +38,17 @@ async def to_thread(
         return await loop.run_in_executor(None, func_call)
 
 
+async def to_thread(
+    func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+) -> T_Retval:
+    if sniffio.current_async_library() == "asyncio":
+        return await _asyncio_to_thread(func, *args, **kwargs)
+
+    return await anyio.to_thread.run_sync(
+        functools.partial(func, *args, **kwargs),
+    )
+
+
 # inspired by `asyncer`, https://github.com/tiangolo/asyncer
 def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
     """
diff --git a/src/codex/_utils/_transform.py b/src/codex/_utils/_transform.py
index a6b62ca..18afd9d 100644
--- a/src/codex/_utils/_transform.py
+++ b/src/codex/_utils/_transform.py
@@ -25,7 +25,7 @@
     is_annotated_type,
     strip_annotated_type,
 )
-from .._compat import model_dump, is_typeddict
+from .._compat import get_origin, model_dump, is_typeddict
 
 _T = TypeVar("_T")
 
@@ -164,9 +164,14 @@ def _transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return _transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
@@ -307,9 +312,14 @@ async def _async_transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return await _async_transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
diff --git a/src/codex/_version.py b/src/codex/_version.py
index fd5f708..fe7cc73 100644
--- a/src/codex/_version.py
+++ b/src/codex/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "codex"
-__version__ = "0.1.0-alpha.11"  # x-release-please-version
+__version__ = "0.1.0-alpha.12"  # x-release-please-version
diff --git a/src/codex/resources/__init__.py b/src/codex/resources/__init__.py
index b96b725..f91f0e4 100644
--- a/src/codex/resources/__init__.py
+++ b/src/codex/resources/__init__.py
@@ -1,5 +1,13 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .tlm import (
+    TlmResource,
+    AsyncTlmResource,
+    TlmResourceWithRawResponse,
+    AsyncTlmResourceWithRawResponse,
+    TlmResourceWithStreamingResponse,
+    AsyncTlmResourceWithStreamingResponse,
+)
 from .users import (
     UsersResource,
     AsyncUsersResource,
@@ -58,4 +66,10 @@
     "AsyncProjectsResourceWithRawResponse",
     "ProjectsResourceWithStreamingResponse",
     "AsyncProjectsResourceWithStreamingResponse",
+    "TlmResource",
+    "AsyncTlmResource",
+    "TlmResourceWithRawResponse",
+    "AsyncTlmResourceWithRawResponse",
+    "TlmResourceWithStreamingResponse",
+    "AsyncTlmResourceWithStreamingResponse",
 ]
diff --git a/src/codex/resources/projects/entries.py b/src/codex/resources/projects/entries.py
index d307d3d..2fcc8e0 100644
--- a/src/codex/resources/projects/entries.py
+++ b/src/codex/resources/projects/entries.py
@@ -70,6 +70,8 @@ def create(
         """
         Create a knowledge entry for a project.
 
+        Raises: HTTPException: If an existing entry is found with the same question.
+
         Args:
           extra_headers: Send extra headers
 
@@ -138,6 +140,7 @@ def update(
         *,
         project_id: str,
         answer: Optional[str] | NotGiven = NOT_GIVEN,
+        frequency_count: Optional[int] | NotGiven = NOT_GIVEN,
         question: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -167,6 +170,7 @@ def update(
             body=maybe_transform(
                 {
                     "answer": answer,
+                    "frequency_count": frequency_count,
                     "question": question,
                 },
                 entry_update_params.EntryUpdateParams,
@@ -318,8 +322,10 @@ def query(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Optional[Entry]:
-        """
-        Query knowledge for a project.
+        """Query knowledge for a project.
+
+        Also increments the frequency_count for the
+        matching entry if found.
 
         Returns the matching entry if found and answered, otherwise returns None. This
         allows the client to distinguish between: (1) no matching question found
@@ -383,6 +389,8 @@ async def create(
         """
         Create a knowledge entry for a project.
 
+        Raises: HTTPException: If an existing entry is found with the same question.
+
         Args:
           extra_headers: Send extra headers
 
@@ -451,6 +459,7 @@ async def update(
         *,
         project_id: str,
         answer: Optional[str] | NotGiven = NOT_GIVEN,
+        frequency_count: Optional[int] | NotGiven = NOT_GIVEN,
         question: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -480,6 +489,7 @@ async def update(
             body=await async_maybe_transform(
                 {
                     "answer": answer,
+                    "frequency_count": frequency_count,
                     "question": question,
                 },
                 entry_update_params.EntryUpdateParams,
@@ -631,8 +641,10 @@ async def query(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Optional[Entry]:
-        """
-        Query knowledge for a project.
+        """Query knowledge for a project.
+
+        Also increments the frequency_count for the
+        matching entry if found.
 
         Returns the matching entry if found and answered, otherwise returns None. This
         allows the client to distinguish between: (1) no matching question found
diff --git a/src/codex/resources/tlm.py b/src/codex/resources/tlm.py
new file mode 100644
index 0000000..c6585d0
--- /dev/null
+++ b/src/codex/resources/tlm.py
@@ -0,0 +1,656 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ..types import tlm_score_params, tlm_prompt_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.tlm_score_response import TlmScoreResponse
+from ..types.tlm_prompt_response import TlmPromptResponse
+
+__all__ = ["TlmResource", "AsyncTlmResource"]
+
+
+class TlmResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TlmResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers
+        """
+        return TlmResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TlmResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response
+        """
+        return TlmResourceWithStreamingResponse(self)
+
+    def prompt(
+        self,
+        *,
+        prompt: str,
+        constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        options: Optional[tlm_prompt_params.Options] | NotGiven = NOT_GIVEN,
+        quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN,
+        task: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TlmPromptResponse:
+        """
+        Prompts the TLM API.
+
+        Args:
+          options: Typed dict of advanced configuration options for the Trustworthy Language Model.
+              Many of these configurations are determined by the quality preset selected
+              (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+              Specifying TLMOptions values directly overrides any default values set from the
+              quality preset.
+
+              For all options described below, higher settings will lead to longer runtimes
+              and may consume more tokens internally. You may not be able to run long prompts
+              (or prompts with long responses) in your account, unless your token/rate limits
+              are increased. If you hit token limit issues, try lower/less expensive
+              TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+              increase your limits.
+
+              The default values corresponding to each quality preset are:
+
+              - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+                `use_self_reflection` = True. This preset improves LLM responses.
+              - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+                `use_self_reflection` = True. This preset improves LLM responses.
+              - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+                `use_self_reflection` = True.
+              - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+                `use_self_reflection` = True.
+              - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+                `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+                "base" preset, a cheaper self-reflection will be used to compute the
+                trustworthiness score.
+
+              By default, the TLM uses the "medium" quality preset. The default base LLM
+              `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+              You can set custom values for these arguments regardless of the quality preset
+              specified.
+
+              Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+              "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+              "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+              "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+              "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+              faster models yield faster/cheaper results). - Models still in beta: "o1",
+              "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+              "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+              "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+              "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+              "gpt-4o-mini".
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+                  If you experience token/rate limit errors while using TLM, try lowering this number.
+                  For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+                  TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+                  This parameter must be between 1 and 20.
+                  When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+                  This consistency helps quantify the epistemic uncertainty associated with
+                  strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+                  TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+                  use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+                  generated and self-evaluate this response.
+                  Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+                  Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+                  and catches answers that are obviously incorrect/bad.
+
+                  similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+                  similarity between sampled responses considered by the model in the consistency assessment.
+                  Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+                  "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+                  Set this to "string" to improve latency/costs.
+
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+                  when considering alternative possible responses and double-checking responses.
+                  Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/api/tlm/prompt",
+            body=maybe_transform(
+                {
+                    "prompt": prompt,
+                    "constrain_outputs": constrain_outputs,
+                    "options": options,
+                    "quality_preset": quality_preset,
+                    "task": task,
+                },
+                tlm_prompt_params.TlmPromptParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=TlmPromptResponse,
+        )
+
+    def score(
+        self,
+        *,
+        prompt: str,
+        response: str,
+        constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        options: Optional[tlm_score_params.Options] | NotGiven = NOT_GIVEN,
+        quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN,
+        task: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TlmScoreResponse:
+        """
+        Scores the TLM API.
+
+        TODO:
+
+        - Track query count in DB
+        - Enforce hard cap on queries for users w/o credit card on file
+
+        Args:
+          options: Typed dict of advanced configuration options for the Trustworthy Language Model.
+              Many of these configurations are determined by the quality preset selected
+              (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+              Specifying TLMOptions values directly overrides any default values set from the
+              quality preset.
+
+              For all options described below, higher settings will lead to longer runtimes
+              and may consume more tokens internally. You may not be able to run long prompts
+              (or prompts with long responses) in your account, unless your token/rate limits
+              are increased. If you hit token limit issues, try lower/less expensive
+              TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+              increase your limits.
+
+              The default values corresponding to each quality preset are:
+
+              - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+                `use_self_reflection` = True. This preset improves LLM responses.
+              - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+                `use_self_reflection` = True. This preset improves LLM responses.
+              - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+                `use_self_reflection` = True.
+              - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+                `use_self_reflection` = True.
+              - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+                `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+                "base" preset, a cheaper self-reflection will be used to compute the
+                trustworthiness score.
+
+              By default, the TLM uses the "medium" quality preset. The default base LLM
+              `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+              You can set custom values for these arguments regardless of the quality preset
+              specified.
+
+              Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+              "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+              "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+              "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+              "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+              faster models yield faster/cheaper results). - Models still in beta: "o1",
+              "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+              "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+              "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+              "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+              "gpt-4o-mini".
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+                  If you experience token/rate limit errors while using TLM, try lowering this number.
+                  For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+                  TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+                  This parameter must be between 1 and 20.
+                  When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+                  This consistency helps quantify the epistemic uncertainty associated with
+                  strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+                  TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+                  use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+                  generated and self-evaluate this response.
+                  Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+                  Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+                  and catches answers that are obviously incorrect/bad.
+
+                  similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+                  similarity between sampled responses considered by the model in the consistency assessment.
+                  Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+                  "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+                  Set this to "string" to improve latency/costs.
+
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+                  when considering alternative possible responses and double-checking responses.
+                  Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/api/tlm/score",
+            body=maybe_transform(
+                {
+                    "prompt": prompt,
+                    "response": response,
+                    "constrain_outputs": constrain_outputs,
+                    "options": options,
+                    "quality_preset": quality_preset,
+                    "task": task,
+                },
+                tlm_score_params.TlmScoreParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=TlmScoreResponse,
+        )
+
+
+class AsyncTlmResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTlmResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTlmResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTlmResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response
+        """
+        return AsyncTlmResourceWithStreamingResponse(self)
+
+    async def prompt(
+        self,
+        *,
+        prompt: str,
+        constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        options: Optional[tlm_prompt_params.Options] | NotGiven = NOT_GIVEN,
+        quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN,
+        task: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TlmPromptResponse:
+        """
+        Prompts the TLM API.
+
+        Args:
+          options: Typed dict of advanced configuration options for the Trustworthy Language Model.
+              Many of these configurations are determined by the quality preset selected
+              (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+              Specifying TLMOptions values directly overrides any default values set from the
+              quality preset.
+
+              For all options described below, higher settings will lead to longer runtimes
+              and may consume more tokens internally. You may not be able to run long prompts
+              (or prompts with long responses) in your account, unless your token/rate limits
+              are increased. If you hit token limit issues, try lower/less expensive
+              TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+              increase your limits.
+
+              The default values corresponding to each quality preset are:
+
+              - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+                `use_self_reflection` = True. This preset improves LLM responses.
+              - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+                `use_self_reflection` = True. This preset improves LLM responses.
+              - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+                `use_self_reflection` = True.
+              - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+                `use_self_reflection` = True.
+              - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+                `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+                "base" preset, a cheaper self-reflection will be used to compute the
+                trustworthiness score.
+
+              By default, the TLM uses the "medium" quality preset. The default base LLM
+              `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+              You can set custom values for these arguments regardless of the quality preset
+              specified.
+
+              Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+              "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+              "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+              "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+              "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+              faster models yield faster/cheaper results). - Models still in beta: "o1",
+              "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+              "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+              "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+              "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+              "gpt-4o-mini".
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+                  If you experience token/rate limit errors while using TLM, try lowering this number.
+                  For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+                  TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+                  This parameter must be between 1 and 20.
+                  When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+                  This consistency helps quantify the epistemic uncertainty associated with
+                  strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+                  TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+                  use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+                  generated and self-evaluate this response.
+                  Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+                  Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+                  and catches answers that are obviously incorrect/bad.
+
+                  similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+                  similarity between sampled responses considered by the model in the consistency assessment.
+                  Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+                  "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+                  Set this to "string" to improve latency/costs.
+
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+                  when considering alternative possible responses and double-checking responses.
+                  Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/api/tlm/prompt",
+            body=await async_maybe_transform(
+                {
+                    "prompt": prompt,
+                    "constrain_outputs": constrain_outputs,
+                    "options": options,
+                    "quality_preset": quality_preset,
+                    "task": task,
+                },
+                tlm_prompt_params.TlmPromptParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=TlmPromptResponse,
+        )
+
+    async def score(
+        self,
+        *,
+        prompt: str,
+        response: str,
+        constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        options: Optional[tlm_score_params.Options] | NotGiven = NOT_GIVEN,
+        quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN,
+        task: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TlmScoreResponse:
+        """
+        Scores the TLM API.
+
+        TODO:
+
+        - Track query count in DB
+        - Enforce hard cap on queries for users w/o credit card on file
+
+        Args:
+          options: Typed dict of advanced configuration options for the Trustworthy Language Model.
+              Many of these configurations are determined by the quality preset selected
+              (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+              Specifying TLMOptions values directly overrides any default values set from the
+              quality preset.
+
+              For all options described below, higher settings will lead to longer runtimes
+              and may consume more tokens internally. You may not be able to run long prompts
+              (or prompts with long responses) in your account, unless your token/rate limits
+              are increased. If you hit token limit issues, try lower/less expensive
+              TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+              increase your limits.
+
+              The default values corresponding to each quality preset are:
+
+              - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+                `use_self_reflection` = True. This preset improves LLM responses.
+              - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+                `use_self_reflection` = True. This preset improves LLM responses.
+              - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+                `use_self_reflection` = True.
+              - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+                `use_self_reflection` = True.
+              - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+                `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+                "base" preset, a cheaper self-reflection will be used to compute the
+                trustworthiness score.
+
+              By default, the TLM uses the "medium" quality preset. The default base LLM
+              `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+              You can set custom values for these arguments regardless of the quality preset
+              specified.
+
+              Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+              "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+              "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+              "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+              "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+              faster models yield faster/cheaper results). - Models still in beta: "o1",
+              "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+              "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+              "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+              "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+              "gpt-4o-mini".
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+                  If you experience token/rate limit errors while using TLM, try lowering this number.
+                  For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+                  TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+                  This parameter must be between 1 and 20.
+                  When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+                  This consistency helps quantify the epistemic uncertainty associated with
+                  strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+                  TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+                  use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+                  generated and self-evaluate this response.
+                  Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+                  Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+                  and catches answers that are obviously incorrect/bad.
+
+                  similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+                  similarity between sampled responses considered by the model in the consistency assessment.
+                  Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+                  "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+                  Set this to "string" to improve latency/costs.
+
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+                  when considering alternative possible responses and double-checking responses.
+                  Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/api/tlm/score",
+            body=await async_maybe_transform(
+                {
+                    "prompt": prompt,
+                    "response": response,
+                    "constrain_outputs": constrain_outputs,
+                    "options": options,
+                    "quality_preset": quality_preset,
+                    "task": task,
+                },
+                tlm_score_params.TlmScoreParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=TlmScoreResponse,
+        )
+
+
+class TlmResourceWithRawResponse:
+    def __init__(self, tlm: TlmResource) -> None:
+        self._tlm = tlm
+
+        self.prompt = to_raw_response_wrapper(
+            tlm.prompt,
+        )
+        self.score = to_raw_response_wrapper(
+            tlm.score,
+        )
+
+
+class AsyncTlmResourceWithRawResponse:
+    def __init__(self, tlm: AsyncTlmResource) -> None:
+        self._tlm = tlm
+
+        self.prompt = async_to_raw_response_wrapper(
+            tlm.prompt,
+        )
+        self.score = async_to_raw_response_wrapper(
+            tlm.score,
+        )
+
+
+class TlmResourceWithStreamingResponse:
+    def __init__(self, tlm: TlmResource) -> None:
+        self._tlm = tlm
+
+        self.prompt = to_streamed_response_wrapper(
+            tlm.prompt,
+        )
+        self.score = to_streamed_response_wrapper(
+            tlm.score,
+        )
+
+
+class AsyncTlmResourceWithStreamingResponse:
+    def __init__(self, tlm: AsyncTlmResource) -> None:
+        self._tlm = tlm
+
+        self.prompt = async_to_streamed_response_wrapper(
+            tlm.prompt,
+        )
+        self.score = async_to_streamed_response_wrapper(
+            tlm.score,
+        )
diff --git a/src/codex/resources/users/__init__.py b/src/codex/resources/users/__init__.py
index 18ed37e..9618f58 100644
--- a/src/codex/resources/users/__init__.py
+++ b/src/codex/resources/users/__init__.py
@@ -16,6 +16,14 @@
     MyselfResourceWithStreamingResponse,
     AsyncMyselfResourceWithStreamingResponse,
 )
+from .verification import (
+    VerificationResource,
+    AsyncVerificationResource,
+    VerificationResourceWithRawResponse,
+    AsyncVerificationResourceWithRawResponse,
+    VerificationResourceWithStreamingResponse,
+    AsyncVerificationResourceWithStreamingResponse,
+)
 
 __all__ = [
     "MyselfResource",
@@ -24,6 +32,12 @@
     "AsyncMyselfResourceWithRawResponse",
     "MyselfResourceWithStreamingResponse",
     "AsyncMyselfResourceWithStreamingResponse",
+    "VerificationResource",
+    "AsyncVerificationResource",
+    "VerificationResourceWithRawResponse",
+    "AsyncVerificationResourceWithRawResponse",
+    "VerificationResourceWithStreamingResponse",
+    "AsyncVerificationResourceWithStreamingResponse",
     "UsersResource",
     "AsyncUsersResource",
     "UsersResourceWithRawResponse",
diff --git a/src/codex/resources/users/users.py b/src/codex/resources/users/users.py
index fb7ee0f..a7d9d2a 100644
--- a/src/codex/resources/users/users.py
+++ b/src/codex/resources/users/users.py
@@ -22,6 +22,14 @@
     async_to_streamed_response_wrapper,
 )
 from ...types.user import User
+from .verification import (
+    VerificationResource,
+    AsyncVerificationResource,
+    VerificationResourceWithRawResponse,
+    AsyncVerificationResourceWithRawResponse,
+    VerificationResourceWithStreamingResponse,
+    AsyncVerificationResourceWithStreamingResponse,
+)
 from .myself.myself import (
     MyselfResource,
     AsyncMyselfResource,
@@ -40,6 +48,10 @@ class UsersResource(SyncAPIResource):
     def myself(self) -> MyselfResource:
         return MyselfResource(self._client)
 
+    @cached_property
+    def verification(self) -> VerificationResource:
+        return VerificationResource(self._client)
+
     @cached_property
     def with_raw_response(self) -> UsersResourceWithRawResponse:
         """
@@ -114,6 +126,10 @@ class AsyncUsersResource(AsyncAPIResource):
     def myself(self) -> AsyncMyselfResource:
         return AsyncMyselfResource(self._client)
 
+    @cached_property
+    def verification(self) -> AsyncVerificationResource:
+        return AsyncVerificationResource(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncUsersResourceWithRawResponse:
         """
@@ -195,6 +211,10 @@ def __init__(self, users: UsersResource) -> None:
     def myself(self) -> MyselfResourceWithRawResponse:
         return MyselfResourceWithRawResponse(self._users.myself)
 
+    @cached_property
+    def verification(self) -> VerificationResourceWithRawResponse:
+        return VerificationResourceWithRawResponse(self._users.verification)
+
 
 class AsyncUsersResourceWithRawResponse:
     def __init__(self, users: AsyncUsersResource) -> None:
@@ -208,6 +228,10 @@ def __init__(self, users: AsyncUsersResource) -> None:
     def myself(self) -> AsyncMyselfResourceWithRawResponse:
         return AsyncMyselfResourceWithRawResponse(self._users.myself)
 
+    @cached_property
+    def verification(self) -> AsyncVerificationResourceWithRawResponse:
+        return AsyncVerificationResourceWithRawResponse(self._users.verification)
+
 
 class UsersResourceWithStreamingResponse:
     def __init__(self, users: UsersResource) -> None:
@@ -221,6 +245,10 @@ def __init__(self, users: UsersResource) -> None:
     def myself(self) -> MyselfResourceWithStreamingResponse:
         return MyselfResourceWithStreamingResponse(self._users.myself)
 
+    @cached_property
+    def verification(self) -> VerificationResourceWithStreamingResponse:
+        return VerificationResourceWithStreamingResponse(self._users.verification)
+
 
 class AsyncUsersResourceWithStreamingResponse:
     def __init__(self, users: AsyncUsersResource) -> None:
@@ -233,3 +261,7 @@ def __init__(self, users: AsyncUsersResource) -> None:
     @cached_property
     def myself(self) -> AsyncMyselfResourceWithStreamingResponse:
         return AsyncMyselfResourceWithStreamingResponse(self._users.myself)
+
+    @cached_property
+    def verification(self) -> AsyncVerificationResourceWithStreamingResponse:
+        return AsyncVerificationResourceWithStreamingResponse(self._users.verification)
diff --git a/src/codex/resources/users/verification.py b/src/codex/resources/users/verification.py
new file mode 100644
index 0000000..e75326e
--- /dev/null
+++ b/src/codex/resources/users/verification.py
@@ -0,0 +1,135 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.users.verification_resend_response import VerificationResendResponse
+
+__all__ = ["VerificationResource", "AsyncVerificationResource"]
+
+
+class VerificationResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> VerificationResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers
+        """
+        return VerificationResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> VerificationResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response
+        """
+        return VerificationResourceWithStreamingResponse(self)
+
+    def resend(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VerificationResendResponse:
+        """Resend verification email to the specified user through Auth0."""
+        return self._post(
+            "/api/users/verification/resend",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VerificationResendResponse,
+        )
+
+
+class AsyncVerificationResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncVerificationResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/cleanlab/codex-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncVerificationResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncVerificationResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/cleanlab/codex-python#with_streaming_response
+        """
+        return AsyncVerificationResourceWithStreamingResponse(self)
+
+    async def resend(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VerificationResendResponse:
+        """Resend verification email to the specified user through Auth0."""
+        return await self._post(
+            "/api/users/verification/resend",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VerificationResendResponse,
+        )
+
+
+class VerificationResourceWithRawResponse:
+    def __init__(self, verification: VerificationResource) -> None:
+        self._verification = verification
+
+        self.resend = to_raw_response_wrapper(
+            verification.resend,
+        )
+
+
+class AsyncVerificationResourceWithRawResponse:
+    def __init__(self, verification: AsyncVerificationResource) -> None:
+        self._verification = verification
+
+        self.resend = async_to_raw_response_wrapper(
+            verification.resend,
+        )
+
+
+class VerificationResourceWithStreamingResponse:
+    def __init__(self, verification: VerificationResource) -> None:
+        self._verification = verification
+
+        self.resend = to_streamed_response_wrapper(
+            verification.resend,
+        )
+
+
+class AsyncVerificationResourceWithStreamingResponse:
+    def __init__(self, verification: AsyncVerificationResource) -> None:
+        self._verification = verification
+
+        self.resend = async_to_streamed_response_wrapper(
+            verification.resend,
+        )
diff --git a/src/codex/types/__init__.py b/src/codex/types/__init__.py
index f7ec95b..8f241bc 100644
--- a/src/codex/types/__init__.py
+++ b/src/codex/types/__init__.py
@@ -3,7 +3,11 @@
 from __future__ import annotations
 
 from .user import User as User
+from .tlm_score_params import TlmScoreParams as TlmScoreParams
+from .tlm_prompt_params import TlmPromptParams as TlmPromptParams
+from .tlm_score_response import TlmScoreResponse as TlmScoreResponse
 from .project_list_params import ProjectListParams as ProjectListParams
+from .tlm_prompt_response import TlmPromptResponse as TlmPromptResponse
 from .health_check_response import HealthCheckResponse as HealthCheckResponse
 from .project_create_params import ProjectCreateParams as ProjectCreateParams
 from .project_list_response import ProjectListResponse as ProjectListResponse
diff --git a/src/codex/types/projects/entry.py b/src/codex/types/projects/entry.py
index d3e1fc5..4621cc4 100644
--- a/src/codex/types/projects/entry.py
+++ b/src/codex/types/projects/entry.py
@@ -18,3 +18,5 @@ class Entry(BaseModel):
     answer: Optional[str] = None
 
     answered_at: Optional[datetime] = None
+
+    frequency_count: Optional[int] = None
diff --git a/src/codex/types/projects/entry_update_params.py b/src/codex/types/projects/entry_update_params.py
index 0a676f3..ba10549 100644
--- a/src/codex/types/projects/entry_update_params.py
+++ b/src/codex/types/projects/entry_update_params.py
@@ -13,4 +13,6 @@ class EntryUpdateParams(TypedDict, total=False):
 
     answer: Optional[str]
 
+    frequency_count: Optional[int]
+
     question: Optional[str]
diff --git a/src/codex/types/tlm_prompt_params.py b/src/codex/types/tlm_prompt_params.py
new file mode 100644
index 0000000..860f1a7
--- /dev/null
+++ b/src/codex/types/tlm_prompt_params.py
@@ -0,0 +1,127 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["TlmPromptParams", "Options"]
+
+
+class TlmPromptParams(TypedDict, total=False):
+    prompt: Required[str]
+
+    constrain_outputs: Optional[List[str]]
+
+    options: Optional[Options]
+    """
+    Typed dict of advanced configuration options for the Trustworthy Language Model.
+    Many of these configurations are determined by the quality preset selected
+    (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+    Specifying TLMOptions values directly overrides any default values set from the
+    quality preset.
+
+    For all options described below, higher settings will lead to longer runtimes
+    and may consume more tokens internally. You may not be able to run long prompts
+    (or prompts with long responses) in your account, unless your token/rate limits
+    are increased. If you hit token limit issues, try lower/less expensive
+    TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+    increase your limits.
+
+    The default values corresponding to each quality preset are:
+
+    - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+      `use_self_reflection` = True. This preset improves LLM responses.
+    - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+      `use_self_reflection` = True. This preset improves LLM responses.
+    - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+      `use_self_reflection` = True.
+    - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+      `use_self_reflection` = True.
+    - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+      `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+      "base" preset, a cheaper self-reflection will be used to compute the
+      trustworthiness score.
+
+    By default, the TLM uses the "medium" quality preset. The default base LLM
+    `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+    You can set custom values for these arguments regardless of the quality preset
+    specified.
+
+    Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+    "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+    "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+    "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+    "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+    faster models yield faster/cheaper results). - Models still in beta: "o1",
+    "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+    "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+    "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+    "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+    "gpt-4o-mini".
+
+        max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+        Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+        If you experience token/rate limit errors while using TLM, try lowering this number.
+        For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+        num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+        TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+        Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+        This parameter must be between 1 and 20.
+        When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+        num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+        Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+        This consistency helps quantify the epistemic uncertainty associated with
+        strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+        TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+        use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+        generated and self-evaluate this response.
+        Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+        Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+        and catches answers that are obviously incorrect/bad.
+
+        similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+        similarity between sampled responses considered by the model in the consistency assessment.
+        Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+        "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+        Set this to "string" to improve latency/costs.
+
+        reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+        when considering alternative possible responses and double-checking responses.
+        Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+        log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+        For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+        custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+        The expected input format is a list of dictionaries, where each dictionary has the following keys:
+        - name: Name of the evaluation criteria.
+        - criteria: Instructions specifying the evaluation criteria.
+    """
+
+    quality_preset: Literal["best", "high", "medium", "low", "base"]
+
+    task: Optional[str]
+
+
+class Options(TypedDict, total=False):
+    custom_eval_criteria: Iterable[object]
+
+    log: List[str]
+
+    max_tokens: int
+
+    model: str
+
+    num_candidate_responses: int
+
+    num_consistency_samples: int
+
+    reasoning_effort: str
+
+    similarity_measure: str
+
+    use_self_reflection: bool
diff --git a/src/codex/types/tlm_prompt_response.py b/src/codex/types/tlm_prompt_response.py
new file mode 100644
index 0000000..d939c00
--- /dev/null
+++ b/src/codex/types/tlm_prompt_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["TlmPromptResponse"]
+
+
+class TlmPromptResponse(BaseModel):
+    response: str
+
+    trustworthiness_score: float
+
+    log: Optional[object] = None
diff --git a/src/codex/types/tlm_score_params.py b/src/codex/types/tlm_score_params.py
new file mode 100644
index 0000000..213da42
--- /dev/null
+++ b/src/codex/types/tlm_score_params.py
@@ -0,0 +1,129 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["TlmScoreParams", "Options"]
+
+
+class TlmScoreParams(TypedDict, total=False):
+    prompt: Required[str]
+
+    response: Required[str]
+
+    constrain_outputs: Optional[List[str]]
+
+    options: Optional[Options]
+    """
+    Typed dict of advanced configuration options for the Trustworthy Language Model.
+    Many of these configurations are determined by the quality preset selected
+    (learn about quality presets in the TLM [initialization method](./#class-tlm)).
+    Specifying TLMOptions values directly overrides any default values set from the
+    quality preset.
+
+    For all options described below, higher settings will lead to longer runtimes
+    and may consume more tokens internally. You may not be able to run long prompts
+    (or prompts with long responses) in your account, unless your token/rate limits
+    are increased. If you hit token limit issues, try lower/less expensive
+    TLMOptions to be able to run longer prompts/responses, or contact Cleanlab to
+    increase your limits.
+
+    The default values corresponding to each quality preset are:
+
+    - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8,
+      `use_self_reflection` = True. This preset improves LLM responses.
+    - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8,
+      `use_self_reflection` = True. This preset improves LLM responses.
+    - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8,
+      `use_self_reflection` = True.
+    - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4,
+      `use_self_reflection` = True.
+    - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0,
+      `use_self_reflection` = False. When using `get_trustworthiness_score()` on
+      "base" preset, a cheaper self-reflection will be used to compute the
+      trustworthiness score.
+
+    By default, the TLM uses the "medium" quality preset. The default base LLM
+    `model` used is "gpt-4o-mini", and `max_tokens` is 512 for all quality presets.
+    You can set custom values for these arguments regardless of the quality preset
+    specified.
+
+    Args: model ({"gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "o1-preview",
+    "gpt-3.5-turbo-16k", "gpt-4", "gpt-4.5-preview", "claude-3.7-sonnet",
+    "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku",
+    "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default =
+    "gpt-4o-mini"): Underlying base LLM to use (better models yield better results,
+    faster models yield faster/cheaper results). - Models still in beta: "o1",
+    "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-3.7-sonnet",
+    "claude-3.5-sonnet-v2", "claude-3.5-haiku", "nova-micro", "nova-lite",
+    "nova-pro". - Recommended models for accuracy: "gpt-4o", "o3-mini", "o1",
+    "claude-3.7-sonnet". - Recommended models for low latency/costs: "nova-micro",
+    "gpt-4o-mini".
+
+        max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
+        Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher costs/runtimes.
+        If you experience token/rate limit errors while using TLM, try lowering this number.
+        For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
+
+        num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated by TLM.
+        TLM scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+        Higher values here can produce better (more accurate) responses from the TLM, but at higher costs/runtimes (and internally consumes more tokens).
+        This parameter must be between 1 and 20.
+        When it is 1, TLM simply returns a standard LLM response and does not attempt to auto-improve it.
+
+        num_consistency_samples (int, default = 8): the amount of internal sampling to evaluate LLM response consistency.
+        Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher costs/runtimes.
+        This consistency helps quantify the epistemic uncertainty associated with
+        strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
+        TLM internally measures consistency via the degree of contradiction between sampled responses that the model considers equally plausible.
+
+        use_self_reflection (bool, default = `True`): whether the LLM is asked to self-reflect upon the response it
+        generated and self-evaluate this response.
+        Setting this False disables self-reflection and may worsen trustworthiness scores, but will reduce costs/runtimes.
+        Self-reflection helps quantify aleatoric uncertainty associated with challenging prompts
+        and catches answers that are obviously incorrect/bad.
+
+        similarity_measure ({"semantic", "string", "embedding", "embedding_large"}, default = "semantic"): how the trustworthiness scoring algorithm measures
+        similarity between sampled responses considered by the model in the consistency assessment.
+        Supported similarity measures include "semantic" (based on natural language inference), "string" (based on character/word overlap),
+        "embedding" (based on embedding similarity), and "embedding_large" (based on embedding similarity with a larger embedding model).
+        Set this to "string" to improve latency/costs.
+
+        reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much the LLM can reason (number of thinking tokens)
+        when considering alternative possible responses and double-checking responses.
+        Higher efforts here may produce better TLM trustworthiness scores and LLM responses. Reduce this value to improve latency/costs.
+
+        log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+        For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+        custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria.
+        The expected input format is a list of dictionaries, where each dictionary has the following keys:
+        - name: Name of the evaluation criteria.
+        - criteria: Instructions specifying the evaluation criteria.
+    """
+
+    quality_preset: Literal["best", "high", "medium", "low", "base"]
+
+    task: Optional[str]
+
+
+class Options(TypedDict, total=False):
+    custom_eval_criteria: Iterable[object]
+
+    log: List[str]
+
+    max_tokens: int
+
+    model: str
+
+    num_candidate_responses: int
+
+    num_consistency_samples: int
+
+    reasoning_effort: str
+
+    similarity_measure: str
+
+    use_self_reflection: bool
diff --git a/src/codex/types/tlm_score_response.py b/src/codex/types/tlm_score_response.py
new file mode 100644
index 0000000..e92b2e0
--- /dev/null
+++ b/src/codex/types/tlm_score_response.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["TlmScoreResponse"]
+
+
+class TlmScoreResponse(BaseModel):
+    trustworthiness_score: float
+
+    log: Optional[object] = None
diff --git a/src/codex/types/users/__init__.py b/src/codex/types/users/__init__.py
index 4256bd7..438bc6f 100644
--- a/src/codex/types/users/__init__.py
+++ b/src/codex/types/users/__init__.py
@@ -3,3 +3,4 @@
 from __future__ import annotations
 
 from .user_schema import UserSchema as UserSchema
+from .verification_resend_response import VerificationResendResponse as VerificationResendResponse
diff --git a/src/codex/types/users/verification_resend_response.py b/src/codex/types/users/verification_resend_response.py
new file mode 100644
index 0000000..6617ff5
--- /dev/null
+++ b/src/codex/types/users/verification_resend_response.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import TypeAlias
+
+__all__ = ["VerificationResendResponse"]
+
+VerificationResendResponse: TypeAlias = Dict[str, str]
diff --git a/tests/api_resources/projects/test_entries.py b/tests/api_resources/projects/test_entries.py
index 5b51ec1..026add4 100644
--- a/tests/api_resources/projects/test_entries.py
+++ b/tests/api_resources/projects/test_entries.py
@@ -144,6 +144,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
             entry_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             answer="answer",
+            frequency_count=0,
             question="question",
         )
         assert_matches_type(Entry, entry, path=["response"])
@@ -519,6 +520,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
             entry_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             answer="answer",
+            frequency_count=0,
             question="question",
         )
         assert_matches_type(Entry, entry, path=["response"])
diff --git a/tests/api_resources/test_tlm.py b/tests/api_resources/test_tlm.py
new file mode 100644
index 0000000..32d5a67
--- /dev/null
+++ b/tests/api_resources/test_tlm.py
@@ -0,0 +1,254 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from codex import Codex, AsyncCodex
+from codex.types import TlmScoreResponse, TlmPromptResponse
+from tests.utils import assert_matches_type
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestTlm:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_prompt(self, client: Codex) -> None:
+        tlm = client.tlm.prompt(
+            prompt="prompt",
+        )
+        assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_prompt_with_all_params(self, client: Codex) -> None:
+        tlm = client.tlm.prompt(
+            prompt="prompt",
+            constrain_outputs=["string"],
+            options={
+                "custom_eval_criteria": [{}],
+                "log": ["string"],
+                "max_tokens": 0,
+                "model": "model",
+                "num_candidate_responses": 0,
+                "num_consistency_samples": 0,
+                "reasoning_effort": "reasoning_effort",
+                "similarity_measure": "similarity_measure",
+                "use_self_reflection": True,
+            },
+            quality_preset="best",
+            task="task",
+        )
+        assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_prompt(self, client: Codex) -> None:
+        response = client.tlm.with_raw_response.prompt(
+            prompt="prompt",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        tlm = response.parse()
+        assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_prompt(self, client: Codex) -> None:
+        with client.tlm.with_streaming_response.prompt(
+            prompt="prompt",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            tlm = response.parse()
+            assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_score(self, client: Codex) -> None:
+        tlm = client.tlm.score(
+            prompt="prompt",
+            response="response",
+        )
+        assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_score_with_all_params(self, client: Codex) -> None:
+        tlm = client.tlm.score(
+            prompt="prompt",
+            response="response",
+            constrain_outputs=["string"],
+            options={
+                "custom_eval_criteria": [{}],
+                "log": ["string"],
+                "max_tokens": 0,
+                "model": "model",
+                "num_candidate_responses": 0,
+                "num_consistency_samples": 0,
+                "reasoning_effort": "reasoning_effort",
+                "similarity_measure": "similarity_measure",
+                "use_self_reflection": True,
+            },
+            quality_preset="best",
+            task="task",
+        )
+        assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_score(self, client: Codex) -> None:
+        response = client.tlm.with_raw_response.score(
+            prompt="prompt",
+            response="response",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        tlm = response.parse()
+        assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_score(self, client: Codex) -> None:
+        with client.tlm.with_streaming_response.score(
+            prompt="prompt",
+            response="response",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            tlm = response.parse()
+            assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncTlm:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_prompt(self, async_client: AsyncCodex) -> None:
+        tlm = await async_client.tlm.prompt(
+            prompt="prompt",
+        )
+        assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) -> None:
+        tlm = await async_client.tlm.prompt(
+            prompt="prompt",
+            constrain_outputs=["string"],
+            options={
+                "custom_eval_criteria": [{}],
+                "log": ["string"],
+                "max_tokens": 0,
+                "model": "model",
+                "num_candidate_responses": 0,
+                "num_consistency_samples": 0,
+                "reasoning_effort": "reasoning_effort",
+                "similarity_measure": "similarity_measure",
+                "use_self_reflection": True,
+            },
+            quality_preset="best",
+            task="task",
+        )
+        assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_prompt(self, async_client: AsyncCodex) -> None:
+        response = await async_client.tlm.with_raw_response.prompt(
+            prompt="prompt",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        tlm = await response.parse()
+        assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_prompt(self, async_client: AsyncCodex) -> None:
+        async with async_client.tlm.with_streaming_response.prompt(
+            prompt="prompt",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            tlm = await response.parse()
+            assert_matches_type(TlmPromptResponse, tlm, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_score(self, async_client: AsyncCodex) -> None:
+        tlm = await async_client.tlm.score(
+            prompt="prompt",
+            response="response",
+        )
+        assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> None:
+        tlm = await async_client.tlm.score(
+            prompt="prompt",
+            response="response",
+            constrain_outputs=["string"],
+            options={
+                "custom_eval_criteria": [{}],
+                "log": ["string"],
+                "max_tokens": 0,
+                "model": "model",
+                "num_candidate_responses": 0,
+                "num_consistency_samples": 0,
+                "reasoning_effort": "reasoning_effort",
+                "similarity_measure": "similarity_measure",
+                "use_self_reflection": True,
+            },
+            quality_preset="best",
+            task="task",
+        )
+        assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_score(self, async_client: AsyncCodex) -> None:
+        response = await async_client.tlm.with_raw_response.score(
+            prompt="prompt",
+            response="response",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        tlm = await response.parse()
+        assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_score(self, async_client: AsyncCodex) -> None:
+        async with async_client.tlm.with_streaming_response.score(
+            prompt="prompt",
+            response="response",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            tlm = await response.parse()
+            assert_matches_type(TlmScoreResponse, tlm, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/users/test_verification.py b/tests/api_resources/users/test_verification.py
new file mode 100644
index 0000000..8332327
--- /dev/null
+++ b/tests/api_resources/users/test_verification.py
@@ -0,0 +1,78 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from codex import Codex, AsyncCodex
+from tests.utils import assert_matches_type
+from codex.types.users import VerificationResendResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestVerification:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_resend(self, client: Codex) -> None:
+        verification = client.users.verification.resend()
+        assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_resend(self, client: Codex) -> None:
+        response = client.users.verification.with_raw_response.resend()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        verification = response.parse()
+        assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_resend(self, client: Codex) -> None:
+        with client.users.verification.with_streaming_response.resend() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            verification = response.parse()
+            assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncVerification:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_resend(self, async_client: AsyncCodex) -> None:
+        verification = await async_client.users.verification.resend()
+        assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_resend(self, async_client: AsyncCodex) -> None:
+        response = await async_client.users.verification.with_raw_response.resend()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        verification = await response.parse()
+        assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_resend(self, async_client: AsyncCodex) -> None:
+        async with async_client.users.verification.with_streaming_response.resend() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            verification = await response.parse()
+            assert_matches_type(VerificationResendResponse, verification, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/test_client.py b/tests/test_client.py
index b421541..0b0b783 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -23,10 +23,12 @@
 
 from codex import Codex, AsyncCodex, APIResponseValidationError
 from codex._types import Omit
+from codex._utils import maybe_transform
 from codex._models import BaseModel, FinalRequestOptions
 from codex._constants import RAW_RESPONSE_HEADER
 from codex._exceptions import APIStatusError, APITimeoutError, APIResponseValidationError
 from codex._base_client import DEFAULT_TIMEOUT, HTTPX_DEFAULT_TIMEOUT, BaseClient, make_request_options
+from codex.types.project_create_params import ProjectCreateParams
 
 from .utils import update_env
 
@@ -680,7 +682,13 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
         with pytest.raises(APITimeoutError):
             self.client.post(
                 "/api/projects/",
-                body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")),
+                body=cast(
+                    object,
+                    maybe_transform(
+                        dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"),
+                        ProjectCreateParams,
+                    ),
+                ),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
@@ -695,7 +703,13 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
         with pytest.raises(APIStatusError):
             self.client.post(
                 "/api/projects/",
-                body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")),
+                body=cast(
+                    object,
+                    maybe_transform(
+                        dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"),
+                        ProjectCreateParams,
+                    ),
+                ),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
@@ -1425,7 +1439,13 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
         with pytest.raises(APITimeoutError):
             await self.client.post(
                 "/api/projects/",
-                body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")),
+                body=cast(
+                    object,
+                    maybe_transform(
+                        dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"),
+                        ProjectCreateParams,
+                    ),
+                ),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
@@ -1440,7 +1460,13 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
         with pytest.raises(APIStatusError):
             await self.client.post(
                 "/api/projects/",
-                body=cast(object, dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e")),
+                body=cast(
+                    object,
+                    maybe_transform(
+                        dict(config={}, name="name", organization_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"),
+                        ProjectCreateParams,
+                    ),
+                ),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 2e91888..324f31a 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -2,7 +2,7 @@
 
 import io
 import pathlib
-from typing import Any, List, Union, TypeVar, Iterable, Optional, cast
+from typing import Any, Dict, List, Union, TypeVar, Iterable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import Required, Annotated, TypedDict
 
@@ -388,6 +388,15 @@ def my_iter() -> Iterable[Baz8]:
     }
 
 
+@parametrize
+@pytest.mark.asyncio
+async def test_dictionary_items(use_async: bool) -> None:
+    class DictItems(TypedDict):
+        foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
+
+    assert await transform({"foo": {"foo_baz": "bar"}}, Dict[str, DictItems], use_async) == {"foo": {"fooBaz": "bar"}}
+
+
 class TypedDictIterableUnionStr(TypedDict):
     foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]