diff --git a/core/bifrost.go b/core/bifrost.go
index 57ab5e89ab..23aaeefe73 100644
--- a/core/bifrost.go
+++ b/core/bifrost.go
@@ -2313,6 +2313,123 @@ func (bifrost *Bifrost) FileContentRequest(ctx *schemas.BifrostContext, req *sch
 	return response.FileContentResponse, nil
 }
 
+// CachedContentCreateRequest creates a new cached content (Gemini / Vertex AI named cache lifecycle).
+func (bifrost *Bifrost) CachedContentCreateRequest(ctx *schemas.BifrostContext, req *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	if req == nil {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "cached content create request is nil"}}
+	}
+	if req.Provider == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "provider is required for cached content create request"}}
+	}
+	if req.Model == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "model is required for cached content create request"}}
+	}
+	if ctx == nil {
+		ctx = bifrost.ctx
+	}
+	bifrostReq := bifrost.getBifrostRequest()
+	bifrostReq.RequestType = schemas.CachedContentCreateRequest
+	bifrostReq.CachedContentCreateRequest = req
+	response, err := bifrost.handleRequest(ctx, bifrostReq)
+	if err != nil {
+		return nil, err
+	}
+	return response.CachedContentCreateResponse, nil
+}
+
+// CachedContentListRequest lists cached contents.
+func (bifrost *Bifrost) CachedContentListRequest(ctx *schemas.BifrostContext, req *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	if req == nil {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "cached content list request is nil"}}
+	}
+	if req.Provider == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "provider is required for cached content list request"}}
+	}
+	if ctx == nil {
+		ctx = bifrost.ctx
+	}
+	bifrostReq := bifrost.getBifrostRequest()
+	bifrostReq.RequestType = schemas.CachedContentListRequest
+	bifrostReq.CachedContentListRequest = req
+	response, err := bifrost.handleRequest(ctx, bifrostReq)
+	if err != nil {
+		return nil, err
+	}
+	return response.CachedContentListResponse, nil
+}
+
+// CachedContentRetrieveRequest retrieves a single cached content by name.
+func (bifrost *Bifrost) CachedContentRetrieveRequest(ctx *schemas.BifrostContext, req *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	if req == nil {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "cached content retrieve request is nil"}}
+	}
+	if req.Provider == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "provider is required for cached content retrieve request"}}
+	}
+	if req.Name == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "name is required for cached content retrieve request"}}
+	}
+	if ctx == nil {
+		ctx = bifrost.ctx
+	}
+	bifrostReq := bifrost.getBifrostRequest()
+	bifrostReq.RequestType = schemas.CachedContentRetrieveRequest
+	bifrostReq.CachedContentRetrieveRequest = req
+	response, err := bifrost.handleRequest(ctx, bifrostReq)
+	if err != nil {
+		return nil, err
+	}
+	return response.CachedContentRetrieveResponse, nil
+}
+
+// CachedContentUpdateRequest updates expiration on a cached content.
+func (bifrost *Bifrost) CachedContentUpdateRequest(ctx *schemas.BifrostContext, req *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	if req == nil {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "cached content update request is nil"}}
+	}
+	if req.Provider == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "provider is required for cached content update request"}}
+	}
+	if req.Name == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "name is required for cached content update request"}}
+	}
+	if ctx == nil {
+		ctx = bifrost.ctx
+	}
+	bifrostReq := bifrost.getBifrostRequest()
+	bifrostReq.RequestType = schemas.CachedContentUpdateRequest
+	bifrostReq.CachedContentUpdateRequest = req
+	response, err := bifrost.handleRequest(ctx, bifrostReq)
+	if err != nil {
+		return nil, err
+	}
+	return response.CachedContentUpdateResponse, nil
+}
+
+// CachedContentDeleteRequest deletes a cached content by name.
+func (bifrost *Bifrost) CachedContentDeleteRequest(ctx *schemas.BifrostContext, req *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	if req == nil {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "cached content delete request is nil"}}
+	}
+	if req.Provider == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "provider is required for cached content delete request"}}
+	}
+	if req.Name == "" {
+		return nil, &schemas.BifrostError{IsBifrostError: false, Error: &schemas.ErrorField{Message: "name is required for cached content delete request"}}
+	}
+	if ctx == nil {
+		ctx = bifrost.ctx
+	}
+	bifrostReq := bifrost.getBifrostRequest()
+	bifrostReq.RequestType = schemas.CachedContentDeleteRequest
+	bifrostReq.CachedContentDeleteRequest = req
+	response, err := bifrost.handleRequest(ctx, bifrostReq)
+	if err != nil {
+		return nil, err
+	}
+	return response.CachedContentDeleteResponse, nil
+}
+
 func (bifrost *Bifrost) Passthrough(
 	ctx *schemas.BifrostContext,
 	provider schemas.ModelProvider,
@@ -5608,8 +5725,9 @@ func (bifrost *Bifrost) requestWorker(provider schemas.Provider, config *schemas
 				isMultiKeyBatchOp := isBatchRequestType(req.RequestType) && req.RequestType != schemas.BatchCreateRequest
 				isMultiKeyFileOp := isFileRequestType(req.RequestType) && req.RequestType != schemas.FileUploadRequest
 				isMultiKeyContainerOp := isContainerRequestType(req.RequestType) && req.RequestType != schemas.ContainerCreateRequest && req.RequestType != schemas.ContainerFileCreateRequest
+				isMultiKeyCachedContentOp := isCachedContentRequestType(req.RequestType) && req.RequestType != schemas.CachedContentCreateRequest
 
-				if isMultiKeyBatchOp || isMultiKeyFileOp || isMultiKeyContainerOp {
+				if isMultiKeyBatchOp || isMultiKeyFileOp || isMultiKeyContainerOp || isMultiKeyCachedContentOp {
 					var modelPtr *string
 					if model != "" {
 						modelPtr = &model
@@ -6033,6 +6151,36 @@ func (bifrost *Bifrost) handleProviderRequest(provider schemas.Provider, config
 			return nil, bifrostError
 		}
 		response.FileContentResponse = fileContentResponse
+	case schemas.CachedContentCreateRequest:
+		cachedContentCreateResponse, bifrostError := provider.CachedContentCreate(req.Context, key, req.BifrostRequest.CachedContentCreateRequest)
+		if bifrostError != nil {
+			return nil, bifrostError
+		}
+		response.CachedContentCreateResponse = cachedContentCreateResponse
+	case schemas.CachedContentListRequest:
+		cachedContentListResponse, bifrostError := provider.CachedContentList(req.Context, keys, req.BifrostRequest.CachedContentListRequest)
+		if bifrostError != nil {
+			return nil, bifrostError
+		}
+		response.CachedContentListResponse = cachedContentListResponse
+	case schemas.CachedContentRetrieveRequest:
+		cachedContentRetrieveResponse, bifrostError := provider.CachedContentRetrieve(req.Context, keys, req.BifrostRequest.CachedContentRetrieveRequest)
+		if bifrostError != nil {
+			return nil, bifrostError
+		}
+		response.CachedContentRetrieveResponse = cachedContentRetrieveResponse
+	case schemas.CachedContentUpdateRequest:
+		cachedContentUpdateResponse, bifrostError := provider.CachedContentUpdate(req.Context, keys, req.BifrostRequest.CachedContentUpdateRequest)
+		if bifrostError != nil {
+			return nil, bifrostError
+		}
+		response.CachedContentUpdateResponse = cachedContentUpdateResponse
+	case schemas.CachedContentDeleteRequest:
+		cachedContentDeleteResponse, bifrostError := provider.CachedContentDelete(req.Context, keys, req.BifrostRequest.CachedContentDeleteRequest)
+		if bifrostError != nil {
+			return nil, bifrostError
+		}
+		response.CachedContentDeleteResponse = cachedContentDeleteResponse
 	case schemas.BatchCreateRequest:
 		batchCreateResponse, bifrostError := provider.BatchCreate(req.Context, key, req.BifrostRequest.BatchCreateRequest)
 		if bifrostError != nil {
@@ -6920,6 +7068,11 @@ func resetBifrostRequest(req *schemas.BifrostRequest) {
 	req.FileRetrieveRequest = nil
 	req.FileDeleteRequest = nil
 	req.FileContentRequest = nil
+	req.CachedContentCreateRequest = nil
+	req.CachedContentListRequest = nil
+	req.CachedContentRetrieveRequest = nil
+	req.CachedContentUpdateRequest = nil
+	req.CachedContentDeleteRequest = nil
 	req.BatchCreateRequest = nil
 	req.BatchListRequest = nil
 	req.BatchRetrieveRequest = nil
diff --git a/core/providers/anthropic/cachedcontents.go b/core/providers/anthropic/cachedcontents.go
new file mode 100644
index 0000000000..ece6a13a87
--- /dev/null
+++ b/core/providers/anthropic/cachedcontents.go
@@ -0,0 +1,34 @@
+package anthropic
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on AnthropicProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *AnthropicProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on AnthropicProvider (see CachedContentCreate).
+func (provider *AnthropicProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on AnthropicProvider (see CachedContentCreate).
+func (provider *AnthropicProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on AnthropicProvider (see CachedContentCreate).
+func (provider *AnthropicProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on AnthropicProvider (see CachedContentCreate).
+func (provider *AnthropicProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/azure/cachedcontents.go b/core/providers/azure/cachedcontents.go
new file mode 100644
index 0000000000..1042c06f16
--- /dev/null
+++ b/core/providers/azure/cachedcontents.go
@@ -0,0 +1,34 @@
+package azure
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on AzureProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *AzureProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on AzureProvider (see CachedContentCreate).
+func (provider *AzureProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on AzureProvider (see CachedContentCreate).
+func (provider *AzureProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on AzureProvider (see CachedContentCreate).
+func (provider *AzureProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on AzureProvider (see CachedContentCreate).
+func (provider *AzureProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/bedrock/bedrock_test.go b/core/providers/bedrock/bedrock_test.go
index 40ccb91ce5..685de3c9d9 100644
--- a/core/providers/bedrock/bedrock_test.go
+++ b/core/providers/bedrock/bedrock_test.go
@@ -2605,7 +2605,7 @@ func TestToolResultJSONParsingResponsesAPI(t *testing.T) {
 				},
 			}
 
-			messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(input)
+			messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input)
 			require.NoError(t, err)
 			require.Len(t, messages, 1)
 
@@ -4510,7 +4510,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) {
 			},
 		}
 
-		messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(input)
+		messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input)
 		require.NoError(t, err)
 		require.Len(t, messages, 1)
 
@@ -4554,7 +4554,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) {
 			},
 		}
 
-		messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(input)
+		messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input)
 		require.NoError(t, err)
 		require.Len(t, messages, 1)
 
@@ -4587,7 +4587,7 @@ func TestToolResultImageContentResponsesAPI(t *testing.T) {
 			},
 		}
 
-		messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(input)
+		messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(context.Background(), input)
 		require.NoError(t, err)
 		require.Len(t, messages, 1)
 
diff --git a/core/providers/bedrock/cachedcontents.go b/core/providers/bedrock/cachedcontents.go
new file mode 100644
index 0000000000..5a822f8aa0
--- /dev/null
+++ b/core/providers/bedrock/cachedcontents.go
@@ -0,0 +1,34 @@
+package bedrock
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on BedrockProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *BedrockProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on BedrockProvider (see CachedContentCreate).
+func (provider *BedrockProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on BedrockProvider (see CachedContentCreate).
+func (provider *BedrockProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on BedrockProvider (see CachedContentCreate).
+func (provider *BedrockProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on BedrockProvider (see CachedContentCreate).
+func (provider *BedrockProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/bedrock/chat.go b/core/providers/bedrock/chat.go
index 444120589f..abc21e4019 100644
--- a/core/providers/bedrock/chat.go
+++ b/core/providers/bedrock/chat.go
@@ -24,7 +24,7 @@ func ToBedrockChatCompletionRequest(ctx *schemas.BifrostContext, bifrostReq *sch
 	}
 
 	// Convert messages and system messages
-	messages, systemMessages, err := convertMessages(bifrostReq.Input)
+	messages, systemMessages, err := convertMessages(ctx, bifrostReq.Input)
 	if err != nil {
 		return nil, fmt.Errorf("failed to convert messages: %w", err)
 	}
diff --git a/core/providers/bedrock/responses.go b/core/providers/bedrock/responses.go
index 7fbb228c14..32a43f4b3a 100644
--- a/core/providers/bedrock/responses.go
+++ b/core/providers/bedrock/responses.go
@@ -2,6 +2,7 @@ package bedrock
 
 import (
 	"bytes"
+	"context"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
@@ -1664,7 +1665,7 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.
 
 	// map bifrost messages to bedrock messages using the new conversion method
 	if bifrostReq.Input != nil {
-		messages, systemMessages, err := ConvertBifrostMessagesToBedrockMessages(bifrostReq.Input)
+		messages, systemMessages, err := ConvertBifrostMessagesToBedrockMessages(ctx, bifrostReq.Input)
 		if err != nil {
 			return nil, fmt.Errorf("failed to convert Responses messages: %w", err)
 		}
@@ -2089,8 +2090,10 @@ func ToBedrockConverseResponse(bifrostResp *schemas.BifrostResponsesResponse) (*
 	}
 
 	if len(bifrostResp.Output) > 0 {
-		// Convert Bifrost messages back to Bedrock messages using the new conversion method
-		bedrockMessages, _, err := ConvertBifrostMessagesToBedrockMessages(bifrostResp.Output)
+		// Convert Bifrost messages back to Bedrock messages using the new conversion method.
+		// Response-side conversion does not perform outbound fetches in practice (model output
+		// blocks already carry inline data), so context.Background() is acceptable here.
+		bedrockMessages, _, err := ConvertBifrostMessagesToBedrockMessages(context.Background(), bifrostResp.Output)
 		if err != nil {
 			return nil, fmt.Errorf("failed to convert bifrost output messages: %w", err)
 		}
@@ -2487,8 +2490,9 @@ func (m *ToolCallStateManager) HasPendingResults() bool {
 
 // ConvertBifrostMessagesToBedrockMessages converts an array of Bifrost ResponsesMessage to Bedrock message format
 // This is the main conversion method from Bifrost to Bedrock - handles all message types and returns messages + system messages
-// Uses a state machine to properly track and manage tool call lifecycles
-func ConvertBifrostMessagesToBedrockMessages(bifrostMessages []schemas.ResponsesMessage) ([]BedrockMessage, []BedrockSystemMessage, error) {
+// Uses a state machine to properly track and manage tool call lifecycles.
+// The ctx is propagated to URL fetches inside content blocks.
+func ConvertBifrostMessagesToBedrockMessages(ctx context.Context, bifrostMessages []schemas.ResponsesMessage) ([]BedrockMessage, []BedrockSystemMessage, error) {
 	var bedrockMessages []BedrockMessage
 	var systemMessages []BedrockSystemMessage
 	var pendingReasoningContentBlocks []BedrockContentBlock
@@ -2641,7 +2645,7 @@ func ConvertBifrostMessagesToBedrockMessages(bifrostMessages []schemas.Responses
 							} else if block.Type == schemas.ResponsesInputMessageContentBlockTypeImage &&
 								block.ResponsesInputMessageContentBlockImage != nil &&
 								block.ResponsesInputMessageContentBlockImage.ImageURL != nil {
-								imageSource, err := convertImageToBedrockSource(*block.ResponsesInputMessageContentBlockImage.ImageURL)
+								imageSource, err := convertImageToBedrockSource(ctx, *block.ResponsesInputMessageContentBlockImage.ImageURL)
 								if err != nil {
 									// Bedrock only supports base64 data URIs for images. If conversion
 									// fails (e.g. remote URL), the image is dropped from the tool result
@@ -2828,7 +2832,7 @@ func ConvertBifrostMessagesToBedrockMessages(bifrostMessages []schemas.Responses
 				systemMessages = append(systemMessages, systemMsgs...)
 			} else {
 				// Convert user/assistant text message
-				bedrockMsg := convertBifrostMessageToBedrockMessage(&msg)
+				bedrockMsg := convertBifrostMessageToBedrockMessage(ctx, &msg)
 				if bedrockMsg != nil {
 					bedrockMessages = append(bedrockMessages, *bedrockMsg)
 				}
@@ -2935,8 +2939,9 @@ func convertBifrostMessageToBedrockSystemMessages(msg *schemas.ResponsesMessage)
 	return systemMessages
 }
 
-// convertBifrostMessageToBedrockMessage converts a regular Bifrost message to Bedrock message
-func convertBifrostMessageToBedrockMessage(msg *schemas.ResponsesMessage) *BedrockMessage {
+// convertBifrostMessageToBedrockMessage converts a regular Bifrost message to Bedrock message.
+// The ctx is propagated to URL fetches inside content blocks.
+func convertBifrostMessageToBedrockMessage(ctx context.Context, msg *schemas.ResponsesMessage) *BedrockMessage {
 	// Ensure Content is present
 	if msg.Content == nil {
 		return nil
@@ -2947,7 +2952,7 @@ func convertBifrostMessageToBedrockMessage(msg *schemas.ResponsesMessage) *Bedro
 	}
 
 	// Convert content
-	contentBlocks, err := convertBifrostResponsesMessageContentBlocksToBedrockContentBlocks(*msg.Content)
+	contentBlocks, err := convertBifrostResponsesMessageContentBlocksToBedrockContentBlocks(ctx, *msg.Content)
 	if err != nil {
 		return nil
 	}
@@ -3315,8 +3320,9 @@ func convertBifrostReasoningToBedrockReasoning(msg *schemas.ResponsesMessage) []
 	return reasoningBlocks
 }
 
-// convertBifrostResponsesMessageContentBlocksToBedrockContentBlocks converts Bifrost content to Bedrock content blocks
-func convertBifrostResponsesMessageContentBlocksToBedrockContentBlocks(content schemas.ResponsesMessageContent) ([]BedrockContentBlock, error) {
+// convertBifrostResponsesMessageContentBlocksToBedrockContentBlocks converts Bifrost content to Bedrock content blocks.
+// The ctx is propagated to URL fetches inside image blocks.
+func convertBifrostResponsesMessageContentBlocksToBedrockContentBlocks(ctx context.Context, content schemas.ResponsesMessageContent) ([]BedrockContentBlock, error) {
 	var blocks []BedrockContentBlock
 
 	if content.ContentStr != nil {
@@ -3332,7 +3338,7 @@ func convertBifrostResponsesMessageContentBlocksToBedrockContentBlocks(content s
 				bedrockBlock.Text = block.Text
 			case schemas.ResponsesInputMessageContentBlockTypeImage:
 				if block.ResponsesInputMessageContentBlockImage != nil && block.ResponsesInputMessageContentBlockImage.ImageURL != nil {
-					imageSource, err := convertImageToBedrockSource(*block.ResponsesInputMessageContentBlockImage.ImageURL)
+					imageSource, err := convertImageToBedrockSource(ctx, *block.ResponsesInputMessageContentBlockImage.ImageURL)
 					if err != nil {
 						return nil, fmt.Errorf("failed to convert image in responses content block: %w", err)
 					}
diff --git a/core/providers/bedrock/utils.go b/core/providers/bedrock/utils.go
index ee8cca2775..4c845a79d9 100644
--- a/core/providers/bedrock/utils.go
+++ b/core/providers/bedrock/utils.go
@@ -2,9 +2,11 @@ package bedrock
 
 import (
 	"bytes"
+	"context"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
+	"mime"
 	"regexp"
 	"strings"
 
@@ -555,8 +557,9 @@ func ensureChatToolConfigForConversation(bifrostReq *schemas.BifrostChatRequest,
 }
 
 // convertMessages converts Bifrost messages to Bedrock format
-// Returns regular messages and system messages separately
-func convertMessages(bifrostMessages []schemas.ChatMessage) ([]BedrockMessage, []BedrockSystemMessage, error) {
+// Returns regular messages and system messages separately.
+// The ctx is propagated to URL fetches inside individual messages.
+func convertMessages(ctx context.Context, bifrostMessages []schemas.ChatMessage) ([]BedrockMessage, []BedrockSystemMessage, error) {
 	var messages []BedrockMessage
 	var systemMessages []BedrockSystemMessage
 
@@ -573,7 +576,7 @@ func convertMessages(bifrostMessages []schemas.ChatMessage) ([]BedrockMessage, [
 
 		case schemas.ChatMessageRoleUser, schemas.ChatMessageRoleAssistant:
 			// Convert regular message
-			bedrockMsg, err := convertMessage(msg)
+			bedrockMsg, err := convertMessage(ctx, msg)
 			if err != nil {
 				return nil, nil, fmt.Errorf("failed to convert message: %w", err)
 			}
@@ -591,7 +594,7 @@ func convertMessages(bifrostMessages []schemas.ChatMessage) ([]BedrockMessage, [
 			}
 
 			// Convert all collected tool messages into a single Bedrock message
-			bedrockMsg, err := convertToolMessages(toolMessages)
+			bedrockMsg, err := convertToolMessages(ctx, toolMessages)
 			if err != nil {
 				return nil, nil, fmt.Errorf("failed to convert tool messages: %w", err)
 			}
@@ -647,8 +650,9 @@ func convertSystemMessages(msg schemas.ChatMessage) ([]BedrockSystemMessage, err
 	return systemMsgs, nil
 }
 
-// convertMessage converts a Bifrost message to Bedrock format
-func convertMessage(msg schemas.ChatMessage) (BedrockMessage, error) {
+// convertMessage converts a Bifrost message to Bedrock format.
+// The ctx is propagated to URL fetches inside content blocks.
+func convertMessage(ctx context.Context, msg schemas.ChatMessage) (BedrockMessage, error) {
 	bedrockMsg := BedrockMessage{
 		Role: BedrockMessageRole(msg.Role),
 	}
@@ -657,7 +661,7 @@ func convertMessage(msg schemas.ChatMessage) (BedrockMessage, error) {
 	var contentBlocks []BedrockContentBlock
 	if msg.Content != nil {
 		var err error
-		contentBlocks, err = convertContent(*msg.Content)
+		contentBlocks, err = convertContent(ctx, *msg.Content)
 		if err != nil {
 			return BedrockMessage{}, fmt.Errorf("failed to convert content: %w", err)
 		}
@@ -691,8 +695,9 @@ func convertMessage(msg schemas.ChatMessage) (BedrockMessage, error) {
 	return bedrockMsg, nil
 }
 
-// convertToolMessages converts multiple consecutive Bifrost tool messages to a single Bedrock message
-func convertToolMessages(msgs []schemas.ChatMessage) (BedrockMessage, error) {
+// convertToolMessages converts multiple consecutive Bifrost tool messages to a single Bedrock message.
+// The ctx is propagated to URL fetches inside tool result image blocks.
+func convertToolMessages(ctx context.Context, msgs []schemas.ChatMessage) (BedrockMessage, error) {
 	if len(msgs) == 0 {
 		return BedrockMessage{}, fmt.Errorf("no tool messages provided")
 	}
@@ -761,7 +766,7 @@ func convertToolMessages(msgs []schemas.ChatMessage) (BedrockMessage, error) {
 					}
 				case schemas.ChatContentBlockTypeImage:
 					if block.ImageURLStruct != nil {
-						imageSource, err := convertImageToBedrockSource(block.ImageURLStruct.URL)
+						imageSource, err := convertImageToBedrockSource(ctx, block.ImageURLStruct.URL)
 						if err != nil {
 							return BedrockMessage{}, fmt.Errorf("failed to convert image in tool result: %w", err)
 						}
@@ -805,8 +810,9 @@ func convertToolMessages(msgs []schemas.ChatMessage) (BedrockMessage, error) {
 	return bedrockMsg, nil
 }
 
-// convertContent converts Bifrost message content to Bedrock content blocks
-func convertContent(content schemas.ChatMessageContent) ([]BedrockContentBlock, error) {
+// convertContent converts Bifrost message content to Bedrock content blocks.
+// The ctx is propagated to URL fetches inside individual content blocks.
+func convertContent(ctx context.Context, content schemas.ChatMessageContent) ([]BedrockContentBlock, error) {
 	var contentBlocks []BedrockContentBlock
 	if content.ContentStr != nil && *content.ContentStr != "" {
 		// Simple text content (skip empty strings as Bedrock rejects blank text)
@@ -816,7 +822,7 @@ func convertContent(content schemas.ChatMessageContent) ([]BedrockContentBlock,
 	} else if content.ContentBlocks != nil {
 		// Multi-modal content
 		for _, block := range content.ContentBlocks {
-			bedrockBlocks, err := convertContentBlock(block)
+			bedrockBlocks, err := convertContentBlock(ctx, block)
 			if err != nil {
 				return nil, fmt.Errorf("failed to convert content block: %w", err)
 			}
@@ -827,8 +833,9 @@ func convertContent(content schemas.ChatMessageContent) ([]BedrockContentBlock,
 	return contentBlocks, nil
 }
 
-// convertContentBlock converts a Bifrost content block to Bedrock format
-func convertContentBlock(block schemas.ChatContentBlock) ([]BedrockContentBlock, error) {
+// convertContentBlock converts a Bifrost content block to Bedrock format.
+// The ctx is propagated to URL fetches for image and document blocks.
+func convertContentBlock(ctx context.Context, block schemas.ChatContentBlock) ([]BedrockContentBlock, error) {
 	// Handle Bedrock native format where type may be empty but text is set directly
 	// This occurs when requests are sent in Bedrock's native format (e.g., from Claude Code)
 	// In Bedrock format: {"text": "hello"} vs OpenAI format: {"type": "text", "text": "hello"}
@@ -866,7 +873,7 @@ func convertContentBlock(block schemas.ChatContentBlock) ([]BedrockContentBlock,
 			return nil, fmt.Errorf("image_url block missing image_url field")
 		}
 
-		imageSource, err := convertImageToBedrockSource(block.ImageURLStruct.URL)
+		imageSource, err := convertImageToBedrockSource(ctx, block.ImageURLStruct.URL)
 		if err != nil {
 			return nil, fmt.Errorf("failed to convert image: %w", err)
 		}
@@ -931,6 +938,51 @@ func convertContentBlock(block schemas.ChatContentBlock) ([]BedrockContentBlock,
 			}
 		}
 
+		// URL-sourced document: fetch and inline the bytes (Bedrock Converse only
+		// accepts inline source bytes, not remote URLs).
+		if block.File.FileURL != nil && *block.File.FileURL != "" {
+			fetchedMediaType, fetchedB64, fetchErr := providerUtils.FetchAndEncodeURL(ctx, *block.File.FileURL)
+			if fetchErr != nil {
+				return nil, fetchErr
+			}
+			// Refine format from response Content-Type when present (more reliable
+			// than file extension or upstream-declared media type). Normalize to
+			// strip parameters (e.g. "; charset=utf-8") and lowercase the base type.
+			if mt, _, err := mime.ParseMediaType(fetchedMediaType); err == nil {
+				fetchedMediaType = mt
+			}
+			switch fetchedMediaType {
+			case "application/pdf":
+				documentSource.Format = "pdf"
+			case "text/plain":
+				documentSource.Format = "txt"
+				isText = true
+			case "text/markdown":
+				documentSource.Format = "md"
+				isText = true
+			case "text/html":
+				documentSource.Format = "html"
+				isText = true
+			case "text/csv":
+				documentSource.Format = "csv"
+				isText = true
+			case "application/msword":
+				documentSource.Format = "doc"
+			case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+				documentSource.Format = "docx"
+			case "application/vnd.ms-excel":
+				documentSource.Format = "xls"
+			case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+				documentSource.Format = "xlsx"
+			}
+			documentSource.Source.Bytes = &fetchedB64
+			return []BedrockContentBlock{
+				{
+					Document: documentSource,
+				},
+			}, nil
+		}
+
 		// Handle file data - strip data URL prefix if present
 		if block.File.FileData != nil {
 			fileData := *block.File.FileData
@@ -982,45 +1034,59 @@ func convertContentBlock(block schemas.ChatContentBlock) ([]BedrockContentBlock,
 	}
 }
 
-// convertImageToBedrockSource converts a Bifrost image URL to Bedrock image source
-// Uses centralized utility functions like Anthropic converter
-// Returns an error for URL-based images (non-base64) since Bedrock requires base64 data
-func convertImageToBedrockSource(imageURL string) (*BedrockImageSource, error) {
-	// Use centralized utility functions from schemas package
+// convertImageToBedrockSource converts a Bifrost image URL to Bedrock image source.
+// Bedrock Converse requires inline base64 bytes - it does not accept remote URLs.
+// For data: URLs (already base64), use the bytes directly. For http(s) URLs, fetch
+// the image and inline it via fetchImageFromURL. The ctx is propagated to the
+// fetch so request cancellation/deadlines abort in-flight downloads.
+func convertImageToBedrockSource(ctx context.Context, imageURL string) (*BedrockImageSource, error) {
 	sanitizedURL, err := schemas.SanitizeImageURL(imageURL)
 	if err != nil {
 		return nil, fmt.Errorf("failed to sanitize image URL: %w", err)
 	}
 	urlTypeInfo := schemas.ExtractURLTypeInfo(sanitizedURL)
 
-	// Check if this is a URL-based image (not base64/data URI)
-	if urlTypeInfo.Type != schemas.ImageContentTypeBase64 || urlTypeInfo.DataURLWithoutPrefix == nil {
-		return nil, fmt.Errorf("only base64-encoded images (data URI format) are supported; remote image URLs are not allowed")
+	var encoded *string
+	var mediaType string
+	if urlTypeInfo.MediaType != nil {
+		mediaType = *urlTypeInfo.MediaType
 	}
 
-	// Determine format from media type or default to jpeg
-	format := "jpeg"
-	if urlTypeInfo.MediaType != nil {
-		switch *urlTypeInfo.MediaType {
-		case "image/png":
-			format = "png"
-		case "image/gif":
-			format = "gif"
-		case "image/webp":
-			format = "webp"
-		case "image/jpeg", "image/jpg":
-			format = "jpeg"
+	if urlTypeInfo.Type == schemas.ImageContentTypeBase64 && urlTypeInfo.DataURLWithoutPrefix != nil {
+		encoded = urlTypeInfo.DataURLWithoutPrefix
+	} else {
+		fetchedMediaType, fetchedB64, fetchErr := providerUtils.FetchAndEncodeURL(ctx, sanitizedURL)
+		if fetchErr != nil {
+			return nil, fetchErr
 		}
+		// Prefer the response Content-Type over an extension-inferred media type.
+		if fetchedMediaType != "" {
+			mediaType = fetchedMediaType
+		}
+		encoded = &fetchedB64
 	}
 
-	imageSource := &BedrockImageSource{
+	if mt, _, err := mime.ParseMediaType(mediaType); err == nil {
+		mediaType = mt
+	}
+	format := "jpeg"
+	switch mediaType {
+	case "image/png":
+		format = "png"
+	case "image/gif":
+		format = "gif"
+	case "image/webp":
+		format = "webp"
+	case "image/jpeg", "image/jpg":
+		format = "jpeg"
+	}
+
+	return &BedrockImageSource{
 		Format: format,
 		Source: BedrockImageSourceData{
-			Bytes: urlTypeInfo.DataURLWithoutPrefix,
+			Bytes: encoded,
 		},
-	}
-
-	return imageSource, nil
+	}, nil
 }
 
 // convertResponseFormatToTool converts a response_format parameter to a Bedrock tool
diff --git a/core/providers/cerebras/cachedcontents.go b/core/providers/cerebras/cachedcontents.go
new file mode 100644
index 0000000000..416676de00
--- /dev/null
+++ b/core/providers/cerebras/cachedcontents.go
@@ -0,0 +1,34 @@
+package cerebras
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on CerebrasProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *CerebrasProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on CerebrasProvider (see CachedContentCreate).
+func (provider *CerebrasProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on CerebrasProvider (see CachedContentCreate).
+func (provider *CerebrasProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on CerebrasProvider (see CachedContentCreate).
+func (provider *CerebrasProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on CerebrasProvider (see CachedContentCreate).
+func (provider *CerebrasProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/cohere/cachedcontents.go b/core/providers/cohere/cachedcontents.go
new file mode 100644
index 0000000000..e8e3cef794
--- /dev/null
+++ b/core/providers/cohere/cachedcontents.go
@@ -0,0 +1,34 @@
+package cohere
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on CohereProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *CohereProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on CohereProvider (see CachedContentCreate).
+func (provider *CohereProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on CohereProvider (see CachedContentCreate).
+func (provider *CohereProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on CohereProvider (see CachedContentCreate).
+func (provider *CohereProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on CohereProvider (see CachedContentCreate).
+func (provider *CohereProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/elevenlabs/cachedcontents.go b/core/providers/elevenlabs/cachedcontents.go
new file mode 100644
index 0000000000..802a318637
--- /dev/null
+++ b/core/providers/elevenlabs/cachedcontents.go
@@ -0,0 +1,34 @@
+package elevenlabs
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on ElevenlabsProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *ElevenlabsProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on ElevenlabsProvider (see CachedContentCreate).
+func (provider *ElevenlabsProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on ElevenlabsProvider (see CachedContentCreate).
+func (provider *ElevenlabsProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on ElevenlabsProvider (see CachedContentCreate).
+func (provider *ElevenlabsProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on ElevenlabsProvider (see CachedContentCreate).
+func (provider *ElevenlabsProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/fireworks/cachedcontents.go b/core/providers/fireworks/cachedcontents.go
new file mode 100644
index 0000000000..5810b4b263
--- /dev/null
+++ b/core/providers/fireworks/cachedcontents.go
@@ -0,0 +1,34 @@
+package fireworks
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on FireworksProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *FireworksProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on FireworksProvider (see CachedContentCreate).
+func (provider *FireworksProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on FireworksProvider (see CachedContentCreate).
+func (provider *FireworksProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on FireworksProvider (see CachedContentCreate).
+func (provider *FireworksProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on FireworksProvider (see CachedContentCreate).
+func (provider *FireworksProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/gemini/cachedcontents.go b/core/providers/gemini/cachedcontents.go
new file mode 100644
index 0000000000..7ec3d79544
--- /dev/null
+++ b/core/providers/gemini/cachedcontents.go
@@ -0,0 +1,581 @@
+package gemini
+
+import (
+	"fmt"
+	"net/http"
+	"net/url"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/bytedance/sonic"
+	"github.com/valyala/fasthttp"
+
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// geminiCachedContent mirrors the Gemini API CachedContent resource shape
+// for both request bodies (create) and response parsing.
+//
+// API ref: https://ai.google.dev/api/caching#CachedContent
+type geminiCachedContent struct {
+	Name              string         `json:"name,omitempty"`
+	DisplayName       string         `json:"displayName,omitempty"`
+	Model             string         `json:"model,omitempty"`
+	SystemInstruction any            `json:"systemInstruction,omitempty"`
+	Contents          []any          `json:"contents,omitempty"`
+	Tools             []any          `json:"tools,omitempty"`
+	ToolConfig        any            `json:"toolConfig,omitempty"`
+	CreateTime        string         `json:"createTime,omitempty"`
+	UpdateTime        string         `json:"updateTime,omitempty"`
+	ExpireTime        string         `json:"expireTime,omitempty"`
+	TTL               string         `json:"ttl,omitempty"`
+	UsageMetadata     map[string]any `json:"usageMetadata,omitempty"`
+}
+
+type geminiCachedContentList struct {
+	CachedContents []geminiCachedContent `json:"cachedContents"`
+	NextPageToken  string                `json:"nextPageToken,omitempty"`
+}
+
+func (g *geminiCachedContent) toBifrostObject() schemas.CachedContentObject {
+	return schemas.CachedContentObject{
+		Name:              g.Name,
+		DisplayName:       g.DisplayName,
+		Model:             g.Model,
+		SystemInstruction: g.SystemInstruction,
+		Contents:          g.Contents,
+		Tools:             g.Tools,
+		ToolConfig:        g.ToolConfig,
+		CreateTime:        g.CreateTime,
+		UpdateTime:        g.UpdateTime,
+		ExpireTime:        g.ExpireTime,
+		UsageMetadata:     g.UsageMetadata,
+	}
+}
+
+// cachedContentObjectToWire builds the Gemini camelCase wire shape from a
+// shared CachedContentObject. Used by the response converters below to render
+// upstream-compatible JSON for native Gemini SDK clients.
+func cachedContentObjectToWire(obj schemas.CachedContentObject) geminiCachedContent {
+	return geminiCachedContent{
+		Name:              obj.Name,
+		DisplayName:       obj.DisplayName,
+		Model:             obj.Model,
+		SystemInstruction: obj.SystemInstruction,
+		Contents:          obj.Contents,
+		Tools:             obj.Tools,
+		ToolConfig:        obj.ToolConfig,
+		CreateTime:        obj.CreateTime,
+		UpdateTime:        obj.UpdateTime,
+		ExpireTime:        obj.ExpireTime,
+		UsageMetadata:     obj.UsageMetadata,
+	}
+}
+
+// ToGeminiCachedContentCreateResponse renders a Bifrost create response as the
+// Gemini camelCase wire shape (https://ai.google.dev/api/caching#CachedContent).
+func ToGeminiCachedContentCreateResponse(resp *schemas.BifrostCachedContentCreateResponse) interface{} {
+	if resp == nil {
+		return nil
+	}
+	return geminiCachedContent{
+		Name:              resp.Name,
+		DisplayName:       resp.DisplayName,
+		Model:             resp.Model,
+		SystemInstruction: resp.SystemInstruction,
+		Contents:          resp.Contents,
+		Tools:             resp.Tools,
+		ToolConfig:        resp.ToolConfig,
+		CreateTime:        resp.CreateTime,
+		UpdateTime:        resp.UpdateTime,
+		ExpireTime:        resp.ExpireTime,
+		UsageMetadata:     resp.UsageMetadata,
+	}
+}
+
+// ToGeminiCachedContentListResponse renders a Bifrost list response as the
+// Gemini wire shape (cachedContents/nextPageToken).
+func ToGeminiCachedContentListResponse(resp *schemas.BifrostCachedContentListResponse) interface{} {
+	if resp == nil {
+		return nil
+	}
+	wire := geminiCachedContentList{
+		NextPageToken: resp.NextPageToken,
+	}
+	if len(resp.CachedContents) > 0 {
+		wire.CachedContents = make([]geminiCachedContent, len(resp.CachedContents))
+		for i, obj := range resp.CachedContents {
+			wire.CachedContents[i] = cachedContentObjectToWire(obj)
+		}
+	}
+	return wire
+}
+
+// ToGeminiCachedContentRetrieveResponse renders a Bifrost retrieve response as
+// the Gemini camelCase wire shape.
+func ToGeminiCachedContentRetrieveResponse(resp *schemas.BifrostCachedContentRetrieveResponse) interface{} {
+	if resp == nil {
+		return nil
+	}
+	return geminiCachedContent{
+		Name:              resp.Name,
+		DisplayName:       resp.DisplayName,
+		Model:             resp.Model,
+		SystemInstruction: resp.SystemInstruction,
+		Contents:          resp.Contents,
+		Tools:             resp.Tools,
+		ToolConfig:        resp.ToolConfig,
+		CreateTime:        resp.CreateTime,
+		UpdateTime:        resp.UpdateTime,
+		ExpireTime:        resp.ExpireTime,
+		UsageMetadata:     resp.UsageMetadata,
+	}
+}
+
+// ToGeminiCachedContentUpdateResponse renders a Bifrost update response as the
+// Gemini camelCase wire shape.
+func ToGeminiCachedContentUpdateResponse(resp *schemas.BifrostCachedContentUpdateResponse) interface{} {
+	if resp == nil {
+		return nil
+	}
+	return geminiCachedContent{
+		Name:              resp.Name,
+		DisplayName:       resp.DisplayName,
+		Model:             resp.Model,
+		SystemInstruction: resp.SystemInstruction,
+		Contents:          resp.Contents,
+		Tools:             resp.Tools,
+		ToolConfig:        resp.ToolConfig,
+		CreateTime:        resp.CreateTime,
+		UpdateTime:        resp.UpdateTime,
+		ExpireTime:        resp.ExpireTime,
+		UsageMetadata:     resp.UsageMetadata,
+	}
+}
+
+// ToGeminiCachedContentDeleteResponse renders a Bifrost delete response. Gemini
+// returns an empty body on success; mirror that with an empty struct so the
+// payload is serialized as `{}` rather than the bifrost-internal shape.
+func ToGeminiCachedContentDeleteResponse(_ *schemas.BifrostCachedContentDeleteResponse) interface{} {
+	return struct{}{}
+}
+
+func validateTTLExpireMutex(ttl, expireTime *string) *schemas.BifrostError {
+	if ttl != nil && *ttl != "" && expireTime != nil && *expireTime != "" {
+		return providerUtils.NewBifrostOperationError("ttl and expire_time are mutually exclusive", nil)
+	}
+	return nil
+}
+
+func normalizeCachedContentName(name string) string {
+	if strings.HasPrefix(name, "cachedContents/") {
+		return name
+	}
+	return "cachedContents/" + name
+}
+
+// CachedContentCreate creates a new cached content via Google AI Studio's
+// /v1beta/cachedContents endpoint.
+func (provider *GeminiProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	if err := providerUtils.CheckOperationAllowed(schemas.Gemini, provider.customProviderConfig, schemas.CachedContentCreateRequest); err != nil {
+		return nil, err
+	}
+	if err := validateTTLExpireMutex(request.TTL, request.ExpireTime); err != nil {
+		return nil, err
+	}
+	if request.Model == "" {
+		return nil, providerUtils.NewBifrostOperationError("model is required for cached content create", nil)
+	}
+
+	model := request.Model
+	if !strings.HasPrefix(model, "models/") {
+		model = "models/" + model
+	}
+
+	body := geminiCachedContent{
+		Model:             model,
+		SystemInstruction: request.SystemInstruction,
+		Contents:          request.Contents,
+		Tools:             request.Tools,
+		ToolConfig:        request.ToolConfig,
+	}
+	if request.DisplayName != nil {
+		body.DisplayName = *request.DisplayName
+	}
+	if request.TTL != nil {
+		body.TTL = *request.TTL
+	}
+	if request.ExpireTime != nil {
+		body.ExpireTime = *request.ExpireTime
+	}
+
+	jsonBody, err := sonic.Marshal(body)
+	if err != nil {
+		return nil, providerUtils.NewBifrostOperationError("failed to marshal cached content create body", err)
+	}
+
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	requestURL := fmt.Sprintf("%s/cachedContents", provider.networkConfig.BaseURL)
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodPost)
+	req.Header.SetContentType("application/json")
+	if key.Value.GetValue() != "" {
+		req.Header.Set("x-goog-api-key", key.Value.GetValue())
+	}
+	req.SetBody(jsonBody)
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, parseGeminiError(resp)
+	}
+
+	respBody, decErr := providerUtils.CheckAndDecodeBody(resp)
+	if decErr != nil {
+		return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, decErr)
+	}
+
+	var geminiResp geminiCachedContent
+	if err := sonic.Unmarshal(respBody, &geminiResp); err != nil {
+		return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err)
+	}
+
+	return &schemas.BifrostCachedContentCreateResponse{
+		Name:              geminiResp.Name,
+		DisplayName:       geminiResp.DisplayName,
+		Model:             geminiResp.Model,
+		SystemInstruction: geminiResp.SystemInstruction,
+		Contents:          geminiResp.Contents,
+		Tools:             geminiResp.Tools,
+		ToolConfig:        geminiResp.ToolConfig,
+		CreateTime:        geminiResp.CreateTime,
+		UpdateTime:        geminiResp.UpdateTime,
+		ExpireTime:        geminiResp.ExpireTime,
+		UsageMetadata:     geminiResp.UsageMetadata,
+		ExtraFields: schemas.BifrostResponseExtraFields{
+			Latency: latency.Milliseconds(),
+		},
+	}, nil
+}
+
+// cachedContentListByKey lists cached contents for a single key.
+func (provider *GeminiProvider) cachedContentListByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, time.Duration, *schemas.BifrostError) {
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	requestURL := fmt.Sprintf("%s/cachedContents", provider.networkConfig.BaseURL)
+	queryArgs := url.Values{}
+	if request.PageSize > 0 {
+		queryArgs.Set("pageSize", strconv.Itoa(request.PageSize))
+	}
+	if request.PageToken != nil && *request.PageToken != "" {
+		queryArgs.Set("pageToken", *request.PageToken)
+	}
+	if len(queryArgs) > 0 {
+		requestURL += "?" + queryArgs.Encode()
+	}
+
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodGet)
+	req.Header.SetContentType("application/json")
+	if key.Value.GetValue() != "" {
+		req.Header.Set("x-goog-api-key", key.Value.GetValue())
+	}
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, latency, parseGeminiError(resp)
+	}
+
+	respBody, decErr := providerUtils.CheckAndDecodeBody(resp)
+	if decErr != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, decErr)
+	}
+
+	var geminiList geminiCachedContentList
+	if err := sonic.Unmarshal(respBody, &geminiList); err != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err)
+	}
+
+	bifrostObjects := make([]schemas.CachedContentObject, 0, len(geminiList.CachedContents))
+	for i := range geminiList.CachedContents {
+		bifrostObjects = append(bifrostObjects, geminiList.CachedContents[i].toBifrostObject())
+	}
+
+	return &schemas.BifrostCachedContentListResponse{
+		CachedContents: bifrostObjects,
+		NextPageToken:  geminiList.NextPageToken,
+	}, latency, nil
+}
+
+// CachedContentList lists cached contents, trying each key until successful.
+func (provider *GeminiProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	if err := providerUtils.CheckOperationAllowed(schemas.Gemini, provider.customProviderConfig, schemas.CachedContentListRequest); err != nil {
+		return nil, err
+	}
+	if len(keys) == 0 {
+		return nil, providerUtils.NewBifrostOperationError("no keys provided for cached content list", nil)
+	}
+
+	var lastErr *schemas.BifrostError
+	for _, key := range keys {
+		resp, latency, bifrostErr := provider.cachedContentListByKey(ctx, key, request)
+		if bifrostErr == nil {
+			resp.ExtraFields = schemas.BifrostResponseExtraFields{Latency: latency.Milliseconds()}
+			return resp, nil
+		}
+		lastErr = bifrostErr
+	}
+	return nil, lastErr
+}
+
+// cachedContentRetrieveByKey retrieves a single cached content for one key.
+func (provider *GeminiProvider) cachedContentRetrieveByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, time.Duration, *schemas.BifrostError) {
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	name := normalizeCachedContentName(request.Name)
+	requestURL := fmt.Sprintf("%s/%s", provider.networkConfig.BaseURL, name)
+
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodGet)
+	req.Header.SetContentType("application/json")
+	if key.Value.GetValue() != "" {
+		req.Header.Set("x-goog-api-key", key.Value.GetValue())
+	}
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, latency, parseGeminiError(resp)
+	}
+
+	respBody, decErr := providerUtils.CheckAndDecodeBody(resp)
+	if decErr != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, decErr)
+	}
+
+	var geminiResp geminiCachedContent
+	if err := sonic.Unmarshal(respBody, &geminiResp); err != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err)
+	}
+
+	return &schemas.BifrostCachedContentRetrieveResponse{
+		Name:              geminiResp.Name,
+		DisplayName:       geminiResp.DisplayName,
+		Model:             geminiResp.Model,
+		SystemInstruction: geminiResp.SystemInstruction,
+		Contents:          geminiResp.Contents,
+		Tools:             geminiResp.Tools,
+		ToolConfig:        geminiResp.ToolConfig,
+		CreateTime:        geminiResp.CreateTime,
+		UpdateTime:        geminiResp.UpdateTime,
+		ExpireTime:        geminiResp.ExpireTime,
+		UsageMetadata:     geminiResp.UsageMetadata,
+	}, latency, nil
+}
+
+// CachedContentRetrieve retrieves a cached content by name, trying each key.
+func (provider *GeminiProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	if err := providerUtils.CheckOperationAllowed(schemas.Gemini, provider.customProviderConfig, schemas.CachedContentRetrieveRequest); err != nil {
+		return nil, err
+	}
+	if request.Name == "" {
+		return nil, providerUtils.NewBifrostOperationError("name is required for cached content retrieve", nil)
+	}
+	if len(keys) == 0 {
+		return nil, providerUtils.NewBifrostOperationError("no keys provided for cached content retrieve", nil)
+	}
+
+	var lastErr *schemas.BifrostError
+	for _, key := range keys {
+		resp, latency, bifrostErr := provider.cachedContentRetrieveByKey(ctx, key, request)
+		if bifrostErr == nil {
+			resp.ExtraFields = schemas.BifrostResponseExtraFields{Latency: latency.Milliseconds()}
+			return resp, nil
+		}
+		lastErr = bifrostErr
+	}
+	return nil, lastErr
+}
+
+// cachedContentUpdateByKey updates expiration on a cached content for one key.
+func (provider *GeminiProvider) cachedContentUpdateByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, time.Duration, *schemas.BifrostError) {
+	body := geminiCachedContent{}
+	updateMaskFields := []string{}
+	if request.TTL != nil && *request.TTL != "" {
+		body.TTL = *request.TTL
+		updateMaskFields = append(updateMaskFields, "ttl")
+	}
+	if request.ExpireTime != nil && *request.ExpireTime != "" {
+		body.ExpireTime = *request.ExpireTime
+		updateMaskFields = append(updateMaskFields, "expireTime")
+	}
+
+	jsonBody, marshalErr := sonic.Marshal(body)
+	if marshalErr != nil {
+		return nil, 0, providerUtils.NewBifrostOperationError("failed to marshal cached content update body", marshalErr)
+	}
+
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	name := normalizeCachedContentName(request.Name)
+	requestURL := fmt.Sprintf("%s/%s", provider.networkConfig.BaseURL, name)
+	if len(updateMaskFields) > 0 {
+		requestURL += "?updateMask=" + strings.Join(updateMaskFields, ",")
+	}
+
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodPatch)
+	req.Header.SetContentType("application/json")
+	if key.Value.GetValue() != "" {
+		req.Header.Set("x-goog-api-key", key.Value.GetValue())
+	}
+	req.SetBody(jsonBody)
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, latency, parseGeminiError(resp)
+	}
+
+	respBody, decErr := providerUtils.CheckAndDecodeBody(resp)
+	if decErr != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, decErr)
+	}
+
+	var geminiResp geminiCachedContent
+	if err := sonic.Unmarshal(respBody, &geminiResp); err != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err)
+	}
+
+	return &schemas.BifrostCachedContentUpdateResponse{
+		Name:              geminiResp.Name,
+		DisplayName:       geminiResp.DisplayName,
+		Model:             geminiResp.Model,
+		SystemInstruction: geminiResp.SystemInstruction,
+		Contents:          geminiResp.Contents,
+		Tools:             geminiResp.Tools,
+		ToolConfig:        geminiResp.ToolConfig,
+		CreateTime:        geminiResp.CreateTime,
+		UpdateTime:        geminiResp.UpdateTime,
+		ExpireTime:        geminiResp.ExpireTime,
+		UsageMetadata:     geminiResp.UsageMetadata,
+	}, latency, nil
+}
+
+// CachedContentUpdate updates expiration on a cached content, trying each key.
+func (provider *GeminiProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	if err := providerUtils.CheckOperationAllowed(schemas.Gemini, provider.customProviderConfig, schemas.CachedContentUpdateRequest); err != nil {
+		return nil, err
+	}
+	if request.Name == "" {
+		return nil, providerUtils.NewBifrostOperationError("name is required for cached content update", nil)
+	}
+	if err := validateTTLExpireMutex(request.TTL, request.ExpireTime); err != nil {
+		return nil, err
+	}
+	if (request.TTL == nil || *request.TTL == "") && (request.ExpireTime == nil || *request.ExpireTime == "") {
+		return nil, providerUtils.NewBifrostOperationError("either ttl or expire_time must be set for cached content update", nil)
+	}
+	if len(keys) == 0 {
+		return nil, providerUtils.NewBifrostOperationError("no keys provided for cached content update", nil)
+	}
+
+	var lastErr *schemas.BifrostError
+	for _, key := range keys {
+		resp, latency, bifrostErr := provider.cachedContentUpdateByKey(ctx, key, request)
+		if bifrostErr == nil {
+			resp.ExtraFields = schemas.BifrostResponseExtraFields{Latency: latency.Milliseconds()}
+			return resp, nil
+		}
+		lastErr = bifrostErr
+	}
+	return nil, lastErr
+}
+
+// cachedContentDeleteByKey deletes a cached content for one key.
+func (provider *GeminiProvider) cachedContentDeleteByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, time.Duration, *schemas.BifrostError) {
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	name := normalizeCachedContentName(request.Name)
+	requestURL := fmt.Sprintf("%s/%s", provider.networkConfig.BaseURL, name)
+
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodDelete)
+	if key.Value.GetValue() != "" {
+		req.Header.Set("x-goog-api-key", key.Value.GetValue())
+	}
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, latency, parseGeminiError(resp)
+	}
+
+	return &schemas.BifrostCachedContentDeleteResponse{
+		Name:    name,
+		Deleted: true,
+	}, latency, nil
+}
+
+// CachedContentDelete deletes a cached content by name, trying each key.
+func (provider *GeminiProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	if err := providerUtils.CheckOperationAllowed(schemas.Gemini, provider.customProviderConfig, schemas.CachedContentDeleteRequest); err != nil {
+		return nil, err
+	}
+	if request.Name == "" {
+		return nil, providerUtils.NewBifrostOperationError("name is required for cached content delete", nil)
+	}
+	if len(keys) == 0 {
+		return nil, providerUtils.NewBifrostOperationError("no keys provided for cached content delete", nil)
+	}
+
+	var lastErr *schemas.BifrostError
+	for _, key := range keys {
+		resp, latency, bifrostErr := provider.cachedContentDeleteByKey(ctx, key, request)
+		if bifrostErr == nil {
+			resp.ExtraFields = schemas.BifrostResponseExtraFields{Latency: latency.Milliseconds()}
+			return resp, nil
+		}
+		lastErr = bifrostErr
+	}
+	return nil, lastErr
+}
diff --git a/core/providers/groq/cachedcontents.go b/core/providers/groq/cachedcontents.go
new file mode 100644
index 0000000000..b7a8d7eef1
--- /dev/null
+++ b/core/providers/groq/cachedcontents.go
@@ -0,0 +1,34 @@
+package groq
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on GroqProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *GroqProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on GroqProvider (see CachedContentCreate).
+func (provider *GroqProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on GroqProvider (see CachedContentCreate).
+func (provider *GroqProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on GroqProvider (see CachedContentCreate).
+func (provider *GroqProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on GroqProvider (see CachedContentCreate).
+func (provider *GroqProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/huggingface/cachedcontents.go b/core/providers/huggingface/cachedcontents.go
new file mode 100644
index 0000000000..7da51f6061
--- /dev/null
+++ b/core/providers/huggingface/cachedcontents.go
@@ -0,0 +1,34 @@
+package huggingface
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on HuggingFaceProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *HuggingFaceProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on HuggingFaceProvider (see CachedContentCreate).
+func (provider *HuggingFaceProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on HuggingFaceProvider (see CachedContentCreate).
+func (provider *HuggingFaceProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on HuggingFaceProvider (see CachedContentCreate).
+func (provider *HuggingFaceProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on HuggingFaceProvider (see CachedContentCreate).
+func (provider *HuggingFaceProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/mistral/cachedcontents.go b/core/providers/mistral/cachedcontents.go
new file mode 100644
index 0000000000..8a650487ed
--- /dev/null
+++ b/core/providers/mistral/cachedcontents.go
@@ -0,0 +1,34 @@
+package mistral
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on MistralProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *MistralProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on MistralProvider (see CachedContentCreate).
+func (provider *MistralProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on MistralProvider (see CachedContentCreate).
+func (provider *MistralProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on MistralProvider (see CachedContentCreate).
+func (provider *MistralProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on MistralProvider (see CachedContentCreate).
+func (provider *MistralProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/nebius/cachedcontents.go b/core/providers/nebius/cachedcontents.go
new file mode 100644
index 0000000000..894e162c5f
--- /dev/null
+++ b/core/providers/nebius/cachedcontents.go
@@ -0,0 +1,34 @@
+package nebius
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on NebiusProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *NebiusProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on NebiusProvider (see CachedContentCreate).
+func (provider *NebiusProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on NebiusProvider (see CachedContentCreate).
+func (provider *NebiusProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on NebiusProvider (see CachedContentCreate).
+func (provider *NebiusProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on NebiusProvider (see CachedContentCreate).
+func (provider *NebiusProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/ollama/cachedcontents.go b/core/providers/ollama/cachedcontents.go
new file mode 100644
index 0000000000..f3933502a0
--- /dev/null
+++ b/core/providers/ollama/cachedcontents.go
@@ -0,0 +1,34 @@
+package ollama
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on OllamaProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *OllamaProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on OllamaProvider (see CachedContentCreate).
+func (provider *OllamaProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on OllamaProvider (see CachedContentCreate).
+func (provider *OllamaProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on OllamaProvider (see CachedContentCreate).
+func (provider *OllamaProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on OllamaProvider (see CachedContentCreate).
+func (provider *OllamaProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/openai/cachedcontents.go b/core/providers/openai/cachedcontents.go
new file mode 100644
index 0000000000..4542f8b597
--- /dev/null
+++ b/core/providers/openai/cachedcontents.go
@@ -0,0 +1,34 @@
+package openai
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on OpenAIProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *OpenAIProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on OpenAIProvider (see CachedContentCreate).
+func (provider *OpenAIProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on OpenAIProvider (see CachedContentCreate).
+func (provider *OpenAIProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on OpenAIProvider (see CachedContentCreate).
+func (provider *OpenAIProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on OpenAIProvider (see CachedContentCreate).
+func (provider *OpenAIProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/openrouter/cachedcontents.go b/core/providers/openrouter/cachedcontents.go
new file mode 100644
index 0000000000..d310838389
--- /dev/null
+++ b/core/providers/openrouter/cachedcontents.go
@@ -0,0 +1,34 @@
+package openrouter
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on OpenRouterProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *OpenRouterProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on OpenRouterProvider (see CachedContentCreate).
+func (provider *OpenRouterProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on OpenRouterProvider (see CachedContentCreate).
+func (provider *OpenRouterProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on OpenRouterProvider (see CachedContentCreate).
+func (provider *OpenRouterProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on OpenRouterProvider (see CachedContentCreate).
+func (provider *OpenRouterProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/parasail/cachedcontents.go b/core/providers/parasail/cachedcontents.go
new file mode 100644
index 0000000000..bcec0c6a39
--- /dev/null
+++ b/core/providers/parasail/cachedcontents.go
@@ -0,0 +1,34 @@
+package parasail
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on ParasailProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *ParasailProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on ParasailProvider (see CachedContentCreate).
+func (provider *ParasailProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on ParasailProvider (see CachedContentCreate).
+func (provider *ParasailProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on ParasailProvider (see CachedContentCreate).
+func (provider *ParasailProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on ParasailProvider (see CachedContentCreate).
+func (provider *ParasailProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/perplexity/cachedcontents.go b/core/providers/perplexity/cachedcontents.go
new file mode 100644
index 0000000000..9c2608f5ee
--- /dev/null
+++ b/core/providers/perplexity/cachedcontents.go
@@ -0,0 +1,34 @@
+package perplexity
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on PerplexityProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *PerplexityProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on PerplexityProvider (see CachedContentCreate).
+func (provider *PerplexityProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on PerplexityProvider (see CachedContentCreate).
+func (provider *PerplexityProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on PerplexityProvider (see CachedContentCreate).
+func (provider *PerplexityProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on PerplexityProvider (see CachedContentCreate).
+func (provider *PerplexityProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/replicate/cachedcontents.go b/core/providers/replicate/cachedcontents.go
new file mode 100644
index 0000000000..01a2cb9739
--- /dev/null
+++ b/core/providers/replicate/cachedcontents.go
@@ -0,0 +1,34 @@
+package replicate
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on ReplicateProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *ReplicateProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on ReplicateProvider (see CachedContentCreate).
+func (provider *ReplicateProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on ReplicateProvider (see CachedContentCreate).
+func (provider *ReplicateProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on ReplicateProvider (see CachedContentCreate).
+func (provider *ReplicateProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on ReplicateProvider (see CachedContentCreate).
+func (provider *ReplicateProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/runway/cachedcontents.go b/core/providers/runway/cachedcontents.go
new file mode 100644
index 0000000000..1a3f54cd52
--- /dev/null
+++ b/core/providers/runway/cachedcontents.go
@@ -0,0 +1,34 @@
+package runway
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on RunwayProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *RunwayProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on RunwayProvider (see CachedContentCreate).
+func (provider *RunwayProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on RunwayProvider (see CachedContentCreate).
+func (provider *RunwayProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on RunwayProvider (see CachedContentCreate).
+func (provider *RunwayProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on RunwayProvider (see CachedContentCreate).
+func (provider *RunwayProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/sgl/cachedcontents.go b/core/providers/sgl/cachedcontents.go
new file mode 100644
index 0000000000..bf15ab3843
--- /dev/null
+++ b/core/providers/sgl/cachedcontents.go
@@ -0,0 +1,34 @@
+package sgl
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on SGLProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *SGLProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on SGLProvider (see CachedContentCreate).
+func (provider *SGLProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on SGLProvider (see CachedContentCreate).
+func (provider *SGLProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on SGLProvider (see CachedContentCreate).
+func (provider *SGLProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on SGLProvider (see CachedContentCreate).
+func (provider *SGLProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/utils/fetch.go b/core/providers/utils/fetch.go
new file mode 100644
index 0000000000..709f241703
--- /dev/null
+++ b/core/providers/utils/fetch.go
@@ -0,0 +1,118 @@
+package utils
+
+import (
+	"context"
+	"encoding/base64"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"net/netip"
+	"net/url"
+	"strings"
+	"time"
+)
+
+// FetchAndEncodeURL downloads a remote resource (image, document, etc.) and
+// returns its base64 encoding plus the response Content-Type. Used by providers
+// (Bedrock Converse, Anthropic-on-Vertex) whose upstream surface only accepts
+// inline bytes, not remote URLs. Bounded by a 20s timeout and a 25 MiB body cap;
+// non-2xx responses error. The provided ctx is honored for cancellation and
+// deadlines; pass context.Background() if no request context is available.
+//
+// SSRF-hardened: only http/https schemes are accepted, and the dialer rejects
+// connections to loopback, private, link-local, unique-local, and unspecified
+// addresses. The IP check runs at dial time (not just lookup time) so DNS
+// rebinding does not bypass it. Redirect targets are subject to the same
+// scheme + dial-time IP validation.
+func FetchAndEncodeURL(ctx context.Context, resourceURL string) (mediaType string, encoded string, err error) {
+	const maxBytes int64 = 25 * 1024 * 1024
+
+	parsed, err := url.Parse(resourceURL)
+	if err != nil {
+		return "", "", fmt.Errorf("invalid resource URL %q: %w", resourceURL, err)
+	}
+	if parsed.Scheme != "http" && parsed.Scheme != "https" {
+		return "", "", fmt.Errorf("unsupported URL scheme %q (only http/https allowed)", parsed.Scheme)
+	}
+
+	dialer := &net.Dialer{Timeout: 10 * time.Second}
+	transport := &http.Transport{
+		DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
+			host, port, splitErr := net.SplitHostPort(addr)
+			if splitErr != nil {
+				return nil, splitErr
+			}
+			ips, lookupErr := (&net.Resolver{}).LookupIP(ctx, "ip", host)
+			if lookupErr != nil {
+				return nil, lookupErr
+			}
+			for _, ip := range ips {
+				if !isPublicIP(ip) {
+					return nil, fmt.Errorf("blocked fetch to non-public address %s", ip.String())
+				}
+			}
+			// Dial the first validated IP directly to close the DNS-rebinding TOCTOU.
+			return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].String(), port))
+		},
+	}
+	client := &http.Client{
+		Timeout:   20 * time.Second,
+		Transport: transport,
+		CheckRedirect: func(req *http.Request, via []*http.Request) error {
+			if req.URL.Scheme != "http" && req.URL.Scheme != "https" {
+				return fmt.Errorf("blocked redirect to unsupported scheme %q", req.URL.Scheme)
+			}
+			if len(via) >= 10 {
+				return fmt.Errorf("stopped after 10 redirects")
+			}
+			return nil
+		},
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, resourceURL, nil)
+	if err != nil {
+		return "", "", fmt.Errorf("invalid resource URL %q: %w", resourceURL, err)
+	}
+	req.Header.Set("User-Agent", "bifrost-fetch/1")
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return "", "", fmt.Errorf("failed to fetch from %q: %w", resourceURL, err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return "", "", fmt.Errorf("fetch %q returned non-2xx status %d", resourceURL, resp.StatusCode)
+	}
+
+	body, err := io.ReadAll(io.LimitReader(resp.Body, maxBytes+1))
+	if err != nil {
+		return "", "", fmt.Errorf("failed to read body from %q: %w", resourceURL, err)
+	}
+	if int64(len(body)) > maxBytes {
+		return "", "", fmt.Errorf("resource at %q exceeds %d-byte limit", resourceURL, maxBytes)
+	}
+
+	mediaType = resp.Header.Get("Content-Type")
+	if i := strings.Index(mediaType, ";"); i != -1 {
+		mediaType = strings.TrimSpace(mediaType[:i])
+	}
+
+	return mediaType, base64.StdEncoding.EncodeToString(body), nil
+}
+
+// isPublicIP reports whether the given IP address is safe to fetch from
+// (i.e. not loopback, private, link-local, unique-local, or unspecified).
+func isPublicIP(ip net.IP) bool {
+	addr, ok := netip.AddrFromSlice(ip)
+	if !ok {
+		return false
+	}
+	addr = addr.Unmap()
+	if addr.IsLoopback() || addr.IsPrivate() || addr.IsLinkLocalUnicast() || addr.IsLinkLocalMulticast() ||
+		addr.IsMulticast() || addr.IsUnspecified() || addr.IsInterfaceLocalMulticast() {
+		return false
+	}
+	return true
+}
diff --git a/core/providers/vertex/cachedcontents.go b/core/providers/vertex/cachedcontents.go
new file mode 100644
index 0000000000..4f04e005ce
--- /dev/null
+++ b/core/providers/vertex/cachedcontents.go
@@ -0,0 +1,538 @@
+package vertex
+
+import (
+	"fmt"
+	"net/http"
+	"net/url"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/bytedance/sonic"
+	"github.com/valyala/fasthttp"
+
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// vertexCachedContent mirrors Vertex AI's CachedContent resource shape.
+// Vertex uses the same camelCase keys as Google AI Studio for the body, but
+// the `model` field must be the full publisher path (handled in CachedContentCreate).
+//
+// API ref: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create
+type vertexCachedContent struct {
+	Name              string         `json:"name,omitempty"`
+	DisplayName       string         `json:"displayName,omitempty"`
+	Model             string         `json:"model,omitempty"`
+	SystemInstruction any            `json:"systemInstruction,omitempty"`
+	Contents          []any          `json:"contents,omitempty"`
+	Tools             []any          `json:"tools,omitempty"`
+	ToolConfig        any            `json:"toolConfig,omitempty"`
+	CreateTime        string         `json:"createTime,omitempty"`
+	UpdateTime        string         `json:"updateTime,omitempty"`
+	ExpireTime        string         `json:"expireTime,omitempty"`
+	TTL               string         `json:"ttl,omitempty"`
+	UsageMetadata     map[string]any `json:"usageMetadata,omitempty"`
+}
+
+type vertexCachedContentList struct {
+	CachedContents []vertexCachedContent `json:"cachedContents"`
+	NextPageToken  string                `json:"nextPageToken,omitempty"`
+}
+
+func (v *vertexCachedContent) toBifrostObject() schemas.CachedContentObject {
+	return schemas.CachedContentObject{
+		Name:              v.Name,
+		DisplayName:       v.DisplayName,
+		Model:             v.Model,
+		SystemInstruction: v.SystemInstruction,
+		Contents:          v.Contents,
+		Tools:             v.Tools,
+		ToolConfig:        v.ToolConfig,
+		CreateTime:        v.CreateTime,
+		UpdateTime:        v.UpdateTime,
+		ExpireTime:        v.ExpireTime,
+		UsageMetadata:     v.UsageMetadata,
+	}
+}
+
+func validateVertexTTLExpireMutex(ttl, expireTime *string) *schemas.BifrostError {
+	if ttl != nil && *ttl != "" && expireTime != nil && *expireTime != "" {
+		return providerUtils.NewBifrostOperationError("ttl and expire_time are mutually exclusive", nil)
+	}
+	return nil
+}
+
+// expandVertexCachedContentName ensures the name is the full Vertex resource path.
+// If the user passes "abc123" or "cachedContents/abc123", rewrite to
+// "projects/{p}/locations/{l}/cachedContents/abc123". Idempotent for already-full paths.
+func expandVertexCachedContentName(name, projectID, region string) string {
+	if strings.HasPrefix(name, "projects/") {
+		return name
+	}
+	id := strings.TrimPrefix(name, "cachedContents/")
+	return fmt.Sprintf("projects/%s/locations/%s/cachedContents/%s", projectID, region, id)
+}
+
+// expandVertexModelPath rewrites a bare model id ("gemini-2.5-pro") to the full
+// Vertex publisher path. Already-expanded paths pass through unchanged.
+func expandVertexModelPath(model, projectID, region string) string {
+	if strings.HasPrefix(model, "projects/") {
+		return model
+	}
+	model = strings.TrimPrefix(model, "models/")
+	return fmt.Sprintf("projects/%s/locations/%s/publishers/google/models/%s", projectID, region, model)
+}
+
+// vertexAuthHeaders pulls an OAuth bearer token from the key and applies it.
+func vertexAuthHeaders(req *fasthttp.Request, key schemas.Key) *schemas.BifrostError {
+	tokenSource, err := getAuthTokenSource(key)
+	if err != nil {
+		return providerUtils.NewBifrostOperationError("error creating auth token source", err)
+	}
+	token, err := tokenSource.Token()
+	if err != nil {
+		return providerUtils.NewBifrostOperationError("error getting auth token", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+token.AccessToken)
+	return nil
+}
+
+// vertexCachedContentBaseURL builds /v1/projects/{p}/locations/{l}/cachedContents
+// using the existing helper from utils.go.
+func vertexCachedContentBaseURL(region, projectID string) string {
+	return fmt.Sprintf("%s/cachedContents", getVertexProjectLocationURL(region, "v1", projectID))
+}
+
+// CachedContentCreate creates a new cached content via Vertex AI's
+// /v1/projects/{p}/locations/{l}/cachedContents endpoint.
+func (provider *VertexProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	if err := validateVertexTTLExpireMutex(request.TTL, request.ExpireTime); err != nil {
+		return nil, err
+	}
+	if request.Model == "" {
+		return nil, providerUtils.NewBifrostOperationError("model is required for cached content create", nil)
+	}
+
+	projectID := key.VertexKeyConfig.ProjectID.GetValue()
+	if projectID == "" {
+		return nil, providerUtils.NewConfigurationError("project_id is not set in vertex key config")
+	}
+	region := key.VertexKeyConfig.Region.GetValue()
+	if region == "" {
+		return nil, providerUtils.NewConfigurationError("region is not set in vertex key config")
+	}
+
+	body := vertexCachedContent{
+		Model:             expandVertexModelPath(request.Model, projectID, region),
+		SystemInstruction: request.SystemInstruction,
+		Contents:          request.Contents,
+		Tools:             request.Tools,
+		ToolConfig:        request.ToolConfig,
+	}
+	if request.DisplayName != nil {
+		body.DisplayName = *request.DisplayName
+	}
+	if request.TTL != nil {
+		body.TTL = *request.TTL
+	}
+	if request.ExpireTime != nil {
+		body.ExpireTime = *request.ExpireTime
+	}
+
+	jsonBody, err := sonic.Marshal(body)
+	if err != nil {
+		return nil, providerUtils.NewBifrostOperationError("failed to marshal cached content create body", err)
+	}
+
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	requestURL := vertexCachedContentBaseURL(region, projectID)
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodPost)
+	req.Header.SetContentType("application/json")
+	if authErr := vertexAuthHeaders(req, key); authErr != nil {
+		return nil, authErr
+	}
+	req.SetBody(jsonBody)
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, parseVertexCachedContentError(resp)
+	}
+
+	respBody, decErr := providerUtils.CheckAndDecodeBody(resp)
+	if decErr != nil {
+		return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, decErr)
+	}
+
+	var vResp vertexCachedContent
+	if err := sonic.Unmarshal(respBody, &vResp); err != nil {
+		return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err)
+	}
+
+	return &schemas.BifrostCachedContentCreateResponse{
+		Name:              vResp.Name,
+		DisplayName:       vResp.DisplayName,
+		Model:             vResp.Model,
+		SystemInstruction: vResp.SystemInstruction,
+		Contents:          vResp.Contents,
+		Tools:             vResp.Tools,
+		ToolConfig:        vResp.ToolConfig,
+		CreateTime:        vResp.CreateTime,
+		UpdateTime:        vResp.UpdateTime,
+		ExpireTime:        vResp.ExpireTime,
+		UsageMetadata:     vResp.UsageMetadata,
+		ExtraFields: schemas.BifrostResponseExtraFields{
+			Latency: latency.Milliseconds(),
+		},
+	}, nil
+}
+
+func (provider *VertexProvider) cachedContentListByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, time.Duration, *schemas.BifrostError) {
+	projectID := key.VertexKeyConfig.ProjectID.GetValue()
+	if projectID == "" {
+		return nil, 0, providerUtils.NewConfigurationError("project_id is not set in vertex key config")
+	}
+	region := key.VertexKeyConfig.Region.GetValue()
+	if region == "" {
+		return nil, 0, providerUtils.NewConfigurationError("region is not set in vertex key config")
+	}
+
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	requestURL := vertexCachedContentBaseURL(region, projectID)
+	queryArgs := url.Values{}
+	if request.PageSize > 0 {
+		queryArgs.Set("pageSize", strconv.Itoa(request.PageSize))
+	}
+	if request.PageToken != nil && *request.PageToken != "" {
+		queryArgs.Set("pageToken", *request.PageToken)
+	}
+	if len(queryArgs) > 0 {
+		requestURL += "?" + queryArgs.Encode()
+	}
+
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodGet)
+	req.Header.SetContentType("application/json")
+	if authErr := vertexAuthHeaders(req, key); authErr != nil {
+		return nil, 0, authErr
+	}
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, latency, parseVertexCachedContentError(resp)
+	}
+
+	respBody, decErr := providerUtils.CheckAndDecodeBody(resp)
+	if decErr != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, decErr)
+	}
+
+	var vList vertexCachedContentList
+	if err := sonic.Unmarshal(respBody, &vList); err != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err)
+	}
+
+	bifrostObjects := make([]schemas.CachedContentObject, 0, len(vList.CachedContents))
+	for i := range vList.CachedContents {
+		bifrostObjects = append(bifrostObjects, vList.CachedContents[i].toBifrostObject())
+	}
+
+	return &schemas.BifrostCachedContentListResponse{
+		CachedContents: bifrostObjects,
+		NextPageToken:  vList.NextPageToken,
+	}, latency, nil
+}
+
+func (provider *VertexProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	if len(keys) == 0 {
+		return nil, providerUtils.NewBifrostOperationError("no keys provided for cached content list", nil)
+	}
+
+	var lastErr *schemas.BifrostError
+	for _, key := range keys {
+		resp, latency, bifrostErr := provider.cachedContentListByKey(ctx, key, request)
+		if bifrostErr == nil {
+			resp.ExtraFields = schemas.BifrostResponseExtraFields{Latency: latency.Milliseconds()}
+			return resp, nil
+		}
+		lastErr = bifrostErr
+	}
+	return nil, lastErr
+}
+
+func (provider *VertexProvider) cachedContentRetrieveByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, time.Duration, *schemas.BifrostError) {
+	projectID := key.VertexKeyConfig.ProjectID.GetValue()
+	if projectID == "" {
+		return nil, 0, providerUtils.NewConfigurationError("project_id is not set in vertex key config")
+	}
+	region := key.VertexKeyConfig.Region.GetValue()
+	if region == "" {
+		return nil, 0, providerUtils.NewConfigurationError("region is not set in vertex key config")
+	}
+
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	name := expandVertexCachedContentName(request.Name, projectID, region)
+	requestURL := fmt.Sprintf("%s/%s", getVertexAPIBaseURL(region, "v1"), name)
+
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodGet)
+	req.Header.SetContentType("application/json")
+	if authErr := vertexAuthHeaders(req, key); authErr != nil {
+		return nil, 0, authErr
+	}
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, latency, parseVertexCachedContentError(resp)
+	}
+
+	respBody, decErr := providerUtils.CheckAndDecodeBody(resp)
+	if decErr != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, decErr)
+	}
+
+	var vResp vertexCachedContent
+	if err := sonic.Unmarshal(respBody, &vResp); err != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err)
+	}
+
+	return &schemas.BifrostCachedContentRetrieveResponse{
+		Name:              vResp.Name,
+		DisplayName:       vResp.DisplayName,
+		Model:             vResp.Model,
+		SystemInstruction: vResp.SystemInstruction,
+		Contents:          vResp.Contents,
+		Tools:             vResp.Tools,
+		ToolConfig:        vResp.ToolConfig,
+		CreateTime:        vResp.CreateTime,
+		UpdateTime:        vResp.UpdateTime,
+		ExpireTime:        vResp.ExpireTime,
+		UsageMetadata:     vResp.UsageMetadata,
+	}, latency, nil
+}
+
+func (provider *VertexProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	if request.Name == "" {
+		return nil, providerUtils.NewBifrostOperationError("name is required for cached content retrieve", nil)
+	}
+	if len(keys) == 0 {
+		return nil, providerUtils.NewBifrostOperationError("no keys provided for cached content retrieve", nil)
+	}
+
+	var lastErr *schemas.BifrostError
+	for _, key := range keys {
+		resp, latency, bifrostErr := provider.cachedContentRetrieveByKey(ctx, key, request)
+		if bifrostErr == nil {
+			resp.ExtraFields = schemas.BifrostResponseExtraFields{Latency: latency.Milliseconds()}
+			return resp, nil
+		}
+		lastErr = bifrostErr
+	}
+	return nil, lastErr
+}
+
+func (provider *VertexProvider) cachedContentUpdateByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, time.Duration, *schemas.BifrostError) {
+	projectID := key.VertexKeyConfig.ProjectID.GetValue()
+	if projectID == "" {
+		return nil, 0, providerUtils.NewConfigurationError("project_id is not set in vertex key config")
+	}
+	region := key.VertexKeyConfig.Region.GetValue()
+	if region == "" {
+		return nil, 0, providerUtils.NewConfigurationError("region is not set in vertex key config")
+	}
+
+	body := vertexCachedContent{}
+	updateMaskFields := []string{}
+	if request.TTL != nil && *request.TTL != "" {
+		body.TTL = *request.TTL
+		updateMaskFields = append(updateMaskFields, "ttl")
+	}
+	if request.ExpireTime != nil && *request.ExpireTime != "" {
+		body.ExpireTime = *request.ExpireTime
+		updateMaskFields = append(updateMaskFields, "expireTime")
+	}
+
+	jsonBody, marshalErr := sonic.Marshal(body)
+	if marshalErr != nil {
+		return nil, 0, providerUtils.NewBifrostOperationError("failed to marshal cached content update body", marshalErr)
+	}
+
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	name := expandVertexCachedContentName(request.Name, projectID, region)
+	requestURL := fmt.Sprintf("%s/%s", getVertexAPIBaseURL(region, "v1"), name)
+	if len(updateMaskFields) > 0 {
+		requestURL += "?updateMask=" + strings.Join(updateMaskFields, ",")
+	}
+
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodPatch)
+	req.Header.SetContentType("application/json")
+	if authErr := vertexAuthHeaders(req, key); authErr != nil {
+		return nil, 0, authErr
+	}
+	req.SetBody(jsonBody)
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, latency, parseVertexCachedContentError(resp)
+	}
+
+	respBody, decErr := providerUtils.CheckAndDecodeBody(resp)
+	if decErr != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, decErr)
+	}
+
+	var vResp vertexCachedContent
+	if err := sonic.Unmarshal(respBody, &vResp); err != nil {
+		return nil, latency, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err)
+	}
+
+	return &schemas.BifrostCachedContentUpdateResponse{
+		Name:              vResp.Name,
+		DisplayName:       vResp.DisplayName,
+		Model:             vResp.Model,
+		SystemInstruction: vResp.SystemInstruction,
+		Contents:          vResp.Contents,
+		Tools:             vResp.Tools,
+		ToolConfig:        vResp.ToolConfig,
+		CreateTime:        vResp.CreateTime,
+		UpdateTime:        vResp.UpdateTime,
+		ExpireTime:        vResp.ExpireTime,
+		UsageMetadata:     vResp.UsageMetadata,
+	}, latency, nil
+}
+
+func (provider *VertexProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	if request.Name == "" {
+		return nil, providerUtils.NewBifrostOperationError("name is required for cached content update", nil)
+	}
+	if err := validateVertexTTLExpireMutex(request.TTL, request.ExpireTime); err != nil {
+		return nil, err
+	}
+	if (request.TTL == nil || *request.TTL == "") && (request.ExpireTime == nil || *request.ExpireTime == "") {
+		return nil, providerUtils.NewBifrostOperationError("either ttl or expire_time must be set for cached content update", nil)
+	}
+	if len(keys) == 0 {
+		return nil, providerUtils.NewBifrostOperationError("no keys provided for cached content update", nil)
+	}
+
+	var lastErr *schemas.BifrostError
+	for _, key := range keys {
+		resp, latency, bifrostErr := provider.cachedContentUpdateByKey(ctx, key, request)
+		if bifrostErr == nil {
+			resp.ExtraFields = schemas.BifrostResponseExtraFields{Latency: latency.Milliseconds()}
+			return resp, nil
+		}
+		lastErr = bifrostErr
+	}
+	return nil, lastErr
+}
+
+func (provider *VertexProvider) cachedContentDeleteByKey(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, time.Duration, *schemas.BifrostError) {
+	projectID := key.VertexKeyConfig.ProjectID.GetValue()
+	if projectID == "" {
+		return nil, 0, providerUtils.NewConfigurationError("project_id is not set in vertex key config")
+	}
+	region := key.VertexKeyConfig.Region.GetValue()
+	if region == "" {
+		return nil, 0, providerUtils.NewConfigurationError("region is not set in vertex key config")
+	}
+
+	req := fasthttp.AcquireRequest()
+	resp := fasthttp.AcquireResponse()
+	defer fasthttp.ReleaseRequest(req)
+	defer fasthttp.ReleaseResponse(resp)
+
+	name := expandVertexCachedContentName(request.Name, projectID, region)
+	requestURL := fmt.Sprintf("%s/%s", getVertexAPIBaseURL(region, "v1"), name)
+
+	providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil)
+	req.SetRequestURI(requestURL)
+	req.Header.SetMethod(http.MethodDelete)
+	if authErr := vertexAuthHeaders(req, key); authErr != nil {
+		return nil, 0, authErr
+	}
+
+	latency, bifrostErr, wait := providerUtils.MakeRequestWithContext(ctx, provider.client, req, resp)
+	defer wait()
+	if bifrostErr != nil {
+		return nil, latency, bifrostErr
+	}
+	if resp.StatusCode() != fasthttp.StatusOK {
+		return nil, latency, parseVertexCachedContentError(resp)
+	}
+
+	return &schemas.BifrostCachedContentDeleteResponse{
+		Name:    name,
+		Deleted: true,
+	}, latency, nil
+}
+
+func (provider *VertexProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	if request.Name == "" {
+		return nil, providerUtils.NewBifrostOperationError("name is required for cached content delete", nil)
+	}
+	if len(keys) == 0 {
+		return nil, providerUtils.NewBifrostOperationError("no keys provided for cached content delete", nil)
+	}
+
+	var lastErr *schemas.BifrostError
+	for _, key := range keys {
+		resp, latency, bifrostErr := provider.cachedContentDeleteByKey(ctx, key, request)
+		if bifrostErr == nil {
+			resp.ExtraFields = schemas.BifrostResponseExtraFields{Latency: latency.Milliseconds()}
+			return resp, nil
+		}
+		lastErr = bifrostErr
+	}
+	return nil, lastErr
+}
+
+// parseVertexCachedContentError parses a Vertex API error response into a BifrostError.
+func parseVertexCachedContentError(resp *fasthttp.Response) *schemas.BifrostError {
+	respBody := resp.Body()
+	statusCode := resp.StatusCode()
+
+	var errorResp VertexError
+	if err := sonic.Unmarshal(respBody, &errorResp); err == nil && errorResp.Error.Message != "" {
+		return providerUtils.NewProviderAPIError(errorResp.Error.Message, nil, statusCode, nil, nil)
+	}
+	return providerUtils.NewProviderAPIError(string(respBody), nil, statusCode, nil, nil)
+}
diff --git a/core/providers/vertex/vertex.go b/core/providers/vertex/vertex.go
index 5cf1fc77de..27e1e915fb 100644
--- a/core/providers/vertex/vertex.go
+++ b/core/providers/vertex/vertex.go
@@ -379,6 +379,40 @@ func (provider *VertexProvider) TextCompletionStream(ctx *schemas.BifrostContext
 	return nil, providerUtils.NewUnsupportedOperationError(schemas.TextCompletionStreamRequest, provider.GetProviderKey())
 }
 
+// inlineDocumentURLs replaces document content blocks carrying a remote URL
+// source with inline base64 bytes by fetching each URL. Required because
+// Anthropic-on-Vertex does not accept URL-source documents (unlike direct
+// Anthropic). Mutates the request in place; safe to call when no document
+// blocks are present. The ctx is propagated to each fetch so request
+// cancellation/deadlines abort in-flight downloads.
+func inlineDocumentURLs(ctx context.Context, request *schemas.BifrostChatRequest) error {
+	if request == nil || request.Input == nil {
+		return nil
+	}
+	for mi := range request.Input {
+		msg := &request.Input[mi]
+		if msg.Content == nil || msg.Content.ContentBlocks == nil {
+			continue
+		}
+		for bi := range msg.Content.ContentBlocks {
+			block := &msg.Content.ContentBlocks[bi]
+			if block.File == nil || block.File.FileURL == nil || *block.File.FileURL == "" {
+				continue
+			}
+			mediaType, encoded, err := providerUtils.FetchAndEncodeURL(ctx, *block.File.FileURL)
+			if err != nil {
+				return err
+			}
+			block.File.FileData = &encoded
+			if mediaType != "" && block.File.FileType == nil {
+				block.File.FileType = &mediaType
+			}
+			block.File.FileURL = nil
+		}
+	}
+	return nil
+}
+
 // ChatCompletion performs a chat completion request to the Vertex API.
 // It supports both text and image content in messages.
 // Returns a BifrostResponse containing the completion results or an error if the request fails.
@@ -393,6 +427,11 @@ func (provider *VertexProvider) ChatCompletion(ctx *schemas.BifrostContext, key
 			var err error
 
 			if schemas.IsAnthropicModel(request.Model) {
+				// Anthropic-on-Vertex doesn't accept URL-source document blocks.
+				// Inline any URL documents to base64 before the converter runs.
+				if err := inlineDocumentURLs(ctx, request); err != nil {
+					return nil, fmt.Errorf("failed to inline document URLs for vertex/claude: %w", err)
+				}
 				// Use centralized Anthropic converter
 				reqBody, convErr := anthropic.ToAnthropicChatRequest(ctx, request)
 				if convErr != nil {
@@ -700,6 +739,11 @@ func (provider *VertexProvider) ChatCompletionStream(ctx *schemas.BifrostContext
 			request,
 			func() (providerUtils.RequestBodyWithExtraParams, error) {
 				var extraParams map[string]interface{}
+				// Anthropic-on-Vertex doesn't accept URL-source document blocks.
+				// Inline any URL documents to base64 before the converter runs.
+				if err := inlineDocumentURLs(ctx, request); err != nil {
+					return nil, fmt.Errorf("failed to inline document URLs for vertex/claude: %w", err)
+				}
 				reqBody, convErr := anthropic.ToAnthropicChatRequest(ctx, request)
 				if convErr != nil {
 					return nil, convErr
diff --git a/core/providers/vllm/cachedcontents.go b/core/providers/vllm/cachedcontents.go
new file mode 100644
index 0000000000..5c8131bf39
--- /dev/null
+++ b/core/providers/vllm/cachedcontents.go
@@ -0,0 +1,34 @@
+package vllm
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on VLLMProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *VLLMProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on VLLMProvider (see CachedContentCreate).
+func (provider *VLLMProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on VLLMProvider (see CachedContentCreate).
+func (provider *VLLMProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on VLLMProvider (see CachedContentCreate).
+func (provider *VLLMProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on VLLMProvider (see CachedContentCreate).
+func (provider *VLLMProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/providers/xai/cachedcontents.go b/core/providers/xai/cachedcontents.go
new file mode 100644
index 0000000000..0ca6a486d0
--- /dev/null
+++ b/core/providers/xai/cachedcontents.go
@@ -0,0 +1,34 @@
+package xai
+
+import (
+	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// CachedContentCreate is unsupported on XAIProvider. Only Gemini and Vertex AI
+// implement the cached-content lifecycle (Google AI Studio + Vertex AI named
+// caches). Other providers either lack named cache management entirely or
+// handle caching implicitly via per-message cache_control markers.
+func (provider *XAIProvider) CachedContentCreate(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostCachedContentCreateRequest) (*schemas.BifrostCachedContentCreateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentCreateRequest, provider.GetProviderKey())
+}
+
+// CachedContentList is unsupported on XAIProvider (see CachedContentCreate).
+func (provider *XAIProvider) CachedContentList(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentListRequest) (*schemas.BifrostCachedContentListResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentListRequest, provider.GetProviderKey())
+}
+
+// CachedContentRetrieve is unsupported on XAIProvider (see CachedContentCreate).
+func (provider *XAIProvider) CachedContentRetrieve(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentRetrieveRequest) (*schemas.BifrostCachedContentRetrieveResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentRetrieveRequest, provider.GetProviderKey())
+}
+
+// CachedContentUpdate is unsupported on XAIProvider (see CachedContentCreate).
+func (provider *XAIProvider) CachedContentUpdate(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentUpdateRequest) (*schemas.BifrostCachedContentUpdateResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentUpdateRequest, provider.GetProviderKey())
+}
+
+// CachedContentDelete is unsupported on XAIProvider (see CachedContentCreate).
+func (provider *XAIProvider) CachedContentDelete(ctx *schemas.BifrostContext, keys []schemas.Key, request *schemas.BifrostCachedContentDeleteRequest) (*schemas.BifrostCachedContentDeleteResponse, *schemas.BifrostError) {
+	return nil, providerUtils.NewUnsupportedOperationError(schemas.CachedContentDeleteRequest, provider.GetProviderKey())
+}
diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go
index 2fbc95f5c4..6b0acbe40f 100644
--- a/core/schemas/bifrost.go
+++ b/core/schemas/bifrost.go
@@ -135,6 +135,11 @@ const (
 	FileListRequest              RequestType = "file_list"
 	FileRetrieveRequest          RequestType = "file_retrieve"
 	FileDeleteRequest            RequestType = "file_delete"
+	CachedContentCreateRequest   RequestType = "cached_content_create"
+	CachedContentListRequest     RequestType = "cached_content_list"
+	CachedContentRetrieveRequest RequestType = "cached_content_retrieve"
+	CachedContentUpdateRequest   RequestType = "cached_content_update"
+	CachedContentDeleteRequest   RequestType = "cached_content_delete"
 	FileContentRequest           RequestType = "file_content"
 	ContainerCreateRequest       RequestType = "container_create"
 	ContainerListRequest         RequestType = "container_list"
@@ -411,6 +416,11 @@ type BifrostRequest struct {
 	FileRetrieveRequest          *BifrostFileRetrieveRequest
 	FileDeleteRequest            *BifrostFileDeleteRequest
 	FileContentRequest           *BifrostFileContentRequest
+	CachedContentCreateRequest   *BifrostCachedContentCreateRequest
+	CachedContentListRequest     *BifrostCachedContentListRequest
+	CachedContentRetrieveRequest *BifrostCachedContentRetrieveRequest
+	CachedContentUpdateRequest   *BifrostCachedContentUpdateRequest
+	CachedContentDeleteRequest   *BifrostCachedContentDeleteRequest
 	BatchCreateRequest           *BifrostBatchCreateRequest
 	BatchListRequest             *BifrostBatchListRequest
 	BatchRetrieveRequest         *BifrostBatchRetrieveRequest
@@ -495,6 +505,28 @@ func (br *BifrostRequest) GetRequestFields() (provider ModelProvider, model stri
 			return br.FileContentRequest.Provider, *br.FileContentRequest.Model, nil
 		}
 		return br.FileContentRequest.Provider, "", nil
+	case br.CachedContentCreateRequest != nil:
+		return br.CachedContentCreateRequest.Provider, br.CachedContentCreateRequest.Model, nil
+	case br.CachedContentListRequest != nil:
+		if br.CachedContentListRequest.Model != nil {
+			return br.CachedContentListRequest.Provider, *br.CachedContentListRequest.Model, nil
+		}
+		return br.CachedContentListRequest.Provider, "", nil
+	case br.CachedContentRetrieveRequest != nil:
+		if br.CachedContentRetrieveRequest.Model != nil {
+			return br.CachedContentRetrieveRequest.Provider, *br.CachedContentRetrieveRequest.Model, nil
+		}
+		return br.CachedContentRetrieveRequest.Provider, "", nil
+	case br.CachedContentUpdateRequest != nil:
+		if br.CachedContentUpdateRequest.Model != nil {
+			return br.CachedContentUpdateRequest.Provider, *br.CachedContentUpdateRequest.Model, nil
+		}
+		return br.CachedContentUpdateRequest.Provider, "", nil
+	case br.CachedContentDeleteRequest != nil:
+		if br.CachedContentDeleteRequest.Model != nil {
+			return br.CachedContentDeleteRequest.Provider, *br.CachedContentDeleteRequest.Model, nil
+		}
+		return br.CachedContentDeleteRequest.Provider, "", nil
 	case br.BatchCreateRequest != nil:
 		if br.BatchCreateRequest.Model != nil {
 			return br.BatchCreateRequest.Provider, *br.BatchCreateRequest.Model, nil
@@ -589,6 +621,16 @@ func (br *BifrostRequest) SetProvider(provider ModelProvider) {
 		br.VideoDeleteRequest.Provider = provider
 	case br.VideoRemixRequest != nil:
 		br.VideoRemixRequest.Provider = provider
+	case br.CachedContentCreateRequest != nil:
+		br.CachedContentCreateRequest.Provider = provider
+	case br.CachedContentListRequest != nil:
+		br.CachedContentListRequest.Provider = provider
+	case br.CachedContentRetrieveRequest != nil:
+		br.CachedContentRetrieveRequest.Provider = provider
+	case br.CachedContentUpdateRequest != nil:
+		br.CachedContentUpdateRequest.Provider = provider
+	case br.CachedContentDeleteRequest != nil:
+		br.CachedContentDeleteRequest.Provider = provider
 	}
 }
 
@@ -624,6 +666,24 @@ func (br *BifrostRequest) SetModel(model string) {
 		if br.BatchCreateRequest.Model != nil {
 			br.BatchCreateRequest.Model = new(model)
 		}
+	case br.CachedContentCreateRequest != nil:
+		br.CachedContentCreateRequest.Model = model
+	case br.CachedContentListRequest != nil:
+		if br.CachedContentListRequest.Model != nil {
+			br.CachedContentListRequest.Model = new(model)
+		}
+	case br.CachedContentRetrieveRequest != nil:
+		if br.CachedContentRetrieveRequest.Model != nil {
+			br.CachedContentRetrieveRequest.Model = new(model)
+		}
+	case br.CachedContentUpdateRequest != nil:
+		if br.CachedContentUpdateRequest.Model != nil {
+			br.CachedContentUpdateRequest.Model = new(model)
+		}
+	case br.CachedContentDeleteRequest != nil:
+		if br.CachedContentDeleteRequest.Model != nil {
+			br.CachedContentDeleteRequest.Model = new(model)
+		}
 	}
 }
 
@@ -688,6 +748,16 @@ func (br *BifrostRequest) SetRawRequestBody(rawRequestBody []byte) {
 		br.VideoGenerationRequest.RawRequestBody = rawRequestBody
 	case br.VideoRemixRequest != nil:
 		br.VideoRemixRequest.RawRequestBody = rawRequestBody
+	case br.CachedContentCreateRequest != nil:
+		br.CachedContentCreateRequest.RawRequestBody = rawRequestBody
+	case br.CachedContentListRequest != nil:
+		br.CachedContentListRequest.RawRequestBody = rawRequestBody
+	case br.CachedContentRetrieveRequest != nil:
+		br.CachedContentRetrieveRequest.RawRequestBody = rawRequestBody
+	case br.CachedContentUpdateRequest != nil:
+		br.CachedContentUpdateRequest.RawRequestBody = rawRequestBody
+	case br.CachedContentDeleteRequest != nil:
+		br.CachedContentDeleteRequest.RawRequestBody = rawRequestBody
 	}
 }
 
@@ -761,6 +831,11 @@ type BifrostResponse struct {
 	FileRetrieveResponse          *BifrostFileRetrieveResponse
 	FileDeleteResponse            *BifrostFileDeleteResponse
 	FileContentResponse           *BifrostFileContentResponse
+	CachedContentCreateResponse   *BifrostCachedContentCreateResponse
+	CachedContentListResponse     *BifrostCachedContentListResponse
+	CachedContentRetrieveResponse *BifrostCachedContentRetrieveResponse
+	CachedContentUpdateResponse   *BifrostCachedContentUpdateResponse
+	CachedContentDeleteResponse   *BifrostCachedContentDeleteResponse
 	BatchCreateResponse           *BifrostBatchCreateResponse
 	BatchListResponse             *BifrostBatchListResponse
 	BatchRetrieveResponse         *BifrostBatchRetrieveResponse
@@ -861,6 +936,16 @@ func (r *BifrostResponse) GetExtraFields() *BifrostResponseExtraFields {
 		return &r.ContainerFileDeleteResponse.ExtraFields
 	case r.PassthroughResponse != nil:
 		return &r.PassthroughResponse.ExtraFields
+	case r.CachedContentCreateResponse != nil:
+		return &r.CachedContentCreateResponse.ExtraFields
+	case r.CachedContentListResponse != nil:
+		return &r.CachedContentListResponse.ExtraFields
+	case r.CachedContentRetrieveResponse != nil:
+		return &r.CachedContentRetrieveResponse.ExtraFields
+	case r.CachedContentUpdateResponse != nil:
+		return &r.CachedContentUpdateResponse.ExtraFields
+	case r.CachedContentDeleteResponse != nil:
+		return &r.CachedContentDeleteResponse.ExtraFields
 	}
 
 	return &BifrostResponseExtraFields{}
@@ -1078,6 +1163,31 @@ func (r *BifrostResponse) PopulateExtraFields(requestType RequestType, provider
 		r.PassthroughResponse.ExtraFields.Provider = provider
 		r.PassthroughResponse.ExtraFields.OriginalModelRequested = originalModelRequested
 		r.PassthroughResponse.ExtraFields.ResolvedModelUsed = resolvedModel
+	case r.CachedContentCreateResponse != nil:
+		r.CachedContentCreateResponse.ExtraFields.RequestType = requestType
+		r.CachedContentCreateResponse.ExtraFields.Provider = provider
+		r.CachedContentCreateResponse.ExtraFields.OriginalModelRequested = originalModelRequested
+		r.CachedContentCreateResponse.ExtraFields.ResolvedModelUsed = resolvedModel
+	case r.CachedContentListResponse != nil:
+		r.CachedContentListResponse.ExtraFields.RequestType = requestType
+		r.CachedContentListResponse.ExtraFields.Provider = provider
+		r.CachedContentListResponse.ExtraFields.OriginalModelRequested = originalModelRequested
+		r.CachedContentListResponse.ExtraFields.ResolvedModelUsed = resolvedModel
+	case r.CachedContentRetrieveResponse != nil:
+		r.CachedContentRetrieveResponse.ExtraFields.RequestType = requestType
+		r.CachedContentRetrieveResponse.ExtraFields.Provider = provider
+		r.CachedContentRetrieveResponse.ExtraFields.OriginalModelRequested = originalModelRequested
+		r.CachedContentRetrieveResponse.ExtraFields.ResolvedModelUsed = resolvedModel
+	case r.CachedContentUpdateResponse != nil:
+		r.CachedContentUpdateResponse.ExtraFields.RequestType = requestType
+		r.CachedContentUpdateResponse.ExtraFields.Provider = provider
+		r.CachedContentUpdateResponse.ExtraFields.OriginalModelRequested = originalModelRequested
+		r.CachedContentUpdateResponse.ExtraFields.ResolvedModelUsed = resolvedModel
+	case r.CachedContentDeleteResponse != nil:
+		r.CachedContentDeleteResponse.ExtraFields.RequestType = requestType
+		r.CachedContentDeleteResponse.ExtraFields.Provider = provider
+		r.CachedContentDeleteResponse.ExtraFields.OriginalModelRequested = originalModelRequested
+		r.CachedContentDeleteResponse.ExtraFields.ResolvedModelUsed = resolvedModel
 	}
 }
 
diff --git a/core/schemas/cachedcontents.go b/core/schemas/cachedcontents.go
new file mode 100644
index 0000000000..b309467734
--- /dev/null
+++ b/core/schemas/cachedcontents.go
@@ -0,0 +1,176 @@
+// Package schemas defines the core schemas and types used by the Bifrost system.
+package schemas
+
+// CachedContentObject represents a cached content resource as returned by the
+// provider API (Gemini / Vertex AI). The `name` field is the canonical identifier:
+//   - Google AI Studio: "cachedContents/{id}"
+//   - Vertex AI:        "projects/{p}/locations/{l}/cachedContents/{id}"
+type CachedContentObject struct {
+	Name              string         `json:"name"`
+	DisplayName       string         `json:"display_name,omitempty"`
+	Model             string         `json:"model"`
+	SystemInstruction any            `json:"system_instruction,omitempty"`
+	Contents          []any          `json:"contents,omitempty"`
+	Tools             []any          `json:"tools,omitempty"`
+	ToolConfig        any            `json:"tool_config,omitempty"`
+	CreateTime        string         `json:"create_time,omitempty"`
+	UpdateTime        string         `json:"update_time,omitempty"`
+	ExpireTime        string         `json:"expire_time,omitempty"`
+	UsageMetadata     map[string]any `json:"usage_metadata,omitempty"`
+}
+
+// BifrostCachedContentCreateRequest creates a new cached content. TTL and
+// ExpireTime are mutually exclusive — providers must error if both are set.
+type BifrostCachedContentCreateRequest struct {
+	Provider          ModelProvider `json:"provider"`
+	Model             string        `json:"model"`
+	DisplayName       *string       `json:"display_name,omitempty"`
+	SystemInstruction any           `json:"system_instruction,omitempty"`
+	Contents          []any         `json:"contents,omitempty"`
+	Tools             []any         `json:"tools,omitempty"`
+	ToolConfig        any           `json:"tool_config,omitempty"`
+	TTL               *string       `json:"ttl,omitempty"`         // duration like "3600s"
+	ExpireTime        *string       `json:"expire_time,omitempty"` // RFC3339 timestamp
+
+	RawRequestBody []byte         `json:"-"`
+	ExtraParams    map[string]any `json:"-"`
+}
+
+// GetRawRequestBody returns the raw request body.
+func (r *BifrostCachedContentCreateRequest) GetRawRequestBody() []byte { return r.RawRequestBody }
+
+// BifrostCachedContentCreateResponse is the response from creating a cached content.
+type BifrostCachedContentCreateResponse struct {
+	Name              string         `json:"name"`
+	DisplayName       string         `json:"display_name,omitempty"`
+	Model             string         `json:"model"`
+	SystemInstruction any            `json:"system_instruction,omitempty"`
+	Contents          []any          `json:"contents,omitempty"`
+	Tools             []any          `json:"tools,omitempty"`
+	ToolConfig        any            `json:"tool_config,omitempty"`
+	CreateTime        string         `json:"create_time,omitempty"`
+	UpdateTime        string         `json:"update_time,omitempty"`
+	ExpireTime        string         `json:"expire_time,omitempty"`
+	UsageMetadata     map[string]any `json:"usage_metadata,omitempty"`
+
+	ExtraFields BifrostResponseExtraFields `json:"extra_fields"`
+}
+
+// BifrostCachedContentListRequest lists cached contents in the project.
+type BifrostCachedContentListRequest struct {
+	Provider ModelProvider `json:"provider"`
+	Model    *string       `json:"model"`
+
+	// Pagination
+	PageSize  int     `json:"page_size,omitempty"`
+	PageToken *string `json:"page_token,omitempty"`
+
+	RawRequestBody []byte         `json:"-"`
+	ExtraParams    map[string]any `json:"-"`
+}
+
+// GetRawRequestBody returns the raw request body.
+func (r *BifrostCachedContentListRequest) GetRawRequestBody() []byte { return r.RawRequestBody }
+
+// BifrostCachedContentListResponse is the response from listing cached contents.
+type BifrostCachedContentListResponse struct {
+	CachedContents []CachedContentObject `json:"cached_contents"`
+	NextPageToken  string                `json:"next_page_token,omitempty"`
+
+	ExtraFields BifrostResponseExtraFields `json:"extra_fields"`
+}
+
+// BifrostCachedContentRetrieveRequest retrieves a single cached content by name.
+type BifrostCachedContentRetrieveRequest struct {
+	Provider ModelProvider `json:"provider"`
+	Model    *string       `json:"model"`
+
+	// Name is the identifier of the cached content.
+	//   - Google AI Studio: "cachedContents/{id}" or just "{id}"
+	//   - Vertex AI:        "projects/{p}/locations/{l}/cachedContents/{id}" or just "{id}"
+	Name string `json:"name"`
+
+	RawRequestBody []byte         `json:"-"`
+	ExtraParams    map[string]any `json:"-"`
+}
+
+// GetRawRequestBody returns the raw request body.
+func (r *BifrostCachedContentRetrieveRequest) GetRawRequestBody() []byte { return r.RawRequestBody }
+
+// BifrostCachedContentRetrieveResponse is the response from retrieving one cached content.
+type BifrostCachedContentRetrieveResponse struct {
+	Name              string         `json:"name"`
+	DisplayName       string         `json:"display_name,omitempty"`
+	Model             string         `json:"model"`
+	SystemInstruction any            `json:"system_instruction,omitempty"`
+	Contents          []any          `json:"contents,omitempty"`
+	Tools             []any          `json:"tools,omitempty"`
+	ToolConfig        any            `json:"tool_config,omitempty"`
+	CreateTime        string         `json:"create_time,omitempty"`
+	UpdateTime        string         `json:"update_time,omitempty"`
+	ExpireTime        string         `json:"expire_time,omitempty"`
+	UsageMetadata     map[string]any `json:"usage_metadata,omitempty"`
+
+	ExtraFields BifrostResponseExtraFields `json:"extra_fields"`
+}
+
+// BifrostCachedContentUpdateRequest updates a cached content's expiration.
+// Only TTL or ExpireTime may be set — they are mutually exclusive.
+type BifrostCachedContentUpdateRequest struct {
+	Provider ModelProvider `json:"provider"`
+	Model    *string       `json:"model"`
+
+	// Name is the identifier of the cached content to update (see Retrieve.Name).
+	Name string `json:"name"`
+
+	TTL        *string `json:"ttl,omitempty"`
+	ExpireTime *string `json:"expire_time,omitempty"`
+
+	RawRequestBody []byte         `json:"-"`
+	ExtraParams    map[string]any `json:"-"`
+}
+
+// GetRawRequestBody returns the raw request body.
+func (r *BifrostCachedContentUpdateRequest) GetRawRequestBody() []byte { return r.RawRequestBody }
+
+// BifrostCachedContentUpdateResponse is the response from updating a cached content.
+type BifrostCachedContentUpdateResponse struct {
+	Name              string         `json:"name"`
+	DisplayName       string         `json:"display_name,omitempty"`
+	Model             string         `json:"model"`
+	SystemInstruction any            `json:"system_instruction,omitempty"`
+	Contents          []any          `json:"contents,omitempty"`
+	Tools             []any          `json:"tools,omitempty"`
+	ToolConfig        any            `json:"tool_config,omitempty"`
+	CreateTime        string         `json:"create_time,omitempty"`
+	UpdateTime        string         `json:"update_time,omitempty"`
+	ExpireTime        string         `json:"expire_time,omitempty"`
+	UsageMetadata     map[string]any `json:"usage_metadata,omitempty"`
+
+	ExtraFields BifrostResponseExtraFields `json:"extra_fields"`
+}
+
+// BifrostCachedContentDeleteRequest deletes a cached content by name.
+type BifrostCachedContentDeleteRequest struct {
+	Provider ModelProvider `json:"provider"`
+	Model    *string       `json:"model"`
+
+	// Name is the identifier of the cached content to delete (see Retrieve.Name).
+	Name string `json:"name"`
+
+	RawRequestBody []byte         `json:"-"`
+	ExtraParams    map[string]any `json:"-"`
+}
+
+// GetRawRequestBody returns the raw request body.
+func (r *BifrostCachedContentDeleteRequest) GetRawRequestBody() []byte { return r.RawRequestBody }
+
+// BifrostCachedContentDeleteResponse is the response from deleting a cached
+// content. Providers typically return an empty body on success; this struct
+// carries a Deleted flag set by bifrost plus ExtraFields for diagnostics.
+type BifrostCachedContentDeleteResponse struct {
+	Name    string `json:"name,omitempty"`
+	Deleted bool   `json:"deleted"`
+
+	ExtraFields BifrostResponseExtraFields `json:"extra_fields"`
+}
diff --git a/core/schemas/chatcompletions.go b/core/schemas/chatcompletions.go
index 89bd5b7615..6bb4ef4bb4 100644
--- a/core/schemas/chatcompletions.go
+++ b/core/schemas/chatcompletions.go
@@ -5,6 +5,9 @@ import (
 	"encoding/json"
 	"fmt"
 	"time"
+
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )
 
 // BifrostChatRequest is the request struct for chat completion requests
@@ -1116,6 +1119,102 @@ type ChatContentBlock struct {
 	CachePoint *CachePoint `json:"cachePoint,omitempty"`
 }
 
+// UnmarshalJSON normalizes Anthropic-style document content blocks
+// (`{"type":"document","source":{...}}`) into bifrost's canonical file shape
+// (`{"type":"file","file":{file_data|file_url, file_type}}`) before the default
+// unmarshal runs. This lets every code path - native /v1/chat/completions, drop-in
+// routes, programmatic JSON callers - reuse the existing ChatContentBlockTypeFile
+// branch in provider converters without per-handler shims.
+//
+// Source variants mapped:
+//   - {type:"base64", media_type, data}  -> File.FileData (raw base64), File.FileType (media_type)
+//   - {type:"url",    url}               -> File.FileURL,               provider fetches at convert time
+//   - {type:"text",   media_type, data}  -> File.FileData (plain text), File.FileType (media_type)
+//   - {type:"file",   file_id}           -> File.FileID
+//
+// Sibling fields (citations, cache_control, cachePoint, title) are preserved.
+// Other type values pass through to the default unmarshal unchanged.
+func (c *ChatContentBlock) UnmarshalJSON(data []byte) error {
+	// Alias type avoids infinite recursion when delegating to default unmarshal.
+	type alias ChatContentBlock
+
+	if blockType := gjson.GetBytes(data, "type"); blockType.Type == gjson.String && blockType.String() == "document" {
+		rewritten, err := rewriteDocumentBlock(data)
+		if err != nil {
+			return err
+		}
+		data = rewritten
+	}
+
+	var a alias
+	if err := Unmarshal(data, &a); err != nil {
+		return err
+	}
+	*c = ChatContentBlock(a)
+	return nil
+}
+
+// rewriteDocumentBlock converts an Anthropic-style document content block into
+// the canonical {type:"file", file:{...}} shape. Sibling fields (citations,
+// cache_control, cachePoint) survive untouched.
+func rewriteDocumentBlock(data []byte) ([]byte, error) {
+	srcType := gjson.GetBytes(data, "source.type").String()
+
+	out, err := sjson.SetBytes(data, "type", "file")
+	if err != nil {
+		return nil, fmt.Errorf("document rewrite: set type: %w", err)
+	}
+	out, err = sjson.DeleteBytes(out, "source")
+	if err != nil {
+		return nil, fmt.Errorf("document rewrite: drop source: %w", err)
+	}
+
+	switch srcType {
+	case "base64", "text":
+		mediaType := gjson.GetBytes(data, "source.media_type").String()
+		dataField := gjson.GetBytes(data, "source.data").String()
+		if dataField == "" {
+			return nil, fmt.Errorf("document rewrite: source.data is required for source.type=%q", srcType)
+		}
+		if out, err = sjson.SetBytes(out, "file.file_data", dataField); err != nil {
+			return nil, fmt.Errorf("document rewrite: set file_data: %w", err)
+		}
+		if mediaType != "" {
+			if out, err = sjson.SetBytes(out, "file.file_type", mediaType); err != nil {
+				return nil, fmt.Errorf("document rewrite: set file_type: %w", err)
+			}
+		}
+	case "url":
+		urlField := gjson.GetBytes(data, "source.url").String()
+		if urlField == "" {
+			return nil, fmt.Errorf("document rewrite: source.url is required for source.type=url")
+		}
+		if out, err = sjson.SetBytes(out, "file.file_url", urlField); err != nil {
+			return nil, fmt.Errorf("document rewrite: set file_url: %w", err)
+		}
+	case "file":
+		fileID := gjson.GetBytes(data, "source.file_id").String()
+		if fileID == "" {
+			return nil, fmt.Errorf("document rewrite: source.file_id is required for source.type=file")
+		}
+		if out, err = sjson.SetBytes(out, "file.file_id", fileID); err != nil {
+			return nil, fmt.Errorf("document rewrite: set file_id: %w", err)
+		}
+	case "content":
+		return nil, fmt.Errorf("document rewrite: source.type=content (Anthropic inline content array) is not supported; use base64/text/url/file")
+	default:
+		return nil, fmt.Errorf("document rewrite: unsupported source.type %q", srcType)
+	}
+
+	if name := gjson.GetBytes(data, "title"); name.Exists() {
+		if out, err = sjson.SetBytes(out, "file.filename", name.String()); err != nil {
+			return nil, fmt.Errorf("document rewrite: set filename: %w", err)
+		}
+	}
+
+	return out, nil
+}
+
 // CachePoint represents a cache point marker (Bedrock-specific)
 type CachePoint struct {
 	Type string `json:"type"` // "default"
diff --git a/core/schemas/provider.go b/core/schemas/provider.go
index 3ab0d099c9..80f6bf3d91 100644
--- a/core/schemas/provider.go
+++ b/core/schemas/provider.go
@@ -349,6 +349,11 @@ type AllowedRequests struct {
 	PassthroughStream     bool `json:"passthrough_stream"`
 	WebSocketResponses    bool `json:"websocket_responses"`
 	Realtime              bool `json:"realtime"`
+	CachedContentCreate   bool `json:"cached_content_create"`
+	CachedContentList     bool `json:"cached_content_list"`
+	CachedContentRetrieve bool `json:"cached_content_retrieve"`
+	CachedContentUpdate   bool `json:"cached_content_update"`
+	CachedContentDelete   bool `json:"cached_content_delete"`
 }
 
 // IsOperationAllowed checks if a specific operation is allowed
@@ -458,6 +463,16 @@ func (ar *AllowedRequests) IsOperationAllowed(operation RequestType) bool {
 		return ar.WebSocketResponses
 	case RealtimeRequest:
 		return ar.Realtime
+	case CachedContentCreateRequest:
+		return ar.CachedContentCreate
+	case CachedContentListRequest:
+		return ar.CachedContentList
+	case CachedContentRetrieveRequest:
+		return ar.CachedContentRetrieve
+	case CachedContentUpdateRequest:
+		return ar.CachedContentUpdate
+	case CachedContentDeleteRequest:
+		return ar.CachedContentDelete
 	default:
 		return false // Default to not allowed for unknown operations
 	}
@@ -636,6 +651,16 @@ type Provider interface {
 	FileDelete(ctx *BifrostContext, keys []Key, request *BifrostFileDeleteRequest) (*BifrostFileDeleteResponse, *BifrostError)
 	// FileContent downloads file content from the provider
 	FileContent(ctx *BifrostContext, keys []Key, request *BifrostFileContentRequest) (*BifrostFileContentResponse, *BifrostError)
+	// CachedContentCreate creates a new cached content (Gemini / Vertex AI named cache lifecycle)
+	CachedContentCreate(ctx *BifrostContext, key Key, request *BifrostCachedContentCreateRequest) (*BifrostCachedContentCreateResponse, *BifrostError)
+	// CachedContentList lists cached contents
+	CachedContentList(ctx *BifrostContext, keys []Key, request *BifrostCachedContentListRequest) (*BifrostCachedContentListResponse, *BifrostError)
+	// CachedContentRetrieve retrieves a single cached content by name
+	CachedContentRetrieve(ctx *BifrostContext, keys []Key, request *BifrostCachedContentRetrieveRequest) (*BifrostCachedContentRetrieveResponse, *BifrostError)
+	// CachedContentUpdate updates a cached content's expiration (TTL or expireTime)
+	CachedContentUpdate(ctx *BifrostContext, keys []Key, request *BifrostCachedContentUpdateRequest) (*BifrostCachedContentUpdateResponse, *BifrostError)
+	// CachedContentDelete deletes a cached content by name
+	CachedContentDelete(ctx *BifrostContext, keys []Key, request *BifrostCachedContentDeleteRequest) (*BifrostCachedContentDeleteResponse, *BifrostError)
 	// ContainerCreate creates a new container
 	ContainerCreate(ctx *BifrostContext, key Key, request *BifrostContainerCreateRequest) (*BifrostContainerCreateResponse, *BifrostError)
 	// ContainerList lists containers
diff --git a/core/utils.go b/core/utils.go
index bc0301e55a..b3bf8fff95 100644
--- a/core/utils.go
+++ b/core/utils.go
@@ -342,6 +342,13 @@ func isContainerRequestType(reqType schemas.RequestType) bool {
 		reqType == schemas.ContainerFileDeleteRequest
 }
 
+// isCachedContentRequestType returns true if the given request type is a cached-content API operation.
+func isCachedContentRequestType(reqType schemas.RequestType) bool {
+	return reqType == schemas.CachedContentCreateRequest || reqType == schemas.CachedContentListRequest ||
+		reqType == schemas.CachedContentRetrieveRequest || reqType == schemas.CachedContentUpdateRequest ||
+		reqType == schemas.CachedContentDeleteRequest
+}
+
 // isModellessVideoRequestType returns true if the given request type is a video request that does not require a model.
 func isModellessVideoRequestType(reqType schemas.RequestType) bool {
 	switch reqType {
diff --git a/docs/providers/test-harness-coverage.mdx b/docs/providers/test-harness-coverage.mdx
index 475491e7c4..464b92c733 100644
--- a/docs/providers/test-harness-coverage.mdx
+++ b/docs/providers/test-harness-coverage.mdx
@@ -8,7 +8,9 @@ icon: "list-check"
 
 Bifrost ships a comprehensive end-to-end test harness (`tests/e2e/api/collections/provider-harness.json`) that exercises every provider's translation surface. This page documents which features are covered for each provider, sourced from each upstream's official docs and verified against what's in the harness collection today.
 
-**Total: 329 requests across 13 folders** covering native API, drop-in routes, cross-model routing, passthrough endpoints, feature variations, and explicit backlog coverage.
+**Total: 321 requests across 12 folders** covering native API, drop-in routes, cross-model routing, passthrough endpoints, feature variations, and explicit backlog coverage.
+
+**`[PREVIEW]` tag**: requests prefixed `[PREVIEW]` are gated behind `INCLUDE_PREVIEW=1` (default-skipped) because they target account/region-scoped resources — preview-model deployments, vector stores, cached content references, MCP servers — that the harness can't provision in arbitrary environments. Run `make run-provider-harness-test INCLUDE_PREVIEW=1` to include them.
 
 Run it with:
 
@@ -21,143 +23,262 @@ Output:
 - `tmp/harness-failures.md` — categorized failures + coverage matrices
 - Interactive viewer at `http://localhost:8090` with Resend + Copy curl
 
-## Cell legend
-
-- `✅` — feature is exercised by the harness
-- `❌` — feature is **not** exercised yet (gap; PRs welcome)
-- `N/A` — feature is not part of that provider's API surface
-
-## Provider × Feature coverage
-
-### Core conversation
-
-| Feature | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Basic chat | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| System message | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| Multi-turn conversation | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| Streaming (SSE) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| Stop sequences | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| Sampling params (temperature/top_p/top_k) | ✅ | N/A | ✅ | ✅ | ✅ | ✅ |
-| Logprobs / top_logprobs | ✅ | N/A | N/A | ✅ | ✅ | ❌ |
-| Seed (deterministic outputs) | ✅ | N/A | N/A | N/A | N/A | ❌ |
-| Predicted outputs | ✅ | N/A | N/A | N/A | N/A | ❌ |
-
-### Tools
-
-| Feature | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Function calling (custom tool) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| Tool choice forced (`required` / `any`) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| Tool choice specific function | ✅ | ✅ | ✅ | ✅ | N/A | ❌ |
-| Parallel tool calls | ✅ | ✅ | ✅ | ✅ | N/A | ✅ |
-| Strict tool input | ✅ | ✅ | N/A | N/A | N/A | ❌ |
-| Tool input examples | N/A | ✅ | N/A | N/A | N/A | N/A |
-| Defer loading | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Allowed callers | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Eager input streaming | N/A | ✅ | N/A | N/A | N/A | N/A |
-| Tool search (BM25 / regex) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| MCP toolset | ✅ | ✅ | ✅ | N/A | ✅ | N/A |
-
-### Server-side tools
-
-| Feature | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Web search (basic) | ✅ | ✅ | ✅ | ✅ | ✅ | N/A |
-| Web search (dynamic filtering) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Web search (domain filter) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Web search (user location) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Web fetch | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Code execution | N/A | ✅ | ✅ | ✅ | ✅ | N/A |
-| Code interpreter (OpenAI Responses) | ✅ | N/A | N/A | N/A | N/A | ✅ |
-| File search (vector store) | ✅* | N/A | N/A | N/A | N/A | ✅* |
-| Computer use | ✅ | ✅ | ✅ | N/A | ✅ | N/A |
-| Text editor tool | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Bash tool | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Memory tool | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| URL context tool | N/A | N/A | N/A | ✅ | N/A | N/A |
-| Google search grounding | N/A | N/A | N/A | ✅ | ✅ | N/A |
-
-`*` = harness sends a placeholder `vs_REPLACE_ME` ID; passes only with a real OpenAI vector store.
-
-### Multimodal
-
-| Feature | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Vision (image input) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| PDF input | ✅* | ✅ | ✅ | ✅ | ✅ | N/A |
-| Audio input | ✅* | N/A | N/A | ✅* | ✅* | ✅* |
-| YouTube URL input | N/A | N/A | N/A | ✅ | N/A | N/A |
-| Citations | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-
-`*` = uses a `REPLACE_BASE64` / `file_REPLACE_ME` placeholder; exercises Bifrost's routing but needs real bytes/IDs to succeed end-to-end.
-
-### Reasoning / thinking
-
-| Feature | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Reasoning effort (`reasoning_effort`) | ✅ | N/A | N/A | N/A | N/A | ✅ |
-| Extended thinking (`thinking.budget_tokens`) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Adaptive thinking (`thinking.adaptive`) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Thinking budget (Gemini `thinkingConfig`) | N/A | N/A | N/A | ✅ | ✅ | N/A |
-| Interleaved thinking (Anthropic beta) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Reasoning summary (Responses API) | ✅ | N/A | N/A | N/A | N/A | N/A |
-
-### Output controls
-
-| Feature | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Structured output (`json_schema`) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-| Response format JSON object | ✅ | N/A | N/A | N/A | N/A | ❌ |
-| Response MIME type (Gemini) | N/A | N/A | N/A | ✅ | ✅ | N/A |
-| Output config / effort (Anthropic) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-
-### Caching / efficiency
-
-| Feature | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Prompt caching (ephemeral) | ✅ | ✅ | ✅ | ✅* | ✅ | N/A |
-| Prompt caching (1-hour TTL) | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Cached content reference (Gemini) | N/A | N/A | N/A | ✅* | N/A | N/A |
-| Stream options w/ usage | ✅ | N/A | N/A | N/A | N/A | N/A |
-
-`*` = uses a placeholder `cachedContents/PLACEHOLDER` ID; needs a real cache reference.
-
-### Provider-specific advanced
-
-| Feature | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Service tier (`scale` / `auto` / `priority`) | ✅ | ✅ | ✅ | N/A | ✅ | ✅ |
-| Background mode (Responses async) | ✅ | N/A | N/A | N/A | N/A | N/A |
-| Truncation strategy (`auto`) | ✅ | N/A | N/A | N/A | N/A | N/A |
-| Include array (Responses) | ✅ | N/A | N/A | N/A | N/A | N/A |
-| Custom tool (Responses) | ✅ | N/A | N/A | N/A | N/A | N/A |
-| Anthropic beta header | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Skills / container | ✅ | ✅ | N/A | N/A | N/A | ✅ |
-| Context management / 1M context | N/A | ✅ | ✅ | N/A | ✅ | N/A |
-| Compaction beta | N/A | ✅ | N/A | N/A | N/A | N/A |
-| Token-efficient tools beta | N/A | ✅ | N/A | N/A | N/A | N/A |
-| Fine-grained tool streaming | N/A | ✅ | N/A | N/A | N/A | N/A |
-| Fast mode (Opus 4.6) | N/A | ✅ | N/A | N/A | N/A | N/A |
-| Redact thinking beta | N/A | ✅ | N/A | N/A | N/A | N/A |
-| Safety settings (Gemini) | N/A | N/A | N/A | ✅ | ✅ | N/A |
-| Azure On Your Data (`azure_search`) | N/A | N/A | N/A | N/A | N/A | ✅ |
-| Bedrock cross-region inference (`global.`) | N/A | N/A | ✅ | N/A | N/A | N/A |
-| Bedrock geo profiles (`us.` / `eu.` / `apac.` / `jp.` / `au.`) | N/A | N/A | ✅ | N/A | N/A | N/A |
-| Vertex Model Garden (Llama / Mistral) | N/A | N/A | N/A | N/A | ✅ | N/A |
-
-### Endpoints (beyond chat completions)
-
-| Endpoint | OpenAI | Anthropic | Bedrock | Gemini | Vertex | Azure |
-|---|---|---|---|---|---|---|
-| Token counting | ✅ | ✅ | N/A | ✅ | N/A | N/A |
-| Batch API (create / list) | ✅ | ✅ | ✅ | N/A | N/A | N/A |
-| Files API (list) | ✅ | ✅ | N/A | ✅ | N/A | N/A |
-| Models list | ✅ | ✅ | N/A | ✅ | N/A | N/A |
-| Cached contents list (Gemini) | N/A | N/A | N/A | ✅ | N/A | N/A |
-| Native Bedrock Converse | N/A | N/A | ✅ | N/A | N/A | N/A |
-| Native Bedrock InvokeModel | N/A | N/A | ✅ | N/A | N/A | N/A |
-| Bedrock model invocation jobs | N/A | N/A | ✅ | N/A | N/A | N/A |
+## Status legend
+
+- `✅` — exercised by the harness, expected to pass against a properly-configured upstream
+- `✅*` — exercised, but needs an environment-side resource the harness can't manufacture (vector store, cached content reference, real audio bytes, MCP server, preview deployment, etc.). These rows are typically `[PREVIEW]`-tagged in the collection so they default-skip; opt in with `make run-provider-harness-test INCLUDE_PREVIEW=1`
+- `❌` — provider supports this feature but the harness doesn't yet exercise it (gap; PRs welcome)
+
+Features a provider doesn't natively support are simply omitted from that provider's table — there's no `N/A` row, since each table only lists features within that provider's own API surface.
+
+## Per-provider coverage
+
+### OpenAI
+
+| Feature | Status |
+|---|---|
+| Basic chat | ✅ |
+| System message | ✅ |
+| Multi-turn conversation | ✅ |
+| Streaming (SSE) | ✅ |
+| Stop sequences | ✅ |
+| Sampling params (temperature / top_p) | ✅ |
+| Logprobs / top_logprobs | ✅ |
+| Seed (deterministic outputs) | ✅ |
+| Predicted outputs | ✅ |
+| Function calling (custom tool) | ✅ |
+| Tool choice forced (`required`) | ✅ |
+| Tool choice specific function | ✅ |
+| Parallel tool calls | ✅ |
+| Strict tool input | ✅ |
+| MCP toolset | ✅* |
+| Web search (basic) | ✅ |
+| Code interpreter (Responses API) | ✅ |
+| File search (vector store) | ✅* |
+| Computer use (Responses API) | ✅* |
+| Vision (image input) | ✅ |
+| PDF input | ✅* |
+| Audio input | ✅* |
+| Reasoning effort (`reasoning_effort`) | ✅ |
+| Reasoning summary (Responses API) | ✅ |
+| Structured output (`json_schema`) | ✅ |
+| Response format JSON object | ✅ |
+| Prompt caching (ephemeral) | ✅ |
+| Stream options w/ usage | ✅ |
+| Service tier (`auto` / `flex` / `priority`) | ✅ |
+| Background mode (Responses async) | ✅ |
+| Truncation strategy (`auto`) | ✅ |
+| Include array (Responses) | ✅ |
+| Custom tool (Responses) | ✅ |
+| Skills / container | ✅ |
+| Token counting (`/v1/responses/input_tokens`) | ✅ |
+| Batch API (create / list) | ✅ |
+| Files API (list) | ✅ |
+| Models list | ✅ |
+
+### Anthropic
+
+| Feature | Status |
+|---|---|
+| Basic chat | ✅ |
+| System message | ✅ |
+| Multi-turn conversation | ✅ |
+| Streaming (SSE) | ✅ |
+| Stop sequences | ✅ |
+| Function calling (custom tool) | ✅ |
+| Tool choice forced (`any`) | ✅ |
+| Tool choice specific function | ✅ |
+| Parallel tool calls | ✅ |
+| Strict tool input | ✅ |
+| Tool input examples | ✅ |
+| Defer loading | ✅ |
+| Allowed callers | ✅ |
+| Eager input streaming | ✅ |
+| Tool search (BM25 / regex) | ✅ |
+| MCP toolset | ✅* |
+| Web search (basic / dynamic filtering / domain filter / user location) | ✅ |
+| Web fetch | ✅ |
+| Code execution | ✅ |
+| Computer use | ✅ |
+| Text editor tool | ✅ |
+| Bash tool | ✅ |
+| Memory tool | ✅ |
+| Vision (image input) | ✅ |
+| PDF input (URL or base64 source) | ✅ |
+| Citations on document blocks | ✅ |
+| Extended thinking (`thinking.budget_tokens`) | ✅ |
+| Adaptive thinking (`thinking.adaptive`) | ✅ |
+| Interleaved thinking (beta) | ✅ |
+| Structured output (`json_schema`) | ✅ |
+| Output config / effort | ✅ |
+| Prompt caching (ephemeral) | ✅ |
+| Prompt caching (1-hour TTL) | ✅ |
+| Service tier | ✅ |
+| `anthropic-beta` header | ✅ |
+| Skills / container | ✅ |
+| Context management / 1M context | ✅ |
+| Compaction beta | ✅ |
+| Token-efficient tools beta | ✅ |
+| Fine-grained tool streaming | ✅ |
+| Fast mode (Opus 4.6) | ✅* |
+| Redact thinking beta | ✅ |
+| Token counting (`/v1/messages/count_tokens`) | ✅ |
+| Batch API (create / list) | ✅ |
+| Files API (list) | ✅ |
+| Models list | ✅ |
+
+### Bedrock
+
+| Feature | Status |
+|---|---|
+| Basic chat | ✅ |
+| System message | ✅ |
+| Multi-turn conversation | ✅ |
+| Streaming (SSE / AWS event-stream) | ✅ |
+| Stop sequences | ✅ |
+| Sampling params (temperature OR top_p, not both on Anthropic models) | ✅ |
+| Function calling | ✅ |
+| Tool choice forced | ✅ |
+| Tool choice specific function | ✅ |
+| Parallel tool calls | ✅ |
+| MCP toolset | ✅ |
+| Web search (basic / dynamic filtering / domain filter / user location) | ✅ |
+| Web fetch | ✅ |
+| Code execution | ✅ |
+| Computer use | ✅ |
+| Text editor tool | ✅ |
+| Bash tool | ✅ |
+| Memory tool | ✅ |
+| Vision (image; URLs auto-fetched + inlined by Bifrost) | ✅ |
+| PDF input (URLs auto-fetched + inlined) | ✅ |
+| Citations on document blocks | ✅ |
+| Extended thinking (`thinking.budget_tokens`) | ✅ |
+| Interleaved thinking (beta) | ✅ |
+| Structured output (`json_schema` via tool-mode workaround) | ✅ |
+| Output config / effort | ✅ |
+| Prompt caching (ephemeral) | ✅ |
+| Prompt caching (1-hour TTL) | ✅ |
+| `anthropic-beta` header passthrough | ✅ |
+| Context management / 1M context | ✅ |
+| Cross-region inference (`global.` prefix) | ✅ |
+| Cross-region inference (`us.` prefix) | ✅ |
+| Native Converse (`/bedrock/model/{m}/converse`) | ✅ |
+| Native InvokeModel (`/bedrock/model/{m}/invoke`) | ✅ |
+| Model invocation jobs (Batch API equivalent) | ✅ |
+| Batch API | ✅ |
+
+### Gemini (Google AI Studio)
+
+| Feature | Status |
+|---|---|
+| Basic chat | ✅ |
+| System message | ✅ |
+| Multi-turn conversation | ✅ |
+| Streaming (SSE) | ✅ |
+| Stop sequences | ✅ |
+| Sampling params (temperature / top_p / top_k) | ✅ |
+| Logprobs | ✅ |
+| Function calling | ✅ |
+| Tool choice forced (`any`) | ✅ |
+| Tool choice specific function | ✅ |
+| Parallel tool calls | ✅ |
+| Web search (basic) | ✅ |
+| Code execution | ✅ |
+| URL context tool | ✅ |
+| Google search grounding | ✅ |
+| Vision (image input) | ✅ |
+| PDF input | ✅* |
+| Audio input | ✅ |
+| YouTube URL input | ✅ |
+| Thinking budget (`thinkingConfig`) | ✅ |
+| Structured output (`json_schema`) | ✅ |
+| Response MIME type | ✅ |
+| Prompt caching (implicit, via cached content) | ✅ |
+| Cached content reference (`cachedContent: "cachedContents/{id}"`) | ✅* |
+| Cached contents lifecycle (`POST/GET/PATCH/DELETE /v1beta/cachedContents`) | ❌ |
+| Cached contents list (`GET /v1beta/cachedContents`) | ✅ |
+| Safety settings | ✅ |
+| Token counting (`:countTokens`) | ✅ |
+| Files API (list) | ✅ |
+| Models list | ✅ |
+
+### Vertex AI
+
+| Feature | Status |
+|---|---|
+| Basic chat | ✅ |
+| System message | ✅ |
+| Multi-turn conversation | ✅ |
+| Streaming (SSE) | ✅ |
+| Sampling params (temperature / top_p) | ✅ |
+| Function calling | ✅ |
+| Tool choice forced | ✅ |
+| Defer loading (Anthropic) | ✅ |
+| Allowed callers (Anthropic) | ✅ |
+| Tool search (Anthropic BM25) | ✅ |
+| MCP toolset | ✅* |
+| Web search (basic / dynamic filtering / domain filter / user location) | ✅ |
+| Web fetch | ✅ |
+| Code execution | ✅ |
+| Google search grounding | ✅ |
+| Computer use (Anthropic) | ✅ |
+| Text editor tool (Anthropic) | ✅ |
+| Bash tool (Anthropic) | ✅ |
+| Memory tool (Anthropic) | ✅ |
+| Vision (image input) | ✅ |
+| PDF input (URLs auto-fetched + inlined for Vertex-Anthropic) | ✅ |
+| Audio input (Gemini) | ✅* |
+| Citations on document blocks | ✅ |
+| Extended thinking (`thinking.budget_tokens`) | ✅ |
+| Adaptive thinking | ✅ |
+| Thinking budget (Gemini `thinkingConfig`) | ✅ |
+| Interleaved thinking (Anthropic beta) | ✅ |
+| Structured output (`json_schema`) | ✅ |
+| Response MIME type (Gemini) | ✅ |
+| Output config / effort (Anthropic) | ✅ |
+| Prompt caching (ephemeral) | ✅ |
+| Prompt caching (1-hour TTL) | ✅ |
+| Cached contents lifecycle (`POST/GET/PATCH/DELETE /v1/projects/.../cachedContents`) | ❌ |
+| `anthropic-beta` header passthrough | ✅ |
+| Safety settings (Gemini) | ✅ |
+| Context management / 1M context (Anthropic) | ✅ |
+| Token counting | ✅ |
+| Model Garden (Llama / Mistral) | ✅* |
+
+### Azure OpenAI
+
+| Feature | Status |
+|---|---|
+| Basic chat | ✅ |
+| System message | ✅ |
+| Multi-turn conversation | ✅ |
+| Streaming (SSE) | ✅ |
+| Stop sequences | ✅ |
+| Sampling params (temperature / top_p) | ✅ |
+| Logprobs / top_logprobs | ❌ |
+| Seed (deterministic outputs) | ❌ |
+| Predicted outputs | ❌ |
+| Function calling | ✅ |
+| Tool choice forced | ✅ |
+| Tool choice specific function | ❌ |
+| Parallel tool calls | ✅ |
+| Strict tool input | ❌ |
+| Code interpreter (Responses preview) | ✅ |
+| File search (Responses preview) | ✅* |
+| Vision (image input) | ✅ |
+| Audio input (`gpt-4o-audio-preview`) | ✅* |
+| Reasoning effort (`o3` deployment) | ✅* |
+| Structured output (`json_schema`) | ✅ |
+| Response format JSON object | ❌ |
+| Service tier (`auto` / `flex` / `priority`) | ✅ |
+| Skills / container | ✅ |
+| Azure On Your Data (`azure_search`) | ✅ |
+
+**Bifrost-side normalizations applied automatically** (don't appear as separate rows):
+- Vision URL images on Bedrock — fetched and inlined as base64 (Bedrock Converse only accepts inline bytes)
+- PDF URL documents on Bedrock — same fetch+inline path
+- PDF URL documents on Vertex Claude — same (Vertex-Anthropic doesn't accept URL document sources)
+- Anthropic-style `{type:"document",source:{...}}` blocks on `/v1/chat/completions` — normalized to `{type:"file",file:{...}}` at JSON unmarshal so every provider's converter sees the same shape
 
 ## Cross-cutting (Bifrost-specific)
 
@@ -184,25 +305,23 @@ These exercise Bifrost's translation layer between provider shapes — every che
 
 ## Passthrough surface (`*_passthrough/*`)
 
-Catch-all forwarding routes that strip incoming auth headers and inject Bifrost's configured provider key (or, for Bedrock, re-sign with Bifrost's AWS credentials via SigV4).
+Catch-all forwarding routes that strip incoming auth headers and inject Bifrost's configured provider key.
 
 | Feature | OpenAI | Anthropic | Bedrock | Azure | Gemini |
 |---|---|---|---|---|---|
-| Basic chat | ✅ | ✅ | ✅ | ✅ | ✅ |
-| Streaming | ✅ | N/A | ✅ | N/A | N/A |
+| Basic chat | ✅ | ✅ | N/A | ✅ | ✅ |
+| Streaming | ✅ | N/A | N/A | N/A | N/A |
 | Vision | ✅ | N/A | N/A | N/A | N/A |
-| Web search | ✅ | ✅ | ✅ | N/A | ✅ |
+| Web search | ✅ | ✅ | N/A | N/A | ✅ |
 | Code execution / code interpreter | ✅ | N/A | N/A | N/A | ✅ |
-| Function calling / tool use | N/A | N/A | ✅ | ✅ | N/A |
+| Function calling / tool use | N/A | N/A | N/A | ✅ | N/A |
 | Computer use | N/A | ✅ | N/A | N/A | N/A |
-| Extended thinking | N/A | ✅ | ✅ | N/A | N/A |
-| Prompt caching | N/A | ✅ | ✅ | N/A | N/A |
-| System message | N/A | N/A | ✅ | N/A | N/A |
-| Native InvokeModel | N/A | N/A | ✅ | N/A | N/A |
+| Extended thinking | N/A | ✅ | N/A | N/A | N/A |
+| Prompt caching | N/A | ✅ | N/A | N/A | N/A |
 
-**Bedrock passthrough route**: `/bedrock_passthrough/model/{modelId}/converse` (or `/converse-stream`, or `/invoke`). Unlike the HTTP-header-auth providers, Bedrock requires AWS SigV4 signing — the underlying Bedrock provider re-signs the forwarded request with Bifrost's configured AWS credentials.
+**Bedrock**: passthrough is **not supported by design**. AWS SigV4 signing requires bifrost to sign the request with its own credentials, which fundamentally conflicts with byte-for-byte forwarding. Use the typed `/bedrock/model/{modelId}/converse`, `/converse-stream`, or `/invoke` routes instead — those go through bifrost's typed Bedrock provider with proper sigv4 handling.
 
-**Vertex** still has no passthrough variant — Google OAuth bearer tokens are rotated per-request and can't be bridged through a byte-for-byte forward.
+**Vertex**: no passthrough variant. Google OAuth bearer tokens are rotated per-request and can't be bridged through a byte-for-byte forward.
 
 ## Coverage by transport route
 
@@ -216,13 +335,15 @@ Catch-all forwarding routes that strip incoming auth headers and inject Bifrost'
 
 These cells stay `❌` even after running because they require provider-side state the harness can't manufacture:
 
-- **OpenAI File Search** — needs a real `vs_*` vector store
-- **OpenAI / Azure Audio Input** — needs real base64 audio bytes
+- **OpenAI File Search** — needs a real `vs_*` vector store (`[PREVIEW]`-tagged)
+- **OpenAI / Azure Audio Input** — needs real base64 audio bytes (`[PREVIEW]`-tagged)
 - **OpenAI / Anthropic Batch creation** — needs a real input file ID
-- **Gemini Cached Content** — needs a real `cachedContents/*` reference
-- **Anthropic Skills / Container** — needs container provisioning
+- **Gemini Cached Content reference** — the lifecycle endpoints (create/list/retrieve/update/delete) work end-to-end; only the *referencing* tests (passing `cachedContents/{id}` to `generateContent`) need a pre-provisioned cache with 32k+ tokens of content (Gemini's minimum) (`[PREVIEW]`-tagged)
 - **Vertex Anthropic features in `us-central1`** — region-restricted unless `GOOGLE_LOCATION=global`
-- **Bedrock geo profiles outside your region** — requires multi-region capacity
+- **Vertex preview-model deployments** (Gemini-3.x, etc.) — `[PREVIEW]`-tagged; require account access
+- **Vertex-Anthropic URL document sources** — Vertex doesn't accept URL document sources upstream; bifrost auto-fetches and inlines via `inlineDocumentURLs` for parity with direct Anthropic
+- **Azure preview deployments** (`o3` / `gpt-4o-audio-preview` / `computer-use-preview`) — `[PREVIEW]`-tagged; require deployment provisioning
+- **MCP toolset tests** — `[PREVIEW]`-tagged; need a reachable MCP server
 
 ## Coverage report layout
 
diff --git a/tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md b/tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md
index 10332a227b..63b179dba6 100644
--- a/tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md
+++ b/tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md
@@ -265,7 +265,7 @@ Sources:
 - [ ] **Count tokens** (`POST /v1beta/models/{model}:countTokens`)
 - [ ] **Embed content** (`POST /v1beta/models/{model}:embedContent`)
 - [ ] **Batch embed** (`POST /v1beta/models/{model}:batchEmbedContents`)
-- [ ] **Cached content CRUD** (`POST /v1beta/cachedContents`, list, get, update, delete)
+- [~] **Cached content CRUD** (`POST /v1beta/cachedContents`, list, get, update, delete): typed lifecycle implemented for both Gemini and Vertex; harness `Gemini: list cached contents` runs against real upstream (list only; create/retrieve/update/delete not yet exercised)
 - [ ] **Files API** (`POST /v1beta/files` upload, list, get, delete)
 - [ ] **Models list** (`GET /v1beta/models`)
 - [ ] **Tuned models** (`POST /v1beta/tunedModels`)
diff --git a/tests/e2e/api/collections/provider-harness.json b/tests/e2e/api/collections/provider-harness.json
index 22d7f6e835..ac991368b9 100644
--- a/tests/e2e/api/collections/provider-harness.json
+++ b/tests/e2e/api/collections/provider-harness.json
@@ -80,6 +80,9 @@
           "    } else if (Array.isArray(j.files)) {",
           "        shape = 'gemini-list-files';",
           "        hasContent = j.files.length >= 0;",
+          "    } else if (Array.isArray(j.cachedContents)) {",
+          "        shape = 'gemini-list-cached-contents';",
+          "        hasContent = j.cachedContents.length >= 0;",
           "    } else if (Array.isArray(j.invocationJobSummaries)) {",
           "        shape = 'bedrock-list-invocation-jobs';",
           "        hasContent = j.invocationJobSummaries.length >= 0;",
@@ -1317,12 +1320,12 @@
             { "name": "Anthropic: citations on document", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n  \"model\": \"claude-haiku-4-5\",\n  \"max_tokens\": 512,\n  \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"document\",\"source\":{\"type\":\"text\",\"media_type\":\"text/plain\",\"data\":\"The sky is blue. Grass is green.\"},\"citations\":{\"enabled\":true}},{\"type\":\"text\",\"text\":\"What color is the sky?\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic","v1","messages"]}}},
             { "name": "Anthropic: eager input streaming + beta", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"},{"key":"anthropic-beta","value":"eager-input-streaming-2025-10-29"}], "body": {"mode":"raw","raw":"{\n  \"model\": \"claude-haiku-4-5\",\n  \"max_tokens\": 256,\n  \"stream\": true,\n  \"tools\": [{\"name\":\"f\",\"input_schema\":{\"type\":\"object\"},\"eager_input_streaming\":true}],\n  \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic","v1","messages"]}}},
             { "name": "Anthropic: allowed_callers + advanced beta", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"},{"key":"anthropic-beta","value":"advanced-tool-use-2025-09-15"}], "body": {"mode":"raw","raw":"{\n  \"model\": \"claude-opus-4-7\",\n  \"max_tokens\": 256,\n  \"tools\": [{\"name\":\"f\",\"input_schema\":{\"type\":\"object\"},\"allowed_callers\":[\"direct\"]}],\n  \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic","v1","messages"]}}},
-            { "name": "[PREVIEW] Anthropic: skills/container", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"},{"key":"anthropic-beta","value":"skills-2025-10-29"}], "body": {"mode":"raw","raw":"{\n  \"model\": \"claude-opus-4-7\",\n  \"max_tokens\": 1024,\n  \"container\": {\"skills\":[\"data-analysis\"]},\n  \"messages\": [{\"role\":\"user\",\"content\":\"Analyze\"}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic","v1","messages"]}}},
+            { "name": "Anthropic: skills/container", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"},{"key":"anthropic-beta","value":"skills-2025-10-29"}], "body": {"mode":"raw","raw":"{\n  \"model\": \"claude-opus-4-7\",\n  \"max_tokens\": 1024,\n  \"container\": {\"skills\":[{\"skill_id\":\"data-analysis\",\"type\":\"anthropic\"}]},\n  \"messages\": [{\"role\":\"user\",\"content\":\"Analyze\"}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic","v1","messages"]}}},
             { "name": "Gemini: parallel function calls", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n  \"contents\": [{\"parts\":[{\"text\":\"Weather in NYC and SF?\"}]}],\n  \"tools\": [{\"functionDeclarations\":[{\"name\":\"get_weather\",\"parameters\":{\"type\":\"OBJECT\",\"properties\":{\"city\":{\"type\":\"STRING\"}}}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{genaiModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{genaiModel}}:generateContent"]}}},
             { "name": "Gemini: structured output via /v1/chat (json_schema)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n  \"model\": \"gemini/gemini-2.5-flash\",\n  \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n  \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}},
             { "name": "[PREVIEW] Gemini: cached content reference", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n  \"contents\": [{\"parts\":[{\"text\":\"Use cache\"}]}],\n  \"cachedContent\": \"cachedContents/REPLACE_ME\"\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{genaiModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{genaiModel}}:generateContent"]}}},
             { "name": "Gemini: audio input (inline)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n  \"contents\": [{\"parts\":[{\"text\":\"Transcribe\"},{\"inlineData\":{\"mimeType\":\"audio/wav\",\"data\":\"REPLACE_BASE64\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{genaiModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{genaiModel}}:generateContent"]}}},
-            { "name": "[PREVIEW] Gemini: list cached contents", "request": { "method": "GET", "header": [{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "url": {"raw":"{{baseUrl}}/genai/v1beta/cachedContents","host":["{{baseUrl}}"],"path":["genai","v1beta","cachedContents"]}}},
+            { "name": "Gemini: list cached contents", "request": { "method": "GET", "header": [{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "url": {"raw":"{{baseUrl}}/genai/v1beta/cachedContents","host":["{{baseUrl}}"],"path":["genai","v1beta","cachedContents"]}}},
             { "name": "Gemini: list files", "request": { "method": "GET", "header": [{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "url": {"raw":"{{baseUrl}}/genai/v1beta/files","host":["{{baseUrl}}"],"path":["genai","v1beta","files"]}}}
           ]
         },
diff --git a/transports/bifrost-http/integrations/genai.go b/transports/bifrost-http/integrations/genai.go
index 88229393fe..46cb6fe659 100644
--- a/transports/bifrost-http/integrations/genai.go
+++ b/transports/bifrost-http/integrations/genai.go
@@ -847,11 +847,272 @@ func createGenAIRerankRouteConfig(pathPrefix string) RouteConfig {
 	}
 }
 
+// GeminiCachedContentCreateBody is the wire-shape body Gemini sends to POST /v1beta/cachedContents.
+// Mirrors https://ai.google.dev/api/caching#CachedContent (camelCase keys).
+type GeminiCachedContentCreateBody struct {
+	Model             string  `json:"model"`
+	DisplayName       *string `json:"displayName,omitempty"`
+	SystemInstruction any     `json:"systemInstruction,omitempty"`
+	Contents          []any   `json:"contents,omitempty"`
+	Tools             []any   `json:"tools,omitempty"`
+	ToolConfig        any     `json:"toolConfig,omitempty"`
+	TTL               *string `json:"ttl,omitempty"`
+	ExpireTime        *string `json:"expireTime,omitempty"`
+}
+
+// GeminiCachedContentUpdateBody is the wire-shape body for PATCH /v1beta/cachedContents/{name}.
+// Only TTL or expireTime is mutable.
+type GeminiCachedContentUpdateBody struct {
+	TTL        *string `json:"ttl,omitempty"`
+	ExpireTime *string `json:"expireTime,omitempty"`
+}
+
+// extractGeminiCachedContentNameFromPath sets cached content name from URL path on retrieve/update/delete.
+func extractGeminiCachedContentNameFromPath(ctx *fasthttp.RequestCtx, bifrostCtx *schemas.BifrostContext, req interface{}) error {
+	provider := getProviderFromHeader(ctx, schemas.Gemini)
+	nameVal := ctx.UserValue("cached_id")
+	if nameVal == nil {
+		return errors.New("cached content name is required")
+	}
+	nameStr, ok := nameVal.(string)
+	if !ok || nameStr == "" {
+		return errors.New("cached content name must be a non-empty string")
+	}
+
+	switch r := req.(type) {
+	case *schemas.BifrostCachedContentRetrieveRequest:
+		r.Name = nameStr
+		r.Provider = provider
+	case *schemas.BifrostCachedContentUpdateRequest:
+		r.Name = nameStr
+		r.Provider = provider
+	case *schemas.BifrostCachedContentDeleteRequest:
+		r.Name = nameStr
+		r.Provider = provider
+	}
+	return nil
+}
+
+// setGeminiCachedContentCreateProvider resolves the provider from the
+// x-model-provider header (defaulting to Gemini) and stamps it on the typed
+// create request so Vertex callers route to the Vertex provider.
+func setGeminiCachedContentCreateProvider(ctx *fasthttp.RequestCtx, bifrostCtx *schemas.BifrostContext, req interface{}) error {
+	provider := getProviderFromHeader(ctx, schemas.Gemini)
+	if createReq, ok := req.(*schemas.BifrostCachedContentCreateRequest); ok {
+		createReq.Provider = provider
+	}
+	return nil
+}
+
+// extractGeminiCachedContentListQueryParams pulls pageSize/pageToken into the list request.
+func extractGeminiCachedContentListQueryParams(ctx *fasthttp.RequestCtx, bifrostCtx *schemas.BifrostContext, req interface{}) error {
+	provider := getProviderFromHeader(ctx, schemas.Gemini)
+	if listReq, ok := req.(*schemas.BifrostCachedContentListRequest); ok {
+		listReq.Provider = provider
+		if pageSizeStr := string(ctx.QueryArgs().Peek("pageSize")); pageSizeStr != "" {
+			if pageSize, err := strconv.Atoi(pageSizeStr); err == nil {
+				listReq.PageSize = pageSize
+			}
+		}
+		if pageToken := string(ctx.QueryArgs().Peek("pageToken")); pageToken != "" {
+			listReq.PageToken = &pageToken
+		}
+	}
+	return nil
+}
+
+// CreateGenAICachedContentRouteConfigs creates route configurations for the Gemini cached content lifecycle endpoints.
+func CreateGenAICachedContentRouteConfigs(pathPrefix string, handlerStore lib.HandlerStore) []RouteConfig {
+	var routes []RouteConfig
+
+	// POST /v1beta/cachedContents — create
+	routes = append(routes, RouteConfig{
+		Type:   RouteConfigTypeGenAI,
+		Path:   pathPrefix + "/v1beta/cachedContents",
+		Method: "POST",
+		GetHTTPRequestType: func(ctx *fasthttp.RequestCtx) schemas.RequestType {
+			return schemas.CachedContentCreateRequest
+		},
+		GetRequestTypeInstance: func(ctx context.Context) interface{} {
+			return &schemas.BifrostCachedContentCreateRequest{}
+		},
+		RequestParser: func(ctx *fasthttp.RequestCtx, req interface{}) error {
+			createReq, ok := req.(*schemas.BifrostCachedContentCreateRequest)
+			if !ok {
+				return errors.New("invalid cached content create request type")
+			}
+			if body := ctx.Request.Body(); len(body) > 0 {
+				var wire GeminiCachedContentCreateBody
+				if err := sonic.Unmarshal(body, &wire); err != nil {
+					return err
+				}
+				createReq.Model = strings.TrimPrefix(wire.Model, "models/")
+				createReq.DisplayName = wire.DisplayName
+				createReq.SystemInstruction = wire.SystemInstruction
+				createReq.Contents = wire.Contents
+				createReq.Tools = wire.Tools
+				createReq.ToolConfig = wire.ToolConfig
+				createReq.TTL = wire.TTL
+				createReq.ExpireTime = wire.ExpireTime
+			}
+			return nil
+		},
+		CachedContentRequestConverter: func(ctx *schemas.BifrostContext, req interface{}) (*CachedContentRequest, error) {
+			createReq, ok := req.(*schemas.BifrostCachedContentCreateRequest)
+			if !ok {
+				return nil, errors.New("invalid cached content create request type")
+			}
+			// Provider is set via PreCallback (setGeminiCachedContentCreateProvider).
+			if createReq.Provider == "" {
+				createReq.Provider = schemas.Gemini
+			}
+			return &CachedContentRequest{Type: schemas.CachedContentCreateRequest, CreateRequest: createReq}, nil
+		},
+		CachedContentCreateResponseConverter: func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentCreateResponse) (interface{}, error) {
+			return gemini.ToGeminiCachedContentCreateResponse(resp), nil
+		},
+		ErrorConverter: func(ctx *schemas.BifrostContext, err *schemas.BifrostError) interface{} {
+			return gemini.ToGeminiError(err)
+		},
+		PreCallback: setGeminiCachedContentCreateProvider,
+	})
+
+	// GET /v1beta/cachedContents — list
+	routes = append(routes, RouteConfig{
+		Type:   RouteConfigTypeGenAI,
+		Path:   pathPrefix + "/v1beta/cachedContents",
+		Method: "GET",
+		GetHTTPRequestType: func(ctx *fasthttp.RequestCtx) schemas.RequestType {
+			return schemas.CachedContentListRequest
+		},
+		GetRequestTypeInstance: func(ctx context.Context) interface{} {
+			return &schemas.BifrostCachedContentListRequest{}
+		},
+		CachedContentRequestConverter: func(ctx *schemas.BifrostContext, req interface{}) (*CachedContentRequest, error) {
+			listReq, ok := req.(*schemas.BifrostCachedContentListRequest)
+			if !ok {
+				return nil, errors.New("invalid cached content list request type")
+			}
+			return &CachedContentRequest{Type: schemas.CachedContentListRequest, ListRequest: listReq}, nil
+		},
+		CachedContentListResponseConverter: func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentListResponse) (interface{}, error) {
+			return gemini.ToGeminiCachedContentListResponse(resp), nil
+		},
+		ErrorConverter: func(ctx *schemas.BifrostContext, err *schemas.BifrostError) interface{} {
+			return gemini.ToGeminiError(err)
+		},
+		PreCallback: extractGeminiCachedContentListQueryParams,
+	})
+
+	// GET /v1beta/cachedContents/{cached_id} — retrieve
+	routes = append(routes, RouteConfig{
+		Type:   RouteConfigTypeGenAI,
+		Path:   pathPrefix + "/v1beta/cachedContents/{cached_id}",
+		Method: "GET",
+		GetHTTPRequestType: func(ctx *fasthttp.RequestCtx) schemas.RequestType {
+			return schemas.CachedContentRetrieveRequest
+		},
+		GetRequestTypeInstance: func(ctx context.Context) interface{} {
+			return &schemas.BifrostCachedContentRetrieveRequest{}
+		},
+		CachedContentRequestConverter: func(ctx *schemas.BifrostContext, req interface{}) (*CachedContentRequest, error) {
+			retrieveReq, ok := req.(*schemas.BifrostCachedContentRetrieveRequest)
+			if !ok {
+				return nil, errors.New("invalid cached content retrieve request type")
+			}
+			return &CachedContentRequest{Type: schemas.CachedContentRetrieveRequest, RetrieveRequest: retrieveReq}, nil
+		},
+		CachedContentRetrieveResponseConverter: func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentRetrieveResponse) (interface{}, error) {
+			return gemini.ToGeminiCachedContentRetrieveResponse(resp), nil
+		},
+		ErrorConverter: func(ctx *schemas.BifrostContext, err *schemas.BifrostError) interface{} {
+			return gemini.ToGeminiError(err)
+		},
+		PreCallback: extractGeminiCachedContentNameFromPath,
+	})
+
+	// PATCH /v1beta/cachedContents/{cached_id} — update
+	routes = append(routes, RouteConfig{
+		Type:   RouteConfigTypeGenAI,
+		Path:   pathPrefix + "/v1beta/cachedContents/{cached_id}",
+		Method: "PATCH",
+		GetHTTPRequestType: func(ctx *fasthttp.RequestCtx) schemas.RequestType {
+			return schemas.CachedContentUpdateRequest
+		},
+		GetRequestTypeInstance: func(ctx context.Context) interface{} {
+			return &schemas.BifrostCachedContentUpdateRequest{}
+		},
+		RequestParser: func(ctx *fasthttp.RequestCtx, req interface{}) error {
+			updateReq, ok := req.(*schemas.BifrostCachedContentUpdateRequest)
+			if !ok {
+				return errors.New("invalid cached content update request type")
+			}
+			if body := ctx.Request.Body(); len(body) > 0 {
+				var wire GeminiCachedContentUpdateBody
+				if err := sonic.Unmarshal(body, &wire); err != nil {
+					return err
+				}
+				updateReq.TTL = wire.TTL
+				updateReq.ExpireTime = wire.ExpireTime
+			}
+			return nil
+		},
+		CachedContentRequestConverter: func(ctx *schemas.BifrostContext, req interface{}) (*CachedContentRequest, error) {
+			updateReq, ok := req.(*schemas.BifrostCachedContentUpdateRequest)
+			if !ok {
+				return nil, errors.New("invalid cached content update request type")
+			}
+			// Name is set via PreCallback (extractGeminiCachedContentNameFromPath).
+			if updateReq.Provider == "" {
+				updateReq.Provider = schemas.Gemini
+			}
+			return &CachedContentRequest{Type: schemas.CachedContentUpdateRequest, UpdateRequest: updateReq}, nil
+		},
+		CachedContentUpdateResponseConverter: func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentUpdateResponse) (interface{}, error) {
+			return gemini.ToGeminiCachedContentUpdateResponse(resp), nil
+		},
+		ErrorConverter: func(ctx *schemas.BifrostContext, err *schemas.BifrostError) interface{} {
+			return gemini.ToGeminiError(err)
+		},
+		PreCallback: extractGeminiCachedContentNameFromPath,
+	})
+
+	// DELETE /v1beta/cachedContents/{cached_id} — delete
+	routes = append(routes, RouteConfig{
+		Type:   RouteConfigTypeGenAI,
+		Path:   pathPrefix + "/v1beta/cachedContents/{cached_id}",
+		Method: "DELETE",
+		GetHTTPRequestType: func(ctx *fasthttp.RequestCtx) schemas.RequestType {
+			return schemas.CachedContentDeleteRequest
+		},
+		GetRequestTypeInstance: func(ctx context.Context) interface{} {
+			return &schemas.BifrostCachedContentDeleteRequest{}
+		},
+		CachedContentRequestConverter: func(ctx *schemas.BifrostContext, req interface{}) (*CachedContentRequest, error) {
+			deleteReq, ok := req.(*schemas.BifrostCachedContentDeleteRequest)
+			if !ok {
+				return nil, errors.New("invalid cached content delete request type")
+			}
+			return &CachedContentRequest{Type: schemas.CachedContentDeleteRequest, DeleteRequest: deleteReq}, nil
+		},
+		CachedContentDeleteResponseConverter: func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentDeleteResponse) (interface{}, error) {
+			return gemini.ToGeminiCachedContentDeleteResponse(resp), nil
+		},
+		ErrorConverter: func(ctx *schemas.BifrostContext, err *schemas.BifrostError) interface{} {
+			return gemini.ToGeminiError(err)
+		},
+		PreCallback: extractGeminiCachedContentNameFromPath,
+	})
+
+	return routes
+}
+
 // NewGenAIRouter creates a new GenAIRouter with the given bifrost client.
 func NewGenAIRouter(client *bifrost.Bifrost, handlerStore lib.HandlerStore, logger schemas.Logger) *GenAIRouter {
 	routes := CreateGenAIRouteConfigs("/genai")
 	routes = append(routes, CreateGenAIFileRouteConfigs("/genai", handlerStore)...)
 	routes = append(routes, CreateGenAIBatchRouteConfigs("/genai", handlerStore)...)
+	routes = append(routes, CreateGenAICachedContentRouteConfigs("/genai", handlerStore)...)
 
 	return &GenAIRouter{
 		GenericRouter: NewGenericRouter(client, handlerStore, routes, nil, logger),
diff --git a/transports/bifrost-http/integrations/openai.go b/transports/bifrost-http/integrations/openai.go
index 262119f925..950ce337a4 100644
--- a/transports/bifrost-http/integrations/openai.go
+++ b/transports/bifrost-http/integrations/openai.go
@@ -352,6 +352,12 @@ func CreateOpenAIRouteConfigs(pathPrefix string, handlerStore lib.HandlerStore)
 			case strings.HasSuffix(path, "/chat/completions"):
 				return schemas.ChatCompletionRequest
 
+			case strings.HasSuffix(path, "/responses/input_tokens"):
+				return schemas.CountTokensRequest
+
+			case strings.HasSuffix(path, "/responses"):
+				return schemas.ResponsesRequest
+
 			case strings.HasSuffix(path, "/completions"):
 				return schemas.TextCompletionRequest
 
@@ -381,6 +387,8 @@ func CreateOpenAIRouteConfigs(pathPrefix string, handlerStore lib.HandlerStore)
 				switch requestType {
 				case schemas.ChatCompletionRequest:
 					return &openai.OpenAIChatRequest{}
+				case schemas.ResponsesRequest, schemas.CountTokensRequest:
+					return &openai.OpenAIResponsesRequest{}
 				case schemas.TextCompletionRequest:
 					return &openai.OpenAITextCompletionRequest{}
 				case schemas.EmbeddingRequest:
@@ -427,6 +435,15 @@ func CreateOpenAIRouteConfigs(pathPrefix string, handlerStore lib.HandlerStore)
 				return &schemas.BifrostRequest{
 					ChatRequest: openaiReq.ToBifrostChatRequest(ctx),
 				}, nil
+			} else if openaiReq, ok := req.(*openai.OpenAIResponsesRequest); ok {
+				if reqType, _ := ctx.Value(schemas.BifrostContextKeyHTTPRequestType).(schemas.RequestType); reqType == schemas.CountTokensRequest {
+					return &schemas.BifrostRequest{
+						CountTokensRequest: openaiReq.ToBifrostResponsesRequest(ctx),
+					}, nil
+				}
+				return &schemas.BifrostRequest{
+					ResponsesRequest: openaiReq.ToBifrostResponsesRequest(ctx),
+				}, nil
 			} else if openaiReq, ok := req.(*openai.OpenAITextCompletionRequest); ok {
 				return &schemas.BifrostRequest{
 					TextCompletionRequest: openaiReq.ToBifrostTextCompletionRequest(ctx),
@@ -509,6 +526,14 @@ func CreateOpenAIRouteConfigs(pathPrefix string, handlerStore lib.HandlerStore)
 			}
 			return resp, nil
 		},
+		ResponsesResponseConverter: func(ctx *schemas.BifrostContext, resp *schemas.BifrostResponsesResponse) (interface{}, error) {
+			if resp.ExtraFields.Provider == schemas.OpenAI {
+				if resp.ExtraFields.RawResponse != nil {
+					return resp.ExtraFields.RawResponse, nil
+				}
+			}
+			return resp.WithDefaults(), nil
+		},
 		StreamConfig: &StreamConfig{
 			ChatStreamResponseConverter: func(ctx *schemas.BifrostContext, resp *schemas.BifrostChatResponse) (string, interface{}, error) {
 				if resp.ExtraFields.Provider == schemas.OpenAI {
@@ -550,6 +575,18 @@ func CreateOpenAIRouteConfigs(pathPrefix string, handlerStore lib.HandlerStore)
 				}
 				return "", resp, nil
 			},
+			ResponsesStreamResponseConverter: func(ctx *schemas.BifrostContext, resp *schemas.BifrostResponsesStreamResponse) (string, interface{}, error) {
+				if resp.ExtraFields.Provider == schemas.OpenAI {
+					if resp.ExtraFields.RawResponse != nil {
+						return string(resp.Type), resp.ExtraFields.RawResponse, nil
+					}
+				}
+				converted := resp.WithDefaults()
+				if converted == nil {
+					return "", nil, nil
+				}
+				return string(resp.Type), converted, nil
+			},
 			ErrorConverter: func(ctx *schemas.BifrostContext, err *schemas.BifrostError) interface{} {
 				return err
 			},
diff --git a/transports/bifrost-http/integrations/router.go b/transports/bifrost-http/integrations/router.go
index a91e9153b9..55f701070e 100644
--- a/transports/bifrost-http/integrations/router.go
+++ b/transports/bifrost-http/integrations/router.go
@@ -129,6 +129,17 @@ type ContainerFileRequest struct {
 	DeleteRequest   *schemas.BifrostContainerFileDeleteRequest
 }
 
+// CachedContentRequest wraps a Bifrost cached content request with its type information.
+// Used by Gemini and Vertex AI integrations for the named cached content lifecycle.
+type CachedContentRequest struct {
+	Type            schemas.RequestType
+	CreateRequest   *schemas.BifrostCachedContentCreateRequest
+	ListRequest    *schemas.BifrostCachedContentListRequest
+	RetrieveRequest *schemas.BifrostCachedContentRetrieveRequest
+	UpdateRequest  *schemas.BifrostCachedContentUpdateRequest
+	DeleteRequest  *schemas.BifrostCachedContentDeleteRequest
+}
+
 // BatchRequestConverter is a function that converts integration-specific batch requests to Bifrost format.
 type BatchRequestConverter func(ctx *schemas.BifrostContext, req interface{}) (*BatchRequest, error)
 
@@ -141,6 +152,24 @@ type ContainerRequestConverter func(ctx *schemas.BifrostContext, req interface{}
 // ContainerFileRequestConverter is a function that converts integration-specific container file requests to Bifrost format.
 type ContainerFileRequestConverter func(ctx *schemas.BifrostContext, req interface{}) (*ContainerFileRequest, error)
 
+// CachedContentRequestConverter is a function that converts integration-specific cached content requests to Bifrost format.
+type CachedContentRequestConverter func(ctx *schemas.BifrostContext, req interface{}) (*CachedContentRequest, error)
+
+// CachedContentCreateResponseConverter converts BifrostCachedContentCreateResponse to integration format.
+type CachedContentCreateResponseConverter func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentCreateResponse) (interface{}, error)
+
+// CachedContentListResponseConverter converts BifrostCachedContentListResponse to integration format.
+type CachedContentListResponseConverter func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentListResponse) (interface{}, error)
+
+// CachedContentRetrieveResponseConverter converts BifrostCachedContentRetrieveResponse to integration format.
+type CachedContentRetrieveResponseConverter func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentRetrieveResponse) (interface{}, error)
+
+// CachedContentUpdateResponseConverter converts BifrostCachedContentUpdateResponse to integration format.
+type CachedContentUpdateResponseConverter func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentUpdateResponse) (interface{}, error)
+
+// CachedContentDeleteResponseConverter converts BifrostCachedContentDeleteResponse to integration format.
+type CachedContentDeleteResponseConverter func(ctx *schemas.BifrostContext, resp *schemas.BifrostCachedContentDeleteResponse) (interface{}, error)
+
 // RequestConverter is a function that converts integration-specific requests to Bifrost format.
 // It takes the parsed request object and returns a BifrostRequest ready for processing.
 type RequestConverter func(ctx *schemas.BifrostContext, req interface{}) (*schemas.BifrostRequest, error)
@@ -425,6 +454,12 @@ type RouteConfig struct {
 	FileRequestConverter                   FileRequestConverter                   // Function to convert request to FileRequest (for file operations)
 	ContainerRequestConverter              ContainerRequestConverter              // Function to convert request to ContainerRequest (for container operations)
 	ContainerFileRequestConverter          ContainerFileRequestConverter          // Function to convert request to ContainerFileRequest (for container file operations)
+	CachedContentRequestConverter          CachedContentRequestConverter          // Function to convert request to CachedContentRequest (for cached content lifecycle)
+	CachedContentCreateResponseConverter   CachedContentCreateResponseConverter   // Optional response converter for cached content create
+	CachedContentListResponseConverter     CachedContentListResponseConverter     // Optional response converter for cached content list
+	CachedContentRetrieveResponseConverter CachedContentRetrieveResponseConverter // Optional response converter for cached content retrieve
+	CachedContentUpdateResponseConverter   CachedContentUpdateResponseConverter   // Optional response converter for cached content update
+	CachedContentDeleteResponseConverter   CachedContentDeleteResponseConverter   // Optional response converter for cached content delete
 	ListModelsResponseConverter            ListModelsResponseConverter            // Function to convert BifrostListModelsResponse to integration format (SHOULD NOT BE NIL)
 	TextResponseConverter                  TextResponseConverter                  // Function to convert BifrostTextCompletionResponse to integration format (SHOULD NOT BE NIL)
 	ChatResponseConverter                  ChatResponseConverter                  // Function to convert BifrostChatResponse to integration format (SHOULD NOT BE NIL)
@@ -553,12 +588,13 @@ func (g *GenericRouter) RegisterRoutes(r *router.Router, middlewares ...schemas.
 			continue
 		}
 
-		// Determine route type: inference, batch, file, container, or container file
+		// Determine route type: inference, batch, file, container, container file, or cached content
 		isBatchRoute := route.BatchRequestConverter != nil
 		isFileRoute := route.FileRequestConverter != nil
 		isContainerRoute := route.ContainerRequestConverter != nil
 		isContainerFileRoute := route.ContainerFileRequestConverter != nil
-		isInferenceRoute := !isBatchRoute && !isFileRoute && !isContainerRoute && !isContainerFileRoute
+		isCachedContentRoute := route.CachedContentRequestConverter != nil
+		isInferenceRoute := !isBatchRoute && !isFileRoute && !isContainerRoute && !isContainerFileRoute && !isCachedContentRoute
 
 		// For inference routes, require RequestConverter
 		if isInferenceRoute && route.RequestConverter == nil {
@@ -845,6 +881,22 @@ func (g *GenericRouter) createHandler(config RouteConfig) fasthttp.RequestHandle
 			return
 		}
 
+		// Handle cached content requests if CachedContentRequestConverter is set
+		if config.CachedContentRequestConverter != nil {
+			defer cancel()
+			cachedContentReq, err := config.CachedContentRequestConverter(bifrostCtx, req)
+			if err != nil {
+				g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(err, "failed to convert cached content request"))
+				return
+			}
+			if cachedContentReq == nil {
+				g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(nil, "invalid cached content request"))
+				return
+			}
+			g.handleCachedContentRequest(ctx, config, req, cachedContentReq, bifrostCtx)
+			return
+		}
+
 		// Convert the integration-specific request to Bifrost format (inference requests)
 		bifrostReq, err := config.RequestConverter(bifrostCtx, req)
 		if err != nil {
@@ -2223,6 +2275,136 @@ func (g *GenericRouter) handleContainerFileRequest(ctx *fasthttp.RequestCtx, con
 	g.sendSuccess(ctx, bifrostCtx, config.ErrorConverter, response, nil)
 }
 
+// handleCachedContentRequest handles cached content API requests
+// (create, list, retrieve, update, delete) for Gemini and Vertex AI.
+func (g *GenericRouter) handleCachedContentRequest(ctx *fasthttp.RequestCtx, config RouteConfig, req interface{}, cachedReq *CachedContentRequest, bifrostCtx *schemas.BifrostContext) {
+	var response interface{}
+	var err error
+
+	switch cachedReq.Type {
+	case schemas.CachedContentCreateRequest:
+		if cachedReq.CreateRequest == nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(nil, "invalid cached content create request"))
+			return
+		}
+		bifrostResp, bifrostErr := g.client.CachedContentCreateRequest(bifrostCtx, cachedReq.CreateRequest)
+		if bifrostErr != nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, bifrostErr)
+			return
+		}
+		if config.PostCallback != nil {
+			if perr := config.PostCallback(ctx, req, bifrostResp); perr != nil {
+				g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(perr, "failed to execute post-request callback"))
+				return
+			}
+		}
+		if config.CachedContentCreateResponseConverter != nil {
+			response, err = config.CachedContentCreateResponseConverter(bifrostCtx, bifrostResp)
+		} else {
+			response = bifrostResp
+		}
+
+	case schemas.CachedContentListRequest:
+		if cachedReq.ListRequest == nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(nil, "invalid cached content list request"))
+			return
+		}
+		bifrostResp, bifrostErr := g.client.CachedContentListRequest(bifrostCtx, cachedReq.ListRequest)
+		if bifrostErr != nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, bifrostErr)
+			return
+		}
+		if config.PostCallback != nil {
+			if perr := config.PostCallback(ctx, req, bifrostResp); perr != nil {
+				g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(perr, "failed to execute post-request callback"))
+				return
+			}
+		}
+		if config.CachedContentListResponseConverter != nil {
+			response, err = config.CachedContentListResponseConverter(bifrostCtx, bifrostResp)
+		} else {
+			response = bifrostResp
+		}
+
+	case schemas.CachedContentRetrieveRequest:
+		if cachedReq.RetrieveRequest == nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(nil, "invalid cached content retrieve request"))
+			return
+		}
+		bifrostResp, bifrostErr := g.client.CachedContentRetrieveRequest(bifrostCtx, cachedReq.RetrieveRequest)
+		if bifrostErr != nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, bifrostErr)
+			return
+		}
+		if config.PostCallback != nil {
+			if perr := config.PostCallback(ctx, req, bifrostResp); perr != nil {
+				g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(perr, "failed to execute post-request callback"))
+				return
+			}
+		}
+		if config.CachedContentRetrieveResponseConverter != nil {
+			response, err = config.CachedContentRetrieveResponseConverter(bifrostCtx, bifrostResp)
+		} else {
+			response = bifrostResp
+		}
+
+	case schemas.CachedContentUpdateRequest:
+		if cachedReq.UpdateRequest == nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(nil, "invalid cached content update request"))
+			return
+		}
+		bifrostResp, bifrostErr := g.client.CachedContentUpdateRequest(bifrostCtx, cachedReq.UpdateRequest)
+		if bifrostErr != nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, bifrostErr)
+			return
+		}
+		if config.PostCallback != nil {
+			if perr := config.PostCallback(ctx, req, bifrostResp); perr != nil {
+				g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(perr, "failed to execute post-request callback"))
+				return
+			}
+		}
+		if config.CachedContentUpdateResponseConverter != nil {
+			response, err = config.CachedContentUpdateResponseConverter(bifrostCtx, bifrostResp)
+		} else {
+			response = bifrostResp
+		}
+
+	case schemas.CachedContentDeleteRequest:
+		if cachedReq.DeleteRequest == nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(nil, "invalid cached content delete request"))
+			return
+		}
+		bifrostResp, bifrostErr := g.client.CachedContentDeleteRequest(bifrostCtx, cachedReq.DeleteRequest)
+		if bifrostErr != nil {
+			g.sendError(ctx, bifrostCtx, config.ErrorConverter, bifrostErr)
+			return
+		}
+		if config.PostCallback != nil {
+			if perr := config.PostCallback(ctx, req, bifrostResp); perr != nil {
+				g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(perr, "failed to execute post-request callback"))
+				return
+			}
+		}
+		if config.CachedContentDeleteResponseConverter != nil {
+			response, err = config.CachedContentDeleteResponseConverter(bifrostCtx, bifrostResp)
+		} else {
+			response = bifrostResp
+		}
+
+	default:
+		g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(nil, "unsupported cached content request type"))
+		return
+	}
+
+	if err != nil {
+		g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(err, "failed to convert cached content response"))
+		return
+	}
+
+	g.sendSuccess(ctx, bifrostCtx, config.ErrorConverter, response, nil)
+}
+
 // handleStreamingRequest handles streaming requests using Server-Sent Events (SSE)
 func (g *GenericRouter) handleStreamingRequest(ctx *fasthttp.RequestCtx, config RouteConfig, bifrostReq *schemas.BifrostRequest, bifrostCtx *schemas.BifrostContext, cancel context.CancelFunc) {
 	// Use the cancellable context from ConvertToBifrostContext
diff --git a/ui/app/workspace/governance/virtual-keys/page.tsx b/ui/app/workspace/governance/virtual-keys/page.tsx
index ffd6774906..e057e00263 100644
--- a/ui/app/workspace/governance/virtual-keys/page.tsx
+++ b/ui/app/workspace/governance/virtual-keys/page.tsx
@@ -1,7 +1,12 @@
 import VirtualKeysTable from "@/app/workspace/virtual-keys/views/virtualKeysTable";
 import FullPageLoader from "@/components/fullPageLoader";
 import { useDebouncedValue } from "@/hooks/useDebounce";
-import { getErrorMessage, useGetCustomersQuery, useGetTeamsQuery, useGetVirtualKeysQuery } from "@/lib/store";
+import {
+  getErrorMessage,
+  useGetCustomersQuery,
+  useGetTeamsQuery,
+  useGetVirtualKeysQuery,
+} from "@/lib/store";
 import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib";
 import { parseAsInteger, parseAsString, useQueryStates } from "nuqs";
 import { useEffect, useRef } from "react";
@@ -11,135 +16,136 @@ const POLLING_INTERVAL = 5000;
 const PAGE_SIZE = 25;
 
 export default function GovernanceVirtualKeysPage() {
-	const hasVirtualKeysAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.View);
-	const hasTeamsAccess = useRbac(RbacResource.Teams, RbacOperation.View);
-	const hasCustomersAccess = useRbac(RbacResource.Customers, RbacOperation.View);
-	const shownErrorsRef = useRef(new Set<string>());
-
-	const [urlState, setUrlState] = useQueryStates(
-		{
-			search: parseAsString.withDefault(""),
-			customer_id: parseAsString.withDefault(""),
-			team_id: parseAsString.withDefault(""),
-			offset: parseAsInteger.withDefault(0),
-			sort_by: parseAsString.withDefault(""),
-			order: parseAsString.withDefault(""),
-		},
-		{ history: "push" },
-	);
-
-	const debouncedSearch = useDebouncedValue(urlState.search, 300);
-
-	const {
-		data: virtualKeysData,
-		error: vkError,
-		isLoading: vkLoading,
-	} = useGetVirtualKeysQuery(
-		{
-			limit: PAGE_SIZE,
-			offset: urlState.offset,
-			search: debouncedSearch || undefined,
-			customer_id: urlState.customer_id || undefined,
-			team_id: urlState.team_id || undefined,
-			sort_by: (urlState.sort_by as "name" | "budget_spent" | "created_at" | "status") || undefined,
-			order: (urlState.order as "asc" | "desc") || undefined,
-		},
-		{
-			skip: !hasVirtualKeysAccess,
-			pollingInterval: POLLING_INTERVAL,
-		},
-	);
-
-	const {
-		data: teamsData,
-		error: teamsError,
-		isLoading: teamsLoading,
-	} = useGetTeamsQuery(undefined, {
-		skip: !hasTeamsAccess,
-		pollingInterval: POLLING_INTERVAL,
-	});
-
-	const {
-		data: customersData,
-		error: customersError,
-		isLoading: customersLoading,
-	} = useGetCustomersQuery(undefined, {
-		skip: !hasCustomersAccess,
-		pollingInterval: POLLING_INTERVAL,
-	});
-
-	const vkTotal = virtualKeysData?.total_count ?? 0;
-
-	// Snap offset back when total shrinks past current page (e.g. delete last item on last page)
-	useEffect(() => {
-		if (!virtualKeysData || urlState.offset < vkTotal) return;
-		setUrlState({ offset: vkTotal === 0 ? 0 : Math.floor((vkTotal - 1) / PAGE_SIZE) * PAGE_SIZE });
-	}, [vkTotal, urlState.offset]);
-
-	const isLoading = vkLoading || teamsLoading || customersLoading;
-
-	useEffect(() => {
-		if (!vkError && !teamsError && !customersError) {
-			shownErrorsRef.current.clear();
-			return;
-		}
-		const errorKey = `${!!vkError}-${!!teamsError}-${!!customersError}`;
-		if (shownErrorsRef.current.has(errorKey)) return;
-		shownErrorsRef.current.add(errorKey);
-		if (vkError && teamsError && customersError) {
-			toast.error("Failed to load governance data.");
-		} else {
-			if (vkError) toast.error(`Failed to load virtual keys: ${getErrorMessage(vkError)}`);
-			if (teamsError) toast.error(`Failed to load teams: ${getErrorMessage(teamsError)}`);
-			if (customersError) toast.error(`Failed to load customers: ${getErrorMessage(customersError)}`);
-		}
-	}, [vkError, teamsError, customersError]);
-
-	if (isLoading) {
-		return <FullPageLoader />;
-	}
-
-	const handleSearchChange = (value: string) => {
-		setUrlState({ search: value || null, offset: 0 });
-	};
-
-	const handleCustomerFilterChange = (value: string) => {
-		setUrlState({ customer_id: value || null, offset: 0 });
-	};
-
-	const handleTeamFilterChange = (value: string) => {
-		setUrlState({ team_id: value || null, offset: 0 });
-	};
-
-	const handleOffsetChange = (newOffset: number) => {
-		setUrlState({ offset: newOffset });
-	};
-
-	const handleSortChange = (newSortBy: string, newOrder: string) => {
-		setUrlState({ sort_by: newSortBy || null, order: newOrder || null, offset: 0 });
-	};
-
-	return (
-		<div className="mx-auto w-full max-w-7xl">
-			<VirtualKeysTable
-				virtualKeys={virtualKeysData?.virtual_keys || []}
-				totalCount={virtualKeysData?.total_count || 0}
-				teams={teamsData?.teams || []}
-				customers={customersData?.customers || []}
-				search={urlState.search}
-				debouncedSearch={debouncedSearch}
-				onSearchChange={handleSearchChange}
-				customerFilter={urlState.customer_id}
-				onCustomerFilterChange={handleCustomerFilterChange}
-				teamFilter={urlState.team_id}
-				onTeamFilterChange={handleTeamFilterChange}
-				offset={urlState.offset}
-				limit={PAGE_SIZE}
-				onOffsetChange={handleOffsetChange}
-				sortBy={urlState.sort_by}
-				order={urlState.order}
-				onSortChange={handleSortChange}
-			/>
-		</div>
-	);
+  const hasVirtualKeysAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.View);
+  const hasTeamsAccess = useRbac(RbacResource.Teams, RbacOperation.View);
+  const hasCustomersAccess = useRbac(RbacResource.Customers, RbacOperation.View);
+  const shownErrorsRef = useRef(new Set<string>());
+
+  const [urlState, setUrlState] = useQueryStates(
+    {
+      search: parseAsString.withDefault(""),
+      customer_id: parseAsString.withDefault(""),
+      team_id: parseAsString.withDefault(""),
+      offset: parseAsInteger.withDefault(0),
+      sort_by: parseAsString.withDefault(""),
+      order: parseAsString.withDefault(""),
+    },
+    { history: "push" },
+  );
+
+  const debouncedSearch = useDebouncedValue(urlState.search, 300);
+
+  const {
+    data: virtualKeysData,
+    error: vkError,
+    isLoading: vkLoading,
+  } = useGetVirtualKeysQuery(
+    {
+      limit: PAGE_SIZE,
+      offset: urlState.offset,
+      search: debouncedSearch || undefined,
+      customer_id: urlState.customer_id || undefined,
+      team_id: urlState.team_id || undefined,
+      sort_by: (urlState.sort_by as "name" | "budget_spent" | "created_at" | "status") || undefined,
+      order: (urlState.order as "asc" | "desc") || undefined,
+    },
+    {
+      skip: !hasVirtualKeysAccess,
+      pollingInterval: POLLING_INTERVAL,
+    },
+  );
+
+  const {
+    data: teamsData,
+    error: teamsError,
+    isLoading: teamsLoading,
+  } = useGetTeamsQuery(undefined, {
+    skip: !hasTeamsAccess,
+    pollingInterval: POLLING_INTERVAL,
+  });
+
+  const {
+    data: customersData,
+    error: customersError,
+    isLoading: customersLoading,
+  } = useGetCustomersQuery(undefined, {
+    skip: !hasCustomersAccess,
+    pollingInterval: POLLING_INTERVAL,
+  });
+
+  const vkTotal = virtualKeysData?.total_count ?? 0;
+
+  // Snap offset back when total shrinks past current page (e.g. delete last item on last page)
+  useEffect(() => {
+    if (!virtualKeysData || urlState.offset < vkTotal) return;
+    setUrlState({ offset: vkTotal === 0 ? 0 : Math.floor((vkTotal - 1) / PAGE_SIZE) * PAGE_SIZE });
+  }, [vkTotal, urlState.offset]);
+
+  const isLoading = vkLoading || teamsLoading || customersLoading;
+
+  useEffect(() => {
+    if (!vkError && !teamsError && !customersError) {
+      shownErrorsRef.current.clear();
+      return;
+    }
+    const errorKey = `${!!vkError}-${!!teamsError}-${!!customersError}`;
+    if (shownErrorsRef.current.has(errorKey)) return;
+    shownErrorsRef.current.add(errorKey);
+    if (vkError && teamsError && customersError) {
+      toast.error("Failed to load governance data.");
+    } else {
+      if (vkError) toast.error(`Failed to load virtual keys: ${getErrorMessage(vkError)}`);
+      if (teamsError) toast.error(`Failed to load teams: ${getErrorMessage(teamsError)}`);
+      if (customersError)
+        toast.error(`Failed to load customers: ${getErrorMessage(customersError)}`);
+    }
+  }, [vkError, teamsError, customersError]);
+
+  if (isLoading) {
+    return <FullPageLoader />;
+  }
+
+  const handleSearchChange = (value: string) => {
+    setUrlState({ search: value || null, offset: 0 });
+  };
+
+  const handleCustomerFilterChange = (value: string) => {
+    setUrlState({ customer_id: value || null, offset: 0 });
+  };
+
+  const handleTeamFilterChange = (value: string) => {
+    setUrlState({ team_id: value || null, offset: 0 });
+  };
+
+  const handleOffsetChange = (newOffset: number) => {
+    setUrlState({ offset: newOffset });
+  };
+
+  const handleSortChange = (newSortBy: string, newOrder: string) => {
+    setUrlState({ sort_by: newSortBy || null, order: newOrder || null, offset: 0 });
+  };
+
+  return (
+    <div className="mx-auto w-full">
+      <VirtualKeysTable
+        virtualKeys={virtualKeysData?.virtual_keys || []}
+        totalCount={virtualKeysData?.total_count || 0}
+        teams={teamsData?.teams || []}
+        customers={customersData?.customers || []}
+        search={urlState.search}
+        debouncedSearch={debouncedSearch}
+        onSearchChange={handleSearchChange}
+        customerFilter={urlState.customer_id}
+        onCustomerFilterChange={handleCustomerFilterChange}
+        teamFilter={urlState.team_id}
+        onTeamFilterChange={handleTeamFilterChange}
+        offset={urlState.offset}
+        limit={PAGE_SIZE}
+        onOffsetChange={handleOffsetChange}
+        sortBy={urlState.sort_by}
+        order={urlState.order}
+        onSortChange={handleSortChange}
+      />
+    </div>
+  );
 }
\ No newline at end of file