Skip to content

Commit 5a85fc2

Browse files
feat(event_handler): add support for File field in OpenAPI utility
1 parent 7323c05 commit 5a85fc2

File tree

9 files changed

+697
-9
lines changed

9 files changed

+697
-9
lines changed

aws_lambda_powertools/event_handler/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from aws_lambda_powertools.event_handler.lambda_function_url import (
2222
LambdaFunctionUrlResolver,
2323
)
24+
from aws_lambda_powertools.event_handler.openapi.params import File, UploadFile
2425
from aws_lambda_powertools.event_handler.request import Request
2526
from aws_lambda_powertools.event_handler.vpc_lattice import VPCLatticeResolver, VPCLatticeV2Resolver
2627

@@ -36,10 +37,12 @@
3637
"BedrockResponse",
3738
"BedrockFunctionResponse",
3839
"CORSConfig",
40+
"File",
3941
"HttpResolverLocal",
4042
"LambdaFunctionUrlResolver",
4143
"Request",
4244
"Response",
45+
"UploadFile",
4346
"VPCLatticeResolver",
4447
"VPCLatticeV2Resolver",
4548
]

aws_lambda_powertools/event_handler/middlewares/openapi_validation.py

Lines changed: 166 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from __future__ import annotations
22

3+
import base64
34
import dataclasses
45
import json
56
import logging
7+
import warnings
68
from typing import TYPE_CHECKING, Any, Callable, Mapping, MutableMapping, Sequence, Union, cast
79
from urllib.parse import parse_qs
810

@@ -25,7 +27,7 @@
2527
RequestValidationError,
2628
ResponseValidationError,
2729
)
28-
from aws_lambda_powertools.event_handler.openapi.params import Param
30+
from aws_lambda_powertools.event_handler.openapi.params import Param, UploadFile
2931
from aws_lambda_powertools.event_handler.openapi.types import UnionType
3032

3133
if TYPE_CHECKING:
@@ -44,6 +46,7 @@
4446
CONTENT_DISPOSITION_NAME_PARAM = "name="
4547
APPLICATION_JSON_CONTENT_TYPE = "application/json"
4648
APPLICATION_FORM_CONTENT_TYPE = "application/x-www-form-urlencoded"
49+
MULTIPART_FORM_DATA_CONTENT_TYPE = "multipart/form-data"
4750

4851

4952
class OpenAPIRequestValidationMiddleware(BaseMiddlewareHandler):
@@ -134,14 +137,18 @@ def _get_body(self, app: EventHandlerInstance) -> dict[str, Any]:
134137
elif content_type.startswith(APPLICATION_FORM_CONTENT_TYPE):
135138
return self._parse_form_data(app)
136139

140+
# Handle multipart/form-data (file uploads)
141+
elif content_type.startswith(MULTIPART_FORM_DATA_CONTENT_TYPE):
142+
return self._parse_multipart_data(app, content_type)
143+
137144
else:
138145
raise RequestUnsupportedContentType(
139-
"Only JSON body or Form() are supported",
146+
"Unsupported content type",
140147
errors=[
141148
{
142149
"type": "unsupported_content_type",
143150
"loc": ("body",),
144-
"msg": "Only JSON body or Form() are supported",
151+
"msg": f"Unsupported content type: {content_type}",
145152
"input": {},
146153
"ctx": {},
147154
},
@@ -188,6 +195,49 @@ def _parse_form_data(self, app: EventHandlerInstance) -> dict[str, Any]:
188195
],
189196
) from e
190197

198+
def _parse_multipart_data(self, app: EventHandlerInstance, content_type: str) -> dict[str, Any]:
199+
"""Parse multipart/form-data from the request body (file uploads)."""
200+
try:
201+
# Extract the boundary from the content-type header
202+
boundary = _extract_multipart_boundary(content_type)
203+
if not boundary:
204+
raise ValueError("Missing boundary in multipart/form-data content-type header")
205+
206+
# Get raw body bytes
207+
raw_body = app.current_event.body or ""
208+
if app.current_event.is_base64_encoded:
209+
body_bytes = base64.b64decode(raw_body)
210+
else:
211+
warnings.warn(
212+
"Received multipart/form-data without base64 encoding. "
213+
"Binary file uploads may be corrupted. "
214+
"If using API Gateway REST API (v1), configure Binary Media Types "
215+
"to include 'multipart/form-data'. "
216+
"See: https://docs.aws.amazon.com/apigateway/latest/developerguide/"
217+
"api-gateway-payload-encodings.html",
218+
stacklevel=2,
219+
)
220+
# Use latin-1 to preserve all byte values (0-255) since the body
221+
# may contain raw binary data that isn't valid UTF-8
222+
body_bytes = raw_body.encode("latin-1")
223+
224+
return _parse_multipart_body(body_bytes, boundary)
225+
226+
except ValueError:
227+
raise
228+
except Exception as e:
229+
raise RequestValidationError(
230+
[
231+
{
232+
"type": "multipart_invalid",
233+
"loc": ("body",),
234+
"msg": "Multipart form data parsing error",
235+
"input": {},
236+
"ctx": {"error": str(e)},
237+
},
238+
],
239+
) from e
240+
191241

192242
class OpenAPIResponseValidationMiddleware(BaseMiddlewareHandler):
193243
"""
@@ -391,7 +441,12 @@ def _request_body_to_args(
391441
continue
392442

393443
value = _normalize_field_value(value=value, field_info=field.field_info)
394-
values[field.name] = _validate_field(field=field, value=value, loc=loc, existing_errors=errors)
444+
445+
# UploadFile objects bypass Pydantic validation — they're already constructed
446+
if isinstance(value, UploadFile):
447+
values[field.name] = value
448+
else:
449+
values[field.name] = _validate_field(field=field, value=value, loc=loc, existing_errors=errors)
395450

396451
return values, errors
397452

@@ -467,6 +522,10 @@ def _is_or_contains_sequence(annotation: Any) -> bool:
467522

468523
def _normalize_field_value(value: Any, field_info: FieldInfo) -> Any:
469524
"""Normalize field value, converting lists to single values for non-sequence fields."""
525+
# When annotation is bytes but value is UploadFile, extract raw content
526+
if isinstance(value, UploadFile) and field_info.annotation is bytes:
527+
return value.content
528+
470529
if _is_or_contains_sequence(field_info.annotation):
471530
return value
472531
elif isinstance(value, list) and value:
@@ -580,3 +639,106 @@ def _get_param_value(
580639
value = input_dict.get(field_name)
581640

582641
return value
642+
643+
644+
def _extract_multipart_boundary(content_type: str) -> str | None:
645+
"""Extract the boundary string from a multipart/form-data content-type header."""
646+
for segment in content_type.split(";"):
647+
stripped = segment.strip()
648+
if stripped.startswith("boundary="):
649+
boundary = stripped[len("boundary=") :]
650+
# Remove optional quotes around boundary
651+
if boundary.startswith('"') and boundary.endswith('"'):
652+
boundary = boundary[1:-1]
653+
return boundary
654+
return None
655+
656+
657+
def _parse_multipart_body(body: bytes, boundary: str) -> dict[str, Any]:
658+
"""
659+
Parse a multipart/form-data body into a dict of field names to values.
660+
661+
File fields get bytes values; regular form fields get string values.
662+
Multiple values for the same field name are collected into lists.
663+
"""
664+
delimiter = f"--{boundary}".encode()
665+
end_delimiter = f"--{boundary}--".encode()
666+
667+
result: dict[str, Any] = {}
668+
669+
# Split body by the boundary delimiter
670+
raw_parts = body.split(delimiter)
671+
672+
for raw_part in raw_parts:
673+
# Skip the preamble (before first boundary) and epilogue (after closing boundary)
674+
if not raw_part or raw_part.strip() == b"" or raw_part.strip() == b"--":
675+
continue
676+
677+
# Remove the end delimiter marker if present
678+
chunk = raw_part
679+
if chunk.endswith(end_delimiter):
680+
chunk = chunk[: -len(end_delimiter)]
681+
682+
# Strip leading \r\n
683+
if chunk.startswith(b"\r\n"):
684+
chunk = chunk[2:]
685+
686+
# Strip trailing \r\n
687+
if chunk.endswith(b"\r\n"):
688+
chunk = chunk[:-2]
689+
690+
# Split headers from body at the double CRLF
691+
header_end = chunk.find(b"\r\n\r\n")
692+
if header_end == -1:
693+
continue
694+
695+
header_section = chunk[:header_end].decode("utf-8")
696+
body_section = chunk[header_end + 4 :]
697+
698+
# Parse Content-Disposition to get the field name and optional filename
699+
field_name = None
700+
filename = None
701+
content_type_header = None
702+
703+
for header_line in header_section.split("\r\n"):
704+
header_lower = header_line.lower()
705+
if header_lower.startswith("content-disposition:"):
706+
field_name = _extract_header_param(header_line, "name")
707+
filename = _extract_header_param(header_line, "filename")
708+
elif header_lower.startswith("content-type:"):
709+
content_type_header = header_line.split(":", 1)[1].strip()
710+
711+
if field_name is None:
712+
continue
713+
714+
# If it has a filename, it's a file upload — wrap as UploadFile
715+
# Otherwise it's a regular form field — decode to string
716+
if filename is not None:
717+
value: Any = UploadFile(content=body_section, filename=filename, content_type=content_type_header)
718+
else:
719+
value = body_section.decode("utf-8")
720+
721+
# Collect multiple values for same field name into a list
722+
if field_name in result:
723+
existing = result[field_name]
724+
if isinstance(existing, list):
725+
existing.append(value)
726+
else:
727+
result[field_name] = [existing, value]
728+
else:
729+
result[field_name] = value
730+
731+
return result
732+
733+
734+
def _extract_header_param(header_line: str, param_name: str) -> str | None:
735+
"""Extract a parameter value from a header line (e.g., name="file" from Content-Disposition)."""
736+
search = f'{param_name}="'
737+
idx = header_line.find(search)
738+
if idx == -1:
739+
return None
740+
start = idx + len(search)
741+
end = header_line.find('"', start)
742+
if end == -1:
743+
return None
744+
return header_line[start:end]

aws_lambda_powertools/event_handler/openapi/dependant.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
from aws_lambda_powertools.event_handler.openapi.params import (
1414
Body,
1515
Dependant,
16+
File,
1617
Form,
1718
Param,
1819
ParamTypes,
19-
_File,
2020
analyze_param,
2121
create_response_field,
2222
get_flat_dependant,
@@ -370,9 +370,9 @@ def get_body_field_info(
370370
if not required:
371371
body_field_info_kwargs["default"] = None
372372

373-
if any(isinstance(f.field_info, _File) for f in flat_dependant.body_params):
374-
# MAINTENANCE: body_field_info: type[Body] = _File
375-
raise NotImplementedError("_File fields are not supported in request bodies")
373+
if any(isinstance(f.field_info, File) for f in flat_dependant.body_params):
374+
body_field_info = Body
375+
body_field_info_kwargs["media_type"] = "multipart/form-data"
376376
elif any(isinstance(f.field_info, Form) for f in flat_dependant.body_params):
377377
body_field_info = Body
378378
body_field_info_kwargs["media_type"] = "application/x-www-form-urlencoded"

aws_lambda_powertools/event_handler/openapi/params.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,57 @@ def __init__(
829829
)
830830

831831

832-
class _File(Form): # type: ignore[misc]
832+
class UploadFile:
833+
"""
834+
Represents an uploaded file with its metadata.
835+
836+
Use with ``Annotated[UploadFile, File()]`` to receive file content along with
837+
filename and content type. For raw bytes only, use ``Annotated[bytes, File()]``.
838+
839+
Attributes
840+
----------
841+
filename : str | None
842+
The original filename from the upload.
843+
content_type : str | None
844+
The MIME type declared by the client (e.g. ``image/jpeg``).
845+
content : bytes
846+
The raw file content.
847+
"""
848+
849+
__slots__ = ("content", "content_type", "filename")
850+
851+
def __init__(self, *, content: bytes, filename: str | None = None, content_type: str | None = None):
852+
self.content = content
853+
self.filename = filename
854+
self.content_type = content_type
855+
856+
def __len__(self) -> int:
857+
return len(self.content)
858+
859+
def __repr__(self) -> str:
860+
return f"UploadFile(filename={self.filename!r}, content_type={self.content_type!r}, size={len(self.content)})"
861+
862+
@classmethod
863+
def __get_pydantic_core_schema__(cls, _source_type: Any, _handler: Any) -> Any:
864+
from pydantic_core import core_schema
865+
866+
return core_schema.no_info_plain_validator_function(
867+
cls._validate,
868+
serialization=core_schema.plain_serializer_function_ser_schema(lambda v: v, info_arg=False),
869+
)
870+
871+
@classmethod
872+
def _validate(cls, v: Any) -> UploadFile:
873+
if isinstance(v, cls):
874+
return v
875+
raise ValueError(f"Expected UploadFile, got {type(v).__name__}")
876+
877+
@classmethod
878+
def __get_pydantic_json_schema__(cls, _schema: Any, handler: Any) -> dict[str, Any]:
879+
return {"type": "string", "format": "binary"}
880+
881+
882+
class File(Form): # type: ignore[misc]
833883
"""
834884
A class used to represent a file parameter in a path operation.
835885
"""

docs/core/event_handler/api_gateway.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,57 @@ You can use the `Form` type to tell the Event Handler that a parameter expects f
605605
--8<-- "examples/event_handler_rest/src/working_with_form_data.py"
606606
```
607607

608+
#### Handling file uploads
609+
610+
!!! info "You must set `enable_validation=True` to handle file uploads via type annotation."
611+
612+
You can use the `File` type to accept `multipart/form-data` file uploads. This automatically sets the correct OpenAPI schema, and Swagger UI will render a file picker for each `File()` parameter.
613+
614+
There are two ways to receive uploaded files:
615+
616+
* **`bytes`** — receive raw file content only
617+
* **`UploadFile`** — receive file content along with metadata (filename, content type)
618+
619+
=== "working_with_file_uploads.py"
620+
621+
```python hl_lines="4 12"
622+
--8<-- "examples/event_handler_rest/src/working_with_file_uploads.py"
623+
```
624+
625+
1. `File` is a special OpenAPI type for `multipart/form-data` file uploads. When annotated as `bytes`, you receive the raw file content.
626+
627+
=== "working_with_file_uploads_metadata.py"
628+
629+
```python hl_lines="4 11 15-16"
630+
--8<-- "examples/event_handler_rest/src/working_with_file_uploads_metadata.py"
631+
```
632+
633+
1. Using `UploadFile` instead of `bytes` gives you access to file metadata.
634+
2. `filename` and `content_type` come from the multipart headers sent by the client.
635+
636+
=== "working_with_file_uploads_mixed.py"
637+
638+
You can combine `File()` and `Form()` parameters in the same route to accept file uploads with additional form fields.
639+
640+
```python hl_lines="6 14-15"
641+
--8<-- "examples/event_handler_rest/src/working_with_file_uploads_mixed.py"
642+
```
643+
644+
1. File upload parameter — receives the uploaded file with metadata.
645+
2. Regular form field — receives a string value from the same multipart request.
646+
647+
!!! warning "API Gateway REST API (v1) requires Binary Media Types configuration"
648+
When using API Gateway REST API (v1), you must configure Binary Media Types to include `multipart/form-data`, otherwise binary file content will be corrupted.
649+
650+
```yaml title="SAM template.yaml"
651+
Globals:
652+
Api:
653+
BinaryMediaTypes:
654+
- "multipart~1form-data"
655+
```
656+
657+
API Gateway HTTP API (v2), Lambda Function URL, and ALB handle binary encoding automatically — no extra configuration needed.
658+
608659
#### Supported types for response serialization
609660

610661
With data validation enabled, we natively support serializing the following data types to JSON:

0 commit comments

Comments
 (0)