Skip to content

Commit dbb6cc6

Browse files
authored
feat: large file uploads (#1255)
Ports: - microsoft/playwright@a8d8062 (feat(chromium): large file uploads (#12860)) - microsoft/playwright@b010356 (fix(addInitScript): tolerate trailing comments (#13275)) Fixes #1211
1 parent 2c59e6b commit dbb6cc6

12 files changed

+290
-37
lines changed

playwright/_impl/_element_handle.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
from playwright._impl._api_structures import FilePayload, FloatRect, Position
2121
from playwright._impl._connection import ChannelOwner, from_nullable_channel
22-
from playwright._impl._file_chooser import normalize_file_payloads
2322
from playwright._impl._helper import (
23+
Error,
2424
KeyboardModifier,
2525
MouseButton,
2626
async_writefile,
@@ -33,6 +33,7 @@
3333
parse_result,
3434
serialize_argument,
3535
)
36+
from playwright._impl._set_input_files_helpers import convert_input_files
3637

3738
if sys.version_info >= (3, 8): # pragma: no cover
3839
from typing import Literal
@@ -190,8 +191,19 @@ async def set_input_files(
190191
noWaitAfter: bool = None,
191192
) -> None:
192193
params = locals_to_params(locals())
193-
params["files"] = await normalize_file_payloads(files)
194-
await self._channel.send("setInputFiles", params)
194+
frame = await self.owner_frame()
195+
if not frame:
196+
raise Error("Cannot set input files to detached element")
197+
converted = await convert_input_files(files, frame.page.context)
198+
if converted["files"] is not None:
199+
await self._channel.send(
200+
"setInputFiles", {**params, "files": converted["files"]}
201+
)
202+
else:
203+
await self._channel.send(
204+
"setInputFilePaths",
205+
locals_to_params({**params, **converted, "files": None}),
206+
)
195207

196208
async def focus(self) -> None:
197209
await self._channel.send("focus")

playwright/_impl/_file_chooser.py

-28
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,10 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import base64
16-
import os
1715
from pathlib import Path
1816
from typing import TYPE_CHECKING, List, Union
1917

2018
from playwright._impl._api_structures import FilePayload
21-
from playwright._impl._helper import async_readfile
2219

2320
if TYPE_CHECKING: # pragma: no cover
2421
from playwright._impl._element_handle import ElementHandle
@@ -56,28 +53,3 @@ async def set_files(
5653
noWaitAfter: bool = None,
5754
) -> None:
5855
await self._element_handle.set_input_files(files, timeout, noWaitAfter)
59-
60-
61-
async def normalize_file_payloads(
62-
files: Union[str, Path, FilePayload, List[Union[str, Path]], List[FilePayload]]
63-
) -> List:
64-
file_list = files if isinstance(files, list) else [files]
65-
file_payloads: List = []
66-
for item in file_list:
67-
if isinstance(item, (str, Path)):
68-
file_payloads.append(
69-
{
70-
"name": os.path.basename(item),
71-
"buffer": base64.b64encode(await async_readfile(item)).decode(),
72-
}
73-
)
74-
else:
75-
file_payloads.append(
76-
{
77-
"name": item["name"],
78-
"mimeType": item["mimeType"],
79-
"buffer": base64.b64encode(item["buffer"]).decode(),
80-
}
81-
)
82-
83-
return file_payloads

playwright/_impl/_frame.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
)
2929
from playwright._impl._element_handle import ElementHandle, convert_select_option_values
3030
from playwright._impl._event_context_manager import EventContextManagerImpl
31-
from playwright._impl._file_chooser import normalize_file_payloads
3231
from playwright._impl._helper import (
3332
DocumentLoadState,
3433
FrameNavigatedEvent,
@@ -48,6 +47,7 @@
4847
)
4948
from playwright._impl._locator import FrameLocator, Locator
5049
from playwright._impl._network import Response
50+
from playwright._impl._set_input_files_helpers import convert_input_files
5151
from playwright._impl._wait_helper import WaitHelper
5252

5353
if sys.version_info >= (3, 8): # pragma: no cover
@@ -598,8 +598,16 @@ async def set_input_files(
598598
noWaitAfter: bool = None,
599599
) -> None:
600600
params = locals_to_params(locals())
601-
params["files"] = await normalize_file_payloads(files)
602-
await self._channel.send("setInputFiles", params)
601+
converted = await convert_input_files(files, self.page.context)
602+
if converted["files"] is not None:
603+
await self._channel.send(
604+
"setInputFiles", {**params, "files": converted["files"]}
605+
)
606+
else:
607+
await self._channel.send(
608+
"setInputFilePaths",
609+
locals_to_params({**params, **converted, "files": None}),
610+
)
603611

604612
async def type(
605613
self,

playwright/_impl/_object_factory.py

+3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from playwright._impl._selectors import SelectorsOwner
3434
from playwright._impl._stream import Stream
3535
from playwright._impl._tracing import Tracing
36+
from playwright._impl._writable_stream import WritableStream
3637

3738

3839
class DummyObject(ChannelOwner):
@@ -89,6 +90,8 @@ def create_remote_object(
8990
return WebSocket(parent, type, guid, initializer)
9091
if type == "Worker":
9192
return Worker(parent, type, guid, initializer)
93+
if type == "WritableStream":
94+
return WritableStream(parent, type, guid, initializer)
9295
if type == "Selectors":
9396
return SelectorsOwner(parent, type, guid, initializer)
9497
return DummyObject(parent, type, guid, initializer)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import base64
2+
import os
3+
import sys
4+
from pathlib import Path
5+
from typing import TYPE_CHECKING, List, Optional, Union
6+
7+
if sys.version_info >= (3, 8): # pragma: no cover
8+
from typing import TypedDict
9+
else: # pragma: no cover
10+
from typing_extensions import TypedDict
11+
12+
from playwright._impl._connection import Channel, from_channel
13+
from playwright._impl._helper import Error, async_readfile
14+
from playwright._impl._writable_stream import WritableStream
15+
16+
if TYPE_CHECKING: # pragma: no cover
17+
from playwright._impl._browser_context import BrowserContext
18+
19+
from playwright._impl._api_structures import FilePayload
20+
21+
SIZE_LIMIT_IN_BYTES = 50 * 1024 * 1024
22+
23+
24+
class InputFilesList(TypedDict):
25+
streams: Optional[List[Channel]]
26+
localPaths: Optional[List[str]]
27+
files: Optional[List[FilePayload]]
28+
29+
30+
async def convert_input_files(
31+
files: Union[str, Path, FilePayload, List[Union[str, Path]], List[FilePayload]],
32+
context: "BrowserContext",
33+
) -> InputFilesList:
34+
file_list = files if isinstance(files, list) else [files]
35+
36+
has_large_buffer = any(
37+
[
38+
len(f.get("buffer", "")) > SIZE_LIMIT_IN_BYTES
39+
for f in file_list
40+
if not isinstance(f, (str, Path))
41+
]
42+
)
43+
if has_large_buffer:
44+
raise Error(
45+
"Cannot set buffer larger than 50Mb, please write it to a file and pass its path instead."
46+
)
47+
48+
has_large_file = any(
49+
[
50+
os.stat(f).st_size > SIZE_LIMIT_IN_BYTES
51+
for f in file_list
52+
if isinstance(f, (str, Path))
53+
]
54+
)
55+
if has_large_file:
56+
if context._channel._connection.is_remote:
57+
streams = []
58+
for file in file_list:
59+
assert isinstance(file, (str, Path))
60+
stream: WritableStream = from_channel(
61+
await context._channel.send(
62+
"createTempFile", {"name": os.path.basename(file)}
63+
)
64+
)
65+
await stream.copy(file)
66+
streams.append(stream._channel)
67+
return InputFilesList(streams=streams, localPaths=None, files=None)
68+
local_paths = []
69+
for p in file_list:
70+
assert isinstance(p, (str, Path))
71+
local_paths.append(str(Path(p).absolute().resolve()))
72+
return InputFilesList(streams=None, localPaths=local_paths, files=None)
73+
74+
return InputFilesList(
75+
streams=None, localPaths=None, files=await _normalize_file_payloads(files)
76+
)
77+
78+
79+
async def _normalize_file_payloads(
80+
files: Union[str, Path, FilePayload, List[Union[str, Path]], List[FilePayload]]
81+
) -> List:
82+
file_list = files if isinstance(files, list) else [files]
83+
file_payloads: List = []
84+
for item in file_list:
85+
if isinstance(item, (str, Path)):
86+
file_payloads.append(
87+
{
88+
"name": os.path.basename(item),
89+
"buffer": base64.b64encode(await async_readfile(item)).decode(),
90+
}
91+
)
92+
else:
93+
file_payloads.append(
94+
{
95+
"name": item["name"],
96+
"mimeType": item["mimeType"],
97+
"buffer": base64.b64encode(item["buffer"]).decode(),
98+
}
99+
)
100+
101+
return file_payloads

playwright/_impl/_writable_stream.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright (c) Microsoft Corporation.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import base64
16+
import os
17+
from pathlib import Path
18+
from typing import Dict, Union
19+
20+
from playwright._impl._connection import ChannelOwner
21+
22+
# COPY_BUFSIZE is taken from shutil.py in the standard library
23+
_WINDOWS = os.name == "nt"
24+
COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024
25+
26+
27+
class WritableStream(ChannelOwner):
28+
def __init__(
29+
self, parent: ChannelOwner, type: str, guid: str, initializer: Dict
30+
) -> None:
31+
super().__init__(parent, type, guid, initializer)
32+
33+
async def copy(self, path: Union[str, Path]) -> None:
34+
with open(path, "rb") as f:
35+
while True:
36+
data = f.read(COPY_BUFSIZE)
37+
if not data:
38+
break
39+
await self._channel.send(
40+
"write", {"binary": base64.b64encode(data).decode()}
41+
)
42+
await self._channel.send("close")

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
InWheel = None
3131
from wheel.bdist_wheel import bdist_wheel as BDistWheelCommand
3232

33-
driver_version = "1.21.0-beta-1649712128000"
33+
driver_version = "1.21.0"
3434

3535

3636
def extractall(zip: zipfile.ZipFile, path: str) -> None:

tests/assets/input/fileupload.html

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
<title>File upload test</title>
55
</head>
66
<body>
7-
<form action="/input/fileupload.html">
8-
<input type="file">
7+
<form action="/upload" method="post" enctype="multipart/form-data">
8+
<input type="file" name="file1">
99
<input type="submit">
1010
</form>
1111
</body>

tests/async/test_add_init_script.py

+7
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,10 @@ async def test_add_init_script_support_multiple_scripts(page):
7474
await page.goto("data:text/html,<script>window.result = window.injected</script>")
7575
assert await page.evaluate("window.script1") == 1
7676
assert await page.evaluate("window.script2") == 2
77+
78+
79+
async def test_should_work_with_trailing_comments(page):
80+
await page.add_init_script("// comment")
81+
await page.add_init_script("window.secret = 42;")
82+
await page.goto("data:text/html,<html></html>")
83+
assert await page.evaluate("secret") == 42

tests/async/test_browsertype_connect.py

+57
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import asyncio
16+
import re
1517
from typing import Callable
1618

1719
import pytest
@@ -241,3 +243,58 @@ async def handle_request(route: Route) -> None:
241243
assert await response.json() == {"foo": "bar"}
242244

243245
remote.kill()
246+
247+
248+
@pytest.mark.only_browser("chromium")
249+
async def test_should_upload_large_file(
250+
browser_type: BrowserType,
251+
launch_server: Callable[[], RemoteServer],
252+
playwright: Playwright,
253+
server: Server,
254+
tmp_path,
255+
):
256+
remote = launch_server()
257+
258+
browser = await browser_type.connect(remote.ws_endpoint)
259+
context = await browser.new_context()
260+
page = await context.new_page()
261+
262+
await page.goto(server.PREFIX + "/input/fileupload.html")
263+
large_file_path = tmp_path / "200MB.zip"
264+
data = b"A" * 1024
265+
with large_file_path.open("wb") as f:
266+
for i in range(0, 200 * 1024 * 1024, len(data)):
267+
f.write(data)
268+
input = page.locator('input[type="file"]')
269+
events = await input.evaluate_handle(
270+
"""
271+
e => {
272+
const events = [];
273+
e.addEventListener('input', () => events.push('input'));
274+
e.addEventListener('change', () => events.push('change'));
275+
return events;
276+
}
277+
"""
278+
)
279+
280+
await input.set_input_files(large_file_path)
281+
assert await input.evaluate("e => e.files[0].name") == "200MB.zip"
282+
assert await events.evaluate("e => e") == ["input", "change"]
283+
284+
[request, _] = await asyncio.gather(
285+
server.wait_for_request("/upload"),
286+
page.click("input[type=submit]"),
287+
)
288+
289+
contents = request.args[b"file1"][0]
290+
assert len(contents) == 200 * 1024 * 1024
291+
assert contents[:1024] == data
292+
# flake8: noqa: E203
293+
assert contents[len(contents) - 1024 :] == data
294+
match = re.search(
295+
rb'^.*Content-Disposition: form-data; name="(?P<name>.*)"; filename="(?P<filename>.*)".*$',
296+
request.post_body,
297+
re.MULTILINE,
298+
)
299+
assert match.group("name") == b"file1"
300+
assert match.group("filename") == b"200MB.zip"

0 commit comments

Comments
 (0)