Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions google/cloud/storage/_media/_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,29 @@ def _process_upload_response(self, response):

.. _sans-I/O: https://sans-io.readthedocs.io/
"""
# Data corruption errors shouldn't be considered as invalid responses,
# So we handle them earlier than call to `_helpers.require_status_code`.
# If the response is 400, we check for data corruption errors.
if response.status_code == 400:
root = ElementTree.fromstring(response.text)
error_code = root.find("Code").text
error_message = root.find("Message").text
error_details = root.find("Details").text
if error_code in ["InvalidDigest", "BadDigest", "CrcMismatch"]:
raise DataCorruption(
response,
(
"Checksum mismatch: checksum calculated by client and"
" server did not match. Error code: {error_code},"
" Error message: {error_message},"
" Error details: {error_details}"
).format(
error_code=error_code,
error_message=error_message,
error_details=error_details,
),
)

_helpers.require_status_code(
response,
(http.client.OK,),
Expand Down
9 changes: 9 additions & 0 deletions google/cloud/storage/_media/requests/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from google.cloud.storage._media import _upload
from google.cloud.storage._media.requests import _request_helpers
from google.cloud.storage._media import _helpers


class SimpleUpload(_request_helpers.RequestsMixin, _upload.SimpleUpload):
Expand Down Expand Up @@ -757,6 +758,14 @@ def upload(
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_upload_request()
if self._checksum_object is not None:
checksum_digest_in_base64 = _helpers.prepare_checksum_digest(
self._checksum_object.digest()
)
if self._checksum_type == "crc32c":
headers["X-Goog-Hash"] = f"crc32c={checksum_digest_in_base64}"
elif self._checksum_type == "md5":
headers["X-Goog-Hash"] = f"md5={checksum_digest_in_base64}"

# Wrap the request business logic in a function to be retried.
def retriable_request():
Expand Down
65 changes: 65 additions & 0 deletions tests/resumable_media/unit/requests/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@
UPLOAD_ID = "VXBsb2FkIElEIGZvciBlbHZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA"
PARTS = {1: "39a59594290b0f9a30662a56d695b71d", 2: "00000000290b0f9a30662a56d695b71d"}
FILE_DATA = b"testdata" * 128
_HASH_HEADER = "x-goog-hash"
CRC32C_HASH_OF_FIRST_PART = "8hVqVQ=="
MD5_HASH_OF_FIRST_PART = "gfVZ4+0LdooJwGAkxLrCcg=="
DEFAULT_CONNECT_TIMEOUT = 61
DEFAULT_READ_TIMEOUT = 60


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -402,6 +407,66 @@ def test_mpu_part(filename):
assert part.etag == PARTS[1]


def test_mpu_part_with_md5_enabled(filename):
part = upload_mod.XMLMPUPart(
EXAMPLE_XML_UPLOAD_URL,
UPLOAD_ID,
filename,
start=0,
end=128,
part_number=1,
checksum="md5",
)

transport = mock.Mock(spec=["request"])
transport.request.return_value = _make_response(
headers={"etag": PARTS[1], _HASH_HEADER: f"md5={MD5_HASH_OF_FIRST_PART}"}
)

part.upload(transport)

transport.request.assert_called_once_with(
"PUT",
f"{part.upload_url}?partNumber={part.part_number}&uploadId={UPLOAD_ID}",
data=FILE_DATA[part.start : part.end],
headers={"X-Goog-Hash": f"md5={MD5_HASH_OF_FIRST_PART}"},
timeout=(DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT),
)

assert part.finished
assert part.etag == PARTS[1]


def test_mpu_part_with_crc32c_enabled(filename):
part = upload_mod.XMLMPUPart(
EXAMPLE_XML_UPLOAD_URL,
UPLOAD_ID,
filename,
start=0,
end=128,
part_number=1,
checksum="crc32c",
)

transport = mock.Mock(spec=["request"])
transport.request.return_value = _make_response(
headers={"etag": PARTS[1], _HASH_HEADER: f"crc32c={CRC32C_HASH_OF_FIRST_PART}"}
)

part.upload(transport)

transport.request.assert_called_once_with(
"PUT",
f"{part.upload_url}?partNumber={part.part_number}&uploadId={UPLOAD_ID}",
data=FILE_DATA[part.start : part.end],
headers={"X-Goog-Hash": f"crc32c={CRC32C_HASH_OF_FIRST_PART}"},
timeout=(DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT),
)

assert part.finished
assert part.etag == PARTS[1]


def _make_response(status_code=http.client.OK, headers=None, text=None):
headers = headers or {}
return mock.Mock(
Expand Down
31 changes: 28 additions & 3 deletions tests/resumable_media/unit/test__upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@
UPLOAD_ID = "VXBsb2FkIElEIGZvciBlbHZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA"
PARTS = {1: "39a59594290b0f9a30662a56d695b71d", 2: "00000000290b0f9a30662a56d695b71d"}
FILE_DATA = b"testdata" * 128
CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE = """<?xml version='1.0' encoding='UTF-8'?>
<Error>
<Code>{ERROR_CODE}</Code>
<Message>The MD5 you specified in Content-MD5 or x-goog-hash was invalid.</Message>
<Details>Invalid MD5 value: dfdfdfd==</Details>
</Error>"""
INVALID_MD5_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format(
ERROR_CODE="InvalidDigest"
)
INVALID_CRC32C_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format(
ERROR_CODE="BadDigest"
)
INCORRECT_LENGTH_CRC32C_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format(
ERROR_CODE="CrcMismatch"
)


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -1471,7 +1486,15 @@ def test_xml_mpu_part_invalid_response(filename):
part._process_upload_response(response)


def test_xml_mpu_part_checksum_failure(filename):
@pytest.mark.parametrize(
"error_scenarios",
[
INVALID_MD5_XML_RESPONSE,
INVALID_CRC32C_XML_RESPONSE,
INCORRECT_LENGTH_CRC32C_XML_RESPONSE,
],
)
def test_xml_mpu_part_checksum_failure(filename, error_scenarios):
PART_NUMBER = 1
START = 0
END = 256
Expand All @@ -1490,7 +1513,9 @@ def test_xml_mpu_part_checksum_failure(filename):
_fix_up_virtual(part)
part._prepare_upload_request()
response = _make_xml_response(
headers={"etag": ETAG, "x-goog-hash": "md5=Ojk9c3dhfxgoKVVHYwFbHQ=="}
status_code=http.client.BAD_REQUEST,
headers={"etag": ETAG, "x-goog-hash": "md5=Ojk9c3dhfxgoKVVHYwFbHQ=="},
text=error_scenarios,
) # Example md5 checksum but not the correct one
with pytest.raises(DataCorruption):
part._process_upload_response(response)
Expand Down Expand Up @@ -1555,7 +1580,7 @@ def _make_xml_response(status_code=http.client.OK, headers=None, text=None):
headers=headers,
status_code=status_code,
text=text,
spec=["headers", "status_code"],
spec=["headers", "status_code", "text"],
)


Expand Down