Skip to content

Commit 8dba957

Browse files
authored
Crc32c checksum validation and retry of archive downloads (#3546)
1 parent 27da43e commit 8dba957

File tree

10 files changed

+615
-44
lines changed

10 files changed

+615
-44
lines changed

lib/src/crc32c.dart

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// Computes a crc32c checksum.
6+
class Crc32c {
7+
int _current = mask;
8+
static const mask = 0xFFFFFFFF;
9+
10+
// Algorithm based on https://en.wikipedia.org/wiki/Cyclic_redundancy_check
11+
void update(List<int> data) {
12+
for (var i = 0; i < data.length; i++) {
13+
final lookupIndex = (_current ^ data[i]) & 0xff;
14+
_current = (_current >> 8) ^ _crcTable[lookupIndex];
15+
}
16+
}
17+
18+
int finalize() {
19+
// Finalize the CRC-32 value by inverting all the bits
20+
return _current ^ mask & mask;
21+
}
22+
23+
/// Consumes the entirety of "stream" and returns the CRC32C checksum of its
24+
/// data once the stream is finished.
25+
static Future<int> computeByConsumingStream(Stream<List<int>> stream) async {
26+
final checksumComputer = Crc32c();
27+
28+
await for (final chunk in stream) {
29+
checksumComputer.update(chunk);
30+
}
31+
32+
return checksumComputer.finalize();
33+
}
34+
}
35+
36+
// Generated by ./pycrc.py --algorithm=table-driven --model=crc-32c --generate=c
37+
// See: https://pycrc.org/
38+
const _crcTable = [
39+
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, //
40+
0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
41+
0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
42+
0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
43+
0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
44+
0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
45+
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
46+
0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
47+
0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
48+
0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
49+
0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
50+
0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
51+
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
52+
0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
53+
0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
54+
0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
55+
0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
56+
0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
57+
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
58+
0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
59+
0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
60+
0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
61+
0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
62+
0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
63+
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
64+
0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
65+
0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
66+
0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
67+
0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
68+
0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
69+
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
70+
0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
71+
0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
72+
0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
73+
0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
74+
0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
75+
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
76+
0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
77+
0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
78+
0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
79+
0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
80+
0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
81+
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
82+
0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
83+
0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
84+
0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
85+
0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
86+
0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
87+
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
88+
0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
89+
0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
90+
0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
91+
0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
92+
0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
93+
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
94+
0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
95+
0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
96+
0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
97+
0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
98+
0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
99+
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
100+
0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
101+
0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
102+
0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351
103+
];

lib/src/exceptions.dart

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,15 @@ class PackageNotFoundException extends WrappedException {
104104
String toString() => 'Package not available ($message).';
105105
}
106106

107+
/// A class for exceptions where a package's checksum could not be validated.
108+
class PackageIntegrityException extends WrappedException {
109+
PackageIntegrityException(
110+
String message, {
111+
Object? innerError,
112+
StackTrace? innerTrace,
113+
}) : super(message, innerError, innerTrace);
114+
}
115+
107116
/// Returns whether [error] is a user-facing error object.
108117
///
109118
/// This includes both [ApplicationException] and any dart:io errors.

lib/src/io.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ List<int> readBinaryFile(String file) {
172172
}
173173

174174
/// Reads the contents of the binary file [file] as a [Stream].
175-
Stream<List<int>> readBinaryFileAsSream(String file) {
175+
Stream<List<int>> readBinaryFileAsStream(String file) {
176176
log.io('Reading binary file $file.');
177177
var contents = File(file).openRead();
178178
return contents;

lib/src/source/hosted.dart

Lines changed: 127 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,19 @@
55
import 'dart:async';
66
import 'dart:convert';
77
import 'dart:io' as io;
8+
import 'dart:math' as math;
9+
import 'dart:typed_data';
810

911
import 'package:collection/collection.dart'
1012
show maxBy, IterableNullableExtension;
1113
import 'package:http/http.dart' as http;
14+
import 'package:meta/meta.dart';
1215
import 'package:path/path.dart' as p;
1316
import 'package:pub_semver/pub_semver.dart';
1417
import 'package:stack_trace/stack_trace.dart';
1518

1619
import '../authentication/client.dart';
20+
import '../crc32c.dart';
1721
import '../exceptions.dart';
1822
import '../http.dart';
1923
import '../io.dart';
@@ -872,27 +876,53 @@ class HostedSource extends CachedSource {
872876
'Package $packageName has no version $version');
873877
}
874878

875-
var url = versionInfo.archiveUrl;
876-
log.io('Get package from $url.');
879+
final archiveUrl = versionInfo.archiveUrl;
880+
log.io('Get package from $archiveUrl.');
877881
log.message('Downloading ${log.bold(id.name)} ${id.version}...');
878882

879883
// Download and extract the archive to a temp directory.
880884
await withTempDir((tempDirForArchive) async {
881-
var archivePath =
882-
p.join(tempDirForArchive, '$packageName-$version.tar.gz');
883-
var response = await withAuthenticatedClient(
884-
cache,
885-
Uri.parse(description.url),
886-
(client) => client.send(http.Request('GET', url)));
887-
888-
// We download the archive to disk instead of streaming it directly into
889-
// the tar unpacking. This simplifies stream handling.
890-
// Package:tar cancels the stream when it reaches end-of-archive, and
891-
// cancelling a http stream makes it not reusable.
892-
// There are ways around this, and we might revisit this later.
893-
await createFileFromStream(response.stream, archivePath);
885+
var fileName = '$packageName-$version.tar.gz';
886+
var archivePath = p.join(tempDirForArchive, fileName);
887+
888+
// The client from `withAuthenticatedClient` will retry HTTP requests.
889+
// This wrapper is one layer up and will retry checksum validation errors.
890+
await retry(
891+
// Attempt to download archive and validate its checksum.
892+
() async {
893+
final request = http.Request('GET', archiveUrl);
894+
final response = await withAuthenticatedClient(cache,
895+
Uri.parse(description.url), (client) => client.send(request));
896+
final expectedChecksum = _parseCrc32c(response.headers, fileName);
897+
898+
Stream<List<int>> stream = response.stream;
899+
if (expectedChecksum != null) {
900+
stream = _validateStream(
901+
response.stream, expectedChecksum, id, archiveUrl);
902+
}
903+
904+
// We download the archive to disk instead of streaming it directly
905+
// into the tar unpacking. This simplifies stream handling.
906+
// Package:tar cancels the stream when it reaches end-of-archive, and
907+
// cancelling a http stream makes it not reusable.
908+
// There are ways around this, and we might revisit this later.
909+
await createFileFromStream(stream, archivePath);
910+
},
911+
// Retry if the checksum response header was malformed or the actual
912+
// checksum did not match the expected checksum.
913+
retryIf: (e) => e is PackageIntegrityException,
914+
onRetry: (e, retryCount) => log
915+
.io('Retry #${retryCount + 1} because of checksum error with GET '
916+
'$archiveUrl...'),
917+
maxAttempts: math.max(
918+
1, // Having less than 1 attempt doesn't make sense.
919+
int.tryParse(io.Platform.environment['PUB_MAX_HTTP_RETRIES'] ?? '') ??
920+
7,
921+
),
922+
);
923+
894924
var tempDir = cache.createTempDir();
895-
await extractTarGz(readBinaryFileAsSream(archivePath), tempDir);
925+
await extractTarGz(readBinaryFileAsStream(archivePath), tempDir);
896926

897927
// Now that the get has succeeded, move it to the real location in the
898928
// cache.
@@ -1121,3 +1151,84 @@ class _RefAndCache {
11211151
@override
11221152
bool operator ==(Object other) => other is _RefAndCache && other.ref == ref;
11231153
}
1154+
1155+
@visibleForTesting
1156+
const checksumHeaderName = 'x-goog-hash';
1157+
1158+
/// Adds a checksum validation "tap" to the response stream and returns a
1159+
/// wrapped `Stream` object, which should be used to consume the incoming data.
1160+
///
1161+
/// As chunks are received, a CRC32C checksum is updated.
1162+
/// Once the download is completed, the final checksum is compared with
1163+
/// the one present in the checksum response header.
1164+
///
1165+
/// Throws [PackageIntegrityException] if there is a checksum mismatch.
1166+
Stream<List<int>> _validateStream(Stream<List<int>> stream,
1167+
int expectedChecksum, PackageId id, Uri archiveUrl) async* {
1168+
final crc32c = Crc32c();
1169+
1170+
await for (final chunk in stream) {
1171+
crc32c.update(chunk);
1172+
yield chunk;
1173+
}
1174+
1175+
final actualChecksum = crc32c.finalize();
1176+
1177+
log.fine(
1178+
'Computed checksum $actualChecksum for ${id.name} ${id.version} with '
1179+
'expected CRC32C of $expectedChecksum.');
1180+
1181+
if (actualChecksum != expectedChecksum) {
1182+
throw PackageIntegrityException(
1183+
'Package archive for ${id.name} ${id.version} downloaded from '
1184+
'"$archiveUrl" has "x-goog-hash: crc32c=$expectedChecksum", which '
1185+
'doesn\'t match the checksum of the archive downloaded.');
1186+
}
1187+
}
1188+
1189+
/// Parses response [headers] and returns the archive's CRC32C checksum.
1190+
///
1191+
/// In most cases, GCS provides both MD5 and CRC32C checksums in its response
1192+
/// headers. It uses the header name "x-goog-hash" for these values. It has
1193+
/// been documented and observed that GCS will send multiple response headers
1194+
/// with the same "x-goog-hash" token as the key.
1195+
/// https://cloud.google.com/storage/docs/xml-api/reference-headers#xgooghash
1196+
///
1197+
/// Additionally, when the Dart http client encounters multiple response
1198+
/// headers with the same key, it concatenates their values with a comma
1199+
/// before inserting a single item with that key and concatenated value into
1200+
/// its response "headers" Map.
1201+
/// See https://github.com/dart-lang/http/issues/24
1202+
/// https://github.com/dart-lang/http/blob/06649afbb5847dbb0293816ba8348766b116e419/pkgs/http/lib/src/base_response.dart#L29
1203+
///
1204+
/// Throws [PackageIntegrityException] if the CRC32C checksum cannot be parsed.
1205+
int? _parseCrc32c(Map<String, String> headers, String fileName) {
1206+
final checksumHeader = headers[checksumHeaderName];
1207+
if (checksumHeader == null) return null;
1208+
1209+
final parts = checksumHeader.split(',');
1210+
for (final part in parts) {
1211+
if (part.startsWith('crc32c=')) {
1212+
final undecoded = part.substring('crc32c='.length);
1213+
1214+
try {
1215+
final bytes = base64Decode(undecoded);
1216+
1217+
// CRC32C must be 32 bits, or 4 bytes.
1218+
if (bytes.length != 4) {
1219+
throw FormatException('CRC32C checksum has invalid length', bytes);
1220+
}
1221+
1222+
return ByteData.view(bytes.buffer).getUint32(0);
1223+
} on FormatException catch (e, s) {
1224+
throw PackageIntegrityException(
1225+
'Package archive "$fileName" has a malformed CRC32C checksum in '
1226+
'its response headers',
1227+
innerError: e,
1228+
innerTrace: s);
1229+
}
1230+
}
1231+
}
1232+
1233+
return null;
1234+
}

lib/src/utils.dart

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,3 +638,63 @@ Map<K2, V2> mapMap<K1, V1, K2, V2>(
638638
key(entry.key, entry.value): value(entry.key, entry.value),
639639
};
640640
}
641+
642+
/// Call [fn] retrying so long as [retryIf] return `true` for the exception
643+
/// thrown, up-to [maxAttempts] times.
644+
///
645+
/// Defaults to 8 attempts, sleeping as following after 1st, 2nd, 3rd, ...,
646+
/// 7th attempt:
647+
/// 1. 400 ms +/- 25%
648+
/// 2. 800 ms +/- 25%
649+
/// 3. 1600 ms +/- 25%
650+
/// 4. 3200 ms +/- 25%
651+
/// 5. 6400 ms +/- 25%
652+
/// 6. 12800 ms +/- 25%
653+
/// 7. 25600 ms +/- 25%
654+
///
655+
/// ```dart
656+
/// final response = await retry(
657+
/// // Make a GET request
658+
/// () => http.get('https://google.com').timeout(Duration(seconds: 5)),
659+
/// // Retry on SocketException or TimeoutException
660+
/// retryIf: (e) => e is SocketException || e is TimeoutException,
661+
/// );
662+
/// print(response.body);
663+
/// ```
664+
///
665+
/// If no [retryIf] function is given this will retry any for any [Exception]
666+
/// thrown. To retry on an [Error], the error must be caught and _rethrown_
667+
/// as an [Exception].
668+
///
669+
/// See https://github.com/google/dart-neats/blob/master/retry/lib/retry.dart
670+
Future<T> retry<T>(
671+
FutureOr<T> Function() fn, {
672+
Duration delayFactor = const Duration(milliseconds: 200),
673+
double randomizationFactor = 0.25,
674+
Duration maxDelay = const Duration(seconds: 30),
675+
int maxAttempts = 8,
676+
FutureOr<bool> Function(Exception)? retryIf,
677+
FutureOr<void> Function(Exception, int retryCount)? onRetry,
678+
}) async {
679+
var attempt = 0;
680+
// ignore: literal_only_boolean_expressions
681+
while (true) {
682+
attempt++; // first invocation is the first attempt
683+
try {
684+
return await fn();
685+
} on Exception catch (e) {
686+
if (attempt >= maxAttempts || (retryIf != null && !(await retryIf(e)))) {
687+
rethrow;
688+
}
689+
if (onRetry != null) {
690+
await onRetry(e, attempt);
691+
}
692+
}
693+
694+
// Sleep for a delay
695+
final rf = randomizationFactor * (random.nextDouble() * 2 - 1) + 1;
696+
final exp = math.min(attempt, 31); // prevent overflows.
697+
final delay = delayFactor * math.pow(2.0, exp) * rf;
698+
await Future.delayed(delay < maxDelay ? delay : maxDelay);
699+
}
700+
}

test/embedding/embedding_test.dart

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,18 @@ String _filter(String input) {
357357
RegExp(r'Writing \d+ characters', multiLine: true),
358358
r'Writing $N characters',
359359
)
360+
.replaceAll(
361+
RegExp(r'x-goog-hash(.*)$', multiLine: true),
362+
r'x-goog-hash: $CHECKSUM_HEADER',
363+
)
364+
.replaceAll(
365+
RegExp(
366+
r'Computed checksum \d+ for foo 1.0.0 with expected CRC32C of '
367+
r'\d+\.',
368+
multiLine: true),
369+
r'Computed checksum $CRC32C for foo 1.0.0 with expected CRC32C of '
370+
r'$CRC32C.',
371+
)
360372

361373
/// TODO(sigurdm): This hack suppresses differences in stack-traces
362374
/// between dart 2.17 and 2.18. Remove when 2.18 is stable.

0 commit comments

Comments
 (0)