Skip to content

Commit dc97530

Browse files
authored
Speedup package:crypto (with a focus on md5) (#892)
1 parent 66348be commit dc97530

File tree

5 files changed

+281
-79
lines changed

5 files changed

+281
-79
lines changed

pkgs/crypto/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
## 3.0.7-wip
22

33
- Run `dart format` with the new style.
4+
- Performance improvements.
45

56
## 3.0.6
67

pkgs/crypto/benchmark/benchmark.dart

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:io' show exit, stderr;
6+
import 'dart:typed_data';
7+
8+
import 'package:convert/convert.dart';
9+
import 'package:crypto/crypto.dart';
10+
11+
void main(List<String> args) {
12+
Hash? function;
13+
int? customSize;
14+
15+
void setFunction(Hash newFunction, String message) {
16+
if (function != null) {
17+
stderr.writeln('Hash function already set.');
18+
exit(1);
19+
}
20+
function = newFunction;
21+
print('Using hash function $message');
22+
}
23+
24+
for (var arg in args) {
25+
if (arg == 'md5') {
26+
setFunction(md5, 'md5');
27+
} else if (arg == 'sha1') {
28+
setFunction(sha1, 'sha1');
29+
} else if (arg == 'sha256') {
30+
setFunction(sha256, 'sha256');
31+
} else if (arg == 'sha224') {
32+
setFunction(sha224, 'sha224');
33+
} else if (arg == 'sha384') {
34+
setFunction(sha384, 'sha384');
35+
} else if (arg == 'sha512') {
36+
setFunction(sha512, 'sha512');
37+
} else if (arg == 'sha512224') {
38+
setFunction(sha512224, 'sha512/224');
39+
} else if (arg == 'sha512256') {
40+
setFunction(sha512256, 'sha512/256');
41+
} else if (arg.startsWith('--custom=')) {
42+
customSize = int.parse(arg.substring('--custom='.length));
43+
} else {
44+
stderr.writeln('Unknown argument: $arg');
45+
exit(1);
46+
}
47+
}
48+
if (function == null) {
49+
setFunction(md5, 'md5');
50+
}
51+
52+
if (customSize != null) {
53+
doIterationsChunk(function!, mb: customSize, iterations: 1, doPrint: true);
54+
return;
55+
}
56+
57+
// Warmup.
58+
doIterationsChunk(function!, mb: 1, iterations: 100, doPrint: false);
59+
60+
// Benchmarks.
61+
print('One chunk input');
62+
doIterationsChunk(function!, mb: 1, iterations: 1000, doPrint: true);
63+
doIterationsChunk(function!, mb: 10, iterations: 100, doPrint: true);
64+
doIterationsChunk(function!, mb: 100, iterations: 10, doPrint: true);
65+
doIterationsChunk(function!, mb: 1000, iterations: 1, doPrint: true);
66+
67+
print('');
68+
print('Add in 1024 byte chunks:');
69+
doIterationsSmallChunks(function!,
70+
chunkSize: 1024, mb: 1, iterations: 1000, doPrint: true);
71+
72+
print('');
73+
print('Add in 100 byte chunks:');
74+
doIterationsSmallChunks(function!,
75+
chunkSize: 100, mb: 1, iterations: 1000, doPrint: true);
76+
77+
print('');
78+
print('Add in 4 byte chunks:');
79+
doIterationsSmallChunks(function!,
80+
chunkSize: 4, mb: 1, iterations: 1000, doPrint: true);
81+
}
82+
83+
void doIterationsChunk(Hash function,
84+
{required int mb, required int iterations, required bool doPrint}) {
85+
var data = Uint8List(1024 * 1024 * mb);
86+
var runtimesInMs = <double>[];
87+
for (var i = 0; i < iterations; i++) {
88+
runtimesInMs.add(hashChunk(data, function));
89+
}
90+
if (doPrint) {
91+
printStats(runtimesInMs, data.length, iterations);
92+
}
93+
}
94+
95+
void doIterationsSmallChunks(Hash function,
96+
{required int chunkSize,
97+
required int mb,
98+
required int iterations,
99+
required bool doPrint}) {
100+
var data = Uint8List(chunkSize);
101+
var runtimesInMs = <double>[];
102+
var addIterations = mb * 1024 * 1024 ~/ chunkSize;
103+
for (var i = 0; i < iterations; i++) {
104+
runtimesInMs.add(hashSmallChunks(data, addIterations, function));
105+
}
106+
if (doPrint) {
107+
printStats(runtimesInMs, data.length * addIterations, iterations);
108+
}
109+
}
110+
111+
double hashChunk(Uint8List data, Hash function) {
112+
var stopwatch = Stopwatch()..start();
113+
var hash = function.convert(data);
114+
stopwatch.stop();
115+
if (hash.bytes.isEmpty) throw StateError('This should never happen');
116+
return stopwatch.elapsedMicroseconds / 1000;
117+
}
118+
119+
double hashSmallChunks(Uint8List data, int addTimes, Hash function) {
120+
var stopwatch = Stopwatch()..start();
121+
122+
var output = AccumulatorSink<Digest>();
123+
var input = function.startChunkedConversion(output);
124+
for (var i = 0; i < addTimes; i++) {
125+
input.add(data);
126+
}
127+
128+
input.close();
129+
var hash = output.events.single;
130+
131+
stopwatch.stop();
132+
if (hash.bytes.isEmpty) throw StateError('This should never happen');
133+
return stopwatch.elapsedMicroseconds / 1000;
134+
}
135+
136+
void printStats(List<double> runtimesInMs, int dataLength, int iterations) {
137+
var mb = dataLength / 1024 / 1024;
138+
runtimesInMs.sort();
139+
var sum = runtimesInMs.reduce((value, element) => value + element);
140+
var averageRuntimeInMs = sum / runtimesInMs.length;
141+
var averageKbPerMs = dataLength / 1024 / averageRuntimeInMs;
142+
var medianRuntimeInMs = runtimesInMs[runtimesInMs.length ~/ 2];
143+
var medianKbPerMs = dataLength / 1024 / medianRuntimeInMs;
144+
print(
145+
'Processed ${mb.toStringAsFixed(2)} mb of data with an average/median of '
146+
'${averageKbPerMs.toStringAsFixed(2)} / '
147+
'${medianKbPerMs.toStringAsFixed(2)} '
148+
'kb per ms.');
149+
}

pkgs/crypto/lib/src/hash_sink.dart

Lines changed: 72 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
import 'dart:typed_data';
66

7-
import 'package:typed_data/typed_data.dart';
8-
97
import 'digest.dart';
108
import 'utils.dart';
119

@@ -19,11 +17,24 @@ abstract class HashSink implements Sink<List<int>> {
1917
/// Whether the hash function operates on big-endian words.
2018
final Endian _endian;
2119

22-
/// The words in the current chunk.
20+
/// A [ByteData] view of the current chunk of data.
21+
///
22+
/// This is an instance variable to avoid re-allocating.
23+
ByteData? _byteDataView;
24+
25+
/// The actual chunk of bytes currently accumulating.
26+
///
27+
/// The same allocation will be reused over and over again; once full it is
28+
/// passed to the underlying hashing algorithm for processing.
29+
final Uint8List _chunk;
30+
31+
/// The index of the next insertion into the chunk.
32+
int _chunkNextIndex;
33+
34+
/// A [Uint32List] (in specified endian) copy of the chunk.
2335
///
24-
/// This is an instance variable to avoid re-allocating, but its data isn't
25-
/// used across invocations of [_iterate].
26-
final Uint32List _currentChunk;
36+
/// This is an instance variable to avoid re-allocating.
37+
final Uint32List _chunk32;
2738

2839
/// Messages with more than 2^53-1 bits are not supported.
2940
///
@@ -35,9 +46,6 @@ abstract class HashSink implements Sink<List<int>> {
3546
/// The length of the input data so far, in bytes.
3647
int _lengthInBytes = 0;
3748

38-
/// Data that has yet to be processed by the hash function.
39-
final _pendingData = Uint8Buffer();
40-
4149
/// Whether [close] has been called.
4250
bool _isClosed = false;
4351

@@ -66,7 +74,9 @@ abstract class HashSink implements Sink<List<int>> {
6674
}) : _endian = endian,
6775
assert(signatureBytes >= 8),
6876
_signatureBytes = signatureBytes,
69-
_currentChunk = Uint32List(chunkSizeInWords);
77+
_chunk = Uint8List(chunkSizeInWords * bytesPerWord),
78+
_chunkNextIndex = 0,
79+
_chunk32 = Uint32List(chunkSizeInWords);
7080

7181
/// Runs a single iteration of the hash computation, updating [digest] with
7282
/// the result.
@@ -79,18 +89,47 @@ abstract class HashSink implements Sink<List<int>> {
7989
void add(List<int> data) {
8090
if (_isClosed) throw StateError('Hash.add() called after close().');
8191
_lengthInBytes += data.length;
82-
_pendingData.addAll(data);
83-
_iterate();
92+
_addData(data);
93+
}
94+
95+
void _addData(List<int> data) {
96+
var dataIndex = 0;
97+
var chunkNextIndex = _chunkNextIndex;
98+
final size = _chunk.length;
99+
_byteDataView ??= _chunk.buffer.asByteData();
100+
while (true) {
101+
// Check if there is enough data left in [data] for a full chunk.
102+
var restEnd = chunkNextIndex + data.length - dataIndex;
103+
if (restEnd < size) {
104+
// There is not enough data, so just add into [_chunk].
105+
_chunk.setRange(chunkNextIndex, restEnd, data, dataIndex);
106+
_chunkNextIndex = restEnd;
107+
return;
108+
}
109+
110+
// There is enough data to fill the chunk. Fill it and process it.
111+
_chunk.setRange(chunkNextIndex, size, data, dataIndex);
112+
dataIndex += size - chunkNextIndex;
113+
114+
// Now do endian conversion to words.
115+
var j = 0;
116+
do {
117+
_chunk32[j] = _byteDataView!.getUint32(j * bytesPerWord, _endian);
118+
j++;
119+
} while (j < _chunk32.length);
120+
121+
updateHash(_chunk32);
122+
chunkNextIndex = 0;
123+
}
84124
}
85125

86126
@override
87127
void close() {
88128
if (_isClosed) return;
89129
_isClosed = true;
90130

91-
_finalizeData();
92-
_iterate();
93-
assert(_pendingData.isEmpty);
131+
_finalizeAndProcessData();
132+
assert(_chunkNextIndex == 0);
94133
_sink.add(Digest(_byteDigest()));
95134
_sink.close();
96135
}
@@ -108,65 +147,38 @@ abstract class HashSink implements Sink<List<int>> {
108147
return byteDigest;
109148
}
110149

111-
/// Iterates through [_pendingData], updating the hash computation for each
112-
/// chunk.
113-
void _iterate() {
114-
var pendingDataBytes = _pendingData.buffer.asByteData();
115-
var pendingDataChunks = _pendingData.length ~/ _currentChunk.lengthInBytes;
116-
for (var i = 0; i < pendingDataChunks; i++) {
117-
// Copy words from the pending data buffer into the current chunk buffer.
118-
for (var j = 0; j < _currentChunk.length; j++) {
119-
_currentChunk[j] = pendingDataBytes.getUint32(
120-
i * _currentChunk.lengthInBytes + j * bytesPerWord,
121-
_endian,
122-
);
123-
}
124-
125-
// Run the hash function on the current chunk.
126-
updateHash(_currentChunk);
127-
}
128-
129-
// Remove all pending data up to the last clean chunk break.
130-
_pendingData.removeRange(
131-
0,
132-
pendingDataChunks * _currentChunk.lengthInBytes,
133-
);
134-
}
135-
136-
/// Finalizes [_pendingData].
150+
/// Finalizes the data and finishes the hash.
137151
///
138152
/// This adds a 1 bit to the end of the message, and expands it with 0 bits to
139153
/// pad it out.
140-
void _finalizeData() {
141-
// Pad out the data with 0x80, eight or sixteen 0s, and as many more 0s
142-
// as we need to land cleanly on a chunk boundary.
143-
_pendingData.add(0x80);
154+
void _finalizeAndProcessData() {
155+
if (_lengthInBytes > _maxMessageLengthInBytes) {
156+
throw UnsupportedError(
157+
'Hashing is unsupported for messages with more than 2^53 bits.',
158+
);
159+
}
144160

145161
final contentsLength = _lengthInBytes + 1 /* 0x80 */ + _signatureBytes;
146162
final finalizedLength = _roundUp(
147163
contentsLength,
148-
_currentChunk.lengthInBytes,
164+
_chunk.lengthInBytes,
149165
);
150166

151-
for (var i = 0; i < finalizedLength - contentsLength; i++) {
152-
_pendingData.add(0);
153-
}
167+
// Prepare the finalization data.
168+
var padding = Uint8List(finalizedLength - _lengthInBytes);
169+
// Pad out the data with 0x80, eight or sixteen 0s, and as many more 0s
170+
// as we need to land cleanly on a chunk boundary.
171+
padding[0] = 0x80;
154172

155-
if (_lengthInBytes > _maxMessageLengthInBytes) {
156-
throw UnsupportedError(
157-
'Hashing is unsupported for messages with more than 2^53 bits.',
158-
);
159-
}
173+
// The rest is already 0-bytes.
160174

161175
var lengthInBits = _lengthInBytes * bitsPerByte;
162176

163177
// Add the full length of the input data as a 64-bit value at the end of the
164178
// hash. Note: we're only writing out 64 bits, so skip ahead 8 if the
165179
// signature is 128-bit.
166-
final offset = _pendingData.length + (_signatureBytes - 8);
167-
168-
_pendingData.addAll(Uint8List(_signatureBytes));
169-
var byteData = _pendingData.buffer.asByteData();
180+
final offset = padding.length - 8;
181+
var byteData = padding.buffer.asByteData();
170182

171183
// We're essentially doing byteData.setUint64(offset, lengthInBits, _endian)
172184
// here, but that method isn't supported on dart2js so we implement it
@@ -180,6 +192,8 @@ abstract class HashSink implements Sink<List<int>> {
180192
byteData.setUint32(offset, lowBits, _endian);
181193
byteData.setUint32(offset + bytesPerWord, highBits, _endian);
182194
}
195+
196+
_addData(padding);
183197
}
184198

185199
/// Rounds [val] up to the next multiple of [n], as long as [n] is a power of

0 commit comments

Comments
 (0)