@@ -164,17 +164,32 @@ pub fn small_table_crc(data: &[u8]) -> u32 {
164
164
/// compile-time.
165
165
///
166
166
pub fn barret_crc ( data : & [ u8 ] ) -> u32 {
167
- const BARRET_CONSTANT : p64 = {
168
- p64 ( p128 ( 0x10000000000000000 )
169
- . naive_div ( p128 ( POLYNOMIAL . 0 as u128 ) ) . 0 as u64 )
167
+ // Normally this would be 0x10000000000000000 / __polynomial, but
168
+ // we eagerly do one step of division so we avoid needing a 4x wide
169
+ // type. We can also drop the highest bit if we add the high bits
170
+ // manually we use use this constant.
171
+ //
172
+ // = x % p
173
+ // = 0xffffffff & (x + p*(((x >> 32) * [0x10000000000000000/p]) >> 32))
174
+ // = 0xffffffff & (x + p*(((x >> 32) * [(p << 32)/p + 0x100000000]) >> 32))
175
+ // = 0xffffffff & (x + p*((((x >> 32) * [(p << 32)/p]) >> 32) + (x >> 32)))
176
+ // \-----+-----/
177
+ // '-- Barret constant
178
+ //
179
+ // Note that the shifts and masks can go away if we operate on u32s,
180
+ // leaving 2 xmuls and 2 xors.
181
+ //
182
+ const BARRET_CONSTANT : p32 = {
183
+ p32 ( p64 ( POLYNOMIAL . 0 << 32 ) . naive_div ( POLYNOMIAL ) . 0 as u32 )
170
184
} ;
171
185
172
186
let mut crc = p32 ( 0xffffffff ) ;
173
187
174
188
for b in data {
175
189
crc = crc ^ ( p32:: from ( b. reverse_bits ( ) ) << 24 ) ;
176
- let q = ( p64:: from ( crc >> 24 ) * BARRET_CONSTANT ) >> 32 ;
177
- crc = p32:: from_lossy ( q* POLYNOMIAL ) + ( crc << 8 ) ;
190
+ crc = ( crc << 8 )
191
+ + ( ( crc >> 24u32 ) . widening_mul ( BARRET_CONSTANT ) . 1 + ( crc >> 24u32 ) )
192
+ . wrapping_mul ( p32:: from_lossy ( POLYNOMIAL ) ) ;
178
193
}
179
194
180
195
u32:: from ( crc) . reverse_bits ( ) ^ 0xffffffff
@@ -184,9 +199,23 @@ pub fn barret_crc(data: &[u8]) -> u32 {
184
199
/// barret_crc, but operating on a 32-bit word at a time
185
200
///
186
201
pub fn word_barret_crc ( data : & [ u8 ] ) -> u32 {
187
- const BARRET_CONSTANT : p64 = {
188
- p64 ( p128 ( 0x10000000000000000 )
189
- . naive_div ( p128 ( POLYNOMIAL . 0 as u128 ) ) . 0 as u64 )
202
+ // Normally this would be 0x10000000000000000 / __polynomial, but
203
+ // we eagerly do one step of division so we avoid needing a 4x wide
204
+ // type. We can also drop the highest bit if we add the high bits
205
+ // manually we use use this constant.
206
+ //
207
+ // = x % p
208
+ // = 0xffffffff & (x + p*(((x >> 32) * [0x10000000000000000/p]) >> 32))
209
+ // = 0xffffffff & (x + p*(((x >> 32) * [(p << 32)/p + 0x100000000]) >> 32))
210
+ // = 0xffffffff & (x + p*((((x >> 32) * [(p << 32)/p]) >> 32) + (x >> 32)))
211
+ // \-----+-----/
212
+ // '-- Barret constant
213
+ //
214
+ // Note that the shifts and masks can go away if we operate on u32s,
215
+ // leaving 2 xmuls and 2 xors.
216
+ //
217
+ const BARRET_CONSTANT : p32 = {
218
+ p32 ( p64 ( POLYNOMIAL . 0 << 32 ) . naive_div ( POLYNOMIAL ) . 0 as u32 )
190
219
} ;
191
220
192
221
let mut crc = p32 ( 0xffffffff ) ;
@@ -196,19 +225,118 @@ pub fn word_barret_crc(data: &[u8]) -> u32 {
196
225
for word in & mut words {
197
226
let word = <[ u8 ; 4 ] >:: try_from ( word) . unwrap ( ) ;
198
227
crc = crc ^ p32:: from_le_bytes ( word) . reverse_bits ( ) ;
199
- let q = ( p64 :: from ( crc) * BARRET_CONSTANT ) >> 32 ;
200
- crc = p32:: from_lossy ( q * POLYNOMIAL ) ;
228
+ crc = ( crc. widening_mul ( BARRET_CONSTANT ) . 1 + crc )
229
+ . wrapping_mul ( p32:: from_lossy ( POLYNOMIAL ) ) ;
201
230
}
202
231
203
232
for b in words. remainder ( ) {
204
233
crc = crc ^ ( p32:: from ( b. reverse_bits ( ) ) << 24 ) ;
205
- let q = ( p64:: from ( crc >> 24 ) * BARRET_CONSTANT ) >> 32 ;
206
- crc = p32:: from_lossy ( q* POLYNOMIAL ) + ( crc << 8 ) ;
234
+ crc = ( crc << 8 )
235
+ + ( ( crc >> 24u32 ) . widening_mul ( BARRET_CONSTANT ) . 1 + ( crc >> 24u32 ) )
236
+ . wrapping_mul ( p32:: from_lossy ( POLYNOMIAL ) ) ;
207
237
}
208
238
209
239
u32:: from ( crc) . reverse_bits ( ) ^ 0xffffffff
210
240
}
211
241
242
+ /// A hardware-accelerated CRC implementation using Barret reduction without
243
+ /// needing to bit-reverse the internal representation
244
+ ///
245
+ /// CRC32 and polynomial multiplication instructions unfortunately are defined
246
+ /// with different bit-endianness. This would normally mean we need to
247
+ /// bit-reverse the incoming data before we can use polynomial multiplication.
248
+ ///
249
+ /// However, polynomial multiplication has the odd property that it is
250
+ /// symmetric, brev(a) * brev(b) = brev((a * b) << 1)
251
+ ///
252
+ /// This means we can rewrite our Barret reduction CRC to operate entirely
253
+ /// on a bit-reversed representation, shaving off several instructions.
254
+ ///
255
+ /// In theory this should be faster, but measurements show this as actually
256
+ /// being slightly slower, perhaps the extra 1-bit shift costs more on
257
+ /// machines with bit-reverse instructions?
258
+ ///
259
+ pub fn reversed_barret_crc ( data : & [ u8 ] ) -> u32 {
260
+ // Normally this would be 0x10000000000000000 / __polynomial, but
261
+ // we eagerly do one step of division so we avoid needing a 4x wide
262
+ // type. We can also drop the highest bit if we add the high bits
263
+ // manually we use use this constant.
264
+ //
265
+ // = x % p
266
+ // = 0xffffffff & (x + p*(((x >> 32) * [0x10000000000000000/p]) >> 32))
267
+ // = 0xffffffff & (x + p*(((x >> 32) * [(p << 32)/p + 0x100000000]) >> 32))
268
+ // = 0xffffffff & (x + p*((((x >> 32) * [(p << 32)/p]) >> 32) + (x >> 32)))
269
+ // \-----+-----/
270
+ // '-- Barret constant
271
+ //
272
+ // Note that the shifts and masks can go away if we operate on u32s,
273
+ // leaving 2 xmuls and 2 xors.
274
+ //
275
+ const BARRET_CONSTANT : p32 = {
276
+ p32 ( p64 ( POLYNOMIAL . 0 << 32 ) . naive_div ( POLYNOMIAL ) . 0 as u32 )
277
+ } ;
278
+ const POLYNOMIAL_REV : p32 = p32 ( POLYNOMIAL . 0 as u32 ) . reverse_bits ( ) ;
279
+ const BARRET_CONSTANT_REV : p32 = BARRET_CONSTANT . reverse_bits ( ) ;
280
+
281
+ let mut crc = p32 ( 0xffffffff ) ;
282
+
283
+ for b in data {
284
+ crc = crc ^ p32:: from ( * b) ;
285
+ let ( lo, _) = ( crc << 24u32 ) . widening_mul ( BARRET_CONSTANT_REV ) ;
286
+ let ( lo, hi) = ( ( lo << 1u32 ) + ( crc << 24u32 ) ) . widening_mul ( POLYNOMIAL_REV ) ;
287
+ crc = ( crc >> 8u32 ) + ( ( hi << 1u32 ) | ( lo >> 31u32 ) ) ;
288
+ }
289
+
290
+ u32:: from ( crc) ^ 0xffffffff
291
+ }
292
+
293
+ /// A hardware-accelerated CRC implementation using the same technique as
294
+ /// reversed_barret_crc, but operating on a 32-bit word at a time
295
+ ///
296
+ pub fn word_reversed_barret_crc ( data : & [ u8 ] ) -> u32 {
297
+ // Normally this would be 0x10000000000000000 / __polynomial, but
298
+ // we eagerly do one step of division so we avoid needing a 4x wide
299
+ // type. We can also drop the highest bit if we add the high bits
300
+ // manually we use use this constant.
301
+ //
302
+ // = x % p
303
+ // = 0xffffffff & (x + p*(((x >> 32) * [0x10000000000000000/p]) >> 32))
304
+ // = 0xffffffff & (x + p*(((x >> 32) * [(p << 32)/p + 0x100000000]) >> 32))
305
+ // = 0xffffffff & (x + p*((((x >> 32) * [(p << 32)/p]) >> 32) + (x >> 32)))
306
+ // \-----+-----/
307
+ // '-- Barret constant
308
+ //
309
+ // Note that the shifts and masks can go away if we operate on u32s,
310
+ // leaving 2 xmuls and 2 xors.
311
+ //
312
+ const BARRET_CONSTANT : p32 = {
313
+ p32 ( p64 ( POLYNOMIAL . 0 << 32 ) . naive_div ( POLYNOMIAL ) . 0 as u32 )
314
+ } ;
315
+ const POLYNOMIAL_REV : p32 = p32 ( POLYNOMIAL . 0 as u32 ) . reverse_bits ( ) ;
316
+ const BARRET_CONSTANT_REV : p32 = BARRET_CONSTANT . reverse_bits ( ) ;
317
+
318
+ let mut crc = p32 ( 0xffffffff ) ;
319
+
320
+ // iterate over 4-byte words
321
+ let mut words = data. chunks_exact ( 4 ) ;
322
+ for word in & mut words {
323
+ let word = <[ u8 ; 4 ] >:: try_from ( word) . unwrap ( ) ;
324
+ crc = crc ^ p32:: from_le_bytes ( word) ;
325
+ let ( lo, _) = crc. widening_mul ( BARRET_CONSTANT_REV ) ;
326
+ let ( lo, hi) = ( ( lo << 1u32 ) + crc) . widening_mul ( POLYNOMIAL_REV ) ;
327
+ crc = ( hi << 1u32 ) | ( lo >> 31u32 ) ;
328
+ }
329
+
330
+ for b in words. remainder ( ) {
331
+ crc = crc ^ p32:: from ( * b) ;
332
+ let ( lo, _) = ( crc << 24u32 ) . widening_mul ( BARRET_CONSTANT_REV ) ;
333
+ let ( lo, hi) = ( ( lo << 1u32 ) + ( crc << 24u32 ) ) . widening_mul ( POLYNOMIAL_REV ) ;
334
+ crc = ( crc >> 8u32 ) + ( ( hi << 1u32 ) | ( lo >> 31u32 ) ) ;
335
+ }
336
+
337
+ u32:: from ( crc) ^ 0xffffffff
338
+ }
339
+
212
340
213
341
fn main ( ) {
214
342
let input = b"Hello World!" ;
@@ -217,31 +345,39 @@ fn main() {
217
345
println ! ( "testing crc({:?})" , String :: from_utf8_lossy( input) ) ;
218
346
219
347
let output = naive_crc ( input) ;
220
- println ! ( "{:<19 } => 0x{:08x}" , "naive_crc" , output) ;
348
+ println ! ( "{:<24 } => 0x{:08x}" , "naive_crc" , output) ;
221
349
assert_eq ! ( output, expected) ;
222
350
223
- let output = naive_crc ( input) ;
224
- println ! ( "{:<19 } => 0x{:08x}" , "less_naive_crc" , output) ;
351
+ let output = less_naive_crc ( input) ;
352
+ println ! ( "{:<24 } => 0x{:08x}" , "less_naive_crc" , output) ;
225
353
assert_eq ! ( output, expected) ;
226
354
227
- let output = naive_crc ( input) ;
228
- println ! ( "{:<19 } => 0x{:08x}" , "word_less_naive_crc" , output) ;
355
+ let output = word_less_naive_crc ( input) ;
356
+ println ! ( "{:<24 } => 0x{:08x}" , "word_less_naive_crc" , output) ;
229
357
assert_eq ! ( output, expected) ;
230
358
231
- let output = naive_crc ( input) ;
232
- println ! ( "{:<19 } => 0x{:08x}" , "table_crc" , output) ;
359
+ let output = table_crc ( input) ;
360
+ println ! ( "{:<24 } => 0x{:08x}" , "table_crc" , output) ;
233
361
assert_eq ! ( output, expected) ;
234
362
235
- let output = naive_crc ( input) ;
236
- println ! ( "{:<19 } => 0x{:08x}" , "small_table_crc" , output) ;
363
+ let output = small_table_crc ( input) ;
364
+ println ! ( "{:<24 } => 0x{:08x}" , "small_table_crc" , output) ;
237
365
assert_eq ! ( output, expected) ;
238
366
239
- let output = naive_crc ( input) ;
240
- println ! ( "{:<19 } => 0x{:08x}" , "barret_crc" , output) ;
367
+ let output = barret_crc ( input) ;
368
+ println ! ( "{:<24 } => 0x{:08x}" , "barret_crc" , output) ;
241
369
assert_eq ! ( output, expected) ;
242
370
243
- let output = naive_crc ( input) ;
244
- println ! ( "{:<19} => 0x{:08x}" , "word_barret_crc" , output) ;
371
+ let output = word_barret_crc ( input) ;
372
+ println ! ( "{:<24} => 0x{:08x}" , "word_barret_crc" , output) ;
373
+ assert_eq ! ( output, expected) ;
374
+
375
+ let output = reversed_barret_crc ( input) ;
376
+ println ! ( "{:<24} => 0x{:08x}" , "reversed_barret_crc" , output) ;
377
+ assert_eq ! ( output, expected) ;
378
+
379
+ let output = word_reversed_barret_crc ( input) ;
380
+ println ! ( "{:<24} => 0x{:08x}" , "word_reversed_barret_crc" , output) ;
245
381
assert_eq ! ( output, expected) ;
246
382
247
383
println ! ( ) ;
0 commit comments