Skip to content

Commit 2c5409b

Browse files
committed
cksum: Use crc_fast::Digest on the fly to improve performances
1 parent b3d71e3 commit 2c5409b

File tree

2 files changed

+45
-52
lines changed

2 files changed

+45
-52
lines changed

src/uucore/src/lib/features/checksum.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1155,7 +1155,7 @@ where
11551155

11561156
pub fn digest_reader<T: Read>(
11571157
digest: &mut Box<dyn Digest>,
1158-
reader: &mut BufReader<T>,
1158+
reader: &mut T,
11591159
binary: bool,
11601160
output_bits: usize,
11611161
) -> io::Result<(String, usize)> {

src/uucore/src/lib/features/sum.rs

Lines changed: 44 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,6 @@ use hex::encode;
1818
#[cfg(windows)]
1919
use memchr::memmem;
2020

21-
use crc_fast::{CrcParams, checksum_with_params};
22-
23-
// POSIX cksum SIMD configuration for crc-fast
24-
// This uses SIMD instructions (PCLMULQDQ) for fast CRC computation
25-
fn get_posix_cksum_params() -> CrcParams {
26-
CrcParams::new(
27-
"CRC-32/CKSUM", // Name
28-
32, // Width
29-
0x04c11db7, // Polynomial
30-
0x00000000, // Initial CRC value: 0 (not 0xffffffff)
31-
false, // No input reflection (refin)
32-
0xffffffff, // XOR output with 0xffffffff (xorout)
33-
0, // Check value (not used)
34-
)
35-
}
36-
3721
pub trait Digest {
3822
fn new() -> Self
3923
where
@@ -139,50 +123,59 @@ impl Digest for Sm3 {
139123
}
140124

141125
pub struct Crc {
142-
state: u32,
126+
digest: crc_fast::Digest,
143127
size: usize,
144-
// Store data for SIMD processing
145-
data_buffer: Vec<u8>,
128+
}
129+
130+
impl Crc {
131+
/// POSIX cksum SIMD configuration for crc-fast
132+
/// This uses SIMD instructions (PCLMULQDQ) for fast CRC computation
133+
fn get_posix_cksum_params() -> crc_fast::CrcParams {
134+
crc_fast::CrcParams::new(
135+
"CRC-32/CKSUM", // Name
136+
32, // Width
137+
0x04c11db7, // Polynomial
138+
0x00000000, // Initial CRC value: 0 (not 0xffffffff)
139+
false, // No input reflection (refin)
140+
0xffffffff, // XOR output with 0xffffffff (xorout)
141+
0, // Check value (not used)
142+
)
143+
}
146144
}
147145

148146
impl Digest for Crc {
149147
fn new() -> Self {
150148
Self {
151-
state: 0,
149+
digest: crc_fast::Digest::new_with_params(Self::get_posix_cksum_params()),
152150
size: 0,
153-
data_buffer: Vec::with_capacity(8192),
154151
}
155152
}
156153

157154
fn hash_update(&mut self, input: &[u8]) {
155+
self.digest.update(input);
158156
self.size += input.len();
159-
// Store data for SIMD processing
160-
self.data_buffer.extend_from_slice(input);
161157
}
162158

163159
fn hash_finalize(&mut self, out: &mut [u8]) {
164-
// Add the size bytes to the data buffer
160+
// Add the size at the end of the buffer.
165161
let mut sz = self.size;
166-
while sz != 0 {
167-
self.data_buffer.push(sz as u8);
162+
while sz > 0 {
163+
self.digest.update(&[sz as u8]);
168164
sz >>= 8;
169165
}
170166

171-
// Use SIMD-accelerated CRC computation
172-
self.state = checksum_with_params(get_posix_cksum_params(), &self.data_buffer) as u32;
173-
out.copy_from_slice(&self.state.to_ne_bytes());
167+
out.copy_from_slice(&self.digest.finalize().to_ne_bytes());
174168
}
175169

176170
fn result_str(&mut self) -> String {
177-
let mut _out: Vec<u8> = vec![0; 4];
178-
self.hash_finalize(&mut _out);
179-
format!("{}", self.state)
171+
let mut out: [u8; 8] = [0; 8];
172+
self.hash_finalize(&mut out);
173+
u64::from_ne_bytes(out).to_string()
180174
}
181175

182176
fn reset(&mut self) {
183-
self.state = 0;
177+
self.digest.reset();
184178
self.size = 0;
185-
self.data_buffer.clear();
186179
}
187180

188181
fn output_bits(&self) -> usize {
@@ -525,8 +518,8 @@ mod tests {
525518
crc1.hash_update(b"test");
526519
crc2.hash_update(b"test");
527520

528-
let mut out1 = [0u8; 4];
529-
let mut out2 = [0u8; 4];
521+
let mut out1 = [0u8; 8];
522+
let mut out2 = [0u8; 8];
530523
crc1.hash_finalize(&mut out1);
531524
crc2.hash_finalize(&mut out2);
532525

@@ -538,15 +531,15 @@ mod tests {
538531
let mut crc = Crc::new();
539532

540533
// Test empty input
541-
let mut output = [0u8; 4];
534+
let mut output = [0u8; 8];
542535
crc.hash_finalize(&mut output);
543-
let empty_result = u32::from_ne_bytes(output);
536+
let empty_result = u64::from_ne_bytes(output);
544537

545538
// Reset and test with "test" string
546-
crc.reset();
539+
let mut crc = Crc::new();
547540
crc.hash_update(b"test");
548541
crc.hash_finalize(&mut output);
549-
let test_result = u32::from_ne_bytes(output);
542+
let test_result = u64::from_ne_bytes(output);
550543

551544
// The result should be different for different inputs
552545
assert_ne!(empty_result, test_result);
@@ -565,14 +558,14 @@ mod tests {
565558

566559
// Process all at once
567560
crc1.hash_update(data);
568-
let mut output1 = [0u8; 4];
561+
let mut output1 = [0u8; 8];
569562
crc1.hash_finalize(&mut output1);
570563

571564
// Process in chunks
572565
crc2.hash_update(&data[0..10]);
573566
crc2.hash_update(&data[10..30]);
574567
crc2.hash_update(&data[30..]);
575-
let mut output2 = [0u8; 4];
568+
let mut output2 = [0u8; 8];
576569
crc2.hash_finalize(&mut output2);
577570

578571
assert_eq!(output1, output2);
@@ -585,15 +578,15 @@ mod tests {
585578

586579
let mut crc_optimized = Crc::new();
587580
crc_optimized.hash_update(test_data);
588-
let mut output_opt = [0u8; 4];
581+
let mut output_opt = [0u8; 8];
589582
crc_optimized.hash_finalize(&mut output_opt);
590583

591584
// Create a reference implementation using hash_update
592585
let mut crc_reference = Crc::new();
593586
for &byte in test_data {
594587
crc_reference.hash_update(&[byte]);
595588
}
596-
let mut output_ref = [0u8; 4];
589+
let mut output_ref = [0u8; 8];
597590
crc_reference.hash_finalize(&mut output_ref);
598591

599592
assert_eq!(output_opt, output_ref);
@@ -604,17 +597,17 @@ mod tests {
604597
// Test against our CRC implementation values
605598
// Note: These are the correct values for our POSIX cksum implementation
606599
let test_cases = [
607-
("", 4294967295u32),
608-
("a", 1220704766u32),
609-
("abc", 1219131554u32),
600+
("", 4294967295_u64),
601+
("a", 1220704766_u64),
602+
("abc", 1219131554_u64),
610603
];
611604

612605
for (input, expected) in test_cases {
613606
let mut crc = Crc::new();
614607
crc.hash_update(input.as_bytes());
615-
let mut output = [0u8; 4];
608+
let mut output = [0u8; 8];
616609
crc.hash_finalize(&mut output);
617-
let result = u32::from_ne_bytes(output);
610+
let result = u64::from_ne_bytes(output);
618611

619612
assert_eq!(result, expected, "CRC mismatch for input: '{}'", input);
620613
}
@@ -633,8 +626,8 @@ mod tests {
633626
crc2.hash_update(data9);
634627

635628
// Should not panic and should produce valid results
636-
let mut out1 = [0u8; 4];
637-
let mut out2 = [0u8; 4];
629+
let mut out1 = [0u8; 8];
630+
let mut out2 = [0u8; 8];
638631
crc.hash_finalize(&mut out1);
639632
crc2.hash_finalize(&mut out2);
640633

0 commit comments

Comments
 (0)