Skip to content

Commit a083dd6

Browse files
committed
optimize Hash for Path
Hashing does not have to use the whole Components parsing machinery because we only need it to match the normalizations that Components does. * stripping redundant separators -> skipping separators * stripping redundant '.' directories -> skipping '.' following after a separator That's all it takes. And instead of hashing individual slices for each component we feed the bytes directly into the hasher which avoids hashing the length of each component in addition to its contents.
1 parent 82b4544 commit a083dd6

File tree

1 file changed

+28
-2
lines changed

1 file changed

+28
-2
lines changed

library/std/src/path.rs

+28-2
Original file line numberDiff line numberDiff line change
@@ -2873,9 +2873,35 @@ impl cmp::PartialEq for Path {
28732873
#[stable(feature = "rust1", since = "1.0.0")]
28742874
impl Hash for Path {
28752875
fn hash<H: Hasher>(&self, h: &mut H) {
2876-
for component in self.components() {
2877-
component.hash(h);
2876+
let bytes = self.as_u8_slice();
2877+
2878+
let mut component_start = 0;
2879+
let mut bytes_hashed = 0;
2880+
2881+
for i in 0..bytes.len() {
2882+
if is_sep_byte(bytes[i]) {
2883+
if i > component_start {
2884+
let to_hash = &bytes[component_start..i];
2885+
h.write(to_hash);
2886+
bytes_hashed += to_hash.len();
2887+
}
2888+
2889+
// skip over separator and optionally a following CurDir item
2890+
// since components() would normalize these away
2891+
component_start = i + match bytes[i..] {
2892+
[_, b'.', b'/', ..] | [_, b'.'] => 2,
2893+
_ => 1,
2894+
};
2895+
}
2896+
}
2897+
2898+
if component_start < bytes.len() {
2899+
let to_hash = &bytes[component_start..];
2900+
h.write(to_hash);
2901+
bytes_hashed += to_hash.len();
28782902
}
2903+
2904+
h.write_usize(bytes_hashed);
28792905
}
28802906
}
28812907

0 commit comments

Comments
 (0)