From e9b7de0d4eeafed7d2ed5434bce823685a1e3c01 Mon Sep 17 00:00:00 2001 From: Michael Mior Date: Tue, 25 Feb 2025 12:52:31 -0500 Subject: [PATCH] Add the first 31 bytes to the hash of long strings Signed-off-by: Michael Mior --- src/core/json/include/sourcemeta/core/json_hash.h | 13 +++++++------ test/json/json_hash_test.cc | 14 +++++++------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/core/json/include/sourcemeta/core/json_hash.h b/src/core/json/include/sourcemeta/core/json_hash.h index 87d79fd33..826aeb798 100644 --- a/src/core/json/include/sourcemeta/core/json_hash.h +++ b/src/core/json/include/sourcemeta/core/json_hash.h @@ -49,7 +49,6 @@ template struct PropertyHashJSON { -> hash_type { hash_type result; assert(!value.empty()); - assert(value.size() <= 31); // Copy starting a byte 2 std::memcpy(reinterpret_cast(&result) + 1, value.data(), size); return result; @@ -126,17 +125,19 @@ template struct PropertyHashJSON { // This case is specifically designed to be constant with regards to // string length, and to exploit the fact that most JSON objects don't // have a lot of entries, so hash collision is not as common - return {(size + static_cast(value.front()) + - static_cast(value.back())) % - // Make sure the property hash can never exceed 8 bits - 256}; + auto hash = this->perfect(value, 31); + hash.a |= (size + static_cast(value.front()) + + static_cast(value.back())) % + // Make sure the property hash can never exceed 8 bits + 256; + return hash; } } inline auto is_perfect(const hash_type &hash) const noexcept -> bool { // If there is anything written past the first byte, // then it is a perfect hash - return hash.a > 255; + return (hash.a & 255) == 0; } }; diff --git a/test/json/json_hash_test.cc b/test/json/json_hash_test.cc index a3fcf908b..56baa6b10 100644 --- a/test/json/json_hash_test.cc +++ b/test/json/json_hash_test.cc @@ -7,7 +7,7 @@ TEST(JSON_key_hash, hash_empty) { hasher; const sourcemeta::core::JSON::String value{""}; const auto hash{hasher(value)}; - EXPECT_FALSE(hasher.is_perfect(hash)); + EXPECT_TRUE(hasher.is_perfect(hash)); #if defined(__SIZEOF_INT128__) EXPECT_EQ(hash.a, (__uint128_t{0x0000000000000000} << 64) | 0x0000000000000000); @@ -619,14 +619,14 @@ TEST(JSON_key_hash, hash_fooooooooooooooooooooooooooooooo) { EXPECT_FALSE(hasher.is_perfect(hash)); #if defined(__SIZEOF_INT128__) EXPECT_EQ(hash.a, - (__uint128_t{0x0000000000000000} << 64) | 0x00000000000000f5); + (__uint128_t{0x6f6f6f6f6f6f6f6f} << 64) | 0x6f6f6f6f6f6f66f5); EXPECT_EQ(hash.b, - (__uint128_t{0x0000000000000000} << 64) | 0x0000000000000000); + (__uint128_t{0x6f6f6f6f6f6f6f6f} << 64) | 0x6f6f6f6f6f6f6f6f); #else // 0x20 (length) + 0x66 (f) + 0x6f (o) - EXPECT_EQ(hash.a, 0x00000000000000f5); - EXPECT_EQ(hash.b, 0x0000000000000000); - EXPECT_EQ(hash.c, 0x0000000000000000); - EXPECT_EQ(hash.d, 0x0000000000000000); + EXPECT_EQ(hash.a, 0x6f6f6f6f6f6f66f5); + EXPECT_EQ(hash.b, 0x6f6f6f6f6f6f6f6f); + EXPECT_EQ(hash.c, 0x6f6f6f6f6f6f6f6f); + EXPECT_EQ(hash.d, 0x6f6f6f6f6f6f6f6f); #endif }