Skip to content

[Hashing] Use a non-deterministic seed if LLVM_ENABLE_ABI_BREAKING_CHECKS #96282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/lib/AST/ODRHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ unsigned ODRHash::CalculateHash() {

assert(I == Bools.rend());
Bools.clear();
return ID.ComputeHash();
return ID.computeStableHash();
}

namespace {
Expand Down
10 changes: 6 additions & 4 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ unsigned DeclarationNameKey::getHash() const {
break;
}

return ID.ComputeHash();
return ID.computeStableHash();
}

ModuleFile *
Expand Down Expand Up @@ -2033,7 +2033,10 @@ const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
}

unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
return llvm::hash_combine(ikey.Size, ikey.ModTime);
uint8_t buf[sizeof(ikey.Size) + sizeof(ikey.ModTime)];
memcpy(buf, &ikey.Size, sizeof(ikey.Size));
memcpy(buf + sizeof(ikey.Size), &ikey.ModTime, sizeof(ikey.ModTime));
return llvm::xxh3_64bits(buf);
}

HeaderFileInfoTrait::internal_key_type
Expand Down Expand Up @@ -2640,8 +2643,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
return OriginalChange;
}

// FIXME: hash_value is not guaranteed to be stable!
auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
auto ContentHash = xxh3_64bits(MemBuffOrError.get()->getBuffer());
if (StoredContentHash == static_cast<uint64_t>(ContentHash))
return Change{Change::None};

Expand Down
7 changes: 5 additions & 2 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1782,7 +1782,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
.ValidateASTInputFilesContent) {
auto MemBuff = Cache->getBufferIfLoaded();
if (MemBuff)
ContentHash = hash_value(MemBuff->getBuffer());
ContentHash = xxh3_64bits(MemBuff->getBuffer());
else
PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
<< Entry.File.getName();
Expand Down Expand Up @@ -1987,7 +1987,10 @@ namespace {
// The hash is based only on size/time of the file, so that the reader can
// match even when symlinking or excess path elements ("foo/../", "../")
// change the form of the name. However, complete path is still the key.
return llvm::hash_combine(key.Size, key.ModTime);
uint8_t buf[sizeof(key.Size) + sizeof(key.ModTime)];
memcpy(buf, &key.Size, sizeof(key.Size));
memcpy(buf + sizeof(key.Size), &key.ModTime, sizeof(key.ModTime));
return llvm::xxh3_64bits(buf);
}

std::pair<unsigned, unsigned>
Expand Down
23 changes: 19 additions & 4 deletions llvm/include/llvm/ADT/FoldingSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/xxhash.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
Expand Down Expand Up @@ -294,12 +295,19 @@ class FoldingSetNodeIDRef {
FoldingSetNodeIDRef() = default;
FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}

/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
/// used to lookup the node in the FoldingSetBase.
// Compute a strong hash value used to lookup the node in the FoldingSetBase.
// The hash value is not guaranteed to be deterministic across processes.
unsigned ComputeHash() const {
return static_cast<unsigned>(hash_combine_range(Data, Data + Size));
}

// Compute a deterministic hash value across processes that is suitable for
// on-disk serialization.
unsigned computeStableHash() const {
return static_cast<unsigned>(xxh3_64bits(ArrayRef(
reinterpret_cast<const uint8_t *>(Data), sizeof(unsigned) * Size)));
}

bool operator==(FoldingSetNodeIDRef) const;

bool operator!=(FoldingSetNodeIDRef RHS) const { return !(*this == RHS); }
Expand Down Expand Up @@ -366,12 +374,19 @@ class FoldingSetNodeID {
/// object to be used to compute a new profile.
inline void clear() { Bits.clear(); }

/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used
/// to lookup the node in the FoldingSetBase.
// Compute a strong hash value for this FoldingSetNodeID, used to lookup the
// node in the FoldingSetBase. The hash value is not guaranteed to be
// deterministic across processes.
unsigned ComputeHash() const {
return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
}

// Compute a deterministic hash value across processes that is suitable for
// on-disk serialization.
unsigned computeStableHash() const {
return FoldingSetNodeIDRef(Bits.data(), Bits.size()).computeStableHash();
}

/// operator== - Used to compare two nodes to each other.
bool operator==(const FoldingSetNodeID &RHS) const;
bool operator==(const FoldingSetNodeIDRef RHS) const;
Expand Down
45 changes: 11 additions & 34 deletions llvm/include/llvm/ADT/Hashing.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,23 +126,6 @@ hash_code hash_value(const std::basic_string<T> &arg);
/// Compute a hash_code for a standard string.
template <typename T> hash_code hash_value(const std::optional<T> &arg);

/// Override the execution seed with a fixed value.
///
/// This hashing library uses a per-execution seed designed to change on each
/// run with high probability in order to ensure that the hash codes are not
/// attackable and to ensure that output which is intended to be stable does
/// not rely on the particulars of the hash codes produced.
///
/// That said, there are use cases where it is important to be able to
/// reproduce *exactly* a specific behavior. To that end, we provide a function
/// which will forcibly set the seed to a fixed value. This must be done at the
/// start of the program, before any hashes are computed. Also, it cannot be
/// undone. This makes it thread-hostile and very hard to use outside of
/// immediately on start of a simple program designed for reproducible
/// behavior.
void set_fixed_execution_hash_seed(uint64_t fixed_value);


// All of the implementation details of actually computing the various hash
// code values are held within this namespace. These routines are included in
// the header file mainly to allow inlining and constant propagation.
Expand Down Expand Up @@ -322,24 +305,18 @@ struct hash_state {
}
};


/// A global, fixed seed-override variable.
///
/// This variable can be set using the \see llvm::set_fixed_execution_seed
/// function. See that function for details. Do not, under any circumstances,
/// set or read this variable.
extern uint64_t fixed_seed_override;

/// The seed is non-deterministic (address of a variable) to prevent having
/// users depend on the particular hash values. On platforms without ASLR, this
/// is still likely non-deterministic per build.
inline uint64_t get_execution_seed() {
// FIXME: This needs to be a per-execution seed. This is just a placeholder
// implementation. Switching to a per-execution seed is likely to flush out
// instability bugs and so will happen as its own commit.
//
// However, if there is a fixed seed override set the first time this is
// called, return that instead of the per-execution seed.
const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime;
return seed;
static const char seed = 0;
// Work around x86-64 negative offset folding for old Clang -fno-pic
// https://reviews.llvm.org/D93931
#if !defined(__clang__) || __clang_major__ > 11
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(&seed));
#else
return 0xff51afd7ed558ccdULL;
#endif
}


Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ add_llvm_component_library(LLVMSupport
FormatVariadic.cpp
GlobPattern.cpp
GraphWriter.cpp
Hashing.cpp
HexagonAttributeParser.cpp
HexagonAttributes.cpp
InitLLVM.cpp
Expand Down
28 changes: 0 additions & 28 deletions llvm/lib/Support/Hashing.cpp

This file was deleted.

72 changes: 0 additions & 72 deletions llvm/unittests/ADT/HashingTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,78 +235,6 @@ TEST(HashingTest, HashCombineRangeLengthDiff) {
}
}

TEST(HashingTest, HashCombineRangeGoldenTest) {
struct { const char *s; uint64_t hash; } golden_data[] = {
#if SIZE_MAX == UINT64_MAX || SIZE_MAX == UINT32_MAX
{ "a", 0xaeb6f9d5517c61f8ULL },
{ "ab", 0x7ab1edb96be496b4ULL },
{ "abc", 0xe38e60bf19c71a3fULL },
{ "abcde", 0xd24461a66de97f6eULL },
{ "abcdefgh", 0x4ef872ec411dec9dULL },
{ "abcdefghijklm", 0xe8a865539f4eadfeULL },
{ "abcdefghijklmnopqrstu", 0x261cdf85faaf4e79ULL },
{ "abcdefghijklmnopqrstuvwxyzabcdef", 0x43ba70e4198e3b2aULL },
{ "abcdefghijklmnopqrstuvwxyzabcdef"
"abcdefghijklmnopqrstuvwxyzghijkl"
"abcdefghijklmnopqrstuvwxyzmnopqr"
"abcdefghijklmnopqrstuvwxyzstuvwx"
"abcdefghijklmnopqrstuvwxyzyzabcd", 0xdcd57fb2afdf72beULL },
{ "a", 0xaeb6f9d5517c61f8ULL },
{ "aa", 0xf2b3b69a9736a1ebULL },
{ "aaa", 0xf752eb6f07b1cafeULL },
{ "aaaaa", 0x812bd21e1236954cULL },
{ "aaaaaaaa", 0xff07a2cff08ac587ULL },
{ "aaaaaaaaaaaaa", 0x84ac949d54d704ecULL },
{ "aaaaaaaaaaaaaaaaaaaaa", 0xcb2c8fb6be8f5648ULL },
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xcc40ab7f164091b6ULL },
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xc58e174c1e78ffe9ULL },
{ "z", 0x1ba160d7e8f8785cULL },
{ "zz", 0x2c5c03172f1285d7ULL },
{ "zzz", 0x9d2c4f4b507a2ac3ULL },
{ "zzzzz", 0x0f03b9031735693aULL },
{ "zzzzzzzz", 0xe674147c8582c08eULL },
{ "zzzzzzzzzzzzz", 0x3162d9fa6938db83ULL },
{ "zzzzzzzzzzzzzzzzzzzzz", 0x37b9a549e013620cULL },
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x8921470aff885016ULL },
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0xf60fdcd9beb08441ULL },
{ "a", 0xaeb6f9d5517c61f8ULL },
{ "ab", 0x7ab1edb96be496b4ULL },
{ "aba", 0x3edb049950884d0aULL },
{ "ababa", 0x8f2de9e73a97714bULL },
{ "abababab", 0xee14a29ddf0ce54cULL },
{ "ababababababa", 0x38b3ddaada2d52b4ULL },
{ "ababababababababababa", 0xd3665364219f2b85ULL },
{ "abababababababababababababababab", 0xa75cd6afbf1bc972ULL },
{ "abababababababababababababababab"
"abababababababababababababababab"
"abababababababababababababababab"
"abababababababababababababababab"
"abababababababababababababababab", 0x840192d129f7a22bULL }
#else
#error This test only supports 64-bit and 32-bit systems.
#endif
};
for (unsigned i = 0; i < sizeof(golden_data)/sizeof(*golden_data); ++i) {
StringRef str = golden_data[i].s;
hash_code hash = hash_combine_range(str.begin(), str.end());
#if 0 // Enable this to generate paste-able text for the above structure.
std::string member_str = "\"" + str.str() + "\",";
fprintf(stderr, " { %-35s 0x%016llxULL },\n",
member_str.c_str(), static_cast<uint64_t>(hash));
#endif
EXPECT_EQ(static_cast<size_t>(golden_data[i].hash),
static_cast<size_t>(hash));
}
}

TEST(HashingTest, HashCombineBasicTest) {
// Hashing a sequence of homogenous types matches range hashing.
const int i1 = 42, i2 = 43, i3 = 123, i4 = 999, i5 = 0, i6 = 79;
Expand Down
1 change: 0 additions & 1 deletion llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ static_library("Support") {
"FormattedStream.cpp",
"GlobPattern.cpp",
"GraphWriter.cpp",
"Hashing.cpp",
"HexagonAttributeParser.cpp",
"HexagonAttributes.cpp",
"InitLLVM.cpp",
Expand Down
Loading