Skip to content

Commit ce80c80

Browse files
authored
[Hashing] Use a non-deterministic seed if LLVM_ENABLE_ABI_BREAKING_CHECKS
Hashing.h provides hash_value/hash_combine/hash_combine_range, which are primarily used by `DenseMap<StringRef, X>` Users shouldn't rely on specific hash values due to size_t differences on 32-bit/64-bit platforms and potential algorithm changes. `set_fixed_execution_hash_seed` is provided but it has never been used. In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, take the the address of a static storage duration variable as the seed like absl/hash/internal/hash.h `kSeed`. (See https://reviews.llvm.org/D93931 for workaround for older Clang. Mach-O x86-64 forces PIC, so absl's `__apple_build_version__` check is unnecessary.) LLVM_ENABLE_ABI_BREAKING_CHECKS defaults to `WITH_ASSERTS` and is enabled in an assertion build. In a non-assertion build, `get_execution_seed` returns the fixed value regardless of `NDEBUG`. Removing a variable load yields noticeable size/performance improvement. A few users relying on the iteration order of `DenseMap<StringRef, X>` have been fixed (e.g., f8f4235 c025bd1 89e8e63 86eb6bf eb8d036 0ea6b8e 58d7a6e 8ea31db 592abf2 6644975). From my experience fixing [`StringMap`](https://discourse.llvm.org/t/reverse-iteration-bots/72224) iteration order issues, the scale of issues is similar. Pull Request: #96282
1 parent 982c547 commit ce80c80

File tree

5 files changed

+14
-136
lines changed

5 files changed

+14
-136
lines changed

llvm/include/llvm/ADT/Hashing.h

Lines changed: 14 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#ifndef LLVM_ADT_HASHING_H
4545
#define LLVM_ADT_HASHING_H
4646

47+
#include "llvm/Config/abi-breaking.h"
4748
#include "llvm/Support/DataTypes.h"
4849
#include "llvm/Support/ErrorHandling.h"
4950
#include "llvm/Support/SwapByteOrder.h"
@@ -126,23 +127,6 @@ hash_code hash_value(const std::basic_string<T> &arg);
126127
/// Compute a hash_code for a standard string.
127128
template <typename T> hash_code hash_value(const std::optional<T> &arg);
128129

129-
/// Override the execution seed with a fixed value.
130-
///
131-
/// This hashing library uses a per-execution seed designed to change on each
132-
/// run with high probability in order to ensure that the hash codes are not
133-
/// attackable and to ensure that output which is intended to be stable does
134-
/// not rely on the particulars of the hash codes produced.
135-
///
136-
/// That said, there are use cases where it is important to be able to
137-
/// reproduce *exactly* a specific behavior. To that end, we provide a function
138-
/// which will forcibly set the seed to a fixed value. This must be done at the
139-
/// start of the program, before any hashes are computed. Also, it cannot be
140-
/// undone. This makes it thread-hostile and very hard to use outside of
141-
/// immediately on start of a simple program designed for reproducible
142-
/// behavior.
143-
void set_fixed_execution_hash_seed(uint64_t fixed_value);
144-
145-
146130
// All of the implementation details of actually computing the various hash
147131
// code values are held within this namespace. These routines are included in
148132
// the header file mainly to allow inlining and constant propagation.
@@ -322,24 +306,20 @@ struct hash_state {
322306
}
323307
};
324308

325-
326-
/// A global, fixed seed-override variable.
327-
///
328-
/// This variable can be set using the \see llvm::set_fixed_execution_seed
329-
/// function. See that function for details. Do not, under any circumstances,
330-
/// set or read this variable.
331-
extern uint64_t fixed_seed_override;
332-
309+
/// In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic
310+
/// (address of a variable) to prevent having users depend on the particular
311+
/// hash values. On platforms without ASLR, this is still likely
312+
/// non-deterministic per build.
333313
inline uint64_t get_execution_seed() {
334-
// FIXME: This needs to be a per-execution seed. This is just a placeholder
335-
// implementation. Switching to a per-execution seed is likely to flush out
336-
// instability bugs and so will happen as its own commit.
337-
//
338-
// However, if there is a fixed seed override set the first time this is
339-
// called, return that instead of the per-execution seed.
340-
const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
341-
static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime;
342-
return seed;
314+
// Work around x86-64 negative offset folding for old Clang -fno-pic
315+
// https://reviews.llvm.org/D93931
316+
#if LLVM_ENABLE_ABI_BREAKING_CHECKS && \
317+
(!defined(__clang__) || __clang_major__ > 11)
318+
static const char seed = 0;
319+
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(&seed));
320+
#else
321+
return 0xff51afd7ed558ccdULL;
322+
#endif
343323
}
344324

345325

llvm/lib/Support/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,6 @@ add_llvm_component_library(LLVMSupport
187187
FormatVariadic.cpp
188188
GlobPattern.cpp
189189
GraphWriter.cpp
190-
Hashing.cpp
191190
HexagonAttributeParser.cpp
192191
HexagonAttributes.cpp
193192
InitLLVM.cpp

llvm/lib/Support/Hashing.cpp

Lines changed: 0 additions & 28 deletions
This file was deleted.

llvm/unittests/ADT/HashingTest.cpp

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -235,78 +235,6 @@ TEST(HashingTest, HashCombineRangeLengthDiff) {
235235
}
236236
}
237237

238-
TEST(HashingTest, HashCombineRangeGoldenTest) {
239-
struct { const char *s; uint64_t hash; } golden_data[] = {
240-
#if SIZE_MAX == UINT64_MAX || SIZE_MAX == UINT32_MAX
241-
{ "a", 0xaeb6f9d5517c61f8ULL },
242-
{ "ab", 0x7ab1edb96be496b4ULL },
243-
{ "abc", 0xe38e60bf19c71a3fULL },
244-
{ "abcde", 0xd24461a66de97f6eULL },
245-
{ "abcdefgh", 0x4ef872ec411dec9dULL },
246-
{ "abcdefghijklm", 0xe8a865539f4eadfeULL },
247-
{ "abcdefghijklmnopqrstu", 0x261cdf85faaf4e79ULL },
248-
{ "abcdefghijklmnopqrstuvwxyzabcdef", 0x43ba70e4198e3b2aULL },
249-
{ "abcdefghijklmnopqrstuvwxyzabcdef"
250-
"abcdefghijklmnopqrstuvwxyzghijkl"
251-
"abcdefghijklmnopqrstuvwxyzmnopqr"
252-
"abcdefghijklmnopqrstuvwxyzstuvwx"
253-
"abcdefghijklmnopqrstuvwxyzyzabcd", 0xdcd57fb2afdf72beULL },
254-
{ "a", 0xaeb6f9d5517c61f8ULL },
255-
{ "aa", 0xf2b3b69a9736a1ebULL },
256-
{ "aaa", 0xf752eb6f07b1cafeULL },
257-
{ "aaaaa", 0x812bd21e1236954cULL },
258-
{ "aaaaaaaa", 0xff07a2cff08ac587ULL },
259-
{ "aaaaaaaaaaaaa", 0x84ac949d54d704ecULL },
260-
{ "aaaaaaaaaaaaaaaaaaaaa", 0xcb2c8fb6be8f5648ULL },
261-
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xcc40ab7f164091b6ULL },
262-
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
263-
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
264-
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
265-
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
266-
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xc58e174c1e78ffe9ULL },
267-
{ "z", 0x1ba160d7e8f8785cULL },
268-
{ "zz", 0x2c5c03172f1285d7ULL },
269-
{ "zzz", 0x9d2c4f4b507a2ac3ULL },
270-
{ "zzzzz", 0x0f03b9031735693aULL },
271-
{ "zzzzzzzz", 0xe674147c8582c08eULL },
272-
{ "zzzzzzzzzzzzz", 0x3162d9fa6938db83ULL },
273-
{ "zzzzzzzzzzzzzzzzzzzzz", 0x37b9a549e013620cULL },
274-
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x8921470aff885016ULL },
275-
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
276-
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
277-
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
278-
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
279-
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0xf60fdcd9beb08441ULL },
280-
{ "a", 0xaeb6f9d5517c61f8ULL },
281-
{ "ab", 0x7ab1edb96be496b4ULL },
282-
{ "aba", 0x3edb049950884d0aULL },
283-
{ "ababa", 0x8f2de9e73a97714bULL },
284-
{ "abababab", 0xee14a29ddf0ce54cULL },
285-
{ "ababababababa", 0x38b3ddaada2d52b4ULL },
286-
{ "ababababababababababa", 0xd3665364219f2b85ULL },
287-
{ "abababababababababababababababab", 0xa75cd6afbf1bc972ULL },
288-
{ "abababababababababababababababab"
289-
"abababababababababababababababab"
290-
"abababababababababababababababab"
291-
"abababababababababababababababab"
292-
"abababababababababababababababab", 0x840192d129f7a22bULL }
293-
#else
294-
#error This test only supports 64-bit and 32-bit systems.
295-
#endif
296-
};
297-
for (unsigned i = 0; i < sizeof(golden_data)/sizeof(*golden_data); ++i) {
298-
StringRef str = golden_data[i].s;
299-
hash_code hash = hash_combine_range(str.begin(), str.end());
300-
#if 0 // Enable this to generate paste-able text for the above structure.
301-
std::string member_str = "\"" + str.str() + "\",";
302-
fprintf(stderr, " { %-35s 0x%016llxULL },\n",
303-
member_str.c_str(), static_cast<uint64_t>(hash));
304-
#endif
305-
EXPECT_EQ(static_cast<size_t>(golden_data[i].hash),
306-
static_cast<size_t>(hash));
307-
}
308-
}
309-
310238
TEST(HashingTest, HashCombineBasicTest) {
311239
// Hashing a sequence of homogenous types matches range hashing.
312240
const int i1 = 42, i2 = 43, i3 = 123, i4 = 999, i5 = 0, i6 = 79;

llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ static_library("Support") {
9090
"FormattedStream.cpp",
9191
"GlobPattern.cpp",
9292
"GraphWriter.cpp",
93-
"Hashing.cpp",
9493
"HexagonAttributeParser.cpp",
9594
"HexagonAttributes.cpp",
9695
"InitLLVM.cpp",

0 commit comments

Comments
 (0)