Skip to content

Commit 37bacd7

Browse files
authored
Merge pull request #8621 from benlangmuir/flocks-of-races
[llvm][cas] Fix thread safety issues with MappedFileRegionBumpPtr
2 parents e2063e1 + 15a76e0 commit 37bacd7

8 files changed

+156
-86
lines changed

llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,6 @@ class MappedFileRegionBumpPtr {
5050
create(const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset,
5151
function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor);
5252

53-
/// Create a \c MappedFileRegionBumpPtr., shared across the process via a
54-
/// singleton map.
55-
///
56-
/// FIXME: Singleton map should be based on sys::fs::UniqueID, but currently
57-
/// it is just based on \p Path.
58-
///
59-
/// \param Path the path to open the mapped region.
60-
/// \param Capacity the maximum size for the mapped file region.
61-
/// \param BumpPtrOffset the offset at which to store the bump pointer.
62-
/// \param NewFileConstructor is for constructing new files. It has exclusive
63-
/// access to the file. Must call \c initializeBumpPtr.
64-
static Expected<std::shared_ptr<MappedFileRegionBumpPtr>> createShared(
65-
const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset,
66-
function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor);
67-
6853
/// Finish initializing the bump pointer. Must be called by
6954
/// \c NewFileConstructor.
7055
void initializeBumpPtr(int64_t BumpPtrOffset);

llvm/lib/CAS/MappedFileRegionBumpPtr.cpp

Lines changed: 9 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,12 @@
5252
//===----------------------------------------------------------------------===//
5353

5454
#include "llvm/CAS/MappedFileRegionBumpPtr.h"
55-
#include "llvm/ADT/ScopeExit.h"
55+
#include "OnDiskCommon.h"
5656
#include "llvm/ADT/StringMap.h"
57-
#include <mutex>
5857

5958
using namespace llvm;
6059
using namespace llvm::cas;
60+
using namespace llvm::cas::ondisk;
6161

6262
namespace {
6363
struct FileLockRAII {
@@ -70,7 +70,7 @@ struct FileLockRAII {
7070
~FileLockRAII() { consumeError(unlock()); }
7171

7272
Error lock(LockKind LK) {
73-
if (std::error_code EC = sys::fs::lockFile(FD, LK == Exclusive))
73+
if (std::error_code EC = lockFileThreadSafe(FD, LK == Exclusive))
7474
return createFileError(Path, EC);
7575
Locked = LK;
7676
return Error::success();
@@ -79,7 +79,7 @@ struct FileLockRAII {
7979
Error unlock() {
8080
if (Locked) {
8181
Locked = std::nullopt;
82-
if (std::error_code EC = sys::fs::unlockFile(FD))
82+
if (std::error_code EC = unlockFileThreadSafe(FD))
8383
return createFileError(Path, EC);
8484
}
8585
return Error::success();
@@ -111,7 +111,8 @@ Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
111111

112112
// Take shared/reader lock that will be held until we close the file; unlocked
113113
// by destroyImpl.
114-
if (std::error_code EC = sys::fs::lockFile(SharedLockFD, /*Exclusive=*/false))
114+
if (std::error_code EC =
115+
lockFileThreadSafe(SharedLockFD, /*Exclusive=*/false))
115116
return createFileError(Path, EC);
116117

117118
// Take shared/reader lock for initialization.
@@ -172,73 +173,22 @@ Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
172173
return Result;
173174
}
174175

175-
Expected<std::shared_ptr<MappedFileRegionBumpPtr>>
176-
MappedFileRegionBumpPtr::createShared(
177-
const Twine &PathTwine, uint64_t Capacity, int64_t BumpPtrOffset,
178-
function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor) {
179-
struct MapNode {
180-
std::mutex Mutex;
181-
std::weak_ptr<MappedFileRegionBumpPtr> MFR;
182-
};
183-
static std::mutex Mutex;
184-
185-
// FIXME: Map should be by sys::fs::UniqueID instead of by path. Here's how
186-
// it should work:
187-
//
188-
// 1. Open the file.
189-
// 2. Stat the file descriptor to get the UniqueID.
190-
// 3. Check the map.
191-
// 4. If new, pass the open file descriptor to a helper extracted from
192-
// MappedFileRegionBumpPtr::create().
193-
static StringMap<MapNode> Regions;
194-
195-
SmallString<128> PathStorage;
196-
const StringRef Path = PathTwine.toStringRef(PathStorage);
197-
198-
MapNode *Node;
199-
{
200-
std::lock_guard<std::mutex> Lock(Mutex);
201-
Node = &Regions[Path];
202-
}
203-
204-
if (std::shared_ptr<MappedFileRegionBumpPtr> MFR = Node->MFR.lock())
205-
return MFR;
206-
207-
// Construct a new region. Use a fine-grained lock to allow other regions to
208-
// be opened concurrently.
209-
std::lock_guard<std::mutex> Lock(Node->Mutex);
210-
211-
// Open / create / initialize files on disk.
212-
Expected<MappedFileRegionBumpPtr> ExpectedMFR =
213-
MappedFileRegionBumpPtr::create(Path, Capacity, BumpPtrOffset,
214-
NewFileConstructor);
215-
if (!ExpectedMFR)
216-
return ExpectedMFR.takeError();
217-
218-
auto SharedMFR =
219-
std::make_shared<MappedFileRegionBumpPtr>(std::move(*ExpectedMFR));
220-
221-
// Success.
222-
Node->MFR = SharedMFR;
223-
return std::move(SharedMFR);
224-
}
225-
226176
void MappedFileRegionBumpPtr::destroyImpl() {
227177
if (!FD)
228178
return;
229179

230180
// Drop the shared lock indicating we are no longer accessing the file.
231181
if (SharedLockFD)
232-
(void)sys::fs::unlockFile(*SharedLockFD);
182+
(void)unlockFileThreadSafe(*SharedLockFD);
233183

234184
// Attempt to truncate the file if we can get exclusive access. Ignore any
235185
// errors.
236186
if (BumpPtr) {
237187
assert(SharedLockFD && "Must have shared lock file open");
238-
if (sys::fs::tryLockFile(*SharedLockFD) == std::error_code()) {
188+
if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
239189
assert(size() <= capacity());
240190
(void)sys::fs::resize_file(*FD, size());
241-
(void)sys::fs::unlockFile(*SharedLockFD);
191+
(void)unlockFileThreadSafe(*SharedLockFD);
242192
}
243193
}
244194

llvm/lib/CAS/OnDiskCommon.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@
1212
#include "llvm/Support/Process.h"
1313
#include <mutex>
1414
#include <optional>
15+
#include <thread>
16+
17+
#if __has_include(<sys/file.h>)
18+
#include <sys/file.h>
19+
#ifdef LOCK_SH
20+
#define HAVE_FLOCK 1
21+
#else
22+
#define HAVE_FLOCK 0
23+
#endif
24+
#endif
1525

1626
using namespace llvm;
1727

@@ -46,3 +56,55 @@ Expected<std::optional<uint64_t>> cas::ondisk::getOverriddenMaxMappingSize() {
4656
void cas::ondisk::setMaxMappingSize(uint64_t Size) {
4757
OnDiskCASMaxMappingSize = Size;
4858
}
59+
60+
std::error_code cas::ondisk::lockFileThreadSafe(int FD, bool Exclusive) {
61+
#if HAVE_FLOCK
62+
if (flock(FD, Exclusive ? LOCK_EX : LOCK_SH) == 0)
63+
return std::error_code();
64+
return std::error_code(errno, std::generic_category());
65+
#elif defined(_WIN32)
66+
// On Windows this implementation is thread-safe.
67+
return sys::fs::lockFile(FD, Exclusive);
68+
#else
69+
return make_error_code(std::errc::no_lock_available);
70+
#endif
71+
}
72+
73+
std::error_code cas::ondisk::unlockFileThreadSafe(int FD) {
74+
#if HAVE_FLOCK
75+
if (flock(FD, LOCK_UN) == 0)
76+
return std::error_code();
77+
return std::error_code(errno, std::generic_category());
78+
#elif defined(_WIN32)
79+
// On Windows this implementation is thread-safe.
80+
return sys::fs::unlockFile(FD);
81+
#else
82+
return make_error_code(std::errc::no_lock_available);
83+
#endif
84+
}
85+
86+
std::error_code
87+
cas::ondisk::tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout,
88+
bool Exclusive) {
89+
#if HAVE_FLOCK
90+
auto Start = std::chrono::steady_clock::now();
91+
auto End = Start + Timeout;
92+
do {
93+
if (flock(FD, (Exclusive ? LOCK_EX : LOCK_SH) | LOCK_NB) == 0)
94+
return std::error_code();
95+
int Error = errno;
96+
if (Error == EWOULDBLOCK) {
97+
// Match sys::fs::tryLockFile, which sleeps for 1 ms per attempt.
98+
std::this_thread::sleep_for(std::chrono::milliseconds(1));
99+
continue;
100+
}
101+
return std::error_code(Error, std::generic_category());
102+
} while (std::chrono::steady_clock::now() < End);
103+
return make_error_code(std::errc::no_lock_available);
104+
#elif defined(_WIN32)
105+
// On Windows this implementation is thread-safe.
106+
return sys::fs::tryLockFile(FD, Timeout, Exclusive);
107+
#else
108+
return make_error_code(std::errc::no_lock_available);
109+
#endif
110+
}

llvm/lib/CAS/OnDiskCommon.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_LIB_CAS_ONDISKCOMMON_H
1111

1212
#include "llvm/Support/Error.h"
13+
#include <chrono>
1314
#include <optional>
1415

1516
namespace llvm::cas::ondisk {
@@ -25,6 +26,23 @@ Expected<std::optional<uint64_t>> getOverriddenMaxMappingSize();
2526
/// created. Set value 0 to use default size.
2627
void setMaxMappingSize(uint64_t Size);
2728

29+
/// Thread-safe alternative to \c sys::fs::lockFile. This does not support all
30+
/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library
31+
/// for now.
32+
std::error_code lockFileThreadSafe(int FD, bool Exclusive = true);
33+
34+
/// Thread-safe alternative to \c sys::fs::unlockFile. This does not support all
35+
/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library
36+
/// for now.
37+
std::error_code unlockFileThreadSafe(int FD);
38+
39+
/// Thread-safe alternative to \c sys::fs::tryLockFile. This does not support
40+
/// all the platforms that \c sys::fs::lockFile does, so keep it in the CAS
41+
/// library for now.
42+
std::error_code tryLockFileThreadSafe(
43+
int FD, std::chrono::milliseconds Timeout = std::chrono::milliseconds(0),
44+
bool Exclusive = true);
45+
2846
} // namespace llvm::cas::ondisk
2947

3048
#endif // LLVM_LIB_CAS_ONDISKCOMMON_H

llvm/lib/CAS/OnDiskHashMappedTrie.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ class DatabaseFile {
138138

139139
private:
140140
static Expected<DatabaseFile>
141-
get(std::shared_ptr<MappedFileRegionBumpPtr> Alloc) {
141+
get(std::unique_ptr<MappedFileRegionBumpPtr> Alloc) {
142142
if (Error E = validate(Alloc->getRegion()))
143143
return std::move(E);
144144
return DatabaseFile(std::move(Alloc));
@@ -148,14 +148,14 @@ class DatabaseFile {
148148

149149
DatabaseFile(MappedFileRegionBumpPtr &Alloc)
150150
: H(reinterpret_cast<Header *>(Alloc.data())), Alloc(Alloc) {}
151-
DatabaseFile(std::shared_ptr<MappedFileRegionBumpPtr> Alloc)
151+
DatabaseFile(std::unique_ptr<MappedFileRegionBumpPtr> Alloc)
152152
: DatabaseFile(*Alloc) {
153153
OwnedAlloc = std::move(Alloc);
154154
}
155155

156156
Header *H = nullptr;
157157
MappedFileRegionBumpPtr &Alloc;
158-
std::shared_ptr<MappedFileRegionBumpPtr> OwnedAlloc;
158+
std::unique_ptr<MappedFileRegionBumpPtr> OwnedAlloc;
159159
};
160160

161161
} // end anonymous namespace
@@ -173,14 +173,15 @@ DatabaseFile::create(const Twine &Path, uint64_t Capacity,
173173
};
174174

175175
// Get or create the file.
176-
std::shared_ptr<MappedFileRegionBumpPtr> Alloc;
177-
if (Error E = MappedFileRegionBumpPtr::createShared(Path, Capacity,
178-
offsetof(Header, BumpPtr),
179-
NewFileConstructor)
176+
MappedFileRegionBumpPtr Alloc;
177+
if (Error E = MappedFileRegionBumpPtr::create(Path, Capacity,
178+
offsetof(Header, BumpPtr),
179+
NewFileConstructor)
180180
.moveInto(Alloc))
181181
return std::move(E);
182182

183-
return DatabaseFile::get(std::move(Alloc));
183+
return DatabaseFile::get(
184+
std::make_unique<MappedFileRegionBumpPtr>(std::move(Alloc)));
184185
}
185186

186187
void DatabaseFile::addTable(TableHandle Table) {

llvm/lib/CAS/UnifiedOnDiskCache.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
//===----------------------------------------------------------------------===//
5151

5252
#include "llvm/CAS/UnifiedOnDiskCache.h"
53+
#include "OnDiskCommon.h"
5354
#include "llvm/ADT/ScopeExit.h"
5455
#include "llvm/CAS/OnDiskKeyValueDB.h"
5556
#include "llvm/Support/Errc.h"
@@ -182,7 +183,7 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
182183
// from creating a new chain (essentially while a \p UnifiedOnDiskCache
183184
// instance holds a shared lock the storage for the primary directory will
184185
// grow unrestricted).
185-
if (std::error_code EC = sys::fs::lockFile(LockFD, /*Exclusive=*/false))
186+
if (std::error_code EC = lockFileThreadSafe(LockFD, /*Exclusive=*/false))
186187
return createFileError(PathBuf, EC);
187188

188189
SmallVector<std::string, 4> DBDirs;
@@ -295,7 +296,7 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
295296
UpstreamKVDB.reset();
296297
PrimaryGraphDB.reset();
297298
UpstreamGraphDB = nullptr;
298-
if (std::error_code EC = sys::fs::unlockFile(LockFD))
299+
if (std::error_code EC = unlockFileThreadSafe(LockFD))
299300
return createFileError(RootPath, EC);
300301

301302
if (!ExceededSizeLimit)
@@ -305,13 +306,13 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
305306
// exclusive lock in order to create a new primary directory for next time
306307
// this \p UnifiedOnDiskCache path is opened.
307308

308-
if (std::error_code EC = sys::fs::tryLockFile(
309+
if (std::error_code EC = tryLockFileThreadSafe(
309310
LockFD, std::chrono::milliseconds(0), /*Exclusive=*/true)) {
310311
if (EC == errc::no_lock_available)
311312
return Error::success(); // couldn't get exclusive lock, give up.
312313
return createFileError(RootPath, EC);
313314
}
314-
auto _2 = make_scope_exit([&]() { sys::fs::unlockFile(LockFD); });
315+
auto _2 = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
315316

316317
// Managed to get an exclusive lock which means there are no other open
317318
// \p UnifiedOnDiskCache instances for the same path, so we can safely start a

llvm/test/check-lock-files.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# REQUIRES: ondisk_cas
2+
3+
# Multi-threaded test that CAS lock files protecting the shared data are working.
4+
5+
# RUN: rm -rf %t/cas
6+
# RUN: llvm-cas -cas %t/cas -check-lock-files

0 commit comments

Comments
 (0)