Skip to content

Commit 15a76e0

Browse files
committed
[llvm][cas] Fix thread safety issues with MappedFileRegionBumpPtr
llvm::sys::fs::lockFile and friends, which are implemented with fcntl F_SETLK, are only safe to use across processes and not across threads. We were attempting to workaround that using createShared, which tries to unique the mapped file per process, but (a) that only works if you have a single copy of llvm in your process, which may not always be true, and (b) we had a race in the implementation between closing one CAS and opening a new one at the same path. Replace these with an flock-based implementation. Since this is not available on all platforms LLVM supports we use a separate implementation for the on-disk CAS rather than change llvm::sys::fs::lockFile. rdar://126882843
1 parent 27d0673 commit 15a76e0

8 files changed

+156
-86
lines changed

llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,6 @@ class MappedFileRegionBumpPtr {
5050
create(const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset,
5151
function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor);
5252

53-
/// Create a \c MappedFileRegionBumpPtr., shared across the process via a
54-
/// singleton map.
55-
///
56-
/// FIXME: Singleton map should be based on sys::fs::UniqueID, but currently
57-
/// it is just based on \p Path.
58-
///
59-
/// \param Path the path to open the mapped region.
60-
/// \param Capacity the maximum size for the mapped file region.
61-
/// \param BumpPtrOffset the offset at which to store the bump pointer.
62-
/// \param NewFileConstructor is for constructing new files. It has exclusive
63-
/// access to the file. Must call \c initializeBumpPtr.
64-
static Expected<std::shared_ptr<MappedFileRegionBumpPtr>> createShared(
65-
const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset,
66-
function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor);
67-
6853
/// Finish initializing the bump pointer. Must be called by
6954
/// \c NewFileConstructor.
7055
void initializeBumpPtr(int64_t BumpPtrOffset);

llvm/lib/CAS/MappedFileRegionBumpPtr.cpp

Lines changed: 9 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,12 @@
5252
//===----------------------------------------------------------------------===//
5353

5454
#include "llvm/CAS/MappedFileRegionBumpPtr.h"
55-
#include "llvm/ADT/ScopeExit.h"
55+
#include "OnDiskCommon.h"
5656
#include "llvm/ADT/StringMap.h"
57-
#include <mutex>
5857

5958
using namespace llvm;
6059
using namespace llvm::cas;
60+
using namespace llvm::cas::ondisk;
6161

6262
namespace {
6363
struct FileLockRAII {
@@ -70,7 +70,7 @@ struct FileLockRAII {
7070
~FileLockRAII() { consumeError(unlock()); }
7171

7272
Error lock(LockKind LK) {
73-
if (std::error_code EC = sys::fs::lockFile(FD, LK == Exclusive))
73+
if (std::error_code EC = lockFileThreadSafe(FD, LK == Exclusive))
7474
return createFileError(Path, EC);
7575
Locked = LK;
7676
return Error::success();
@@ -79,7 +79,7 @@ struct FileLockRAII {
7979
Error unlock() {
8080
if (Locked) {
8181
Locked = std::nullopt;
82-
if (std::error_code EC = sys::fs::unlockFile(FD))
82+
if (std::error_code EC = unlockFileThreadSafe(FD))
8383
return createFileError(Path, EC);
8484
}
8585
return Error::success();
@@ -111,7 +111,8 @@ Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
111111

112112
// Take shared/reader lock that will be held until we close the file; unlocked
113113
// by destroyImpl.
114-
if (std::error_code EC = sys::fs::lockFile(SharedLockFD, /*Exclusive=*/false))
114+
if (std::error_code EC =
115+
lockFileThreadSafe(SharedLockFD, /*Exclusive=*/false))
115116
return createFileError(Path, EC);
116117

117118
// Take shared/reader lock for initialization.
@@ -172,73 +173,22 @@ Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
172173
return Result;
173174
}
174175

175-
Expected<std::shared_ptr<MappedFileRegionBumpPtr>>
176-
MappedFileRegionBumpPtr::createShared(
177-
const Twine &PathTwine, uint64_t Capacity, int64_t BumpPtrOffset,
178-
function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor) {
179-
struct MapNode {
180-
std::mutex Mutex;
181-
std::weak_ptr<MappedFileRegionBumpPtr> MFR;
182-
};
183-
static std::mutex Mutex;
184-
185-
// FIXME: Map should be by sys::fs::UniqueID instead of by path. Here's how
186-
// it should work:
187-
//
188-
// 1. Open the file.
189-
// 2. Stat the file descriptor to get the UniqueID.
190-
// 3. Check the map.
191-
// 4. If new, pass the open file descriptor to a helper extracted from
192-
// MappedFileRegionBumpPtr::create().
193-
static StringMap<MapNode> Regions;
194-
195-
SmallString<128> PathStorage;
196-
const StringRef Path = PathTwine.toStringRef(PathStorage);
197-
198-
MapNode *Node;
199-
{
200-
std::lock_guard<std::mutex> Lock(Mutex);
201-
Node = &Regions[Path];
202-
}
203-
204-
if (std::shared_ptr<MappedFileRegionBumpPtr> MFR = Node->MFR.lock())
205-
return MFR;
206-
207-
// Construct a new region. Use a fine-grained lock to allow other regions to
208-
// be opened concurrently.
209-
std::lock_guard<std::mutex> Lock(Node->Mutex);
210-
211-
// Open / create / initialize files on disk.
212-
Expected<MappedFileRegionBumpPtr> ExpectedMFR =
213-
MappedFileRegionBumpPtr::create(Path, Capacity, BumpPtrOffset,
214-
NewFileConstructor);
215-
if (!ExpectedMFR)
216-
return ExpectedMFR.takeError();
217-
218-
auto SharedMFR =
219-
std::make_shared<MappedFileRegionBumpPtr>(std::move(*ExpectedMFR));
220-
221-
// Success.
222-
Node->MFR = SharedMFR;
223-
return std::move(SharedMFR);
224-
}
225-
226176
void MappedFileRegionBumpPtr::destroyImpl() {
227177
if (!FD)
228178
return;
229179

230180
// Drop the shared lock indicating we are no longer accessing the file.
231181
if (SharedLockFD)
232-
(void)sys::fs::unlockFile(*SharedLockFD);
182+
(void)unlockFileThreadSafe(*SharedLockFD);
233183

234184
// Attempt to truncate the file if we can get exclusive access. Ignore any
235185
// errors.
236186
if (BumpPtr) {
237187
assert(SharedLockFD && "Must have shared lock file open");
238-
if (sys::fs::tryLockFile(*SharedLockFD) == std::error_code()) {
188+
if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
239189
assert(size() <= capacity());
240190
(void)sys::fs::resize_file(*FD, size());
241-
(void)sys::fs::unlockFile(*SharedLockFD);
191+
(void)unlockFileThreadSafe(*SharedLockFD);
242192
}
243193
}
244194

llvm/lib/CAS/OnDiskCommon.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@
1212
#include "llvm/Support/Process.h"
1313
#include <mutex>
1414
#include <optional>
15+
#include <thread>
16+
17+
#if __has_include(<sys/file.h>)
18+
#include <sys/file.h>
19+
#ifdef LOCK_SH
20+
#define HAVE_FLOCK 1
21+
#else
22+
#define HAVE_FLOCK 0
23+
#endif
24+
#endif
1525

1626
using namespace llvm;
1727

@@ -46,3 +56,55 @@ Expected<std::optional<uint64_t>> cas::ondisk::getOverriddenMaxMappingSize() {
4656
void cas::ondisk::setMaxMappingSize(uint64_t Size) {
4757
OnDiskCASMaxMappingSize = Size;
4858
}
59+
60+
std::error_code cas::ondisk::lockFileThreadSafe(int FD, bool Exclusive) {
61+
#if HAVE_FLOCK
62+
if (flock(FD, Exclusive ? LOCK_EX : LOCK_SH) == 0)
63+
return std::error_code();
64+
return std::error_code(errno, std::generic_category());
65+
#elif defined(_WIN32)
66+
// On Windows this implementation is thread-safe.
67+
return sys::fs::lockFile(FD, Exclusive);
68+
#else
69+
return make_error_code(std::errc::no_lock_available);
70+
#endif
71+
}
72+
73+
std::error_code cas::ondisk::unlockFileThreadSafe(int FD) {
74+
#if HAVE_FLOCK
75+
if (flock(FD, LOCK_UN) == 0)
76+
return std::error_code();
77+
return std::error_code(errno, std::generic_category());
78+
#elif defined(_WIN32)
79+
// On Windows this implementation is thread-safe.
80+
return sys::fs::unlockFile(FD);
81+
#else
82+
return make_error_code(std::errc::no_lock_available);
83+
#endif
84+
}
85+
86+
std::error_code
87+
cas::ondisk::tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout,
88+
bool Exclusive) {
89+
#if HAVE_FLOCK
90+
auto Start = std::chrono::steady_clock::now();
91+
auto End = Start + Timeout;
92+
do {
93+
if (flock(FD, (Exclusive ? LOCK_EX : LOCK_SH) | LOCK_NB) == 0)
94+
return std::error_code();
95+
int Error = errno;
96+
if (Error == EWOULDBLOCK) {
97+
// Match sys::fs::tryLockFile, which sleeps for 1 ms per attempt.
98+
std::this_thread::sleep_for(std::chrono::milliseconds(1));
99+
continue;
100+
}
101+
return std::error_code(Error, std::generic_category());
102+
} while (std::chrono::steady_clock::now() < End);
103+
return make_error_code(std::errc::no_lock_available);
104+
#elif defined(_WIN32)
105+
// On Windows this implementation is thread-safe.
106+
return sys::fs::tryLockFile(FD, Timeout, Exclusive);
107+
#else
108+
return make_error_code(std::errc::no_lock_available);
109+
#endif
110+
}

llvm/lib/CAS/OnDiskCommon.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_LIB_CAS_ONDISKCOMMON_H
1111

1212
#include "llvm/Support/Error.h"
13+
#include <chrono>
1314
#include <optional>
1415

1516
namespace llvm::cas::ondisk {
@@ -25,6 +26,23 @@ Expected<std::optional<uint64_t>> getOverriddenMaxMappingSize();
2526
/// created. Set value 0 to use default size.
2627
void setMaxMappingSize(uint64_t Size);
2728

29+
/// Thread-safe alternative to \c sys::fs::lockFile. This does not support all
30+
/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library
31+
/// for now.
32+
std::error_code lockFileThreadSafe(int FD, bool Exclusive = true);
33+
34+
/// Thread-safe alternative to \c sys::fs::unlockFile. This does not support all
35+
/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library
36+
/// for now.
37+
std::error_code unlockFileThreadSafe(int FD);
38+
39+
/// Thread-safe alternative to \c sys::fs::tryLockFile. This does not support
40+
/// all the platforms that \c sys::fs::lockFile does, so keep it in the CAS
41+
/// library for now.
42+
std::error_code tryLockFileThreadSafe(
43+
int FD, std::chrono::milliseconds Timeout = std::chrono::milliseconds(0),
44+
bool Exclusive = true);
45+
2846
} // namespace llvm::cas::ondisk
2947

3048
#endif // LLVM_LIB_CAS_ONDISKCOMMON_H

llvm/lib/CAS/OnDiskHashMappedTrie.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ class DatabaseFile {
138138

139139
private:
140140
static Expected<DatabaseFile>
141-
get(std::shared_ptr<MappedFileRegionBumpPtr> Alloc) {
141+
get(std::unique_ptr<MappedFileRegionBumpPtr> Alloc) {
142142
if (Error E = validate(Alloc->getRegion()))
143143
return std::move(E);
144144
return DatabaseFile(std::move(Alloc));
@@ -148,14 +148,14 @@ class DatabaseFile {
148148

149149
DatabaseFile(MappedFileRegionBumpPtr &Alloc)
150150
: H(reinterpret_cast<Header *>(Alloc.data())), Alloc(Alloc) {}
151-
DatabaseFile(std::shared_ptr<MappedFileRegionBumpPtr> Alloc)
151+
DatabaseFile(std::unique_ptr<MappedFileRegionBumpPtr> Alloc)
152152
: DatabaseFile(*Alloc) {
153153
OwnedAlloc = std::move(Alloc);
154154
}
155155

156156
Header *H = nullptr;
157157
MappedFileRegionBumpPtr &Alloc;
158-
std::shared_ptr<MappedFileRegionBumpPtr> OwnedAlloc;
158+
std::unique_ptr<MappedFileRegionBumpPtr> OwnedAlloc;
159159
};
160160

161161
} // end anonymous namespace
@@ -173,14 +173,15 @@ DatabaseFile::create(const Twine &Path, uint64_t Capacity,
173173
};
174174

175175
// Get or create the file.
176-
std::shared_ptr<MappedFileRegionBumpPtr> Alloc;
177-
if (Error E = MappedFileRegionBumpPtr::createShared(Path, Capacity,
178-
offsetof(Header, BumpPtr),
179-
NewFileConstructor)
176+
MappedFileRegionBumpPtr Alloc;
177+
if (Error E = MappedFileRegionBumpPtr::create(Path, Capacity,
178+
offsetof(Header, BumpPtr),
179+
NewFileConstructor)
180180
.moveInto(Alloc))
181181
return std::move(E);
182182

183-
return DatabaseFile::get(std::move(Alloc));
183+
return DatabaseFile::get(
184+
std::make_unique<MappedFileRegionBumpPtr>(std::move(Alloc)));
184185
}
185186

186187
void DatabaseFile::addTable(TableHandle Table) {

llvm/lib/CAS/UnifiedOnDiskCache.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
//===----------------------------------------------------------------------===//
5151

5252
#include "llvm/CAS/UnifiedOnDiskCache.h"
53+
#include "OnDiskCommon.h"
5354
#include "llvm/ADT/ScopeExit.h"
5455
#include "llvm/CAS/OnDiskKeyValueDB.h"
5556
#include "llvm/Support/Errc.h"
@@ -182,7 +183,7 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
182183
// from creating a new chain (essentially while a \p UnifiedOnDiskCache
183184
// instance holds a shared lock the storage for the primary directory will
184185
// grow unrestricted).
185-
if (std::error_code EC = sys::fs::lockFile(LockFD, /*Exclusive=*/false))
186+
if (std::error_code EC = lockFileThreadSafe(LockFD, /*Exclusive=*/false))
186187
return createFileError(PathBuf, EC);
187188

188189
SmallVector<std::string, 4> DBDirs;
@@ -295,7 +296,7 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
295296
UpstreamKVDB.reset();
296297
PrimaryGraphDB.reset();
297298
UpstreamGraphDB = nullptr;
298-
if (std::error_code EC = sys::fs::unlockFile(LockFD))
299+
if (std::error_code EC = unlockFileThreadSafe(LockFD))
299300
return createFileError(RootPath, EC);
300301

301302
if (!ExceededSizeLimit)
@@ -305,13 +306,13 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
305306
// exclusive lock in order to create a new primary directory for next time
306307
// this \p UnifiedOnDiskCache path is opened.
307308

308-
if (std::error_code EC = sys::fs::tryLockFile(
309+
if (std::error_code EC = tryLockFileThreadSafe(
309310
LockFD, std::chrono::milliseconds(0), /*Exclusive=*/true)) {
310311
if (EC == errc::no_lock_available)
311312
return Error::success(); // couldn't get exclusive lock, give up.
312313
return createFileError(RootPath, EC);
313314
}
314-
auto _2 = make_scope_exit([&]() { sys::fs::unlockFile(LockFD); });
315+
auto _2 = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
315316

316317
// Managed to get an exclusive lock which means there are no other open
317318
// \p UnifiedOnDiskCache instances for the same path, so we can safely start a

llvm/test/check-lock-files.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# REQUIRES: ondisk_cas
2+
3+
# Multi-threaded test that CAS lock files protecting the shared data are working.
4+
5+
# RUN: rm -rf %t/cas
6+
# RUN: llvm-cas -cas %t/cas -check-lock-files

0 commit comments

Comments
 (0)