From d5ae9dbbc735fdc8aa16d18b725614a421a2d186 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Mon, 22 Apr 2024 13:46:28 -0700 Subject: [PATCH] [llvm][cas] Fix thread safety issues with MappedFileRegionBumpPtr llvm::sys::fs::lockFile and friends, which are implemented with fcntl F_SETLK, are only safe to use across processes and not across threads. We were attempting to workaround that using createShared, which tries to unique the mapped file per process, but (a) that only works if you have a single copy of llvm in your process, which may not always be true, and (b) we had a race in the implementation between closing one CAS and opening a new one at the same path. Replace these with an flock-based implementation. Since this is not available on all platforms LLVM supports we use a separate implementation for the on-disk CAS rather than change llvm::sys::fs::lockFile. rdar://126882843 (cherry picked from commit 15a76e048eb9d7bf62fc36865cd749b161dc5b86) --- .../llvm/CAS/MappedFileRegionBumpPtr.h | 15 ---- llvm/lib/CAS/MappedFileRegionBumpPtr.cpp | 68 +++---------------- llvm/lib/CAS/OnDiskCommon.cpp | 62 +++++++++++++++++ llvm/lib/CAS/OnDiskCommon.h | 18 +++++ llvm/lib/CAS/OnDiskHashMappedTrie.cpp | 17 ++--- llvm/lib/CAS/UnifiedOnDiskCache.cpp | 9 +-- llvm/test/check-lock-files.ll | 6 ++ llvm/tools/llvm-cas/llvm-cas.cpp | 47 +++++++++++++ 8 files changed, 156 insertions(+), 86 deletions(-) create mode 100644 llvm/test/check-lock-files.ll diff --git a/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h b/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h index ac97158d48ddb..85abe645d596f 100644 --- a/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h +++ b/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h @@ -50,21 +50,6 @@ class MappedFileRegionBumpPtr { create(const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset, function_ref NewFileConstructor); - /// Create a \c MappedFileRegionBumpPtr., shared across the process via a - /// singleton map. - /// - /// FIXME: Singleton map should be based on sys::fs::UniqueID, but currently - /// it is just based on \p Path. - /// - /// \param Path the path to open the mapped region. - /// \param Capacity the maximum size for the mapped file region. - /// \param BumpPtrOffset the offset at which to store the bump pointer. - /// \param NewFileConstructor is for constructing new files. It has exclusive - /// access to the file. Must call \c initializeBumpPtr. - static Expected> createShared( - const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset, - function_ref NewFileConstructor); - /// Finish initializing the bump pointer. Must be called by /// \c NewFileConstructor. void initializeBumpPtr(int64_t BumpPtrOffset); diff --git a/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp index 157871f2dab71..fa48957b4255d 100644 --- a/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp +++ b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp @@ -52,12 +52,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CAS/MappedFileRegionBumpPtr.h" -#include "llvm/ADT/ScopeExit.h" +#include "OnDiskCommon.h" #include "llvm/ADT/StringMap.h" -#include using namespace llvm; using namespace llvm::cas; +using namespace llvm::cas::ondisk; namespace { struct FileLockRAII { @@ -70,7 +70,7 @@ struct FileLockRAII { ~FileLockRAII() { consumeError(unlock()); } Error lock(LockKind LK) { - if (std::error_code EC = sys::fs::lockFile(FD, LK == Exclusive)) + if (std::error_code EC = lockFileThreadSafe(FD, LK == Exclusive)) return createFileError(Path, EC); Locked = LK; return Error::success(); @@ -79,7 +79,7 @@ struct FileLockRAII { Error unlock() { if (Locked) { Locked = std::nullopt; - if (std::error_code EC = sys::fs::unlockFile(FD)) + if (std::error_code EC = unlockFileThreadSafe(FD)) return createFileError(Path, EC); } return Error::success(); @@ -111,7 +111,8 @@ Expected MappedFileRegionBumpPtr::create( // Take shared/reader lock that will be held until we close the file; unlocked // by destroyImpl. - if (std::error_code EC = sys::fs::lockFile(SharedLockFD, /*Exclusive=*/false)) + if (std::error_code EC = + lockFileThreadSafe(SharedLockFD, /*Exclusive=*/false)) return createFileError(Path, EC); // Take shared/reader lock for initialization. @@ -172,73 +173,22 @@ Expected MappedFileRegionBumpPtr::create( return Result; } -Expected> -MappedFileRegionBumpPtr::createShared( - const Twine &PathTwine, uint64_t Capacity, int64_t BumpPtrOffset, - function_ref NewFileConstructor) { - struct MapNode { - std::mutex Mutex; - std::weak_ptr MFR; - }; - static std::mutex Mutex; - - // FIXME: Map should be by sys::fs::UniqueID instead of by path. Here's how - // it should work: - // - // 1. Open the file. - // 2. Stat the file descriptor to get the UniqueID. - // 3. Check the map. - // 4. If new, pass the open file descriptor to a helper extracted from - // MappedFileRegionBumpPtr::create(). - static StringMap Regions; - - SmallString<128> PathStorage; - const StringRef Path = PathTwine.toStringRef(PathStorage); - - MapNode *Node; - { - std::lock_guard Lock(Mutex); - Node = &Regions[Path]; - } - - if (std::shared_ptr MFR = Node->MFR.lock()) - return MFR; - - // Construct a new region. Use a fine-grained lock to allow other regions to - // be opened concurrently. - std::lock_guard Lock(Node->Mutex); - - // Open / create / initialize files on disk. - Expected ExpectedMFR = - MappedFileRegionBumpPtr::create(Path, Capacity, BumpPtrOffset, - NewFileConstructor); - if (!ExpectedMFR) - return ExpectedMFR.takeError(); - - auto SharedMFR = - std::make_shared(std::move(*ExpectedMFR)); - - // Success. - Node->MFR = SharedMFR; - return std::move(SharedMFR); -} - void MappedFileRegionBumpPtr::destroyImpl() { if (!FD) return; // Drop the shared lock indicating we are no longer accessing the file. if (SharedLockFD) - (void)sys::fs::unlockFile(*SharedLockFD); + (void)unlockFileThreadSafe(*SharedLockFD); // Attempt to truncate the file if we can get exclusive access. Ignore any // errors. if (BumpPtr) { assert(SharedLockFD && "Must have shared lock file open"); - if (sys::fs::tryLockFile(*SharedLockFD) == std::error_code()) { + if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) { assert(size() <= capacity()); (void)sys::fs::resize_file(*FD, size()); - (void)sys::fs::unlockFile(*SharedLockFD); + (void)unlockFileThreadSafe(*SharedLockFD); } } diff --git a/llvm/lib/CAS/OnDiskCommon.cpp b/llvm/lib/CAS/OnDiskCommon.cpp index 4d5a7e1fbbe25..77eb73f810bb3 100644 --- a/llvm/lib/CAS/OnDiskCommon.cpp +++ b/llvm/lib/CAS/OnDiskCommon.cpp @@ -12,6 +12,16 @@ #include "llvm/Support/Process.h" #include #include +#include + +#if __has_include() +#include +#ifdef LOCK_SH +#define HAVE_FLOCK 1 +#else +#define HAVE_FLOCK 0 +#endif +#endif using namespace llvm; @@ -46,3 +56,55 @@ Expected> cas::ondisk::getOverriddenMaxMappingSize() { void cas::ondisk::setMaxMappingSize(uint64_t Size) { OnDiskCASMaxMappingSize = Size; } + +std::error_code cas::ondisk::lockFileThreadSafe(int FD, bool Exclusive) { +#if HAVE_FLOCK + if (flock(FD, Exclusive ? LOCK_EX : LOCK_SH) == 0) + return std::error_code(); + return std::error_code(errno, std::generic_category()); +#elif defined(_WIN32) + // On Windows this implementation is thread-safe. + return sys::fs::lockFile(FD, Exclusive); +#else + return make_error_code(std::errc::no_lock_available); +#endif +} + +std::error_code cas::ondisk::unlockFileThreadSafe(int FD) { +#if HAVE_FLOCK + if (flock(FD, LOCK_UN) == 0) + return std::error_code(); + return std::error_code(errno, std::generic_category()); +#elif defined(_WIN32) + // On Windows this implementation is thread-safe. + return sys::fs::unlockFile(FD); +#else + return make_error_code(std::errc::no_lock_available); +#endif +} + +std::error_code +cas::ondisk::tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout, + bool Exclusive) { +#if HAVE_FLOCK + auto Start = std::chrono::steady_clock::now(); + auto End = Start + Timeout; + do { + if (flock(FD, (Exclusive ? LOCK_EX : LOCK_SH) | LOCK_NB) == 0) + return std::error_code(); + int Error = errno; + if (Error == EWOULDBLOCK) { + // Match sys::fs::tryLockFile, which sleeps for 1 ms per attempt. + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } + return std::error_code(Error, std::generic_category()); + } while (std::chrono::steady_clock::now() < End); + return make_error_code(std::errc::no_lock_available); +#elif defined(_WIN32) + // On Windows this implementation is thread-safe. + return sys::fs::tryLockFile(FD, Timeout, Exclusive); +#else + return make_error_code(std::errc::no_lock_available); +#endif +} \ No newline at end of file diff --git a/llvm/lib/CAS/OnDiskCommon.h b/llvm/lib/CAS/OnDiskCommon.h index 87ec2d1cf45fc..c8dc0c998a95e 100644 --- a/llvm/lib/CAS/OnDiskCommon.h +++ b/llvm/lib/CAS/OnDiskCommon.h @@ -10,6 +10,7 @@ #define LLVM_LIB_CAS_ONDISKCOMMON_H #include "llvm/Support/Error.h" +#include #include namespace llvm::cas::ondisk { @@ -25,6 +26,23 @@ Expected> getOverriddenMaxMappingSize(); /// created. Set value 0 to use default size. void setMaxMappingSize(uint64_t Size); +/// Thread-safe alternative to \c sys::fs::lockFile. This does not support all +/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library +/// for now. +std::error_code lockFileThreadSafe(int FD, bool Exclusive = true); + +/// Thread-safe alternative to \c sys::fs::unlockFile. This does not support all +/// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library +/// for now. +std::error_code unlockFileThreadSafe(int FD); + +/// Thread-safe alternative to \c sys::fs::tryLockFile. This does not support +/// all the platforms that \c sys::fs::lockFile does, so keep it in the CAS +/// library for now. +std::error_code tryLockFileThreadSafe( + int FD, std::chrono::milliseconds Timeout = std::chrono::milliseconds(0), + bool Exclusive = true); + } // namespace llvm::cas::ondisk #endif // LLVM_LIB_CAS_ONDISKCOMMON_H diff --git a/llvm/lib/CAS/OnDiskHashMappedTrie.cpp b/llvm/lib/CAS/OnDiskHashMappedTrie.cpp index 9621cb94fe453..4d4d3fc81e241 100644 --- a/llvm/lib/CAS/OnDiskHashMappedTrie.cpp +++ b/llvm/lib/CAS/OnDiskHashMappedTrie.cpp @@ -138,7 +138,7 @@ class DatabaseFile { private: static Expected - get(std::shared_ptr Alloc) { + get(std::unique_ptr Alloc) { if (Error E = validate(Alloc->getRegion())) return std::move(E); return DatabaseFile(std::move(Alloc)); @@ -148,14 +148,14 @@ class DatabaseFile { DatabaseFile(MappedFileRegionBumpPtr &Alloc) : H(reinterpret_cast
(Alloc.data())), Alloc(Alloc) {} - DatabaseFile(std::shared_ptr Alloc) + DatabaseFile(std::unique_ptr Alloc) : DatabaseFile(*Alloc) { OwnedAlloc = std::move(Alloc); } Header *H = nullptr; MappedFileRegionBumpPtr &Alloc; - std::shared_ptr OwnedAlloc; + std::unique_ptr OwnedAlloc; }; } // end anonymous namespace @@ -173,14 +173,15 @@ DatabaseFile::create(const Twine &Path, uint64_t Capacity, }; // Get or create the file. - std::shared_ptr Alloc; - if (Error E = MappedFileRegionBumpPtr::createShared(Path, Capacity, - offsetof(Header, BumpPtr), - NewFileConstructor) + MappedFileRegionBumpPtr Alloc; + if (Error E = MappedFileRegionBumpPtr::create(Path, Capacity, + offsetof(Header, BumpPtr), + NewFileConstructor) .moveInto(Alloc)) return std::move(E); - return DatabaseFile::get(std::move(Alloc)); + return DatabaseFile::get( + std::make_unique(std::move(Alloc))); } void DatabaseFile::addTable(TableHandle Table) { diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index 3ed56131cd8dd..951b6d9940a10 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -50,6 +50,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CAS/UnifiedOnDiskCache.h" +#include "OnDiskCommon.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/Support/Errc.h" @@ -182,7 +183,7 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, // from creating a new chain (essentially while a \p UnifiedOnDiskCache // instance holds a shared lock the storage for the primary directory will // grow unrestricted). - if (std::error_code EC = sys::fs::lockFile(LockFD, /*Exclusive=*/false)) + if (std::error_code EC = lockFileThreadSafe(LockFD, /*Exclusive=*/false)) return createFileError(PathBuf, EC); SmallVector DBDirs; @@ -295,7 +296,7 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { UpstreamKVDB.reset(); PrimaryGraphDB.reset(); UpstreamGraphDB = nullptr; - if (std::error_code EC = sys::fs::unlockFile(LockFD)) + if (std::error_code EC = unlockFileThreadSafe(LockFD)) return createFileError(RootPath, EC); if (!ExceededSizeLimit) @@ -305,13 +306,13 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { // exclusive lock in order to create a new primary directory for next time // this \p UnifiedOnDiskCache path is opened. - if (std::error_code EC = sys::fs::tryLockFile( + if (std::error_code EC = tryLockFileThreadSafe( LockFD, std::chrono::milliseconds(0), /*Exclusive=*/true)) { if (EC == errc::no_lock_available) return Error::success(); // couldn't get exclusive lock, give up. return createFileError(RootPath, EC); } - auto _2 = make_scope_exit([&]() { sys::fs::unlockFile(LockFD); }); + auto _2 = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); // Managed to get an exclusive lock which means there are no other open // \p UnifiedOnDiskCache instances for the same path, so we can safely start a diff --git a/llvm/test/check-lock-files.ll b/llvm/test/check-lock-files.ll new file mode 100644 index 0000000000000..f8740a9aa4006 --- /dev/null +++ b/llvm/test/check-lock-files.ll @@ -0,0 +1,6 @@ +# REQUIRES: ondisk_cas + +# Multi-threaded test that CAS lock files protecting the shared data are working. + +# RUN: rm -rf %t/cas +# RUN: llvm-cas -cas %t/cas -check-lock-files \ No newline at end of file diff --git a/llvm/tools/llvm-cas/llvm-cas.cpp b/llvm/tools/llvm-cas/llvm-cas.cpp index 34db8b2c1cb54..0aaae970373a0 100644 --- a/llvm/tools/llvm-cas/llvm-cas.cpp +++ b/llvm/tools/llvm-cas/llvm-cas.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/PrefixMapper.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -64,6 +65,7 @@ static int putCacheKey(ObjectStore &CAS, ActionCache &AC, static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID); static int validateObject(ObjectStore &CAS, const CASID &ID); static int ingestCasIDFile(cas::ObjectStore &CAS, ArrayRef CASIDs); +static int checkLockFiles(StringRef CASPath); int main(int Argc, char **Argv) { InitLLVM X(Argc, Argv); @@ -101,6 +103,7 @@ int main(int Argc, char **Argv) { Import, PutCacheKey, GetCacheResult, + CheckLockFiles, Validate, }; cl::opt Command( @@ -126,6 +129,8 @@ int main(int Argc, char **Argv) { "set a value for a cache key"), clEnumValN(GetCacheResult, "get-cache-result", "get the result value from a cache key"), + clEnumValN(CheckLockFiles, "check-lock-files", + "Test file locking behaviour of on-disk CAS"), clEnumValN(Validate, "validate", "validate the object for CASID")), cl::init(CommandKind::Invalid)); @@ -141,6 +146,9 @@ int main(int Argc, char **Argv) { ExitOnErr( createStringError(inconvertibleErrorCode(), "missing --cas=")); + if (Command == CheckLockFiles) + return checkLockFiles(CASPath); + std::shared_ptr CAS; std::shared_ptr AC; std::optional CASFilePath; @@ -659,6 +667,45 @@ static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID) { return 0; } +static int checkLockFiles(StringRef CASPath) { + ExitOnError ExitOnErr("llvm-cas: check-lock-files: "); + + SmallString<128> DataPoolPath(CASPath); + sys::path::append(DataPoolPath, "v1.1/v8.data"); + + auto OpenCASAndGetDataPoolSize = [&]() -> Expected { + auto Result = createOnDiskUnifiedCASDatabases(CASPath); + if (!Result) + return Result.takeError(); + + sys::fs::file_status DataStat; + if (std::error_code EC = sys::fs::status(DataPoolPath, DataStat)) + ExitOnErr(createFileError(DataPoolPath, EC)); + return DataStat.getSize(); + }; + + // Get the normal size of an open CAS data pool to compare against later. + uint64_t OpenSize = ExitOnErr(OpenCASAndGetDataPoolSize()); + + ThreadPool Pool; + for (int i = 0; i < 1000; ++i) { + Pool.async([&, i] { + uint64_t DataPoolSize = ExitOnErr(OpenCASAndGetDataPoolSize()); + if (DataPoolSize < OpenSize) + ExitOnErr(createStringError( + inconvertibleErrorCode(), + StringRef("CAS data file size (" + std::to_string(DataPoolSize) + + ") is smaller than expected (" + + std::to_string(OpenSize) + ") in iteration " + + std::to_string(i)))); + }); + } + + Pool.wait(); + + return 0; +} + int validateObject(ObjectStore &CAS, const CASID &ID) { ExitOnError ExitOnErr("llvm-cas: validate: "); ExitOnErr(CAS.validate(ID));