-
Notifications
You must be signed in to change notification settings - Fork 13.7k
release/20.x: [clangd] [C++20] [Modules] Add scanning cache (#125988) #128841
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Previously, everytime we want to get a source file declaring a specific module, we need to scan the whole projects again and again. The performance is super bad. This patch tries to improve this by introducing a simple cache. (cherry picked from commit ae839b0)
@kadircet What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-clang-tools-extra @llvm/pr-subscribers-clangd Author: None (llvmbot) ChangesBackport ae839b0 Requested by: @ChuanqiXu9 Full diff: https://github.com/llvm/llvm-project/pull/128841.diff 4 Files Affected:
diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index bee31fe51555e..08a7b250a8119 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -357,10 +357,80 @@ void ModuleFileCache::remove(StringRef ModuleName) {
ModuleFiles.erase(ModuleName);
}
+class ModuleNameToSourceCache {
+public:
+ std::string getSourceForModuleName(llvm::StringRef ModuleName) {
+ std::lock_guard<std::mutex> Lock(CacheMutex);
+ auto Iter = ModuleNameToSourceCache.find(ModuleName);
+ if (Iter != ModuleNameToSourceCache.end())
+ return Iter->second;
+ return "";
+ }
+
+ void addEntry(llvm::StringRef ModuleName, PathRef Source) {
+ std::lock_guard<std::mutex> Lock(CacheMutex);
+ ModuleNameToSourceCache[ModuleName] = Source.str();
+ }
+
+ void eraseEntry(llvm::StringRef ModuleName) {
+ std::lock_guard<std::mutex> Lock(CacheMutex);
+ ModuleNameToSourceCache.erase(ModuleName);
+ }
+
+private:
+ std::mutex CacheMutex;
+ llvm::StringMap<std::string> ModuleNameToSourceCache;
+};
+
+class CachingProjectModules : public ProjectModules {
+public:
+ CachingProjectModules(std::unique_ptr<ProjectModules> MDB,
+ ModuleNameToSourceCache &Cache)
+ : MDB(std::move(MDB)), Cache(Cache) {
+ assert(this->MDB && "CachingProjectModules should only be created with a "
+ "valid underlying ProjectModules");
+ }
+
+ std::vector<std::string> getRequiredModules(PathRef File) override {
+ return MDB->getRequiredModules(File);
+ }
+
+ std::string getModuleNameForSource(PathRef File) override {
+ return MDB->getModuleNameForSource(File);
+ }
+
+ std::string getSourceForModuleName(llvm::StringRef ModuleName,
+ PathRef RequiredSrcFile) override {
+ std::string CachedResult = Cache.getSourceForModuleName(ModuleName);
+
+ // Verify Cached Result by seeing if the source declaring the same module
+ // as we query.
+ if (!CachedResult.empty()) {
+ std::string ModuleNameOfCachedSource =
+ MDB->getModuleNameForSource(CachedResult);
+ if (ModuleNameOfCachedSource == ModuleName)
+ return CachedResult;
+
+ // Cached Result is invalid. Clear it.
+ Cache.eraseEntry(ModuleName);
+ }
+
+ auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile);
+ Cache.addEntry(ModuleName, Result);
+
+ return Result;
+ }
+
+private:
+ std::unique_ptr<ProjectModules> MDB;
+ ModuleNameToSourceCache &Cache;
+};
+
/// Collect the directly and indirectly required module names for \param
/// ModuleName in topological order. The \param ModuleName is guaranteed to
/// be the last element in \param ModuleNames.
-llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
+llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource,
+ CachingProjectModules &MDB,
StringRef ModuleName) {
llvm::SmallVector<llvm::StringRef> ModuleNames;
llvm::StringSet<> ModuleNamesSet;
@@ -368,8 +438,8 @@ llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
ModuleNamesSet.insert(ModuleName);
- for (StringRef RequiredModuleName :
- MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
+ for (StringRef RequiredModuleName : MDB.getRequiredModules(
+ MDB.getSourceForModuleName(ModuleName, RequiredSource)))
if (ModuleNamesSet.insert(RequiredModuleName).second)
Visitor(RequiredModuleName, Visitor);
@@ -386,24 +456,29 @@ class ModulesBuilder::ModulesBuilderImpl {
public:
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
+ ModuleNameToSourceCache &getProjectModulesCache() {
+ return ProjectModulesCache;
+ }
const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
llvm::Error
- getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
- ProjectModules &MDB,
+ getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName,
+ const ThreadsafeFS &TFS, CachingProjectModules &MDB,
ReusablePrerequisiteModules &BuiltModuleFiles);
private:
ModuleFileCache Cache;
+ ModuleNameToSourceCache ProjectModulesCache;
};
llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
- StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
- ReusablePrerequisiteModules &BuiltModuleFiles) {
+ PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS,
+ CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
return llvm::Error::success();
- PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
+ std::string ModuleUnitFileName =
+ MDB.getSourceForModuleName(ModuleName, RequiredSource);
/// It is possible that we're meeting third party modules (modules whose
/// source are not in the project. e.g, the std module may be a third-party
/// module for most project) or something wrong with the implementation of
@@ -416,7 +491,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
llvm::formatv("Don't get the module unit for module {0}", ModuleName));
// Get Required modules in topological order.
- auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
+ auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName);
for (llvm::StringRef ReqModuleName : ReqModuleNames) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
continue;
@@ -454,8 +529,11 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
elog("Failed to get Project Modules information for {0}", File);
return std::make_unique<FailedPrerequisiteModules>();
}
+ CachingProjectModules CachedMDB(std::move(MDB),
+ Impl->getProjectModulesCache());
- std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
+ std::vector<std::string> RequiredModuleNames =
+ CachedMDB.getRequiredModules(File);
if (RequiredModuleNames.empty())
return std::make_unique<ReusablePrerequisiteModules>();
@@ -463,7 +541,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
// Return early if there is any error.
if (llvm::Error Err = Impl->getOrBuildModuleFile(
- RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
+ File, RequiredModuleName, TFS, CachedMDB, *RequiredModules.get())) {
elog("Failed to build module {0}; due to {1}", RequiredModuleName,
toString(std::move(Err)));
return std::make_unique<FailedPrerequisiteModules>();
diff --git a/clang-tools-extra/clangd/ProjectModules.h b/clang-tools-extra/clangd/ProjectModules.h
index 48d52ac9deb89..41812674f12f4 100644
--- a/clang-tools-extra/clangd/ProjectModules.h
+++ b/clang-tools-extra/clangd/ProjectModules.h
@@ -42,9 +42,9 @@ class ProjectModules {
llvm::unique_function<void(tooling::CompileCommand &, PathRef) const>;
virtual std::vector<std::string> getRequiredModules(PathRef File) = 0;
- virtual PathRef
- getSourceForModuleName(llvm::StringRef ModuleName,
- PathRef RequiredSrcFile = PathRef()) = 0;
+ virtual std::string getModuleNameForSource(PathRef File) = 0;
+ virtual std::string getSourceForModuleName(llvm::StringRef ModuleName,
+ PathRef RequiredSrcFile) = 0;
virtual void setCommandMangler(CommandMangler Mangler) {}
diff --git a/clang-tools-extra/clangd/ScanningProjectModules.cpp b/clang-tools-extra/clangd/ScanningProjectModules.cpp
index e4dc11c1c2895..e561513fe687f 100644
--- a/clang-tools-extra/clangd/ScanningProjectModules.cpp
+++ b/clang-tools-extra/clangd/ScanningProjectModules.cpp
@@ -134,6 +134,9 @@ ModuleDependencyScanner::scan(PathRef FilePath,
void ModuleDependencyScanner::globalScan(
const ProjectModules::CommandMangler &Mangler) {
+ if (GlobalScanned)
+ return;
+
for (auto &File : CDB->getAllFiles())
scan(File, Mangler);
@@ -189,11 +192,18 @@ class ScanningAllProjectModules : public ProjectModules {
/// RequiredSourceFile is not used intentionally. See the comments of
/// ModuleDependencyScanner for detail.
- PathRef
- getSourceForModuleName(llvm::StringRef ModuleName,
- PathRef RequiredSourceFile = PathRef()) override {
+ std::string getSourceForModuleName(llvm::StringRef ModuleName,
+ PathRef RequiredSourceFile) override {
Scanner.globalScan(Mangler);
- return Scanner.getSourceForModuleName(ModuleName);
+ return Scanner.getSourceForModuleName(ModuleName).str();
+ }
+
+ std::string getModuleNameForSource(PathRef File) override {
+ auto ScanningResult = Scanner.scan(File, Mangler);
+ if (!ScanningResult || !ScanningResult->ModuleName)
+ return {};
+
+ return *ScanningResult->ModuleName;
}
private:
diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
index 51723c797eabc..27f4c817a8ff3 100644
--- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
+++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
@@ -27,12 +27,41 @@
namespace clang::clangd {
namespace {
+class GlobalScanningCounterProjectModules : public ProjectModules {
+public:
+ GlobalScanningCounterProjectModules(
+ std::unique_ptr<ProjectModules> Underlying, std::atomic<unsigned> &Count)
+ : Underlying(std::move(Underlying)), Count(Count) {}
+
+ std::vector<std::string> getRequiredModules(PathRef File) override {
+ return Underlying->getRequiredModules(File);
+ }
+
+ std::string getModuleNameForSource(PathRef File) override {
+ return Underlying->getModuleNameForSource(File);
+ }
+
+ void setCommandMangler(CommandMangler Mangler) override {
+ Underlying->setCommandMangler(std::move(Mangler));
+ }
+
+ std::string getSourceForModuleName(llvm::StringRef ModuleName,
+ PathRef RequiredSrcFile) override {
+ Count++;
+ return Underlying->getSourceForModuleName(ModuleName, RequiredSrcFile);
+ }
+
+private:
+ std::unique_ptr<ProjectModules> Underlying;
+ std::atomic<unsigned> &Count;
+};
+
class MockDirectoryCompilationDatabase : public MockCompilationDatabase {
public:
MockDirectoryCompilationDatabase(StringRef TestDir, const ThreadsafeFS &TFS)
: MockCompilationDatabase(TestDir),
MockedCDBPtr(std::make_shared<MockClangCompilationDatabase>(*this)),
- TFS(TFS) {
+ TFS(TFS), GlobalScanningCount(0) {
this->ExtraClangFlags.push_back("-std=c++20");
this->ExtraClangFlags.push_back("-c");
}
@@ -40,9 +69,12 @@ class MockDirectoryCompilationDatabase : public MockCompilationDatabase {
void addFile(llvm::StringRef Path, llvm::StringRef Contents);
std::unique_ptr<ProjectModules> getProjectModules(PathRef) const override {
- return scanningProjectModules(MockedCDBPtr, TFS);
+ return std::make_unique<GlobalScanningCounterProjectModules>(
+ scanningProjectModules(MockedCDBPtr, TFS), GlobalScanningCount);
}
+ unsigned getGlobalScanningCount() const { return GlobalScanningCount; }
+
private:
class MockClangCompilationDatabase : public tooling::CompilationDatabase {
public:
@@ -68,6 +100,8 @@ class MockDirectoryCompilationDatabase : public MockCompilationDatabase {
std::shared_ptr<MockClangCompilationDatabase> MockedCDBPtr;
const ThreadsafeFS &TFS;
+
+ mutable std::atomic<unsigned> GlobalScanningCount;
};
// Add files to the working testing directory and the compilation database.
@@ -590,6 +624,28 @@ export constexpr int M = 43;
EXPECT_NE(NewHSOptsA.PrebuiltModuleFiles, HSOptsA.PrebuiltModuleFiles);
}
+TEST_F(PrerequisiteModulesTests, ScanningCacheTest) {
+ MockDirectoryCompilationDatabase CDB(TestDir, FS);
+
+ CDB.addFile("M.cppm", R"cpp(
+export module M;
+ )cpp");
+ CDB.addFile("A.cppm", R"cpp(
+export module A;
+import M;
+ )cpp");
+ CDB.addFile("B.cppm", R"cpp(
+export module B;
+import M;
+ )cpp");
+
+ ModulesBuilder Builder(CDB);
+
+ Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
+ Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
+ EXPECT_EQ(CDB.getGlobalScanningCount(), 1u);
+}
+
} // namespace
} // namespace clang::clangd
|
I don't really feel comfortable backporting this change. it's a significant change with implications on stability, and doesn't really bring any functional improvements to clangd's module support. i.e. in absence of this patch, we know that module support "works" (well at least doesn't crash). with this patch, nothing will change in terms of functionality, we'll just have "faster" startup times, at the risk of rendering clangd less stable. i feel like people wanting such optimizations can actually build clangd from head, or use non-official release snapshots (e.g. https://github.com/clangd/clangd/releases). All of that being said, @ChuanqiXu9 if you still feel like we should backport this; can you test this version of clangd for a while and ensure we don't have crashes/stability concerns? I'd be OK with merging in such scenario. Also cc @HighCommander4 in case I am not responsive. |
Fine, it makes sense to backport functionality patch only. |
Backport ae839b0
Requested by: @ChuanqiXu9