Skip to content

[clangd] [C++20] [Modules] Add scanning cache #125988

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 89 additions & 11 deletions clang-tools-extra/clangd/ModulesBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,19 +357,89 @@ void ModuleFileCache::remove(StringRef ModuleName) {
ModuleFiles.erase(ModuleName);
}

class ModuleNameToSourceCache {
public:
std::string getSourceForModuleName(llvm::StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(CacheMutex);
auto Iter = ModuleNameToSourceCache.find(ModuleName);
if (Iter != ModuleNameToSourceCache.end())
return Iter->second;
return "";
}

void addEntry(llvm::StringRef ModuleName, PathRef Source) {
std::lock_guard<std::mutex> Lock(CacheMutex);
ModuleNameToSourceCache[ModuleName] = Source.str();
}

void eraseEntry(llvm::StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(CacheMutex);
ModuleNameToSourceCache.erase(ModuleName);
}

private:
std::mutex CacheMutex;
llvm::StringMap<std::string> ModuleNameToSourceCache;
};

class CachingProjectModules : public ProjectModules {
public:
CachingProjectModules(std::unique_ptr<ProjectModules> MDB,
ModuleNameToSourceCache &Cache)
: MDB(std::move(MDB)), Cache(Cache) {
assert(this->MDB && "CachingProjectModules should only be created with a "
"valid underlying ProjectModules");
}

std::vector<std::string> getRequiredModules(PathRef File) override {
return MDB->getRequiredModules(File);
}

std::string getModuleNameForSource(PathRef File) override {
return MDB->getModuleNameForSource(File);
}

std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile) override {
std::string CachedResult = Cache.getSourceForModuleName(ModuleName);

// Verify Cached Result by seeing if the source declaring the same module
// as we query.
if (!CachedResult.empty()) {
std::string ModuleNameOfCachedSource =
MDB->getModuleNameForSource(CachedResult);
if (ModuleNameOfCachedSource == ModuleName)
return CachedResult;

// Cached Result is invalid. Clear it.
Cache.eraseEntry(ModuleName);
}

auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile);
Cache.addEntry(ModuleName, Result);

return Result;
}

private:
std::unique_ptr<ProjectModules> MDB;
ModuleNameToSourceCache &Cache;
};

/// Collect the directly and indirectly required module names for \param
/// ModuleName in topological order. The \param ModuleName is guaranteed to
/// be the last element in \param ModuleNames.
llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource,
CachingProjectModules &MDB,
StringRef ModuleName) {
llvm::SmallVector<llvm::StringRef> ModuleNames;
llvm::StringSet<> ModuleNamesSet;

auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
ModuleNamesSet.insert(ModuleName);

for (StringRef RequiredModuleName :
MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
for (StringRef RequiredModuleName : MDB.getRequiredModules(
MDB.getSourceForModuleName(ModuleName, RequiredSource)))
if (ModuleNamesSet.insert(RequiredModuleName).second)
Visitor(RequiredModuleName, Visitor);

Expand All @@ -386,24 +456,29 @@ class ModulesBuilder::ModulesBuilderImpl {
public:
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}

ModuleNameToSourceCache &getProjectModulesCache() {
return ProjectModulesCache;
}
const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }

llvm::Error
getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
ProjectModules &MDB,
getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName,
const ThreadsafeFS &TFS, CachingProjectModules &MDB,
ReusablePrerequisiteModules &BuiltModuleFiles);

private:
ModuleFileCache Cache;
ModuleNameToSourceCache ProjectModulesCache;
};

llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
ReusablePrerequisiteModules &BuiltModuleFiles) {
PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS,
CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
return llvm::Error::success();

PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
std::string ModuleUnitFileName =
MDB.getSourceForModuleName(ModuleName, RequiredSource);
/// It is possible that we're meeting third party modules (modules whose
/// source are not in the project. e.g, the std module may be a third-party
/// module for most project) or something wrong with the implementation of
Expand All @@ -416,7 +491,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
llvm::formatv("Don't get the module unit for module {0}", ModuleName));

// Get Required modules in topological order.
auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName);
for (llvm::StringRef ReqModuleName : ReqModuleNames) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
continue;
Expand Down Expand Up @@ -454,16 +529,19 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
elog("Failed to get Project Modules information for {0}", File);
return std::make_unique<FailedPrerequisiteModules>();
}
CachingProjectModules CachedMDB(std::move(MDB),
Impl->getProjectModulesCache());

std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
std::vector<std::string> RequiredModuleNames =
CachedMDB.getRequiredModules(File);
if (RequiredModuleNames.empty())
return std::make_unique<ReusablePrerequisiteModules>();

auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
// Return early if there is any error.
if (llvm::Error Err = Impl->getOrBuildModuleFile(
RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
File, RequiredModuleName, TFS, CachedMDB, *RequiredModules.get())) {
elog("Failed to build module {0}; due to {1}", RequiredModuleName,
toString(std::move(Err)));
return std::make_unique<FailedPrerequisiteModules>();
Expand Down
6 changes: 3 additions & 3 deletions clang-tools-extra/clangd/ProjectModules.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ class ProjectModules {
llvm::unique_function<void(tooling::CompileCommand &, PathRef) const>;

virtual std::vector<std::string> getRequiredModules(PathRef File) = 0;
virtual PathRef
getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile = PathRef()) = 0;
virtual std::string getModuleNameForSource(PathRef File) = 0;
virtual std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile) = 0;

virtual void setCommandMangler(CommandMangler Mangler) {}

Expand Down
18 changes: 14 additions & 4 deletions clang-tools-extra/clangd/ScanningProjectModules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ ModuleDependencyScanner::scan(PathRef FilePath,

void ModuleDependencyScanner::globalScan(
const ProjectModules::CommandMangler &Mangler) {
if (GlobalScanned)
return;

for (auto &File : CDB->getAllFiles())
scan(File, Mangler);

Expand Down Expand Up @@ -189,11 +192,18 @@ class ScanningAllProjectModules : public ProjectModules {

/// RequiredSourceFile is not used intentionally. See the comments of
/// ModuleDependencyScanner for detail.
PathRef
getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSourceFile = PathRef()) override {
std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSourceFile) override {
Scanner.globalScan(Mangler);
return Scanner.getSourceForModuleName(ModuleName);
return Scanner.getSourceForModuleName(ModuleName).str();
}

std::string getModuleNameForSource(PathRef File) override {
auto ScanningResult = Scanner.scan(File, Mangler);
if (!ScanningResult || !ScanningResult->ModuleName)
return {};

return *ScanningResult->ModuleName;
}

private:
Expand Down
60 changes: 58 additions & 2 deletions clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,54 @@
namespace clang::clangd {
namespace {

class GlobalScanningCounterProjectModules : public ProjectModules {
public:
GlobalScanningCounterProjectModules(
std::unique_ptr<ProjectModules> Underlying, std::atomic<unsigned> &Count)
: Underlying(std::move(Underlying)), Count(Count) {}

std::vector<std::string> getRequiredModules(PathRef File) override {
return Underlying->getRequiredModules(File);
}

std::string getModuleNameForSource(PathRef File) override {
return Underlying->getModuleNameForSource(File);
}

void setCommandMangler(CommandMangler Mangler) override {
Underlying->setCommandMangler(std::move(Mangler));
}

std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile) override {
Count++;
return Underlying->getSourceForModuleName(ModuleName, RequiredSrcFile);
}

private:
std::unique_ptr<ProjectModules> Underlying;
std::atomic<unsigned> &Count;
};

class MockDirectoryCompilationDatabase : public MockCompilationDatabase {
public:
MockDirectoryCompilationDatabase(StringRef TestDir, const ThreadsafeFS &TFS)
: MockCompilationDatabase(TestDir),
MockedCDBPtr(std::make_shared<MockClangCompilationDatabase>(*this)),
TFS(TFS) {
TFS(TFS), GlobalScanningCount(0) {
this->ExtraClangFlags.push_back("-std=c++20");
this->ExtraClangFlags.push_back("-c");
}

void addFile(llvm::StringRef Path, llvm::StringRef Contents);

std::unique_ptr<ProjectModules> getProjectModules(PathRef) const override {
return scanningProjectModules(MockedCDBPtr, TFS);
return std::make_unique<GlobalScanningCounterProjectModules>(
scanningProjectModules(MockedCDBPtr, TFS), GlobalScanningCount);
}

unsigned getGlobalScanningCount() const { return GlobalScanningCount; }

private:
class MockClangCompilationDatabase : public tooling::CompilationDatabase {
public:
Expand All @@ -68,6 +100,8 @@ class MockDirectoryCompilationDatabase : public MockCompilationDatabase {

std::shared_ptr<MockClangCompilationDatabase> MockedCDBPtr;
const ThreadsafeFS &TFS;

mutable std::atomic<unsigned> GlobalScanningCount;
};

// Add files to the working testing directory and the compilation database.
Expand Down Expand Up @@ -590,6 +624,28 @@ export constexpr int M = 43;
EXPECT_NE(NewHSOptsA.PrebuiltModuleFiles, HSOptsA.PrebuiltModuleFiles);
}

TEST_F(PrerequisiteModulesTests, ScanningCacheTest) {
MockDirectoryCompilationDatabase CDB(TestDir, FS);

CDB.addFile("M.cppm", R"cpp(
export module M;
)cpp");
CDB.addFile("A.cppm", R"cpp(
export module A;
import M;
)cpp");
CDB.addFile("B.cppm", R"cpp(
export module B;
import M;
)cpp");

ModulesBuilder Builder(CDB);

Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
EXPECT_EQ(CDB.getGlobalScanningCount(), 1u);
}

} // namespace
} // namespace clang::clangd

Expand Down