Skip to content

[clang][modules] Move SLocEntry search into ASTReader #66966

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 6, 2023
20 changes: 20 additions & 0 deletions clang/include/clang/Basic/SourceManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,14 @@ class SLocEntry {
return Expansion;
}

/// Creates an incomplete SLocEntry that is only able to report its offset.
static SLocEntry getOffsetOnly(SourceLocation::UIntTy Offset) {
assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
SLocEntry E;
E.Offset = Offset;
return E;
}

static SLocEntry get(SourceLocation::UIntTy Offset, const FileInfo &FI) {
assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
SLocEntry E;
Expand Down Expand Up @@ -533,6 +541,12 @@ class ExternalSLocEntrySource {
/// entry from being loaded.
virtual bool ReadSLocEntry(int ID) = 0;

/// Get the index ID for the loaded SourceLocation offset.
///
/// \returns Invalid index ID (0) if an error occurred that prevented the
/// SLocEntry from being loaded.
virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0;

/// Retrieve the module import location and name for the given ID, if
/// in fact it was loaded from a module (rather than, say, a precompiled
/// header).
Expand Down Expand Up @@ -723,6 +737,12 @@ class SourceManager : public RefCountedBase<SourceManager> {
/// Same indexing as LoadedSLocEntryTable.
llvm::BitVector SLocEntryLoaded;

/// A bitmap that indicates whether the entries of LoadedSLocEntryTable
/// have already had their offset loaded from the external source.
///
/// Superset of SLocEntryLoaded. Same indexing as SLocEntryLoaded.
llvm::BitVector SLocEntryOffsetLoaded;

/// An external source for source location entries.
ExternalSLocEntrySource *ExternalSLocEntries = nullptr;

Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -2153,6 +2153,12 @@ class ASTReader

/// Read the source location entry with index ID.
bool ReadSLocEntry(int ID) override;
/// Get the index ID for the loaded SourceLocation offset.
int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override;
/// Try to read the offset of the SLocEntry at the given index in the given
/// module file.
llvm::Expected<SourceLocation::UIntTy> readSLocOffset(ModuleFile *F,
unsigned Index);

/// Retrieve the module import location and module name for the
/// given source manager entry ID.
Expand Down
70 changes: 5 additions & 65 deletions clang/lib/Basic/SourceManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ void SourceManager::clearIDTables() {
LocalSLocEntryTable.clear();
LoadedSLocEntryTable.clear();
SLocEntryLoaded.clear();
SLocEntryOffsetLoaded.clear();
LastLineNoFileIDQuery = FileID();
LastLineNoContentCache = nullptr;
LastFileIDLookup = FileID();
Expand Down Expand Up @@ -460,6 +461,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
}
LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
SLocEntryOffsetLoaded.resize(LoadedSLocEntryTable.size());
CurrentLoadedOffset -= TotalSize;
int ID = LoadedSLocEntryTable.size();
return std::make_pair(-ID - 1, CurrentLoadedOffset);
Expand Down Expand Up @@ -608,7 +610,7 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
assert(!SLocEntryLoaded[Index] && "FileID already loaded");
LoadedSLocEntryTable[Index] = SLocEntry::get(
LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename));
SLocEntryLoaded[Index] = true;
SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
return FileID::get(LoadedID);
}
unsigned FileSize = File.getSize();
Expand Down Expand Up @@ -668,7 +670,7 @@ SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
assert(!SLocEntryLoaded[Index] && "FileID already loaded");
LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
SLocEntryLoaded[Index] = true;
SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
return SourceLocation::getMacroLoc(LoadedOffset);
}
LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
Expand Down Expand Up @@ -869,69 +871,7 @@ FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const {
return FileID();
}

// Essentially the same as the local case, but the loaded array is sorted
// in the other direction (decreasing order).
// GreaterIndex is the one where the offset is greater, which is actually a
// lower index!
unsigned GreaterIndex = 0;
unsigned LessIndex = LoadedSLocEntryTable.size();
if (LastFileIDLookup.ID < 0) {
// Prune the search space.
int LastID = LastFileIDLookup.ID;
if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset)
GreaterIndex =
(-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache
else
LessIndex = -LastID - 2;
}

// First do a linear scan from the last lookup position, if possible.
unsigned NumProbes;
bool Invalid = false;
for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) {
// Make sure the entry is loaded!
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid);
if (Invalid)
return FileID(); // invalid entry.
if (E.getOffset() <= SLocOffset) {
FileID Res = FileID::get(-int(GreaterIndex) - 2);
LastFileIDLookup = Res;
NumLinearScans += NumProbes + 1;
return Res;
}
}

// Linear scan failed. Do the binary search.
NumProbes = 0;
while (true) {
++NumProbes;
unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid);
if (Invalid)
return FileID(); // invalid entry.

if (E.getOffset() > SLocOffset) {
if (GreaterIndex == MiddleIndex) {
assert(0 && "binary search missed the entry");
return FileID();
}
GreaterIndex = MiddleIndex;
continue;
}

if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
FileID Res = FileID::get(-int(MiddleIndex) - 2);
LastFileIDLookup = Res;
NumBinaryProbes += NumProbes;
return Res;
}

if (LessIndex == MiddleIndex) {
assert(0 && "binary search missed the entry");
return FileID();
}
LessIndex = MiddleIndex;
}
return FileID::get(ExternalSLocEntries->getSLocEntryID(SLocOffset));
}

SourceLocation SourceManager::
Expand Down
73 changes: 73 additions & 0 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,79 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
}
}

llvm::Expected<SourceLocation::UIntTy>
ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) {
BitstreamCursor &Cursor = F->SLocEntryCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase +
F->SLocEntryOffsets[Index]))
return Err;

Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
if (!MaybeEntry)
return MaybeEntry.takeError();

llvm::BitstreamEntry Entry = MaybeEntry.get();
if (Entry.Kind != llvm::BitstreamEntry::Record)
return llvm::createStringError(
std::errc::illegal_byte_sequence,
"incorrectly-formatted source location entry in AST file");

RecordData Record;
StringRef Blob;
Expected<unsigned> MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob);
if (!MaybeSLOC)
return MaybeSLOC.takeError();

switch (MaybeSLOC.get()) {
default:
return llvm::createStringError(
std::errc::illegal_byte_sequence,
"incorrectly-formatted source location entry in AST file");
case SM_SLOC_FILE_ENTRY:
case SM_SLOC_BUFFER_ENTRY:
case SM_SLOC_EXPANSION_ENTRY:
return F->SLocEntryBaseOffset + Record[0];
}
}

int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) {
auto SLocMapI =
GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 1);
assert(SLocMapI != GlobalSLocOffsetMap.end() &&
"Corrupted global sloc offset map");
ModuleFile *F = SLocMapI->second;

bool Invalid = false;

auto It = llvm::upper_bound(
llvm::index_range(0, F->LocalNumSLocEntries), SLocOffset,
[&](SourceLocation::UIntTy Offset, std::size_t LocalIndex) {
int ID = F->SLocEntryBaseID + LocalIndex;
std::size_t Index = -ID - 2;
if (!SourceMgr.SLocEntryOffsetLoaded[Index]) {
assert(!SourceMgr.SLocEntryLoaded[Index]);
auto MaybeEntryOffset = readSLocOffset(F, LocalIndex);
if (!MaybeEntryOffset) {
Error(MaybeEntryOffset.takeError());
Invalid = true;
return true;
}
SourceMgr.LoadedSLocEntryTable[Index] =
SrcMgr::SLocEntry::getOffsetOnly(*MaybeEntryOffset);
SourceMgr.SLocEntryOffsetLoaded[Index] = true;
}
return Offset < SourceMgr.LoadedSLocEntryTable[Index].getOffset();
});

if (Invalid)
return 0;

// The iterator points to the first entry with start offset greater than the
// offset of interest. The previous entry must contain the offset of interest.
return F->SLocEntryBaseID + *std::prev(It);
}

bool ASTReader::ReadSLocEntry(int ID) {
if (ID == 0)
return false;
Expand Down
1 change: 1 addition & 0 deletions clang/test/Modules/explicit-build-missing-files.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ int y = a2<int>;
// CHECK: In module 'a':
// CHECK-NEXT: a.h:1:45: error:

int z = b<int>;
// MISSING-B: could not find file '{{.*}}b.h'
// MISSING-B-NOT: please delete the module cache
#endif
Expand Down
76 changes: 50 additions & 26 deletions llvm/include/llvm/ADT/STLExtras.h
Original file line number Diff line number Diff line change
Expand Up @@ -2261,43 +2261,67 @@ template <typename... Refs> struct enumerator_result<std::size_t, Refs...> {
mutable range_reference_tuple Storage;
};

/// Infinite stream of increasing 0-based `size_t` indices.
struct index_stream {
struct iterator : iterator_facade_base<iterator, std::forward_iterator_tag,
const iterator> {
iterator &operator++() {
assert(Index != std::numeric_limits<std::size_t>::max() &&
"Attempting to increment end iterator");
++Index;
return *this;
}
struct index_iterator
: llvm::iterator_facade_base<index_iterator,
std::random_access_iterator_tag, std::size_t> {
index_iterator(std::size_t Index) : Index(Index) {}

// Note: This dereference operator returns a value instead of a reference
// and does not strictly conform to the C++17's definition of forward
// iterator. However, it satisfies all the forward_iterator requirements
// that the `zip_common` depends on and fully conforms to the C++20
// definition of forward iterator.
std::size_t operator*() const { return Index; }
index_iterator &operator+=(std::ptrdiff_t N) {
Index += N;
return *this;
}

friend bool operator==(const iterator &Lhs, const iterator &Rhs) {
return Lhs.Index == Rhs.Index;
}
index_iterator &operator-=(std::ptrdiff_t N) {
Index -= N;
return *this;
}

std::size_t Index = 0;
};
std::ptrdiff_t operator-(const index_iterator &R) const {
return Index - R.Index;
}

iterator begin() const { return {}; }
iterator end() const {
// Note: This dereference operator returns a value instead of a reference
// and does not strictly conform to the C++17's definition of forward
// iterator. However, it satisfies all the forward_iterator requirements
// that the `zip_common` depends on and fully conforms to the C++20
// definition of forward iterator.
std::size_t operator*() const { return Index; }

friend bool operator==(const index_iterator &Lhs, const index_iterator &Rhs) {
return Lhs.Index == Rhs.Index;
}

friend bool operator<(const index_iterator &Lhs, const index_iterator &Rhs) {
return Lhs.Index < Rhs.Index;
}

private:
std::size_t Index;
};

/// Infinite stream of increasing 0-based `size_t` indices.
struct index_stream {
index_iterator begin() const { return {0}; }
index_iterator end() const {
// We approximate 'infinity' with the max size_t value, which should be good
// enough to index over any container.
iterator It;
It.Index = std::numeric_limits<std::size_t>::max();
return It;
return index_iterator{std::numeric_limits<std::size_t>::max()};
}
};

} // end namespace detail

/// Increasing range of `size_t` indices.
class index_range {
std::size_t Begin;
std::size_t End;

public:
index_range(std::size_t Begin, std::size_t End) : Begin(Begin), End(End) {}
detail::index_iterator begin() const { return {Begin}; }
detail::index_iterator end() const { return {End}; }
};

/// Given two or more input ranges, returns a new range whose values are are
/// tuples (A, B, C, ...), such that A is the 0-based index of the item in the
/// sequence, and B, C, ..., are the values from the original input ranges. All
Expand Down