Skip to content

Commit 8d4e349

Browse files
committed
[Modules] No transitive source location change
1 parent c6a65e4 commit 8d4e349

15 files changed

+264
-233
lines changed

clang/include/clang/Basic/SourceLocation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class SourceLocation {
9090
friend class ASTWriter;
9191
friend class SourceManager;
9292
friend struct llvm::FoldingSetTrait<SourceLocation, void>;
93+
friend class SourceLocationEncoding;
9394

9495
public:
9596
using UIntTy = uint32_t;

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "clang/Basic/IdentifierTable.h"
2323
#include "clang/Basic/OperatorKinds.h"
2424
#include "clang/Basic/SourceLocation.h"
25+
#include "clang/Serialization/SourceLocationEncoding.h"
2526
#include "llvm/ADT/DenseMapInfo.h"
2627
#include "llvm/Bitstream/BitCodes.h"
2728
#include <cassert>
@@ -175,45 +176,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
175176

176177
/// Source range/offset of a preprocessed entity.
177178
struct PPEntityOffset {
179+
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
180+
178181
/// Raw source location of beginning of range.
179-
SourceLocation::UIntTy Begin;
182+
RawLocEncoding Begin;
180183

181184
/// Raw source location of end of range.
182-
SourceLocation::UIntTy End;
185+
RawLocEncoding End;
183186

184187
/// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
185188
uint32_t BitOffset;
186189

187-
PPEntityOffset(SourceRange R, uint32_t BitOffset)
188-
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
189-
BitOffset(BitOffset) {}
190-
191-
SourceLocation getBegin() const {
192-
return SourceLocation::getFromRawEncoding(Begin);
193-
}
190+
PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
191+
: Begin(Begin), End(End), BitOffset(BitOffset) {}
194192

195-
SourceLocation getEnd() const {
196-
return SourceLocation::getFromRawEncoding(End);
197-
}
193+
RawLocEncoding getBegin() const { return Begin; }
194+
RawLocEncoding getEnd() const { return End; }
198195
};
199196

200197
/// Source range of a skipped preprocessor region
201198
struct PPSkippedRange {
199+
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
200+
202201
/// Raw source location of beginning of range.
203-
SourceLocation::UIntTy Begin;
202+
RawLocEncoding Begin;
204203
/// Raw source location of end of range.
205-
SourceLocation::UIntTy End;
204+
RawLocEncoding End;
206205

207-
PPSkippedRange(SourceRange R)
208-
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) {
209-
}
206+
PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
207+
: Begin(Begin), End(End) {}
210208

211-
SourceLocation getBegin() const {
212-
return SourceLocation::getFromRawEncoding(Begin);
213-
}
214-
SourceLocation getEnd() const {
215-
return SourceLocation::getFromRawEncoding(End);
216-
}
209+
RawLocEncoding getBegin() const { return Begin; }
210+
RawLocEncoding getEnd() const { return End; }
217211
};
218212

219213
/// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -239,26 +233,26 @@ struct UnderalignedInt64 {
239233

240234
/// Source location and bit offset of a declaration.
241235
struct DeclOffset {
236+
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
237+
242238
/// Raw source location.
243-
SourceLocation::UIntTy Loc = 0;
239+
RawLocEncoding RawLoc = 0;
244240

245241
/// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
246242
/// structure alignment 32-bit and avoid padding gap because undefined
247243
/// value in the padding affects AST hash.
248244
UnderalignedInt64 BitOffset;
249245

250246
DeclOffset() = default;
251-
DeclOffset(SourceLocation Loc, uint64_t BitOffset,
252-
uint64_t DeclTypesBlockStartOffset) {
253-
setLocation(Loc);
247+
DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset,
248+
uint64_t DeclTypesBlockStartOffset)
249+
: RawLoc(RawLoc) {
254250
setBitOffset(BitOffset, DeclTypesBlockStartOffset);
255251
}
256252

257-
void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
253+
void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; }
258254

259-
SourceLocation getLocation() const {
260-
return SourceLocation::getFromRawEncoding(Loc);
261-
}
255+
RawLocEncoding getRawLoc() const { return RawLoc; }
262256

263257
void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) {
264258
BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset);

clang/include/clang/Serialization/ASTReader.h

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,7 @@ class ASTReader
696696
/// Mapping from global submodule IDs to the module file in which the
697697
/// submodule resides along with the offset that should be added to the
698698
/// global submodule ID to produce a local ID.
699-
GlobalSubmoduleMapType GlobalSubmoduleMap;
699+
mutable GlobalSubmoduleMapType GlobalSubmoduleMap;
700700

701701
/// A set of hidden declarations.
702702
using HiddenNames = SmallVector<Decl *, 2>;
@@ -942,6 +942,12 @@ class ASTReader
942942
/// Sema tracks these to emit deferred diags.
943943
llvm::SmallSetVector<serialization::DeclID, 4> DeclsToCheckForDeferredDiags;
944944

945+
/// The module files imported by different module files. Indirectly imported
946+
/// module files are included too. The information comes from
947+
/// ReadModuleOffsetMap(ModuleFile&).
948+
mutable llvm::DenseMap<ModuleFile *, llvm::SmallVector<ModuleFile *>>
949+
ImportedModuleFiles;
950+
945951
private:
946952
struct ImportedSubmodule {
947953
serialization::SubmoduleID ID;
@@ -1761,6 +1767,7 @@ class ASTReader
17611767

17621768
/// Retrieve the module manager.
17631769
ModuleManager &getModuleManager() { return ModuleMgr; }
1770+
const ModuleManager &getModuleManager() const { return ModuleMgr; }
17641771

17651772
/// Retrieve the preprocessor.
17661773
Preprocessor &getPreprocessor() const { return PP; }
@@ -2170,8 +2177,8 @@ class ASTReader
21702177

21712178
/// Retrieve the global submodule ID given a module and its local ID
21722179
/// number.
2173-
serialization::SubmoduleID
2174-
getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID);
2180+
serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M,
2181+
unsigned LocalID) const;
21752182

21762183
/// Retrieve the submodule that corresponds to a global submodule ID.
21772184
///
@@ -2184,7 +2191,7 @@ class ASTReader
21842191

21852192
/// Retrieve the module file with a given local ID within the specified
21862193
/// ModuleFile.
2187-
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID);
2194+
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const;
21882195

21892196
/// Get an ID for the given module file.
21902197
unsigned getModuleFileID(ModuleFile *M);
@@ -2220,40 +2227,47 @@ class ASTReader
22202227
return Sema::AlignPackInfo::getFromRawEncoding(Raw);
22212228
}
22222229

2230+
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
2231+
22232232
/// Read a source location from raw form and return it in its
22242233
/// originating module file's source location space.
2225-
SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
2226-
LocSeq *Seq = nullptr) const {
2234+
std::pair<SourceLocation, unsigned>
2235+
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
2236+
LocSeq *Seq = nullptr) const {
22272237
return SourceLocationEncoding::decode(Raw, Seq);
22282238
}
22292239

22302240
/// Read a source location from raw form.
2231-
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
2232-
SourceLocation::UIntTy Raw,
2233-
LocSeq *Seq = nullptr) const {
2234-
SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
2235-
return TranslateSourceLocation(ModuleFile, Loc);
2241+
SourceLocation ReadRawSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
2242+
LocSeq *Seq = nullptr) const {
2243+
if (!MF.ModuleOffsetMap.empty())
2244+
ReadModuleOffsetMap(MF);
2245+
2246+
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
2247+
ModuleFile *ModuleFileHomingLoc =
2248+
ModuleFileIndex ? ImportedModuleFiles[&MF][ModuleFileIndex - 1] : &MF;
2249+
return TranslateSourceLocation(*ModuleFileHomingLoc, Loc);
22362250
}
22372251

22382252
/// Translate a source location from another module file's source
22392253
/// location space into ours.
22402254
SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile,
22412255
SourceLocation Loc) const {
2242-
if (!ModuleFile.ModuleOffsetMap.empty())
2243-
ReadModuleOffsetMap(ModuleFile);
2244-
assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
2245-
ModuleFile.SLocRemap.end() &&
2246-
"Cannot find offset to remap.");
2247-
SourceLocation::IntTy Remap =
2248-
ModuleFile.SLocRemap.find(Loc.getOffset())->second;
2249-
return Loc.getLocWithOffset(Remap);
2256+
if (Loc.isInvalid())
2257+
return Loc;
2258+
2259+
// It implies that the Loc is already translated.
2260+
if (SourceMgr.isLoadedSourceLocation(Loc))
2261+
return Loc;
2262+
2263+
return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);
22502264
}
22512265

22522266
/// Read a source location.
22532267
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
22542268
const RecordDataImpl &Record, unsigned &Idx,
22552269
LocSeq *Seq = nullptr) {
2256-
return ReadSourceLocation(ModuleFile, Record[Idx++], Seq);
2270+
return ReadRawSourceLocation(ModuleFile, Record[Idx++], Seq);
22572271
}
22582272

22592273
/// Read a FileID.

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,10 @@ class ASTWriter : public ASTDeserializationListener,
648648
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
649649
LocSeq *Seq = nullptr);
650650

651+
/// Return the raw encodings for source locations.
652+
SourceLocationEncoding::RawLocEncoding
653+
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);
654+
651655
/// Emit a source range.
652656
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
653657
LocSeq *Seq = nullptr);

clang/include/clang/Serialization/ModuleFile.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,6 @@ class ModuleFile {
295295
/// AST file.
296296
const uint32_t *SLocEntryOffsets = nullptr;
297297

298-
/// Remapping table for source locations in this module.
299-
ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
300-
SLocRemap;
301-
302298
// === Identifiers ===
303299

304300
/// The number of identifiers in this AST file.

clang/include/clang/Serialization/SourceLocationEncoding.h

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,26 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// Source locations are stored pervasively in the AST, making up a third of
10-
// the size of typical serialized files. Storing them efficiently is important.
9+
// We wish to encode the SourceLocation from other module file not dependent
10+
// on the other module file. So that the source location changes from other
11+
// module file may not affect the contents of the current module file. Then the
12+
// users don't need to recompile the whole project due to a new line in a module
13+
// unit in the root of the dependency graph.
1114
//
12-
// We use integers optimized by VBR-encoding, because:
13-
// - when abbreviations cannot be used, VBR6 encoding is our only choice
14-
// - in the worst case a SourceLocation can be ~any 32-bit number, but in
15-
// practice they are highly predictable
15+
// To achieve this, we need to encode the index of the module file into the
16+
// encoding of the source location. The encoding of the source location may be:
1617
//
17-
// We encode the integer so that likely values encode as small numbers that
18-
// turn into few VBR chunks:
19-
// - the invalid sentinel location is a very common value: it encodes as 0
20-
// - the "macro or not" bit is stored at the bottom of the integer
21-
// (rather than at the top, as in memory), so macro locations can have
22-
// small representations.
23-
// - related locations (e.g. of a left and right paren pair) are usually
24-
// similar, so when encoding a sequence of locations we store only
25-
// differences between successive elements.
18+
// |-----------------------|-----------------------|
19+
// | A | B | C |
20+
//
21+
// * A: 32 bit. The index of the module file in the module manager + 1. The +1
22+
// here
23+
// is necessary since we wish 0 stands for the current module file.
24+
// * B: 31 bit. The offset of the source location to the module file containing
25+
// it.
26+
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
27+
// space
28+
// in case the index of the module file is 0.
2629
//
2730
//===----------------------------------------------------------------------===//
2831

@@ -52,11 +55,20 @@ class SourceLocationEncoding {
5255
friend SourceLocationSequence;
5356

5457
public:
55-
static uint64_t encode(SourceLocation Loc,
56-
SourceLocationSequence * = nullptr);
57-
static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
58+
using RawLocEncoding = uint64_t;
59+
60+
static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
61+
unsigned BaseModuleFileIndex,
62+
SourceLocationSequence * = nullptr);
63+
static std::pair<SourceLocation, unsigned>
64+
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
5865
};
5966

67+
/// TODO: Remove SourceLocationSequence since it is not used now.
68+
/// Since we will put the index for ModuleFile in the high bits in the encodings
69+
/// for source locations, it is meaningless to reduce the size of source
70+
/// locations.
71+
///
6072
/// Serialized encoding of a sequence of SourceLocations.
6173
///
6274
/// Optimized to produce small values when locations with the sequence are
@@ -149,14 +161,30 @@ class SourceLocationSequence::State {
149161
operator SourceLocationSequence *() { return &Seq; }
150162
};
151163

152-
inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
153-
SourceLocationSequence *Seq) {
154-
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
164+
inline SourceLocationEncoding::RawLocEncoding
165+
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
166+
unsigned BaseModuleFileIndex,
167+
SourceLocationSequence *Seq) {
168+
if (Loc.isInvalid())
169+
return 0;
170+
171+
assert(Loc.getOffset() >= BaseOffset);
172+
Loc = Loc.getLocWithOffset(-BaseOffset);
173+
RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
174+
assert(Encoded < ((RawLocEncoding)1 << 32));
175+
176+
assert(BaseModuleFileIndex < ((RawLocEncoding)1 << 32));
177+
Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
178+
return Encoded;
155179
}
156-
inline SourceLocation
157-
SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
158-
return Seq ? Seq->decode(Encoded)
159-
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
180+
inline std::pair<SourceLocation, unsigned>
181+
SourceLocationEncoding::decode(RawLocEncoding Encoded,
182+
SourceLocationSequence *Seq) {
183+
unsigned ModuleFileIndex = Encoded >> 32;
184+
Encoded &= ((RawLocEncoding)1 << 33) - 1;
185+
SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
186+
187+
return {Loc, ModuleFileIndex};
160188
}
161189

162190
} // namespace clang

clang/lib/Frontend/ASTUnit.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2373,8 +2373,6 @@ bool ASTUnit::serialize(raw_ostream &OS) {
23732373
return serializeUnit(Writer, Buffer, getSema(), OS);
23742374
}
23752375

2376-
using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;
2377-
23782376
void ASTUnit::TranslateStoredDiagnostics(
23792377
FileManager &FileMgr,
23802378
SourceManager &SrcMgr,

0 commit comments

Comments
 (0)