Skip to content

Commit d333a0d

Browse files
committed
Revert "[Modules] No transitive source location change (#86912)"
This reverts commit 6c31104. Required by the post commit comments: #86912
1 parent c12bc57 commit d333a0d

15 files changed

+162
-287
lines changed

clang/include/clang/Basic/SourceLocation.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ class SourceLocation {
9090
friend class ASTWriter;
9191
friend class SourceManager;
9292
friend struct llvm::FoldingSetTrait<SourceLocation, void>;
93-
friend class SourceLocationEncoding;
9493

9594
public:
9695
using UIntTy = uint32_t;

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include "clang/Basic/IdentifierTable.h"
2424
#include "clang/Basic/OperatorKinds.h"
2525
#include "clang/Basic/SourceLocation.h"
26-
#include "clang/Serialization/SourceLocationEncoding.h"
2726
#include "llvm/ADT/DenseMapInfo.h"
2827
#include "llvm/Bitstream/BitCodes.h"
2928
#include <cassert>
@@ -168,38 +167,45 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
168167

169168
/// Source range/offset of a preprocessed entity.
170169
struct PPEntityOffset {
171-
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
172-
173170
/// Raw source location of beginning of range.
174-
RawLocEncoding Begin;
171+
SourceLocation::UIntTy Begin;
175172

176173
/// Raw source location of end of range.
177-
RawLocEncoding End;
174+
SourceLocation::UIntTy End;
178175

179176
/// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
180177
uint32_t BitOffset;
181178

182-
PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
183-
: Begin(Begin), End(End), BitOffset(BitOffset) {}
179+
PPEntityOffset(SourceRange R, uint32_t BitOffset)
180+
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
181+
BitOffset(BitOffset) {}
182+
183+
SourceLocation getBegin() const {
184+
return SourceLocation::getFromRawEncoding(Begin);
185+
}
184186

185-
RawLocEncoding getBegin() const { return Begin; }
186-
RawLocEncoding getEnd() const { return End; }
187+
SourceLocation getEnd() const {
188+
return SourceLocation::getFromRawEncoding(End);
189+
}
187190
};
188191

189192
/// Source range of a skipped preprocessor region
190193
struct PPSkippedRange {
191-
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
192-
193194
/// Raw source location of beginning of range.
194-
RawLocEncoding Begin;
195+
SourceLocation::UIntTy Begin;
195196
/// Raw source location of end of range.
196-
RawLocEncoding End;
197+
SourceLocation::UIntTy End;
197198

198-
PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
199-
: Begin(Begin), End(End) {}
199+
PPSkippedRange(SourceRange R)
200+
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) {
201+
}
200202

201-
RawLocEncoding getBegin() const { return Begin; }
202-
RawLocEncoding getEnd() const { return End; }
203+
SourceLocation getBegin() const {
204+
return SourceLocation::getFromRawEncoding(Begin);
205+
}
206+
SourceLocation getEnd() const {
207+
return SourceLocation::getFromRawEncoding(End);
208+
}
203209
};
204210

205211
/// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -225,26 +231,26 @@ struct UnderalignedInt64 {
225231

226232
/// Source location and bit offset of a declaration.
227233
struct DeclOffset {
228-
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
229-
230234
/// Raw source location.
231-
RawLocEncoding RawLoc = 0;
235+
SourceLocation::UIntTy Loc = 0;
232236

233237
/// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
234238
/// structure alignment 32-bit and avoid padding gap because undefined
235239
/// value in the padding affects AST hash.
236240
UnderalignedInt64 BitOffset;
237241

238242
DeclOffset() = default;
239-
DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset,
240-
uint64_t DeclTypesBlockStartOffset)
241-
: RawLoc(RawLoc) {
243+
DeclOffset(SourceLocation Loc, uint64_t BitOffset,
244+
uint64_t DeclTypesBlockStartOffset) {
245+
setLocation(Loc);
242246
setBitOffset(BitOffset, DeclTypesBlockStartOffset);
243247
}
244248

245-
void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; }
249+
void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
246250

247-
RawLocEncoding getRawLoc() const { return RawLoc; }
251+
SourceLocation getLocation() const {
252+
return SourceLocation::getFromRawEncoding(Loc);
253+
}
248254

249255
void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) {
250256
BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset);

clang/include/clang/Serialization/ASTReader.h

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,7 +1771,6 @@ class ASTReader
17711771

17721772
/// Retrieve the module manager.
17731773
ModuleManager &getModuleManager() { return ModuleMgr; }
1774-
const ModuleManager &getModuleManager() const { return ModuleMgr; }
17751774

17761775
/// Retrieve the preprocessor.
17771776
Preprocessor &getPreprocessor() const { return PP; }
@@ -2178,8 +2177,8 @@ class ASTReader
21782177

21792178
/// Retrieve the global submodule ID given a module and its local ID
21802179
/// number.
2181-
serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M,
2182-
unsigned LocalID) const;
2180+
serialization::SubmoduleID
2181+
getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID);
21832182

21842183
/// Retrieve the submodule that corresponds to a global submodule ID.
21852184
///
@@ -2192,7 +2191,7 @@ class ASTReader
21922191

21932192
/// Retrieve the module file with a given local ID within the specified
21942193
/// ModuleFile.
2195-
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const;
2194+
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID);
21962195

21972196
/// Get an ID for the given module file.
21982197
unsigned getModuleFileID(ModuleFile *M);
@@ -2228,46 +2227,33 @@ class ASTReader
22282227
return Sema::AlignPackInfo::getFromRawEncoding(Raw);
22292228
}
22302229

2231-
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
2232-
22332230
/// Read a source location from raw form and return it in its
22342231
/// originating module file's source location space.
2235-
std::pair<SourceLocation, unsigned>
2236-
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
2237-
LocSeq *Seq = nullptr) const {
2232+
SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
2233+
LocSeq *Seq = nullptr) const {
22382234
return SourceLocationEncoding::decode(Raw, Seq);
22392235
}
22402236

22412237
/// Read a source location from raw form.
2242-
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
2238+
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
2239+
SourceLocation::UIntTy Raw,
22432240
LocSeq *Seq = nullptr) const {
2244-
if (!MF.ModuleOffsetMap.empty())
2245-
ReadModuleOffsetMap(MF);
2246-
2247-
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
2248-
ModuleFile *OwningModuleFile =
2249-
ModuleFileIndex == 0 ? &MF : MF.DependentModules[ModuleFileIndex - 1];
2250-
2251-
assert(!SourceMgr.isLoadedSourceLocation(Loc) &&
2252-
"Run out source location space");
2253-
2254-
return TranslateSourceLocation(*OwningModuleFile, Loc);
2241+
SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
2242+
return TranslateSourceLocation(ModuleFile, Loc);
22552243
}
22562244

22572245
/// Translate a source location from another module file's source
22582246
/// location space into ours.
22592247
SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile,
22602248
SourceLocation Loc) const {
2261-
if (Loc.isInvalid())
2262-
return Loc;
2263-
2264-
// FIXME: TranslateSourceLocation is not re-enterable. It is problematic
2265-
// to call TranslateSourceLocation on a translated source location.
2266-
// We either need a method to know whether or not a source location is
2267-
// translated or refactor the code to make it clear that
2268-
// TranslateSourceLocation won't be called with translated source location.
2269-
2270-
return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);
2249+
if (!ModuleFile.ModuleOffsetMap.empty())
2250+
ReadModuleOffsetMap(ModuleFile);
2251+
assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
2252+
ModuleFile.SLocRemap.end() &&
2253+
"Cannot find offset to remap.");
2254+
SourceLocation::IntTy Remap =
2255+
ModuleFile.SLocRemap.find(Loc.getOffset())->second;
2256+
return Loc.getLocWithOffset(Remap);
22712257
}
22722258

22732259
/// Read a source location.

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -667,10 +667,6 @@ class ASTWriter : public ASTDeserializationListener,
667667
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
668668
LocSeq *Seq = nullptr);
669669

670-
/// Return the raw encodings for source locations.
671-
SourceLocationEncoding::RawLocEncoding
672-
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);
673-
674670
/// Emit a source range.
675671
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
676672
LocSeq *Seq = nullptr);

clang/include/clang/Serialization/ModuleFile.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ class ModuleFile {
295295
/// AST file.
296296
const uint32_t *SLocEntryOffsets = nullptr;
297297

298+
/// Remapping table for source locations in this module.
299+
ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
300+
SLocRemap;
301+
298302
// === Identifiers ===
299303

300304
/// The number of identifiers in this AST file.
@@ -508,17 +512,9 @@ class ModuleFile {
508512
/// List of modules which depend on this module
509513
llvm::SetVector<ModuleFile *> ImportedBy;
510514

511-
/// List of modules which this module directly imported
515+
/// List of modules which this module depends on
512516
llvm::SetVector<ModuleFile *> Imports;
513517

514-
/// List of modules which this modules dependent on. Different
515-
/// from `Imports`, this includes indirectly imported modules too.
516-
/// The order of DependentModules is significant. It should keep
517-
/// the same order with that module file manager when we write
518-
/// the current module file. The value of the member will be initialized
519-
/// in `ASTReader::ReadModuleOffsetMap`.
520-
llvm::SmallVector<ModuleFile *, 16> DependentModules;
521-
522518
/// Determine whether this module was directly imported at
523519
/// any point during translation.
524520
bool isDirectlyImported() const { return DirectlyImported; }

clang/include/clang/Serialization/SourceLocationEncoding.h

Lines changed: 26 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -6,33 +6,28 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// We wish to encode the SourceLocation from other module file not dependent
10-
// on the other module file. So that the source location changes from other
11-
// module file may not affect the contents of the current module file. Then the
12-
// users don't need to recompile the whole project due to a new line in a module
13-
// unit in the root of the dependency graph.
9+
// Source locations are stored pervasively in the AST, making up a third of
10+
// the size of typical serialized files. Storing them efficiently is important.
1411
//
15-
// To achieve this, we need to encode the index of the module file into the
16-
// encoding of the source location. The encoding of the source location may be:
12+
// We use integers optimized by VBR-encoding, because:
13+
// - when abbreviations cannot be used, VBR6 encoding is our only choice
14+
// - in the worst case a SourceLocation can be ~any 32-bit number, but in
15+
// practice they are highly predictable
1716
//
18-
// |-----------------------|-----------------------|
19-
// | A | B | C |
20-
//
21-
// * A: 32 bit. The index of the module file in the module manager + 1. The +1
22-
// here is necessary since we wish 0 stands for the current module file.
23-
// * B: 31 bit. The offset of the source location to the module file containing
24-
// it.
25-
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
26-
// space in case the index of the module file is 0.
27-
//
28-
// Specially, if the index of the module file is 0, we allow to encode a
29-
// sequence of locations we store only differences between successive elements.
17+
// We encode the integer so that likely values encode as small numbers that
18+
// turn into few VBR chunks:
19+
// - the invalid sentinel location is a very common value: it encodes as 0
20+
// - the "macro or not" bit is stored at the bottom of the integer
21+
// (rather than at the top, as in memory), so macro locations can have
22+
// small representations.
23+
// - related locations (e.g. of a left and right paren pair) are usually
24+
// similar, so when encoding a sequence of locations we store only
25+
// differences between successive elements.
3026
//
3127
//===----------------------------------------------------------------------===//
3228

33-
#include "clang/Basic/SourceLocation.h"
34-
#include "llvm/Support/MathExtras.h"
3529
#include <climits>
30+
#include "clang/Basic/SourceLocation.h"
3631

3732
#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
3833
#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
@@ -57,13 +52,9 @@ class SourceLocationEncoding {
5752
friend SourceLocationSequence;
5853

5954
public:
60-
using RawLocEncoding = uint64_t;
61-
62-
static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
63-
unsigned BaseModuleFileIndex,
64-
SourceLocationSequence * = nullptr);
65-
static std::pair<SourceLocation, unsigned>
66-
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
55+
static uint64_t encode(SourceLocation Loc,
56+
SourceLocationSequence * = nullptr);
57+
static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
6758
};
6859

6960
/// Serialized encoding of a sequence of SourceLocations.
@@ -158,44 +149,14 @@ class SourceLocationSequence::State {
158149
operator SourceLocationSequence *() { return &Seq; }
159150
};
160151

161-
inline SourceLocationEncoding::RawLocEncoding
162-
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
163-
unsigned BaseModuleFileIndex,
164-
SourceLocationSequence *Seq) {
165-
// If the source location is a local source location, we can try to optimize
166-
// the similar sequences to only record the differences.
167-
if (!BaseOffset)
168-
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
169-
170-
if (Loc.isInvalid())
171-
return 0;
172-
173-
// Otherwise, the higher bits are used to store the module file index,
174-
// so it is meaningless to optimize the source locations into small
175-
// integers. Let's try to always use the raw encodings.
176-
assert(Loc.getOffset() >= BaseOffset);
177-
Loc = Loc.getLocWithOffset(-BaseOffset);
178-
RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
179-
180-
// 16 bits should be sufficient to store the module file index.
181-
assert(BaseModuleFileIndex < (1 << 16));
182-
Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
183-
return Encoded;
152+
inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
153+
SourceLocationSequence *Seq) {
154+
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
184155
}
185-
inline std::pair<SourceLocation, unsigned>
186-
SourceLocationEncoding::decode(RawLocEncoding Encoded,
187-
SourceLocationSequence *Seq) {
188-
unsigned ModuleFileIndex = Encoded >> 32;
189-
190-
if (!ModuleFileIndex)
191-
return {Seq ? Seq->decode(Encoded)
192-
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
193-
ModuleFileIndex};
194-
195-
Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
196-
SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
197-
198-
return {Loc, ModuleFileIndex};
156+
inline SourceLocation
157+
SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
158+
return Seq ? Seq->decode(Encoded)
159+
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
199160
}
200161

201162
} // namespace clang

clang/lib/Frontend/ASTUnit.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2374,6 +2374,8 @@ bool ASTUnit::serialize(raw_ostream &OS) {
23742374
return serializeUnit(Writer, Buffer, getSema(), OS);
23752375
}
23762376

2377+
using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;
2378+
23772379
void ASTUnit::TranslateStoredDiagnostics(
23782380
FileManager &FileMgr,
23792381
SourceManager &SrcMgr,

0 commit comments

Comments
 (0)