Skip to content

Commit 6cc1898

Browse files
committed
Auto merge of #115594 - nnethercote:span-tweaks, r=cjgillot
Span tweaks Some minor improvements to code clarity. r? `@cjgillot`
2 parents 38bbc2c + 5790372 commit 6cc1898

File tree

2 files changed

+146
-121
lines changed

2 files changed

+146
-121
lines changed

compiler/rustc_span/src/lib.rs

+1-11
Original file line numberDiff line numberDiff line change
@@ -510,10 +510,6 @@ impl SpanData {
510510
pub fn is_dummy(self) -> bool {
511511
self.lo.0 == 0 && self.hi.0 == 0
512512
}
513-
#[inline]
514-
pub fn is_visible(self, sm: &SourceMap) -> bool {
515-
!self.is_dummy() && sm.is_span_accessible(self.span())
516-
}
517513
/// Returns `true` if `self` fully encloses `other`.
518514
pub fn contains(self, other: Self) -> bool {
519515
self.lo <= other.lo && other.hi <= self.hi
@@ -573,15 +569,9 @@ impl Span {
573569
self.data().with_parent(ctxt)
574570
}
575571

576-
/// Returns `true` if this is a dummy span with any hygienic context.
577-
#[inline]
578-
pub fn is_dummy(self) -> bool {
579-
self.data_untracked().is_dummy()
580-
}
581-
582572
#[inline]
583573
pub fn is_visible(self, sm: &SourceMap) -> bool {
584-
self.data_untracked().is_visible(sm)
574+
!self.is_dummy() && sm.is_span_accessible(self)
585575
}
586576

587577
/// Returns `true` if this span comes from any kind of macro, desugaring or inlining.

compiler/rustc_span/src/span_encoding.rs

+145-110
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value).
2-
// One format is used for keeping span data inline,
3-
// another contains index into an out-of-line span interner.
4-
// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd.
5-
// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28
6-
71
use crate::def_id::{DefIndex, LocalDefId};
82
use crate::hygiene::SyntaxContext;
93
use crate::SPAN_TRACK;
@@ -13,59 +7,69 @@ use rustc_data_structures::fx::FxIndexSet;
137

148
/// A compressed span.
159
///
16-
/// Whereas [`SpanData`] is 16 bytes, which is a bit too big to stick everywhere, `Span`
17-
/// is a form that only takes up 8 bytes, with less space for the length, parent and
18-
/// context. The vast majority (99.9%+) of `SpanData` instances will fit within
19-
/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are
10+
/// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
11+
/// takes up 8 bytes, with less space for the length, parent and context. The
12+
/// vast majority (99.9%+) of `SpanData` instances can be made to fit within
13+
/// those 8 bytes. Any `SpanData` whose fields don't fit into a `Span` are
2014
/// stored in a separate interner table, and the `Span` will index into that
2115
/// table. Interning is rare enough that the cost is low, but common enough
2216
/// that the code is exercised regularly.
2317
///
2418
/// An earlier version of this code used only 4 bytes for `Span`, but that was
2519
/// slower because only 80--90% of spans could be stored inline (even less in
26-
/// very large crates) and so the interner was used a lot more.
20+
/// very large crates) and so the interner was used a lot more. That version of
21+
/// the code also predated the storage of parents.
22+
///
23+
/// There are four different span forms.
2724
///
28-
/// Inline (compressed) format with no parent:
29-
/// - `span.base_or_index == span_data.lo`
30-
/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
31-
/// - `span.ctxt_or_tag == span_data.ctxt` (must be `<= MAX_CTXT`)
25+
/// Inline-context format (requires non-huge length, non-huge context, and no parent):
26+
/// - `span.lo_or_index == span_data.lo`
27+
/// - `span.len_with_tag_or_marker == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
28+
/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
3229
///
33-
/// Interned format with inline `SyntaxContext`:
34-
/// - `span.base_or_index == index` (indexes into the interner table)
35-
/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
36-
/// - `span.ctxt_or_tag == span_data.ctxt` (must be `<= MAX_CTXT`)
30+
/// Inline-parent format (requires non-huge length, root context, and non-huge parent):
31+
/// - `span.lo_or_index == span_data.lo`
32+
/// - `span.len_with_tag_or_marker & !PARENT_TAG == len == span_data.hi - span_data.lo`
33+
/// (must be `<= MAX_LEN`)
34+
/// - `span.len_with_tag_or_marker` has top bit (`PARENT_TAG`) set
35+
/// - `span.ctxt_or_parent_or_marker == span_data.parent` (must be `<= MAX_CTXT`)
3736
///
38-
/// Inline (compressed) format with root context:
39-
/// - `span.base_or_index == span_data.lo`
40-
/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
41-
/// - `span.len_or_tag` has top bit (`PARENT_MASK`) set
42-
/// - `span.ctxt == span_data.parent` (must be `<= MAX_CTXT`)
37+
/// Partially-interned format (requires non-huge context):
38+
/// - `span.lo_or_index == index` (indexes into the interner table)
39+
/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
40+
/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
4341
///
44-
/// Interned format:
45-
/// - `span.base_or_index == index` (indexes into the interner table)
46-
/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
47-
/// - `span.ctxt_or_tag == CTXT_TAG`
42+
/// Fully-interned format (all cases not covered above):
43+
/// - `span.lo_or_index == index` (indexes into the interner table)
44+
/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
45+
/// - `span.ctxt_or_parent_or_marker == CTXT_INTERNED_MARKER`
4846
///
49-
/// The inline form uses 0 for the tag value (rather than 1) so that we don't
50-
/// need to mask out the tag bit when getting the length, and so that the
51-
/// dummy span can be all zeroes.
47+
/// The partially-interned form requires looking in the interning table for
48+
/// lo and length, but the context is stored inline as well as interned.
49+
/// This is useful because context lookups are often done in isolation, and
50+
/// inline lookups are quicker.
5251
///
5352
/// Notes about the choice of field sizes:
54-
/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base`
55-
/// values never cause interning. The number of bits needed for `base`
53+
/// - `lo` is 32 bits in both `Span` and `SpanData`, which means that `lo`
54+
/// values never cause interning. The number of bits needed for `lo`
5655
/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
57-
/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits
58-
/// in `SpanData`, which means that large `len` values will cause interning.
59-
/// The number of bits needed for `len` does not depend on the crate size.
60-
/// The most common numbers of bits for `len` are from 0 to 7, with a peak usually
61-
/// at 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough
62-
/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur
63-
/// dozens of times in a typical crate.
64-
/// - `ctxt_or_tag` is 16 bits in `Span` and 32 bits in `SpanData`, which means that
65-
/// large `ctxt` values will cause interning. The number of bits needed for
66-
/// `ctxt` values depend partly on the crate size and partly on the form of
67-
/// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt_or_tag`,
68-
/// but larger crates might need more than 16 bits.
56+
/// Having no compression on this field means there is no performance cliff
57+
/// if a crate exceeds a particular size.
58+
/// - `len` is ~15 bits in `Span` (a u16, minus 1 bit for PARENT_TAG) and 32
59+
/// bits in `SpanData`, which means that large `len` values will cause
60+
/// interning. The number of bits needed for `len` does not depend on the
61+
/// crate size. The most common numbers of bits for `len` are from 0 to 7,
62+
/// with a peak usually at 3 or 4, and then it drops off quickly from 8
63+
/// onwards. 15 bits is enough for 99.99%+ of cases, but larger values
64+
/// (sometimes 20+ bits) might occur dozens of times in a typical crate.
65+
/// - `ctxt_or_parent_or_marker` is 16 bits in `Span` and two 32 bit fields in
66+
/// `SpanData`, which means intering will happen if `ctxt` is large, if
67+
/// `parent` is large, or if both values are non-zero. The number of bits
68+
/// needed for `ctxt` values depend partly on the crate size and partly on
69+
/// the form of the code. No crates in `rustc-perf` need more than 15 bits
70+
/// for `ctxt_or_parent_or_marker`, but larger crates might need more than 16
71+
/// bits. The number of bits needed for `parent` hasn't been measured,
72+
/// because `parent` isn't currently used by default.
6973
///
7074
/// In order to reliably use parented spans in incremental compilation,
7175
/// the dependency to the parent definition's span. This is performed
@@ -74,19 +78,22 @@ use rustc_data_structures::fx::FxIndexSet;
7478
#[derive(Clone, Copy, Eq, PartialEq, Hash)]
7579
#[rustc_pass_by_value]
7680
pub struct Span {
77-
base_or_index: u32,
78-
len_or_tag: u16,
79-
ctxt_or_tag: u16,
81+
lo_or_index: u32,
82+
len_with_tag_or_marker: u16,
83+
ctxt_or_parent_or_marker: u16,
8084
}
8185

82-
const LEN_TAG: u16 = 0b1111_1111_1111_1111;
83-
const PARENT_MASK: u16 = 0b1000_0000_0000_0000;
84-
const MAX_LEN: u32 = 0b0111_1111_1111_1111;
85-
const CTXT_TAG: u32 = 0b1111_1111_1111_1111;
86-
const MAX_CTXT: u32 = CTXT_TAG - 1;
86+
// `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from
87+
// `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.)
88+
const MAX_LEN: u32 = 0b0111_1111_1111_1110;
89+
const MAX_CTXT: u32 = 0b0111_1111_1111_1110;
90+
const PARENT_TAG: u16 = 0b1000_0000_0000_0000;
91+
const BASE_LEN_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
92+
const CTXT_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
8793

88-
/// Dummy span, both position and length are zero, syntax context is zero as well.
89-
pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_tag: 0 };
94+
/// The dummy span has zero position, length, and context, and no parent.
95+
pub const DUMMY_SP: Span =
96+
Span { lo_or_index: 0, len_with_tag_or_marker: 0, ctxt_or_parent_or_marker: 0 };
9097

9198
impl Span {
9299
#[inline]
@@ -100,39 +107,43 @@ impl Span {
100107
std::mem::swap(&mut lo, &mut hi);
101108
}
102109

103-
let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
104-
105-
if len <= MAX_LEN && ctxt2 <= MAX_CTXT {
106-
let len_or_tag = len as u16;
107-
debug_assert_eq!(len_or_tag & PARENT_MASK, 0);
110+
let (lo2, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
108111

109-
if let Some(parent) = parent {
110-
// Inline format with parent.
111-
let len_or_tag = len_or_tag | PARENT_MASK;
112-
let parent2 = parent.local_def_index.as_u32();
113-
if ctxt2 == SyntaxContext::root().as_u32()
114-
&& parent2 <= MAX_CTXT
115-
&& len_or_tag < LEN_TAG
116-
{
117-
debug_assert_ne!(len_or_tag, LEN_TAG);
118-
return Span { base_or_index: base, len_or_tag, ctxt_or_tag: parent2 as u16 };
119-
}
120-
} else {
121-
// Inline format with ctxt.
122-
debug_assert_ne!(len_or_tag, LEN_TAG);
112+
if len <= MAX_LEN {
113+
if ctxt2 <= MAX_CTXT && parent.is_none() {
114+
// Inline-context format.
123115
return Span {
124-
base_or_index: base,
125-
len_or_tag: len as u16,
126-
ctxt_or_tag: ctxt2 as u16,
116+
lo_or_index: lo2,
117+
len_with_tag_or_marker: len as u16,
118+
ctxt_or_parent_or_marker: ctxt2 as u16,
119+
};
120+
} else if ctxt2 == SyntaxContext::root().as_u32()
121+
&& let Some(parent) = parent
122+
&& let parent2 = parent.local_def_index.as_u32()
123+
&& parent2 <= MAX_CTXT
124+
{
125+
// Inline-parent format.
126+
return Span {
127+
lo_or_index: lo2,
128+
len_with_tag_or_marker: PARENT_TAG | len as u16,
129+
ctxt_or_parent_or_marker: parent2 as u16
127130
};
128131
}
129132
}
130133

131-
// Interned format.
134+
// Partially-interned or fully-interned format.
132135
let index =
133136
with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }));
134-
let ctxt_or_tag = if ctxt2 <= MAX_CTXT { ctxt2 } else { CTXT_TAG } as u16;
135-
Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_tag }
137+
let ctxt_or_parent_or_marker = if ctxt2 <= MAX_CTXT {
138+
ctxt2 as u16 // partially-interned
139+
} else {
140+
CTXT_INTERNED_MARKER // fully-interned
141+
};
142+
Span {
143+
lo_or_index: index,
144+
len_with_tag_or_marker: BASE_LEN_INTERNED_MARKER,
145+
ctxt_or_parent_or_marker,
146+
}
136147
}
137148

138149
#[inline]
@@ -148,56 +159,80 @@ impl Span {
148159
/// This function must not be used outside the incremental engine.
149160
#[inline]
150161
pub fn data_untracked(self) -> SpanData {
151-
if self.len_or_tag != LEN_TAG {
152-
// Inline format.
153-
if self.len_or_tag & PARENT_MASK == 0 {
154-
debug_assert!(self.len_or_tag as u32 <= MAX_LEN);
162+
if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
163+
if self.len_with_tag_or_marker & PARENT_TAG == 0 {
164+
// Inline-context format.
165+
let len = self.len_with_tag_or_marker as u32;
166+
debug_assert!(len <= MAX_LEN);
155167
SpanData {
156-
lo: BytePos(self.base_or_index),
157-
hi: BytePos(self.base_or_index + self.len_or_tag as u32),
158-
ctxt: SyntaxContext::from_u32(self.ctxt_or_tag as u32),
168+
lo: BytePos(self.lo_or_index),
169+
hi: BytePos(self.lo_or_index + len),
170+
ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32),
159171
parent: None,
160172
}
161173
} else {
162-
let len = self.len_or_tag & !PARENT_MASK;
163-
debug_assert!(len as u32 <= MAX_LEN);
164-
let parent =
165-
LocalDefId { local_def_index: DefIndex::from_u32(self.ctxt_or_tag as u32) };
174+
// Inline-parent format.
175+
let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
176+
debug_assert!(len <= MAX_LEN);
177+
let parent = LocalDefId {
178+
local_def_index: DefIndex::from_u32(self.ctxt_or_parent_or_marker as u32),
179+
};
166180
SpanData {
167-
lo: BytePos(self.base_or_index),
168-
hi: BytePos(self.base_or_index + len as u32),
181+
lo: BytePos(self.lo_or_index),
182+
hi: BytePos(self.lo_or_index + len),
169183
ctxt: SyntaxContext::root(),
170184
parent: Some(parent),
171185
}
172186
}
173187
} else {
174-
// Interned format.
175-
let index = self.base_or_index;
188+
// Fully-interned or partially-interned format. In either case,
189+
// the interned value contains all the data, so we don't need to
190+
// distinguish them.
191+
let index = self.lo_or_index;
176192
with_span_interner(|interner| interner.spans[index as usize])
177193
}
178194
}
179195

196+
/// Returns `true` if this is a dummy span with any hygienic context.
197+
#[inline]
198+
pub fn is_dummy(self) -> bool {
199+
if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
200+
// Inline-context or inline-parent format.
201+
let lo = self.lo_or_index;
202+
let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
203+
debug_assert!(len <= MAX_LEN);
204+
lo == 0 && len == 0
205+
} else {
206+
// Fully-interned or partially-interned format.
207+
let index = self.lo_or_index;
208+
let data = with_span_interner(|interner| interner.spans[index as usize]);
209+
data.lo == BytePos(0) && data.hi == BytePos(0)
210+
}
211+
}
212+
180213
/// This function is used as a fast path when decoding the full `SpanData` is not necessary.
214+
/// It's a cut-down version of `data_untracked`.
181215
#[inline]
182216
pub fn ctxt(self) -> SyntaxContext {
183-
let ctxt_or_tag = self.ctxt_or_tag as u32;
184-
// Check for interned format.
185-
if self.len_or_tag == LEN_TAG {
186-
if ctxt_or_tag == CTXT_TAG {
187-
// Fully interned format.
188-
let index = self.base_or_index;
189-
with_span_interner(|interner| interner.spans[index as usize].ctxt)
217+
if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
218+
if self.len_with_tag_or_marker & PARENT_TAG == 0 {
219+
// Inline-context format.
220+
SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32)
190221
} else {
191-
// Interned format with inline ctxt.
192-
SyntaxContext::from_u32(ctxt_or_tag)
222+
// Inline-parent format. We know that the SyntaxContext is root.
223+
SyntaxContext::root()
193224
}
194-
} else if self.len_or_tag & PARENT_MASK == 0 {
195-
// Inline format with inline ctxt.
196-
SyntaxContext::from_u32(ctxt_or_tag)
197225
} else {
198-
// Inline format with inline parent.
199-
// We know that the SyntaxContext is root.
200-
SyntaxContext::root()
226+
if self.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
227+
// Partially-interned format. This path avoids looking up the
228+
// interned value, and is the whole point of the
229+
// partially-interned format.
230+
SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32)
231+
} else {
232+
// Fully-interned format.
233+
let index = self.lo_or_index;
234+
with_span_interner(|interner| interner.spans[index as usize].ctxt)
235+
}
201236
}
202237
}
203238
}

0 commit comments

Comments
 (0)