Skip to content

Commit 2d83bc5

Browse files
committed
Auto merge of rust-lang#15149 - Veykril:line-index-perf, r=Veykril
internal: Speedup line index calculation via SSE2
2 parents 5703346 + 97748b0 commit 2d83bc5

File tree

5 files changed

+304
-115
lines changed

5 files changed

+304
-115
lines changed

Cargo.lock

+4-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/line-index/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "line-index"
3-
version = "0.1.0-pre.1"
3+
version = "0.1.0"
44
description = "Maps flat `TextSize` offsets to/from `(line, column)` representation."
55
license = "MIT OR Apache-2.0"
66
repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/line-index"

lib/line-index/src/lib.rs

+180-38
Original file line numberDiff line numberDiff line change
@@ -94,44 +94,7 @@ pub struct LineIndex {
9494
impl LineIndex {
9595
/// Returns a `LineIndex` for the `text`.
9696
pub fn new(text: &str) -> LineIndex {
97-
let mut newlines = Vec::<TextSize>::with_capacity(16);
98-
let mut line_wide_chars = IntMap::<u32, Box<[WideChar]>>::default();
99-
100-
let mut wide_chars = Vec::<WideChar>::new();
101-
let mut cur_row = TextSize::from(0);
102-
let mut cur_col = TextSize::from(0);
103-
let mut line = 0u32;
104-
105-
for c in text.chars() {
106-
let c_len = TextSize::of(c);
107-
cur_row += c_len;
108-
if c == '\n' {
109-
newlines.push(cur_row);
110-
111-
// Save any wide characters seen in the previous line
112-
if !wide_chars.is_empty() {
113-
let cs = std::mem::take(&mut wide_chars).into_boxed_slice();
114-
line_wide_chars.insert(line, cs);
115-
}
116-
117-
// Prepare for processing the next line
118-
cur_col = TextSize::from(0);
119-
line += 1;
120-
continue;
121-
}
122-
123-
if !c.is_ascii() {
124-
wide_chars.push(WideChar { start: cur_col, end: cur_col + c_len });
125-
}
126-
127-
cur_col += c_len;
128-
}
129-
130-
// Save any wide characters seen in the last line
131-
if !wide_chars.is_empty() {
132-
line_wide_chars.insert(line, wide_chars.into_boxed_slice());
133-
}
134-
97+
let (newlines, line_wide_chars) = analyze_source_file(text);
13598
LineIndex {
13699
newlines: newlines.into_boxed_slice(),
137100
line_wide_chars,
@@ -235,3 +198,182 @@ impl LineIndex {
235198
self.len
236199
}
237200
}
201+
202+
/// This is adapted from the rustc_span crate, https://github.com/rust-lang/rust/blob/master/compiler/rustc_span/src/analyze_source_file.rs
203+
fn analyze_source_file(src: &str) -> (Vec<TextSize>, IntMap<u32, Box<[WideChar]>>) {
204+
assert!(src.len() < !0u32 as usize);
205+
let mut lines = vec![];
206+
let mut line_wide_chars = IntMap::<u32, Vec<WideChar>>::default();
207+
208+
// Calls the right implementation, depending on hardware support available.
209+
analyze_source_file_dispatch(src, &mut lines, &mut line_wide_chars);
210+
211+
(lines, line_wide_chars.into_iter().map(|(k, v)| (k, v.into_boxed_slice())).collect())
212+
}
213+
214+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
215+
fn analyze_source_file_dispatch(
216+
src: &str,
217+
lines: &mut Vec<TextSize>,
218+
multi_byte_chars: &mut IntMap<u32, Vec<WideChar>>,
219+
) {
220+
if is_x86_feature_detected!("sse2") {
221+
// SAFETY: SSE2 support was checked
222+
unsafe {
223+
analyze_source_file_sse2(src, lines, multi_byte_chars);
224+
}
225+
} else {
226+
analyze_source_file_generic(src, src.len(), TextSize::from(0), lines, multi_byte_chars);
227+
}
228+
}
229+
230+
/// Checks 16 byte chunks of text at a time. If the chunk contains
231+
/// something other than printable ASCII characters and newlines, the
232+
/// function falls back to the generic implementation. Otherwise it uses
233+
/// SSE2 intrinsics to quickly find all newlines.
234+
#[target_feature(enable = "sse2")]
235+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
236+
unsafe fn analyze_source_file_sse2(
237+
src: &str,
238+
lines: &mut Vec<TextSize>,
239+
multi_byte_chars: &mut IntMap<u32, Vec<WideChar>>,
240+
) {
241+
#[cfg(target_arch = "x86")]
242+
use std::arch::x86::*;
243+
#[cfg(target_arch = "x86_64")]
244+
use std::arch::x86_64::*;
245+
246+
const CHUNK_SIZE: usize = 16;
247+
248+
let src_bytes = src.as_bytes();
249+
250+
let chunk_count = src.len() / CHUNK_SIZE;
251+
252+
// This variable keeps track of where we should start decoding a
253+
// chunk. If a multi-byte character spans across chunk boundaries,
254+
// we need to skip that part in the next chunk because we already
255+
// handled it.
256+
let mut intra_chunk_offset = 0;
257+
258+
for chunk_index in 0..chunk_count {
259+
let ptr = src_bytes.as_ptr() as *const __m128i;
260+
// We don't know if the pointer is aligned to 16 bytes, so we
261+
// use `loadu`, which supports unaligned loading.
262+
let chunk = _mm_loadu_si128(ptr.add(chunk_index));
263+
264+
// For character in the chunk, see if its byte value is < 0, which
265+
// indicates that it's part of a UTF-8 char.
266+
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
267+
// Create a bit mask from the comparison results.
268+
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
269+
270+
// If the bit mask is all zero, we only have ASCII chars here:
271+
if multibyte_mask == 0 {
272+
assert!(intra_chunk_offset == 0);
273+
274+
// Check for newlines in the chunk
275+
let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
276+
let newlines_mask = _mm_movemask_epi8(newlines_test);
277+
278+
if newlines_mask != 0 {
279+
// All control characters are newlines, record them
280+
let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
281+
let output_offset = TextSize::from((chunk_index * CHUNK_SIZE + 1) as u32);
282+
283+
loop {
284+
let index = newlines_mask.trailing_zeros();
285+
286+
if index >= CHUNK_SIZE as u32 {
287+
// We have arrived at the end of the chunk.
288+
break;
289+
}
290+
291+
lines.push(TextSize::from(index) + output_offset);
292+
293+
// Clear the bit, so we can find the next one.
294+
newlines_mask &= (!1) << index;
295+
}
296+
}
297+
continue;
298+
}
299+
300+
// The slow path.
301+
// There are control chars in here, fallback to generic decoding.
302+
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
303+
intra_chunk_offset = analyze_source_file_generic(
304+
&src[scan_start..],
305+
CHUNK_SIZE - intra_chunk_offset,
306+
TextSize::from(scan_start as u32),
307+
lines,
308+
multi_byte_chars,
309+
);
310+
}
311+
312+
// There might still be a tail left to analyze
313+
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
314+
if tail_start < src.len() {
315+
analyze_source_file_generic(
316+
&src[tail_start..],
317+
src.len() - tail_start,
318+
TextSize::from(tail_start as u32),
319+
lines,
320+
multi_byte_chars,
321+
);
322+
}
323+
}
324+
325+
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
326+
// The target (or compiler version) does not support SSE2 ...
327+
fn analyze_source_file_dispatch(
328+
src: &str,
329+
lines: &mut Vec<TextSize>,
330+
multi_byte_chars: &mut IntMap<u32, Vec<WideChar>>,
331+
) {
332+
analyze_source_file_generic(src, src.len(), TextSize::from(0), lines, multi_byte_chars);
333+
}
334+
335+
// `scan_len` determines the number of bytes in `src` to scan. Note that the
336+
// function can read past `scan_len` if a multi-byte character start within the
337+
// range but extends past it. The overflow is returned by the function.
338+
fn analyze_source_file_generic(
339+
src: &str,
340+
scan_len: usize,
341+
output_offset: TextSize,
342+
lines: &mut Vec<TextSize>,
343+
multi_byte_chars: &mut IntMap<u32, Vec<WideChar>>,
344+
) -> usize {
345+
assert!(src.len() >= scan_len);
346+
let mut i = 0;
347+
let src_bytes = src.as_bytes();
348+
349+
while i < scan_len {
350+
let byte = unsafe {
351+
// We verified that i < scan_len <= src.len()
352+
*src_bytes.get_unchecked(i)
353+
};
354+
355+
// How much to advance in order to get to the next UTF-8 char in the
356+
// string.
357+
let mut char_len = 1;
358+
359+
if byte == b'\n' {
360+
lines.push(TextSize::from(i as u32 + 1) + output_offset);
361+
} else if byte >= 127 {
362+
// The slow path: Just decode to `char`.
363+
let c = src[i..].chars().next().unwrap();
364+
char_len = c.len_utf8();
365+
366+
let pos = TextSize::from(i as u32) + output_offset;
367+
368+
if char_len > 1 {
369+
assert!((2..=4).contains(&char_len));
370+
let mbc = WideChar { start: pos, end: pos + TextSize::from(char_len as u32) };
371+
multi_byte_chars.entry(lines.len() as u32).or_default().push(mbc);
372+
}
373+
}
374+
375+
i += char_len;
376+
}
377+
378+
i - scan_len
379+
}

lib/line-index/src/tests.rs

+119-10
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,120 @@
1-
use super::LineIndex;
2-
3-
#[test]
4-
fn test_empty_index() {
5-
let col_index = LineIndex::new(
6-
"
7-
const C: char = 'x';
8-
",
9-
);
10-
assert_eq!(col_index.line_wide_chars.len(), 0);
1+
use crate::{LineIndex, TextSize, WideChar};
2+
3+
macro_rules! test {
4+
(
5+
case: $test_name:ident,
6+
text: $text:expr,
7+
lines: $lines:expr,
8+
multi_byte_chars: $multi_byte_chars:expr,
9+
) => {
10+
#[test]
11+
fn $test_name() {
12+
let line_index = LineIndex::new($text);
13+
14+
let expected_lines: Vec<TextSize> =
15+
$lines.into_iter().map(<TextSize as From<u32>>::from).collect();
16+
17+
assert_eq!(&*line_index.newlines, &*expected_lines);
18+
19+
let expected_mbcs: Vec<_> = $multi_byte_chars
20+
.into_iter()
21+
.map(|(line, (pos, end)): (u32, (u32, u32))| {
22+
(line, WideChar { start: TextSize::from(pos), end: TextSize::from(end) })
23+
})
24+
.collect();
25+
26+
assert_eq!(
27+
line_index
28+
.line_wide_chars
29+
.iter()
30+
.flat_map(|(line, val)| std::iter::repeat(*line).zip(val.iter().copied()))
31+
.collect::<Vec<_>>(),
32+
expected_mbcs
33+
);
34+
}
35+
};
1136
}
37+
38+
test!(
39+
case: empty_text,
40+
text: "",
41+
lines: vec![],
42+
multi_byte_chars: vec![],
43+
);
44+
45+
test!(
46+
case: newlines_short,
47+
text: "a\nc",
48+
lines: vec![2],
49+
multi_byte_chars: vec![],
50+
);
51+
52+
test!(
53+
case: newlines_long,
54+
text: "012345678\nabcdef012345678\na",
55+
lines: vec![10, 26],
56+
multi_byte_chars: vec![],
57+
);
58+
59+
test!(
60+
case: newline_and_multi_byte_char_in_same_chunk,
61+
text: "01234β789\nbcdef0123456789abcdef",
62+
lines: vec![11],
63+
multi_byte_chars: vec![(0, (5, 7))],
64+
);
65+
66+
test!(
67+
case: newline_and_control_char_in_same_chunk,
68+
text: "01234\u{07}6789\nbcdef0123456789abcdef",
69+
lines: vec![11],
70+
multi_byte_chars: vec![],
71+
);
72+
73+
test!(
74+
case: multi_byte_char_short,
75+
text: "aβc",
76+
lines: vec![],
77+
multi_byte_chars: vec![(0, (1, 3))],
78+
);
79+
80+
test!(
81+
case: multi_byte_char_long,
82+
text: "0123456789abcΔf012345β",
83+
lines: vec![],
84+
multi_byte_chars: vec![(0, (13, 15)), (0, (22, 24))],
85+
);
86+
87+
test!(
88+
case: multi_byte_char_across_chunk_boundary,
89+
text: "0123456789abcdeΔ123456789abcdef01234",
90+
lines: vec![],
91+
multi_byte_chars: vec![(0, (15, 17))],
92+
);
93+
94+
test!(
95+
case: multi_byte_char_across_chunk_boundary_tail,
96+
text: "0123456789abcdeΔ....",
97+
lines: vec![],
98+
multi_byte_chars: vec![(0, (15, 17))],
99+
);
100+
101+
test!(
102+
case: multi_byte_with_new_lines,
103+
text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
104+
lines: vec![7, 27],
105+
multi_byte_chars: vec![(1, (13, 15)), (2, (29, 31))],
106+
);
107+
108+
test!(
109+
case: trailing_newline,
110+
text: "0123456789\n",
111+
lines: vec![11],
112+
multi_byte_chars: vec![],
113+
);
114+
115+
test!(
116+
case: trailing_newline_chunk_boundary,
117+
text: "0123456789abcde\n",
118+
lines: vec![16],
119+
multi_byte_chars: vec![],
120+
);

0 commit comments

Comments
 (0)