Skip to content

Commit 21755b5

Browse files
committed
rustc_lexer: Optimize shebang detection slightly
1 parent 96dd469 commit 21755b5

File tree

3 files changed

+32
-19
lines changed

3 files changed

+32
-19
lines changed

src/librustc_lexer/src/lib.rs

+18-19
Original file line numberDiff line numberDiff line change
@@ -238,26 +238,25 @@ pub enum Base {
238238
/// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
239239
/// but shebang isn't a part of rust syntax.
240240
pub fn strip_shebang(input: &str) -> Option<usize> {
241-
let first_line = input.lines().next()?;
242-
// A shebang is intentionally loosely defined as `#! [non whitespace]` on the first line.
243-
let could_be_shebang =
244-
first_line.starts_with("#!") && first_line[2..].contains(|c| !is_whitespace(c));
245-
if !could_be_shebang {
246-
return None;
247-
}
248-
let non_whitespace_tokens = tokenize(input).map(|tok| tok.kind).filter(|tok|
249-
!matches!(tok, TokenKind::LineComment | TokenKind::BlockComment { .. } | TokenKind::Whitespace)
250-
);
251-
let prefix = [TokenKind::Pound, TokenKind::Not, TokenKind::OpenBracket];
252-
let starts_with_attribute = non_whitespace_tokens.take(3).eq(prefix.iter().copied());
253-
if starts_with_attribute {
254-
// If the file starts with #![ then it's definitely not a shebang -- it couldn't be
255-
// a rust program since a Rust program can't start with `[`
256-
None
257-
} else {
258-
// It's a #!... and there isn't a `[` in sight, must be a shebang
259-
Some(first_line.len())
241+
// Shebang must start with `#!` literally, without any preceding whitespace.
242+
if input.starts_with("#!") {
243+
let input_tail = &input[2..];
244+
// Shebang must have something non-whitespace after `#!` on the first line.
245+
let first_line_tail = input_tail.lines().next()?;
246+
if first_line_tail.contains(|c| !is_whitespace(c)) {
247+
// Ok, this is a shebang but if the next non-whitespace token is `[` or maybe
248+
// a doc comment (due to `TokenKind::(Line,Block)Comment` ambiguity at lexer level),
249+
// then it may be valid Rust code, so consider it Rust code.
250+
let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).filter(|tok|
251+
!matches!(tok, TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment { .. })
252+
).next();
253+
if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
254+
// No other choice than to consider this a shebang.
255+
return Some(2 + first_line_tail.len());
256+
}
257+
}
260258
}
259+
None
261260
}
262261

263262
/// Parses the first token from the provided input string.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!///bin/bash
2+
[allow(unused_variables)]
3+
//~^^ ERROR expected `[`, found doc comment
4+
5+
// Doc comment is misinterpreted as a whitespace (regular comment) during shebang detection.
6+
// Even if it wasn't, it would still result in an error, just a different one.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
error: expected `[`, found doc comment `///bin/bash`
2+
--> $DIR/shebang-doc-comment.rs:1:3
3+
|
4+
LL | #!///bin/bash
5+
| ^^^^^^^^^^^ expected `[`
6+
7+
error: aborting due to previous error
8+

0 commit comments

Comments
 (0)