Skip to content

Commit c0ae197

Browse files
committed
Auto merge of rust-lang#131656 - richard-uk1:move_empty_exponent_to_rustc_session, r=<try>
move some invalid exponent detection into rustc_session This PR moves part of the exponent checks from `rustc_lexer`/`rustc_parser` into `rustc_session`. This change does not affect which programs are accepted by the complier, or the diagnostics that are reported, with one main exception. That exception is that floats or ints with suffixes beginning with `e` are rejected *after* the token stream is passed to proc macros, rather than being rejected by the parser as was the case. This gives proc macro authors more consistent access to numeric literals: currently a proc macro could interpret `1m` or `30s` but not `7eggs` or `3em`. After this change all are handled the same. The lexer will still reject input if it contains `e` followed by a number, `+`/`-`, or `_` if they are not followed by a valid integer literal (number + `_`), but this doesn't affect macro authors who just want to access alpha suffixes. This PR is a continuation of rust-lang#79912. It is also solving exactly the same problem as [rust-lang#111628](rust-lang#111628). Exponents that contain arbitrarily long underscore suffixes are handled without read-ahead by tracking the exponent start in case of invalid exponent, so the suffix start is correct. This is very much an edge-case (the user would have to write something like `1e_______________23`) but nevertheless it is handled correctly. Also adds tests for various edge cases and improves diagnostics marginally. r: `@petrochenkov,` since they reviewed rust-lang#79912.
2 parents 60493b8 + e276417 commit c0ae197

9 files changed

+208
-77
lines changed

compiler/rustc_lexer/src/lib.rs

+71-29
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ pub enum DocStyle {
194194
pub enum LiteralKind {
195195
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
196196
Int { base: Base, empty_int: bool },
197-
/// `12.34f32`, `1e3`, but not `1f32`.
197+
/// `12.34f32`, `1e3` and `1e+`, but not `1f32` or `1em`.
198198
Float { base: Base, empty_exponent: bool },
199199
/// `'a'`, `'\\'`, `'''`, `';`
200200
Char { terminated: bool },
@@ -409,8 +409,8 @@ impl Cursor<'_> {
409409

410410
// Numeric literal.
411411
c @ '0'..='9' => {
412-
let literal_kind = self.number(c);
413-
let suffix_start = self.pos_within_token();
412+
let (literal_kind, suffix_start) = self.number(c);
413+
let suffix_start = suffix_start.unwrap_or(self.pos_within_token());
414414
self.eat_literal_suffix();
415415
TokenKind::Literal { kind: literal_kind, suffix_start }
416416
}
@@ -606,7 +606,9 @@ impl Cursor<'_> {
606606
}
607607
}
608608

609-
fn number(&mut self, first_digit: char) -> LiteralKind {
609+
/// Parses a number and in `.1` returns the offset of the literal suffix if
610+
/// different from the current position on return.
611+
fn number(&mut self, first_digit: char) -> (LiteralKind, Option<u32>) {
610612
debug_assert!('0' <= self.prev() && self.prev() <= '9');
611613
let mut base = Base::Decimal;
612614
if first_digit == '0' {
@@ -616,21 +618,21 @@ impl Cursor<'_> {
616618
base = Base::Binary;
617619
self.bump();
618620
if !self.eat_decimal_digits() {
619-
return Int { base, empty_int: true };
621+
return (Int { base, empty_int: true }, None);
620622
}
621623
}
622624
'o' => {
623625
base = Base::Octal;
624626
self.bump();
625627
if !self.eat_decimal_digits() {
626-
return Int { base, empty_int: true };
628+
return (Int { base, empty_int: true }, None);
627629
}
628630
}
629631
'x' => {
630632
base = Base::Hexadecimal;
631633
self.bump();
632634
if !self.eat_hexadecimal_digits() {
633-
return Int { base, empty_int: true };
635+
return (Int { base, empty_int: true }, None);
634636
}
635637
}
636638
// Not a base prefix; consume additional digits.
@@ -642,40 +644,79 @@ impl Cursor<'_> {
642644
'.' | 'e' | 'E' => {}
643645

644646
// Just a 0.
645-
_ => return Int { base, empty_int: false },
647+
_ => return (Int { base, empty_int: false }, None),
646648
}
647649
} else {
648650
// No base prefix, parse number in the usual way.
649651
self.eat_decimal_digits();
650652
};
651653

652-
match self.first() {
654+
match (self.first(), self.second()) {
653655
// Don't be greedy if this is actually an
654656
// integer literal followed by field/method access or a range pattern
655657
// (`0..2` and `12.foo()`)
656-
'.' if self.second() != '.' && !is_id_start(self.second()) => {
657-
// might have stuff after the ., and if it does, it needs to start
658-
// with a number
658+
('.', second) if second != '.' && !is_id_start(second) => {
659659
self.bump();
660+
self.eat_decimal_digits();
661+
660662
let mut empty_exponent = false;
661-
if self.first().is_ascii_digit() {
662-
self.eat_decimal_digits();
663-
match self.first() {
664-
'e' | 'E' => {
665-
self.bump();
666-
empty_exponent = !self.eat_float_exponent();
667-
}
668-
_ => (),
663+
let suffix_start = match (self.first(), self.second()) {
664+
('e' | 'E', '_') => self.eat_underscore_exponent(),
665+
('e' | 'E', '0'..='9' | '+' | '-') => {
666+
// Definitely an exponent (which still can be empty).
667+
self.bump();
668+
empty_exponent = !self.eat_float_exponent();
669+
None
669670
}
671+
_ => None,
672+
};
673+
(Float { base, empty_exponent }, suffix_start)
674+
}
675+
('e' | 'E', '_') => {
676+
match self.eat_underscore_exponent() {
677+
Some(suffix_start) => {
678+
// The suffix begins at `e`, meaning the number is an integer.
679+
(Int { base, empty_int: false }, Some(suffix_start))
680+
}
681+
None => (Float { base, empty_exponent: false }, None),
670682
}
671-
Float { base, empty_exponent }
672683
}
673-
'e' | 'E' => {
684+
('e' | 'E', '0'..='9' | '+' | '-') => {
685+
// Definitely an exponent (which still can be empty).
674686
self.bump();
675687
let empty_exponent = !self.eat_float_exponent();
676-
Float { base, empty_exponent }
688+
(Float { base, empty_exponent }, None)
677689
}
678-
_ => Int { base, empty_int: false },
690+
_ => (Int { base, empty_int: false }, None),
691+
}
692+
}
693+
694+
/// Try to find and eat an exponent
695+
///
696+
/// Assumes the first character is `e`/`E` and second is `_`, and consumes
697+
/// `e`/`E` followed by all consecutive `_`s.
698+
///
699+
/// Returns `Some` if no exponent was found. In this case, the suffix is partially
700+
/// consumed, and began at the return value.
701+
fn eat_underscore_exponent(&mut self) -> Option<u32> {
702+
debug_assert!(matches!(self.first(), 'e' | 'E'));
703+
debug_assert!(matches!(self.second(), '_'));
704+
let suffix_start = self.pos_within_token();
705+
706+
// check if series of `_` is ended by a digit. If yes
707+
// include it in the number as exponent. If no include
708+
// it in suffix.
709+
self.bump();
710+
while matches!(self.first(), '_') {
711+
self.bump();
712+
}
713+
// If we find a digit, then the exponential was valid
714+
// so the suffix will start at the cursor as usual.
715+
if self.first().is_ascii_digit() {
716+
self.eat_decimal_digits();
717+
None
718+
} else {
719+
Some(suffix_start)
679720
}
680721
}
681722

@@ -924,6 +965,7 @@ impl Cursor<'_> {
924965
}
925966
}
926967

968+
/// Returns `true` if a digit was consumed (rather than just '_'s).
927969
fn eat_decimal_digits(&mut self) -> bool {
928970
let mut has_digits = false;
929971
loop {
@@ -961,20 +1003,20 @@ impl Cursor<'_> {
9611003
/// Eats the float exponent. Returns true if at least one digit was met,
9621004
/// and returns false otherwise.
9631005
fn eat_float_exponent(&mut self) -> bool {
964-
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
1006+
debug_assert!(matches!(self.prev(), 'e' | 'E'));
9651007
if self.first() == '-' || self.first() == '+' {
9661008
self.bump();
9671009
}
9681010
self.eat_decimal_digits()
9691011
}
9701012

971-
// Eats the suffix of the literal, e.g. "u8".
1013+
/// Eats the suffix of the literal, e.g. "u8".
9721014
fn eat_literal_suffix(&mut self) {
973-
self.eat_identifier();
1015+
self.eat_identifier()
9741016
}
9751017

976-
// Eats the identifier. Note: succeeds on `_`, which isn't a valid
977-
// identifier.
1018+
/// Eats the identifier. Note: succeeds on `_`, which isn't a valid
1019+
/// identifier.
9781020
fn eat_identifier(&mut self) {
9791021
if !is_id_start(self.first()) {
9801022
return;

compiler/rustc_session/messages.ftl

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at
1414
1515
session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled
1616
17+
session_empty_float_exponent = expected at least one digit in exponent
18+
1719
session_expr_parentheses_needed = parentheses are required to parse this as an expression
1820
1921
session_failed_to_create_profiler = failed to create profiler: {$err}

compiler/rustc_session/src/errors.rs

+15
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,10 @@ pub fn report_lit_error(
377377
s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
378378
}
379379

380+
fn looks_like_empty_exponent(s: &str) -> bool {
381+
s.len() == 1 && matches!(s.chars().next(), Some('e' | 'E'))
382+
}
383+
380384
// Try to lowercase the prefix if the prefix and suffix are valid.
381385
fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option<String> {
382386
let mut chars = suffix.chars();
@@ -409,6 +413,8 @@ pub fn report_lit_error(
409413
if looks_like_width_suffix(&['i', 'u'], suf) {
410414
// If it looks like a width, try to be helpful.
411415
dcx.emit_err(InvalidIntLiteralWidth { span, width: suf[1..].into() })
416+
} else if looks_like_empty_exponent(suf) {
417+
dcx.emit_err(EmptyFloatExponent { span })
412418
} else if let Some(fixed) = fix_base_capitalisation(lit.symbol.as_str(), suf) {
413419
dcx.emit_err(InvalidNumLiteralBasePrefix { span, fixed })
414420
} else {
@@ -420,6 +426,8 @@ pub fn report_lit_error(
420426
if looks_like_width_suffix(&['f'], suf) {
421427
// If it looks like a width, try to be helpful.
422428
dcx.emit_err(InvalidFloatLiteralWidth { span, width: suf[1..].to_string() })
429+
} else if looks_like_empty_exponent(suf) {
430+
dcx.emit_err(EmptyFloatExponent { span })
423431
} else {
424432
dcx.emit_err(InvalidFloatLiteralSuffix { span, suffix: suf.to_string() })
425433
}
@@ -489,3 +497,10 @@ pub(crate) struct SoftFloatIgnored;
489497
#[note]
490498
#[note(session_soft_float_deprecated_issue)]
491499
pub(crate) struct SoftFloatDeprecated;
500+
501+
#[derive(Diagnostic)]
502+
#[diag(session_empty_float_exponent)]
503+
pub(crate) struct EmptyFloatExponent {
504+
#[primary_span]
505+
pub span: Span,
506+
}
+36-36
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,3 @@
1-
error: expected at least one digit in exponent
2-
--> $DIR/issue-104390.rs:1:27
3-
|
4-
LL | fn f1() -> impl Sized { & 2E }
5-
| ^^
6-
7-
error: expected at least one digit in exponent
8-
--> $DIR/issue-104390.rs:2:28
9-
|
10-
LL | fn f2() -> impl Sized { && 2E }
11-
| ^^
12-
13-
error: expected at least one digit in exponent
14-
--> $DIR/issue-104390.rs:3:29
15-
|
16-
LL | fn f3() -> impl Sized { &'a 2E }
17-
| ^^
18-
19-
error: expected at least one digit in exponent
20-
--> $DIR/issue-104390.rs:5:34
21-
|
22-
LL | fn f4() -> impl Sized { &'static 2E }
23-
| ^^
24-
25-
error: expected at least one digit in exponent
26-
--> $DIR/issue-104390.rs:7:28
27-
|
28-
LL | fn f5() -> impl Sized { *& 2E }
29-
| ^^
30-
31-
error: expected at least one digit in exponent
32-
--> $DIR/issue-104390.rs:8:29
33-
|
34-
LL | fn f6() -> impl Sized { &'_ 2E }
35-
| ^^
36-
371
error: borrow expressions cannot be annotated with lifetimes
382
--> $DIR/issue-104390.rs:3:25
393
|
@@ -76,5 +40,41 @@ LL - fn f6() -> impl Sized { &'_ 2E }
7640
LL + fn f6() -> impl Sized { &2E }
7741
|
7842

43+
error: expected at least one digit in exponent
44+
--> $DIR/issue-104390.rs:1:27
45+
|
46+
LL | fn f1() -> impl Sized { & 2E }
47+
| ^^
48+
49+
error: expected at least one digit in exponent
50+
--> $DIR/issue-104390.rs:2:28
51+
|
52+
LL | fn f2() -> impl Sized { && 2E }
53+
| ^^
54+
55+
error: expected at least one digit in exponent
56+
--> $DIR/issue-104390.rs:3:29
57+
|
58+
LL | fn f3() -> impl Sized { &'a 2E }
59+
| ^^
60+
61+
error: expected at least one digit in exponent
62+
--> $DIR/issue-104390.rs:5:34
63+
|
64+
LL | fn f4() -> impl Sized { &'static 2E }
65+
| ^^
66+
67+
error: expected at least one digit in exponent
68+
--> $DIR/issue-104390.rs:7:28
69+
|
70+
LL | fn f5() -> impl Sized { *& 2E }
71+
| ^^
72+
73+
error: expected at least one digit in exponent
74+
--> $DIR/issue-104390.rs:8:29
75+
|
76+
LL | fn f6() -> impl Sized { &'_ 2E }
77+
| ^^
78+
7979
error: aborting due to 9 previous errors
8080

tests/ui/consts/issue-91434.stderr

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
error: expected at least one digit in exponent
2-
--> $DIR/issue-91434.rs:2:11
3-
|
4-
LL | [9; [[9E; h]]];
5-
| ^^
6-
71
error[E0425]: cannot find value `h` in this scope
82
--> $DIR/issue-91434.rs:2:15
93
|
104
LL | [9; [[9E; h]]];
115
| ^ not found in this scope
126

7+
error: expected at least one digit in exponent
8+
--> $DIR/issue-91434.rs:2:11
9+
|
10+
LL | [9; [[9E; h]]];
11+
| ^^
12+
1313
error: aborting due to 2 previous errors
1414

1515
For more information about this error, try `rustc --explain E0425`.
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
error: expected at least one digit in exponent
2-
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
3-
|
4-
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
5-
| ^^^^^^
6-
71
error: unknown start of token: \u{2212}
82
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:53
93
|
@@ -16,5 +10,11 @@ LL - const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹
1610
LL + const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e-11; // m³⋅kg⁻¹⋅s⁻²
1711
|
1812

13+
error: expected at least one digit in exponent
14+
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
15+
|
16+
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
17+
| ^^^^^^
18+
1919
error: aborting due to 2 previous errors
2020

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
const _A: f64 = 1em;
2+
//~^ ERROR invalid suffix `em` for number literal
3+
const _B: f64 = 1e0m;
4+
//~^ ERROR invalid suffix `m` for float literal
5+
const _C: f64 = 1e_______________0m;
6+
//~^ ERROR invalid suffix `m` for float literal
7+
const _D: f64 = 1e_______________m;
8+
//~^ ERROR invalid suffix `e_______________m` for number literal
9+
10+
// All the above patterns should not generate an error when used in a macro
11+
macro_rules! do_nothing {
12+
($($toks:tt)*) => {};
13+
}
14+
do_nothing!(1em 1e0m 1e_______________0m 1e_______________m);
15+
16+
fn main() {}

0 commit comments

Comments
 (0)