Skip to content

Commit b4266b0

Browse files
authored
Rollup merge of #135557 - estebank:wtf8, r=fee1-dead
Point at invalid utf-8 span on user's source code ``` error: couldn't read `$DIR/not-utf8-bin-file.rs`: stream did not contain valid UTF-8 --> $DIR/not-utf8-2.rs:6:5 | LL | include!("not-utf8-bin-file.rs"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | note: byte `193` is not valid utf-8 --> $DIR/not-utf8-bin-file.rs:2:14 | LL | let _ = "�|�␂!5�cc␕␂��"; | ^ = note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info) ``` When we attempt to load a Rust source code file, if there is a OS file failure we try reading the file as bytes. If that succeeds we try to turn it into UTF-8. If *that* fails, we provide additional context about *where* the file has the first invalid UTF-8 character. Fix #76869.
2 parents f875983 + 57dd42d commit b4266b0

14 files changed

+88
-20
lines changed

compiler/rustc_builtin_macros/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#![feature(proc_macro_internals)]
1717
#![feature(proc_macro_quote)]
1818
#![feature(rustdoc_internals)]
19+
#![feature(string_from_utf8_lossy_owned)]
1920
#![feature(try_blocks)]
2021
#![warn(unreachable_pub)]
2122
// tidy-alphabetical-end

compiler/rustc_builtin_macros/src/source_util.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use rustc_expand::base::{
1313
use rustc_expand::module::DirOwnership;
1414
use rustc_lint_defs::BuiltinLintDiag;
1515
use rustc_parse::parser::{ForceCollect, Parser};
16-
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal};
16+
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
1717
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
1818
use rustc_span::source_map::SourceMap;
1919
use rustc_span::{Pos, Span, Symbol};
@@ -209,9 +209,10 @@ pub(crate) fn expand_include_str(
209209
let interned_src = Symbol::intern(src);
210210
MacEager::expr(cx.expr_str(cx.with_def_site_ctxt(bsp), interned_src))
211211
}
212-
Err(_) => {
213-
let guar = cx.dcx().span_err(sp, format!("`{path}` wasn't a utf-8 file"));
214-
DummyResult::any(sp, guar)
212+
Err(utf8err) => {
213+
let mut err = cx.dcx().struct_span_err(sp, format!("`{path}` wasn't a utf-8 file"));
214+
utf8_error(cx.source_map(), path.as_str(), None, &mut err, utf8err, &bytes[..]);
215+
DummyResult::any(sp, err.emit())
215216
}
216217
},
217218
Err(dummy) => dummy,
@@ -273,7 +274,7 @@ fn load_binary_file(
273274
.and_then(|path| path.into_os_string().into_string().ok());
274275

275276
if let Some(new_path) = new_path {
276-
err.span_suggestion(
277+
err.span_suggestion_verbose(
277278
path_span,
278279
"there is a file with the same name in a different directory",
279280
format!("\"{}\"", new_path.replace('\\', "/").escape_debug()),

compiler/rustc_parse/src/lib.rs

+63-4
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,21 @@
1111
#![feature(if_let_guard)]
1212
#![feature(iter_intersperse)]
1313
#![feature(let_chains)]
14+
#![feature(string_from_utf8_lossy_owned)]
1415
#![warn(unreachable_pub)]
1516
// tidy-alphabetical-end
1617

17-
use std::path::Path;
18+
use std::path::{Path, PathBuf};
19+
use std::str::Utf8Error;
1820

1921
use rustc_ast as ast;
2022
use rustc_ast::tokenstream::TokenStream;
2123
use rustc_ast::{AttrItem, Attribute, MetaItemInner, token};
2224
use rustc_ast_pretty::pprust;
2325
use rustc_data_structures::sync::Lrc;
24-
use rustc_errors::{Diag, FatalError, PResult};
26+
use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
2527
use rustc_session::parse::ParseSess;
28+
use rustc_span::source_map::SourceMap;
2629
use rustc_span::{FileName, SourceFile, Span};
2730
pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
2831

@@ -73,9 +76,22 @@ pub fn new_parser_from_file<'a>(
7376
path: &Path,
7477
sp: Option<Span>,
7578
) -> Result<Parser<'a>, Vec<Diag<'a>>> {
76-
let source_file = psess.source_map().load_file(path).unwrap_or_else(|e| {
77-
let msg = format!("couldn't read {}: {}", path.display(), e);
79+
let sm = psess.source_map();
80+
let source_file = sm.load_file(path).unwrap_or_else(|e| {
81+
let msg = format!("couldn't read `{}`: {}", path.display(), e);
7882
let mut err = psess.dcx().struct_fatal(msg);
83+
if let Ok(contents) = std::fs::read(path)
84+
&& let Err(utf8err) = String::from_utf8(contents.clone())
85+
{
86+
utf8_error(
87+
sm,
88+
&path.display().to_string(),
89+
sp,
90+
&mut err,
91+
utf8err.utf8_error(),
92+
&contents,
93+
);
94+
}
7995
if let Some(sp) = sp {
8096
err.span(sp);
8197
}
@@ -84,6 +100,49 @@ pub fn new_parser_from_file<'a>(
84100
new_parser_from_source_file(psess, source_file)
85101
}
86102

103+
pub fn utf8_error<E: EmissionGuarantee>(
104+
sm: &SourceMap,
105+
path: &str,
106+
sp: Option<Span>,
107+
err: &mut Diag<'_, E>,
108+
utf8err: Utf8Error,
109+
contents: &[u8],
110+
) {
111+
// The file exists, but it wasn't valid UTF-8.
112+
let start = utf8err.valid_up_to();
113+
let note = format!("invalid utf-8 at byte `{start}`");
114+
let msg = if let Some(len) = utf8err.error_len() {
115+
format!(
116+
"byte{s} `{bytes}` {are} not valid utf-8",
117+
bytes = if len == 1 {
118+
format!("{:?}", contents[start])
119+
} else {
120+
format!("{:?}", &contents[start..start + len])
121+
},
122+
s = pluralize!(len),
123+
are = if len == 1 { "is" } else { "are" },
124+
)
125+
} else {
126+
note.clone()
127+
};
128+
let contents = String::from_utf8_lossy(contents).to_string();
129+
let source = sm.new_source_file(PathBuf::from(path).into(), contents);
130+
let span = Span::with_root_ctxt(
131+
source.normalized_byte_pos(start as u32),
132+
source.normalized_byte_pos(start as u32),
133+
);
134+
if span.is_dummy() {
135+
err.note(note);
136+
} else {
137+
if sp.is_some() {
138+
err.span_note(span, msg);
139+
} else {
140+
err.span(span);
141+
err.span_label(span, msg);
142+
}
143+
}
144+
}
145+
87146
/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
88147
/// the initial token stream.
89148
fn new_parser_from_source_file(

src/tools/compiletest/src/errors.rs

+2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ pub fn load_errors(testfile: &Path, revision: Option<&str>) -> Vec<Error> {
101101

102102
rdr.lines()
103103
.enumerate()
104+
// We want to ignore utf-8 failures in tests during collection of annotations.
105+
.filter(|(_, line)| line.is_ok())
104106
.filter_map(|(line_num, line)| {
105107
parse_expected(last_nonfollow_error, line_num + 1, &line.unwrap(), revision).map(
106108
|(which, error)| {

src/tools/tidy/src/tests_revision_unpaired_stdout_stderr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ pub fn check(tests_path: impl AsRef<Path>, bad: &mut bool) {
5858

5959
let mut expected_revisions = BTreeSet::new();
6060

61-
let contents = std::fs::read_to_string(test).unwrap();
61+
let Ok(contents) = std::fs::read_to_string(test) else { continue };
6262

6363
// Collect directives.
6464
iter_header(&contents, &mut |HeaderLine { revision, directive, .. }| {

tests/ui/macros/not-utf8.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33
//@ reference: input.encoding.invalid
44

55
fn foo() {
6-
include!("not-utf8.bin")
6+
include!("not-utf8.bin");
77
}

tests/ui/macros/not-utf8.stderr

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1-
error: couldn't read $DIR/not-utf8.bin: stream did not contain valid UTF-8
1+
error: couldn't read `$DIR/not-utf8.bin`: stream did not contain valid UTF-8
22
--> $DIR/not-utf8.rs:6:5
33
|
4-
LL | include!("not-utf8.bin")
4+
LL | include!("not-utf8.bin");
55
| ^^^^^^^^^^^^^^^^^^^^^^^^
66
|
7+
note: byte `193` is not valid utf-8
8+
--> $DIR/not-utf8.bin:1:1
9+
|
10+
LL | �|�␂!5�cc␕␂�Ӻi��WWj�ȥ�'�}�␒�J�ȉ��W�␞O�@����␜w�V���LO����␔[ ␃_�'���SQ�~ذ��ų&��- ��lN~��!@␌ _#���kQ��h�␝�:�...
11+
| ^
712
= note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)
813

914
error: aborting due to 1 previous error

tests/ui/modules/path-no-file-name.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//@ normalize-stderr: "\.:.*\(" -> ".: $$ACCESS_DENIED_MSG ("
1+
//@ normalize-stderr: "\.`:.*\(" -> ".`: $$ACCESS_DENIED_MSG ("
22
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"
33

44
#[path = "."]

tests/ui/modules/path-no-file-name.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: couldn't read $DIR/.: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
1+
error: couldn't read `$DIR/.`: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
22
--> $DIR/path-no-file-name.rs:5:1
33
|
44
LL | mod m;

tests/ui/parser/issues/issue-5806.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//@ normalize-stderr: "parser:.*\(" -> "parser: $$ACCESS_DENIED_MSG ("
1+
//@ normalize-stderr: "parser`:.*\(" -> "parser`: $$ACCESS_DENIED_MSG ("
22
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"
33

44
#[path = "../parser"]

tests/ui/parser/issues/issue-5806.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: couldn't read $DIR/../parser: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
1+
error: couldn't read `$DIR/../parser`: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
22
--> $DIR/issue-5806.rs:5:1
33
|
44
LL | mod foo;

tests/ui/parser/mod_file_with_path_attr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//@ normalize-stderr: "not_a_real_file.rs:.*\(" -> "not_a_real_file.rs: $$FILE_NOT_FOUND_MSG ("
1+
//@ normalize-stderr: "not_a_real_file.rs`:.*\(" -> "not_a_real_file.rs`: $$FILE_NOT_FOUND_MSG ("
22

33
#[path = "not_a_real_file.rs"]
44
mod m; //~ ERROR not_a_real_file.rs

tests/ui/parser/mod_file_with_path_attr.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: couldn't read $DIR/not_a_real_file.rs: $FILE_NOT_FOUND_MSG (os error 2)
1+
error: couldn't read `$DIR/not_a_real_file.rs`: $FILE_NOT_FOUND_MSG (os error 2)
22
--> $DIR/mod_file_with_path_attr.rs:4:1
33
|
44
LL | mod m;

tests/ui/unpretty/staged-api-invalid-path-108697.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: couldn't read $DIR/lol: No such file or directory (os error 2)
1+
error: couldn't read `$DIR/lol`: No such file or directory (os error 2)
22
--> $DIR/staged-api-invalid-path-108697.rs:8:1
33
|
44
LL | mod foo;

0 commit comments

Comments
 (0)