diff --git a/src/comp/driver/session.rs b/src/comp/driver/session.rs index 8da3e40baa6c0..d8a0771c1d08b 100644 --- a/src/comp/driver/session.rs +++ b/src/comp/driver/session.rs @@ -101,7 +101,7 @@ obj session(@config targ_cfg, fn unimpl(str msg) -> ! { self.bug("unimplemented " + msg); } fn get_codemap() -> codemap::codemap { ret parse_sess.cm; } fn lookup_pos(uint pos) -> codemap::loc { - ret codemap::lookup_pos(parse_sess.cm, pos); + ret codemap::lookup_char_pos(parse_sess.cm, pos); } fn get_parse_sess() -> parse_sess { ret parse_sess; } fn next_node_id() -> ast::node_id { diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index 6df2f2422ad95..3455c6501515d 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -9,50 +9,69 @@ import std::option::none; type filename = str; +type file_pos = rec(uint ch, uint byte); + /* A codemap is a thing that maps uints to file/line/column positions * in a crate. This to make it possible to represent the positions * with single-word things, rather than passing records all over the * compiler. */ -type filemap = @rec(filename name, uint start_pos, mutable vec[uint] lines); +type filemap = @rec(filename name, file_pos start_pos, + mutable vec[file_pos] lines); type codemap = @rec(mutable vec[filemap] files); type loc = rec(filename filename, uint line, uint col); fn new_codemap() -> codemap { - let vec[filemap] files = []; - ret @rec(mutable files=files); + ret @rec(mutable files=[]); +} + +fn new_filemap(filename filename, uint start_pos_ch, uint start_pos_byte) + -> filemap { + ret @rec(name=filename, start_pos=rec(ch=start_pos_ch, + byte=start_pos_byte), + mutable lines=[rec(ch=start_pos_ch, byte=start_pos_byte)]); } -fn new_filemap(filename filename, uint start_pos) -> filemap { - ret @rec(name=filename, start_pos=start_pos, mutable lines=[0u]); +fn next_line(filemap file, uint chpos, uint byte_pos) { + vec::push(file.lines, rec(ch=chpos, byte=byte_pos)); } -fn next_line(filemap file, uint pos) { vec::push[uint](file.lines, pos); } +type lookup_fn = fn (file_pos pos) -> uint; -fn lookup_pos(codemap map, uint pos) -> loc { +fn lookup_pos(codemap map, uint pos, lookup_fn lookup) -> loc { auto a = 0u; - auto b = vec::len[filemap](map.files); + auto b = vec::len(map.files); while (b - a > 1u) { auto m = (a + b) / 2u; - if (map.files.(m).start_pos > pos) { b = m; } else { a = m; } + if (lookup(map.files.(m).start_pos) > pos) { b = m; } else { a = m; } } auto f = map.files.(a); a = 0u; - b = vec::len[uint](f.lines); + b = vec::len(f.lines); while (b - a > 1u) { auto m = (a + b) / 2u; - if (f.lines.(m) > pos) { b = m; } else { a = m; } + if (lookup(f.lines.(m)) > pos) { b = m; } else { a = m; } } - ret rec(filename=f.name, line=a + 1u, col=pos - f.lines.(a)); + ret rec(filename=f.name, line=a + 1u, col=pos - lookup(f.lines.(a))); +} + +fn lookup_char_pos(codemap map, uint pos) -> loc { + fn lookup(file_pos pos) -> uint { ret pos.ch; } + ret lookup_pos(map, pos, lookup); +} + +fn lookup_byte_pos(codemap map, uint pos) -> loc { + fn lookup(file_pos pos) -> uint { ret pos.byte; } + ret lookup_pos(map, pos, lookup); } type span = rec(uint lo, uint hi); fn span_to_str(&span sp, &codemap cm) -> str { - auto lo = lookup_pos(cm, sp.lo); - auto hi = lookup_pos(cm, sp.hi); + auto lo = lookup_char_pos(cm, sp.lo); + auto hi = lookup_char_pos(cm, sp.hi); ret #fmt("%s:%u:%u:%u:%u", lo.filename, lo.line, lo.col, hi.line, hi.col); } @@ -63,10 +82,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color, alt (sp) { case (some(?ssp)) { ss = span_to_str(ssp, cm); - - // FIXME: we're not able to look up lines read from .rc files yet. - // maybe_lines = some(span_to_lines(ssp, cm)); - + maybe_lines = some(span_to_lines(ssp, cm)); } case (none) { } } @@ -85,7 +101,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color, // get access to the necessary lines. auto rdr = io::file_reader(lines.name); auto file = str::unsafe_from_bytes(rdr.read_whole_stream()); - auto fm = codemap::get_filemap(cm, lines.name); + auto fm = get_filemap(cm, lines.name); // arbitrarily only print up to six lines of the error auto max_lines = 6u; @@ -98,7 +114,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color, // Print the offending lines for (uint line in display_lines) { io::stdout().write_str(#fmt("%s:%u ", fm.name, line + 1u)); - auto s = codemap::get_line(fm, line as int, file); + auto s = get_line(fm, line as int, file); if (!str::ends_with(s, "\n")) { s += "\n"; } @@ -116,7 +132,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color, // If there's one line at fault we can easily point to the problem if (vec::len(lines.lines) == 1u) { - auto lo = codemap::lookup_pos(cm, option::get(sp).lo); + auto lo = lookup_char_pos(cm, option::get(sp).lo); auto digits = 0u; auto num = lines.lines.(0) / 10u; @@ -129,7 +145,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color, while (left > 0u) { str::push_char(s, ' '); left -= 1u; } s += "^"; - auto hi = codemap::lookup_pos(cm, option::get(sp).hi); + auto hi = lookup_char_pos(cm, option::get(sp).hi); if (hi.col != lo.col) { // the ^ already takes up one space auto width = hi.col - lo.col - 1u; @@ -158,8 +174,8 @@ fn emit_note(&option::t[span] sp, &str msg, &codemap cm) { type file_lines = rec(str name, vec[uint] lines); fn span_to_lines(span sp, codemap::codemap cm) -> @file_lines { - auto lo = codemap::lookup_pos(cm, sp.lo); - auto hi = codemap::lookup_pos(cm, sp.hi); + auto lo = lookup_char_pos(cm, sp.lo); + auto hi = lookup_char_pos(cm, sp.hi); auto lines = []; for each (uint i in uint::range(lo.line - 1u, hi.line as uint)) { lines += [i]; @@ -168,13 +184,22 @@ fn span_to_lines(span sp, codemap::codemap cm) -> @file_lines { } fn get_line(filemap fm, int line, &str file) -> str { + let uint begin = fm.lines.(line).byte - fm.start_pos.byte; let uint end; - if ((line as uint) + 1u >= vec::len(fm.lines)) { - end = str::byte_len(file); + if (line as uint < vec::len(fm.lines) - 1u) { + end = fm.lines.(line + 1).byte - fm.start_pos.byte; } else { - end = fm.lines.(line + 1); + // If we're not done parsing the file, we're at the limit of what's + // parsed. If we just slice the rest of the string, we'll print out + // the remainder of the file, which is undesirable. + end = str::byte_len(file); + auto rest = str::slice(file, begin, end); + auto newline = str::index(rest, '\n' as u8); + if (newline != -1) { + end = begin + (newline as uint); + } } - ret str::slice(file, fm.lines.(line), end); + ret str::slice(file, begin, end); } fn get_filemap(codemap cm, str filename) -> filemap { diff --git a/src/comp/syntax/parse/eval.rs b/src/comp/syntax/parse/eval.rs index 226d84233e0b2..b953e0e11a912 100644 --- a/src/comp/syntax/parse/eval.rs +++ b/src/comp/syntax/parse/eval.rs @@ -22,6 +22,7 @@ type ctx = mutable vec[str] deps, parser::parse_sess sess, mutable uint chpos, + mutable uint byte_pos, ast::crate_cfg cfg); fn eval_crate_directives(ctx cx, &(@ast::crate_directive)[] cdirs, @@ -57,7 +58,8 @@ fn eval_crate_directive(ctx cx, @ast::crate_directive cdir, str prefix, }; if (cx.mode == mode_depend) { cx.deps += [full_path]; ret; } auto p0 = - new_parser_from_file(cx.sess, cx.cfg, full_path, cx.chpos); + new_parser_from_file(cx.sess, cx.cfg, full_path, cx.chpos, + cx.byte_pos); auto inner_attrs = parse_inner_attrs_and_next(p0); auto mod_attrs = attrs + inner_attrs._0; auto first_item_outer_attrs = inner_attrs._1; @@ -66,8 +68,9 @@ fn eval_crate_directive(ctx cx, @ast::crate_directive cdir, str prefix, auto i = syntax::parse::parser::mk_item (p0, cdir.span.lo, cdir.span.hi, id, ast::item_mod(m0), mod_attrs); - // Thread defids and chpos through the parsers + // Thread defids, chpos and byte_pos through the parsers cx.chpos = p0.get_chpos(); + cx.byte_pos = p0.get_byte_pos(); items += ~[i]; } case (ast::cdir_dir_mod(?id, ?dir_opt, ?cdirs, ?attrs)) { diff --git a/src/comp/syntax/parse/lexer.rs b/src/comp/syntax/parse/lexer.rs index a3fd6670db3c9..1c9bf72df8339 100644 --- a/src/comp/syntax/parse/lexer.rs +++ b/src/comp/syntax/parse/lexer.rs @@ -24,6 +24,7 @@ type reader = fn get_mark_str() -> str ; fn get_interner() -> @interner::interner[str] ; fn get_chpos() -> uint ; + fn get_byte_pos() -> uint ; fn get_col() -> uint ; fn get_filemap() -> codemap::filemap ; fn err(str) ; @@ -53,6 +54,7 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap, } fn get_mark_chpos() -> uint { ret mark_chpos; } fn get_chpos() -> uint { ret chpos; } + fn get_byte_pos() -> uint { ret pos; } fn curr() -> char { ret ch; } fn next() -> char { if (pos < len) { @@ -70,7 +72,10 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap, if (pos < len) { col += 1u; chpos += 1u; - if (ch == '\n') { codemap::next_line(fm, chpos); col = 0u; } + if (ch == '\n') { + codemap::next_line(fm, chpos, pos + fm.start_pos.byte); + col = 0u; + } auto next = str::char_range_at(src, pos); pos = next._1; ch = next._0; @@ -86,7 +91,8 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap, let vec[str] strs = []; auto rd = reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, 0u, - filemap.start_pos, filemap.start_pos, strs, filemap, itr); + filemap.start_pos.ch, filemap.start_pos.ch, strs, filemap, + itr); rd.init(); ret rd; } @@ -737,7 +743,7 @@ fn gather_comments_and_literals(&codemap::codemap cm, str path) auto srdr = io::file_reader(path); auto src = str::unsafe_from_bytes(srdr.read_whole_stream()); auto itr = @interner::mk[str](str::hash, str::eq); - auto rdr = new_reader(cm, src, codemap::new_filemap(path, 0u), itr); + auto rdr = new_reader(cm, src, codemap::new_filemap(path, 0u, 0u), itr); let cmnt[] comments = ~[]; let lit[] literals = ~[]; let bool first_read = true; diff --git a/src/comp/syntax/parse/parser.rs b/src/comp/syntax/parse/parser.rs index fb9eca8640df9..ae97c1ac0ea78 100644 --- a/src/comp/syntax/parse/parser.rs +++ b/src/comp/syntax/parse/parser.rs @@ -51,17 +51,18 @@ type parser = fn get_filemap() -> codemap::filemap ; fn get_bad_expr_words() -> hashmap[str, ()] ; fn get_chpos() -> uint ; + fn get_byte_pos() -> uint ; fn get_id() -> ast::node_id ; fn get_sess() -> parse_sess; }; fn new_parser_from_file(parse_sess sess, ast::crate_cfg cfg, - str path, uint pos) -> parser { + str path, uint chpos, uint byte_pos) -> parser { auto ftype = SOURCE_FILE; if (str::ends_with(path, ".rc")) { ftype = CRATE_FILE; } auto srdr = io::file_reader(path); auto src = str::unsafe_from_bytes(srdr.read_whole_stream()); - auto filemap = codemap::new_filemap(path, pos); + auto filemap = codemap::new_filemap(path, chpos, byte_pos); vec::push(sess.cm.files, filemap); auto itr = @interner::mk(str::hash, str::eq); auto rdr = lexer::new_reader(sess.cm, src, filemap, itr); @@ -115,6 +116,7 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr, fn get_filemap() -> codemap::filemap { ret rdr.get_filemap(); } fn get_bad_expr_words() -> hashmap[str, ()] { ret bad_words; } fn get_chpos() -> uint { ret rdr.get_chpos(); } + fn get_byte_pos() -> uint { ret rdr.get_byte_pos(); } fn get_id() -> ast::node_id { ret next_node_id(sess); } fn get_sess() -> parse_sess { ret sess; } } @@ -2417,7 +2419,7 @@ fn parse_native_view(&parser p) -> (@ast::view_item)[] { fn parse_crate_from_source_file(&str input, &ast::crate_cfg cfg, &parse_sess sess) -> @ast::crate { - auto p = new_parser_from_file(sess, cfg, input, 0u); + auto p = new_parser_from_file(sess, cfg, input, 0u, 0u); ret parse_crate_mod(p, cfg, sess); } @@ -2425,7 +2427,7 @@ fn parse_crate_from_source_str(&str name, &str source, &ast::crate_cfg cfg, &codemap::codemap cm) -> @ast::crate { auto sess = @rec(cm=cm, mutable next_id=0); auto ftype = SOURCE_FILE; - auto filemap = codemap::new_filemap(name, 0u); + auto filemap = codemap::new_filemap(name, 0u, 0u); sess.cm.files += [filemap]; auto itr = @interner::mk(str::hash, str::eq); auto rdr = lexer::new_reader(sess.cm, source, filemap, itr); @@ -2543,7 +2545,7 @@ fn parse_crate_directives(&parser p, token::token term, fn parse_crate_from_crate_file(&str input, &ast::crate_cfg cfg, &parse_sess sess) -> @ast::crate { - auto p = new_parser_from_file(sess, cfg, input, 0u); + auto p = new_parser_from_file(sess, cfg, input, 0u, 0u); auto lo = p.get_lo_pos(); auto prefix = std::fs::dirname(p.get_filemap().name); auto leading_attrs = parse_inner_attrs_and_next(p); @@ -2556,6 +2558,7 @@ fn parse_crate_from_crate_file(&str input, &ast::crate_cfg cfg, mutable deps=deps, sess=sess, mutable chpos=p.get_chpos(), + mutable byte_pos=p.get_byte_pos(), cfg = p.get_cfg()); auto m = eval::eval_crate_directives_to_mod(cx, cdirs, prefix); diff --git a/src/comp/syntax/print/pprust.rs b/src/comp/syntax/print/pprust.rs index df573020618f3..dedc1cb238f82 100644 --- a/src/comp/syntax/print/pprust.rs +++ b/src/comp/syntax/print/pprust.rs @@ -1346,8 +1346,8 @@ fn maybe_print_trailing_comment(&ps s, codemap::span span, alt (next_comment(s)) { case (some(?cmnt)) { if (cmnt.style != lexer::trailing) { ret; } - auto span_line = codemap::lookup_pos(cm, span.hi); - auto comment_line = codemap::lookup_pos(cm, cmnt.pos); + auto span_line = codemap::lookup_char_pos(cm, span.hi); + auto comment_line = codemap::lookup_char_pos(cm, cmnt.pos); auto next = cmnt.pos + 1u; alt (next_pos) { case (none) { } case (some(?p)) { next = p; } } if (span.hi < cmnt.pos && cmnt.pos < next &&