2626//! We are permissive with whitespace, allowing `#type:ignore[code]` and
2727//! `# type: ignore [ code ]`, but do not allow a space before the colon.
2828
29- use std:: iter:: Peekable ;
30- use std:: str:: CharIndices ;
31-
3229use clap:: ValueEnum ;
3330use dupe:: Dupe ;
3431use enum_iterator:: Sequence ;
@@ -42,6 +39,10 @@ use starlark_map::smallset;
4239/// Finds the byte offset of the first '#' character that starts a comment, tracking
4340/// whether we're inside a multi-line triple-quoted string.
4441///
42+ /// All interesting characters (`#`, `'`, `"`, `\`) are ASCII, so we operate
43+ /// on bytes directly — UTF-8 guarantees these never appear inside multi-byte
44+ /// sequences.
45+ ///
4546/// `in_triple_quote` should be `Some('"')` or `Some('\'')` if the line begins
4647/// inside an open triple-quoted string from a previous line, or `None` otherwise.
4748///
@@ -50,66 +51,51 @@ pub fn find_comment_start(
5051 line : & str ,
5152 in_triple_quote : Option < char > ,
5253) -> ( Option < usize > , Option < char > ) {
53- let mut chars = line. char_indices ( ) . peekable ( ) ;
54- let mut triple_quote = in_triple_quote;
55- let mut single_quote = None ;
56-
57- let advance_if_matches = |chars : & mut Peekable < CharIndices > , q| {
58- if chars. peek ( ) . is_some_and ( |( _, next) | * next == q) {
59- chars. next ( ) ;
60- true
61- } else {
62- false
63- }
64- } ;
54+ let mut bytes = line. bytes ( ) . enumerate ( ) . peekable ( ) ;
55+ let mut triple_quote: Option < u8 > = in_triple_quote. map ( |c| c as u8 ) ;
56+ let mut single_quote: Option < u8 > = None ;
6557
66- while let Some ( ( idx, ch ) ) = chars . next ( ) {
58+ while let Some ( ( idx, b ) ) = bytes . next ( ) {
6759 if let Some ( q) = triple_quote {
68- // Inside triple-quoted string
69- if ch == '\\' {
70- // Skip next char if escaped
71- chars. next ( ) ;
72- } else if ch == q
73- // This check consumes zero, one, or two additional chars:
74- // - zero or one: this is a single quote or a pair of quotes, not interesting
75- // - two: this is the end of a triple-quoted string
76- && advance_if_matches ( & mut chars, q)
77- && advance_if_matches ( & mut chars, q)
60+ // Inside triple-quoted string.
61+ if b == b'\\' {
62+ bytes. next ( ) ; // Skip escaped character.
63+ } else if b == q
64+ && bytes. next_if ( |& ( _, next) | next == q) . is_some ( )
65+ && bytes. next_if ( |& ( _, next) | next == q) . is_some ( )
7866 {
7967 triple_quote = None ;
8068 }
8169 continue ;
8270 }
8371
8472 if let Some ( q) = single_quote {
85- // Inside regular string
86- if ch == '\\' {
87- // Skip next char if escaped
88- chars. next ( ) ;
89- } else if ch == q {
73+ // Inside regular string.
74+ if b == b'\\' {
75+ bytes. next ( ) ; // Skip escaped character.
76+ } else if b == q {
9077 single_quote = None ;
9178 }
9279 continue ;
9380 }
9481
9582 // Normal code.
96- match ch {
97- '"' | '\'' => {
98- if advance_if_matches ( & mut chars, ch) {
99- if advance_if_matches ( & mut chars, ch) {
100- triple_quote = Some ( ch) ;
101- } else {
102- // We've advanced past the opening and closing quotes of an empty string
83+ match b {
84+ b'"' | b'\'' => {
85+ if bytes. next_if ( |& ( _, next) | next == b) . is_some ( ) {
86+ if bytes. next_if ( |& ( _, next) | next == b) . is_some ( ) {
87+ triple_quote = Some ( b) ;
10388 }
89+ // else: empty string ("" or ''), both quotes already consumed.
10490 } else {
105- single_quote = Some ( ch ) ;
91+ single_quote = Some ( b ) ;
10692 }
10793 }
108- '#' => return ( Some ( idx) , None ) ,
94+ b '#' => return ( Some ( idx) , None ) ,
10995 _ => { }
11096 }
11197 }
112- ( None , triple_quote)
98+ ( None , triple_quote. map ( |b| b as char ) )
11399}
114100
115101/// Finds the byte offset of the first '#' character that starts a comment.
0 commit comments