Skip to content

Commit 71a9b2a

Browse files
fix(html): correctly handle BOM in HTML-ish languages
1 parent 69cecec commit 71a9b2a

File tree

3 files changed

+64
-10
lines changed

3 files changed

+64
-10
lines changed

crates/biome_html_parser/src/lexer/mod.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,7 @@ impl<'src> HtmlLexer<'src> {
8282
_ if self.current_kind != T![<] && is_attribute_name_byte(current) => {
8383
self.consume_identifier(current, false)
8484
}
85-
_ => {
86-
if self.position == 0
87-
&& let Some((bom, bom_size)) = self.consume_potential_bom(UNICODE_BOM)
88-
{
89-
self.unicode_bom_length = bom_size;
90-
return bom;
91-
}
92-
self.consume_unexpected_character()
93-
}
85+
_ => self.consume_unexpected_character(),
9486
}
9587
}
9688

@@ -134,7 +126,15 @@ impl<'src> HtmlLexer<'src> {
134126
self.consume_byte(HTML_LITERAL)
135127
}
136128
}
137-
_ => self.consume_html_text(current),
129+
_ => {
130+
if self.position == 0
131+
&& let Some((bom, bom_size)) = self.consume_potential_bom(UNICODE_BOM)
132+
{
133+
self.unicode_bom_length = bom_size;
134+
return bom;
135+
}
136+
self.consume_html_text(current)
137+
}
138138
}
139139
}
140140

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<!doctype>
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
---
2+
source: crates/biome_html_parser/tests/spec_test.rs
3+
assertion_line: 138
4+
expression: snapshot
5+
---
6+
## Input
7+
8+
```html
9+
<!doctype>
10+
11+
```
12+
13+
14+
## AST
15+
16+
```
17+
HtmlRoot {
18+
bom_token: UNICODE_BOM@0..3 "\u{feff}" [] [],
19+
frontmatter: missing (optional),
20+
directive: HtmlDirective {
21+
l_angle_token: L_ANGLE@3..4 "<" [] [],
22+
excl_token: BANG@4..5 "!" [] [],
23+
doctype_token: DOCTYPE_KW@5..12 "doctype" [] [],
24+
html_token: missing (optional),
25+
quirk_token: missing (optional),
26+
public_id_token: missing (optional),
27+
system_id_token: missing (optional),
28+
r_angle_token: R_ANGLE@12..13 ">" [] [],
29+
},
30+
html: HtmlElementList [],
31+
eof_token: EOF@13..14 "" [Newline("\n")] [],
32+
}
33+
```
34+
35+
## CST
36+
37+
```
38+
39+
0: [email protected] "\u{feff}" [] []
40+
1: (empty)
41+
42+
0: [email protected] "<" [] []
43+
1: [email protected] "!" [] []
44+
2: [email protected] "doctype" [] []
45+
3: (empty)
46+
4: (empty)
47+
5: (empty)
48+
6: (empty)
49+
7: [email protected] ">" [] []
50+
51+
4: [email protected] "" [Newline("\n")] []
52+
53+
```

0 commit comments

Comments
 (0)