Skip to content

Commit 180e924

Browse files
committed
Rework BeforeAttributeValue tokenizer state
1 parent 56e2ef2 commit 180e924

File tree

1 file changed

+15
-12
lines changed

1 file changed

+15
-12
lines changed

src/tokenizer/mod.rs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ macro_rules! shorthand (
529529
( $me:ident : create_tag $kind:ident $c:expr ) => ( $me.create_tag($kind, $c); );
530530
( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.push($c); );
531531
( $me:ident : discard_tag ) => ( $me.discard_tag(); );
532+
( $me:ident : discard_char ) => ( $me.discard_char(); );
532533
( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.push($c); );
533534
( $me:ident : emit_temp ) => ( $me.emit_temp_buf(); );
534535
( $me:ident : clear_temp ) => ( $me.clear_temp_buf(); );
@@ -613,6 +614,10 @@ macro_rules! get_char ( ($me:expr) => (
613614
unwrap_or_return!($me.get_char(), false)
614615
));
615616

617+
macro_rules! peek ( ($me:expr) => (
618+
unwrap_or_return!($me.peek(), false)
619+
));
620+
616621
macro_rules! pop_except_from ( ($me:expr, $set:expr) => (
617622
unwrap_or_return!($me.pop_except_from($set), false)
618623
));
@@ -912,18 +917,16 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
912917
}},
913918

914919
//§ before-attribute-value-state
915-
states::BeforeAttributeValue => loop { match get_char!(self) {
916-
'\t' | '\n' | '\x0C' | ' ' => (),
917-
'"' => go!(self: to AttributeValue DoubleQuoted),
918-
'&' => go!(self: reconsume AttributeValue Unquoted),
919-
'\'' => go!(self: to AttributeValue SingleQuoted),
920-
'\0' => go!(self: error; push_value '\u{fffd}'; to AttributeValue Unquoted),
921-
'>' => go!(self: error; emit_tag Data),
922-
c => {
923-
go_match!(self: c,
924-
'<' , '=' , '`' => error);
925-
go!(self: push_value c; to AttributeValue Unquoted);
926-
}
920+
// Use peek so we can handle the first attr character along with the rest,
921+
// hopefully in the same zero-copy buffer.
922+
states::BeforeAttributeValue => loop { match peek!(self) {
923+
'\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char),
924+
'"' => go!(self: discard_char; to AttributeValue DoubleQuoted),
925+
'&' => go!(self: to AttributeValue Unquoted),
926+
'\'' => go!(self: discard_char; to AttributeValue SingleQuoted),
927+
'\0' => go!(self: discard_char; error; push_value '\u{fffd}'; to AttributeValue Unquoted),
928+
'>' => go!(self: discard_char; error; emit_tag Data),
929+
_ => go!(self: to AttributeValue Unquoted),
927930
}},
928931

929932
//§ attribute-value-(double-quoted)-state

0 commit comments

Comments
 (0)