From c0d538e191a407417536edbb0df423cbed12722b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 9 Sep 2015 18:32:16 +0200 Subject: [PATCH] Add "token serialization types". Servo will use this for custom property values which are conceptually sequences of tokens, but are represented in memory as strings. When concatenating such strings, an empty comment `/**/` sometimes needs to be inserted so that two tokens are not reparsed as one. --- Cargo.toml | 2 +- src/lib.rs | 2 +- src/serializer.rs | 124 ++++++++++++++++++++++++++++++++++++++++++++++ src/tests.rs | 32 ++++++++++-- 4 files changed, 153 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 24989321..ea0f06c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cssparser" -version = "0.3.8" +version = "0.3.9" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3" diff --git a/src/lib.rs b/src/lib.rs index cb0459e4..c7deb4d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,7 +84,7 @@ pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser}; pub use from_bytes::decode_stylesheet_bytes; pub use color::{RGBA, Color, parse_color_keyword}; pub use nth::parse_nth; -pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string}; +pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType}; pub use parser::{Parser, Delimiter, Delimiters, SourcePosition}; diff --git a/src/serializer.rs b/src/serializer.rs index 8d5f582b..f94903e6 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -290,3 +290,127 @@ impl_tocss_for_number!(i32); impl_tocss_for_number!(u32); impl_tocss_for_number!(i64); impl_tocss_for_number!(u64); + + +/// A category of token. See the `needs_separator_when_before` method. +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] +pub struct TokenSerializationType(TokenSerializationTypeVariants); + +impl TokenSerializationType { + /// Return a value that represents the absence of a token, e.g. before the start of the input. + pub fn nothing() -> TokenSerializationType { + TokenSerializationType(TokenSerializationTypeVariants::Nothing) + } + + /// If this value is `TokenSerializationType::nothing()`, set it to the given value instead. + pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) { + if self.0 == TokenSerializationTypeVariants::Nothing { + self.0 = new_value.0 + } + } + + /// Return true if, when a token of category `self` is serialized just before + /// a token of category `other` with no whitespace in between, + /// an empty comment `/**/` needs to be inserted between them + /// so that they are not re-parsed as a single token. + /// + /// See https://drafts.csswg.org/css-syntax/#serialization + pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool { + use self::TokenSerializationTypeVariants::*; + match self.0 { + Ident => matches!(other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | + UnicodeRange | CDC | OpenParen), + AtKeywordOrHash | Dimension => matches!(other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | + UnicodeRange | CDC), + DelimHash | DelimMinus | Number => matches!(other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | + UnicodeRange), + DelimAt => matches!(other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | UnicodeRange), + UnicodeRange => matches!(other.0, + Ident | Function | Number | Percentage | Dimension | DelimQuestion), + DelimDotOrPlus => matches!(other.0, Number | Percentage | Dimension), + DelimAssorted | DelimAsterisk => matches!(other.0, DelimEquals), + DelimBar => matches!(other.0, DelimEquals | DelimBar | DashMatch), + DelimSlash => matches!(other.0, DelimAsterisk | SubstringMatch), + Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen | + DashMatch | SubstringMatch | DelimQuestion | DelimEquals | Other => false, + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] +enum TokenSerializationTypeVariants { + Nothing, + WhiteSpace, + AtKeywordOrHash, + Number, + Dimension, + Percentage, + UnicodeRange, + UrlOrBadUrl, + Function, + Ident, + CDC, + DashMatch, + SubstringMatch, + OpenParen, // '(' + DelimHash, // '#' + DelimAt, // '@' + DelimDotOrPlus, // '.', '+' + DelimMinus, // '-' + DelimQuestion, // '?' + DelimAssorted, // '$', '^', '~' + DelimEquals, // '=' + DelimBar, // '|' + DelimSlash, // '/' + DelimAsterisk, // '*' + Other, // anything else +} + +impl<'a> Token<'a> { + /// Categorize a token into a type that determines when `/**/` needs to be inserted + /// between two tokens when serialized next to each other without whitespace in between. + /// + /// See the `TokenSerializationType::needs_separator_when_before` method. + pub fn serialization_type(&self) -> TokenSerializationType { + use self::TokenSerializationTypeVariants::*; + TokenSerializationType(match *self { + Token::Ident(_) => Ident, + Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash, + Token::Url(_) | Token::BadUrl => UrlOrBadUrl, + Token::Delim('#') => DelimHash, + Token::Delim('@') => DelimAt, + Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus, + Token::Delim('-') => DelimMinus, + Token::Delim('?') => DelimQuestion, + Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted, + Token::Delim('=') => DelimEquals, + Token::Delim('|') => DelimBar, + Token::Delim('/') => DelimSlash, + Token::Delim('*') => DelimAsterisk, + Token::Number(_) => Number, + Token::Percentage(_) => Percentage, + Token::Dimension(..) => Dimension, + Token::UnicodeRange(..) => UnicodeRange, + Token::WhiteSpace(_) => WhiteSpace, + Token::Comment(_) => DelimSlash, + Token::DashMatch => DashMatch, + Token::SubstringMatch => SubstringMatch, + Token::Column => DelimBar, + Token::CDC => CDC, + Token::Function(_) => Function, + Token::ParenthesisBlock => OpenParen, + Token::SquareBracketBlock | Token::CurlyBracketBlock | + Token::CloseParenthesis | Token::CloseSquareBracket | Token::CloseCurlyBracket | + Token::QuotedString(_) | Token::BadString | + Token::Delim(_) | Token::Colon | Token::Semicolon | Token::Comma | Token::CDO | + Token::IncludeMatch | Token::PrefixMatch | Token::SuffixMatch + => Other, + }) + } +} diff --git a/src/tests.rs b/src/tests.rs index 0068e989..3e2f70e1 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -18,6 +18,7 @@ use super::{Parser, Delimiter, Token, NumericValue, PercentageValue, SourceLocat AtRuleType, AtRuleParser, QualifiedRuleParser, parse_one_declaration, parse_one_rule, parse_important, decode_stylesheet_bytes, + TokenSerializationType, Color, RGBA, parse_nth, ToCss}; @@ -323,10 +324,31 @@ fn nth() { #[test] -fn serializer() { +fn serializer_not_preserving_comments() { + serializer(false) +} + +#[test] +fn serializer_preserving_comments() { + serializer(true) +} + +fn serializer(preserve_comments: bool) { run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| { - fn write_to(input: &mut Parser, string: &mut String) { - while let Ok(token) = input.next_including_whitespace_and_comments() { + fn write_to(mut previous_token: TokenSerializationType, + input: &mut Parser, + string: &mut String, + preserve_comments: bool) { + while let Ok(token) = if preserve_comments { + input.next_including_whitespace_and_comments() + } else { + input.next_including_whitespace() + } { + let token_type = token.serialization_type(); + if !preserve_comments && previous_token.needs_separator_when_before(token_type) { + string.push_str("/**/") + } + previous_token = token_type; token.to_css(string).unwrap(); let closing_token = match token { Token::Function(_) | Token::ParenthesisBlock => Some(Token::CloseParenthesis), @@ -336,7 +358,7 @@ fn serializer() { }; if let Some(closing_token) = closing_token { input.parse_nested_block(|input| { - write_to(input, string); + write_to(previous_token, input, string, preserve_comments); Ok(()) }).unwrap(); closing_token.to_css(string).unwrap(); @@ -344,7 +366,7 @@ fn serializer() { } } let mut serialized = String::new(); - write_to(input, &mut serialized); + write_to(TokenSerializationType::nothing(), input, &mut serialized, preserve_comments); let parser = &mut Parser::new(&serialized); Json::Array(component_values_to_json(parser)) });