Skip to content

Add "token serialization types". #89

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "cssparser"
version = "0.3.8"
version = "0.3.9"
authors = [ "Simon Sapin <[email protected]>" ]

description = "Rust implementation of CSS Syntax Level 3"
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser};
pub use from_bytes::decode_stylesheet_bytes;
pub use color::{RGBA, Color, parse_color_keyword};
pub use nth::parse_nth;
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string};
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition};


Expand Down
124 changes: 124 additions & 0 deletions src/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,3 +290,127 @@ impl_tocss_for_number!(i32);
impl_tocss_for_number!(u32);
impl_tocss_for_number!(i64);
impl_tocss_for_number!(u64);


/// A category of token. See the `needs_separator_when_before` method.
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
pub struct TokenSerializationType(TokenSerializationTypeVariants);

impl TokenSerializationType {
/// Return a value that represents the absence of a token, e.g. before the start of the input.
pub fn nothing() -> TokenSerializationType {
TokenSerializationType(TokenSerializationTypeVariants::Nothing)
}

/// If this value is `TokenSerializationType::nothing()`, set it to the given value instead.
pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) {
if self.0 == TokenSerializationTypeVariants::Nothing {
self.0 = new_value.0
}
}

/// Return true if, when a token of category `self` is serialized just before
/// a token of category `other` with no whitespace in between,
/// an empty comment `/**/` needs to be inserted between them
/// so that they are not re-parsed as a single token.
///
/// See https://drafts.csswg.org/css-syntax/#serialization
pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool {
use self::TokenSerializationTypeVariants::*;
match self.0 {
Ident => matches!(other.0,
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
UnicodeRange | CDC | OpenParen),
AtKeywordOrHash | Dimension => matches!(other.0,
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
UnicodeRange | CDC),
DelimHash | DelimMinus | Number => matches!(other.0,
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
UnicodeRange),
DelimAt => matches!(other.0,
Ident | Function | UrlOrBadUrl | DelimMinus | UnicodeRange),
UnicodeRange => matches!(other.0,
Ident | Function | Number | Percentage | Dimension | DelimQuestion),
DelimDotOrPlus => matches!(other.0, Number | Percentage | Dimension),
DelimAssorted | DelimAsterisk => matches!(other.0, DelimEquals),
DelimBar => matches!(other.0, DelimEquals | DelimBar | DashMatch),
DelimSlash => matches!(other.0, DelimAsterisk | SubstringMatch),
Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen |
DashMatch | SubstringMatch | DelimQuestion | DelimEquals | Other => false,
}
}
}

#[derive(Copy, Clone, Eq, PartialEq, Debug)]
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
enum TokenSerializationTypeVariants {
Nothing,
WhiteSpace,
AtKeywordOrHash,
Number,
Dimension,
Percentage,
UnicodeRange,
UrlOrBadUrl,
Function,
Ident,
CDC,
DashMatch,
SubstringMatch,
OpenParen, // '('
DelimHash, // '#'
DelimAt, // '@'
DelimDotOrPlus, // '.', '+'
DelimMinus, // '-'
DelimQuestion, // '?'
DelimAssorted, // '$', '^', '~'
DelimEquals, // '='
DelimBar, // '|'
DelimSlash, // '/'
DelimAsterisk, // '*'
Other, // anything else
}

impl<'a> Token<'a> {
/// Categorize a token into a type that determines when `/**/` needs to be inserted
/// between two tokens when serialized next to each other without whitespace in between.
///
/// See the `TokenSerializationType::needs_separator_when_before` method.
pub fn serialization_type(&self) -> TokenSerializationType {
use self::TokenSerializationTypeVariants::*;
TokenSerializationType(match *self {
Token::Ident(_) => Ident,
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
Token::Url(_) | Token::BadUrl => UrlOrBadUrl,
Token::Delim('#') => DelimHash,
Token::Delim('@') => DelimAt,
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
Token::Delim('-') => DelimMinus,
Token::Delim('?') => DelimQuestion,
Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted,
Token::Delim('=') => DelimEquals,
Token::Delim('|') => DelimBar,
Token::Delim('/') => DelimSlash,
Token::Delim('*') => DelimAsterisk,
Token::Number(_) => Number,
Token::Percentage(_) => Percentage,
Token::Dimension(..) => Dimension,
Token::UnicodeRange(..) => UnicodeRange,
Token::WhiteSpace(_) => WhiteSpace,
Token::Comment(_) => DelimSlash,
Token::DashMatch => DashMatch,
Token::SubstringMatch => SubstringMatch,
Token::Column => DelimBar,
Token::CDC => CDC,
Token::Function(_) => Function,
Token::ParenthesisBlock => OpenParen,
Token::SquareBracketBlock | Token::CurlyBracketBlock |
Token::CloseParenthesis | Token::CloseSquareBracket | Token::CloseCurlyBracket |
Token::QuotedString(_) | Token::BadString |
Token::Delim(_) | Token::Colon | Token::Semicolon | Token::Comma | Token::CDO |
Token::IncludeMatch | Token::PrefixMatch | Token::SuffixMatch
=> Other,
})
}
}
32 changes: 27 additions & 5 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use super::{Parser, Delimiter, Token, NumericValue, PercentageValue, SourceLocat
AtRuleType, AtRuleParser, QualifiedRuleParser,
parse_one_declaration, parse_one_rule, parse_important,
decode_stylesheet_bytes,
TokenSerializationType,
Color, RGBA, parse_nth, ToCss};


Expand Down Expand Up @@ -323,10 +324,31 @@ fn nth() {


#[test]
fn serializer() {
fn serializer_not_preserving_comments() {
serializer(false)
}

#[test]
fn serializer_preserving_comments() {
serializer(true)
}

fn serializer(preserve_comments: bool) {
run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| {
fn write_to(input: &mut Parser, string: &mut String) {
while let Ok(token) = input.next_including_whitespace_and_comments() {
fn write_to(mut previous_token: TokenSerializationType,
input: &mut Parser,
string: &mut String,
preserve_comments: bool) {
while let Ok(token) = if preserve_comments {
input.next_including_whitespace_and_comments()
} else {
input.next_including_whitespace()
} {
let token_type = token.serialization_type();
if !preserve_comments && previous_token.needs_separator_when_before(token_type) {
string.push_str("/**/")
}
previous_token = token_type;
token.to_css(string).unwrap();
let closing_token = match token {
Token::Function(_) | Token::ParenthesisBlock => Some(Token::CloseParenthesis),
Expand All @@ -336,15 +358,15 @@ fn serializer() {
};
if let Some(closing_token) = closing_token {
input.parse_nested_block(|input| {
write_to(input, string);
write_to(previous_token, input, string, preserve_comments);
Ok(())
}).unwrap();
closing_token.to_css(string).unwrap();
}
}
}
let mut serialized = String::new();
write_to(input, &mut serialized);
write_to(TokenSerializationType::nothing(), input, &mut serialized, preserve_comments);
let parser = &mut Parser::new(&serialized);
Json::Array(component_values_to_json(parser))
});
Expand Down