Skip to content

Commit b9458fe

Browse files
committed
Add "token serialization types".
Servo will use this for custom property values which are conceptually sequences of tokens, but are represented in memory as strings. When concatenating such strings, an empty comment `/**/` sometimes needs to be inserted so that two tokens are not reparsed as one.
1 parent 22cb1cb commit b9458fe

File tree

4 files changed

+164
-3
lines changed

4 files changed

+164
-3
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "cssparser"
4-
version = "0.3.8"
4+
version = "0.3.9"
55
authors = [ "Simon Sapin <[email protected]>" ]
66

77
description = "Rust implementation of CSS Syntax Level 3"

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser};
8484
pub use from_bytes::decode_stylesheet_bytes;
8585
pub use color::{RGBA, Color, parse_color_keyword};
8686
pub use nth::parse_nth;
87-
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string};
87+
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
8888
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition};
8989

9090

src/serializer.rs

+124
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,127 @@ impl_tocss_for_number!(i32);
290290
impl_tocss_for_number!(u32);
291291
impl_tocss_for_number!(i64);
292292
impl_tocss_for_number!(u64);
293+
294+
295+
/// A category of token. See the `needs_separator_when_before` method.
296+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
297+
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
298+
pub struct TokenSerializationType(TokenSerializationTypeVariants);
299+
300+
impl TokenSerializationType {
301+
/// Return a value that represents the absence of a token, e.g. before the start of the input.
302+
pub fn nothing() -> TokenSerializationType {
303+
TokenSerializationType(TokenSerializationTypeVariants::Nothing)
304+
}
305+
306+
/// If this value is `TokenSerializationType::nothing()`, set it to the given value instead.
307+
pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) {
308+
if self.0 == TokenSerializationTypeVariants::Nothing {
309+
self.0 = new_value.0
310+
}
311+
}
312+
313+
/// Return true if, when a token of category `self` is serialized just before
314+
/// a token of category `other` with no whitespace in between,
315+
/// an empty comment `/**/` needs to be inserted between them
316+
/// so that they are not re-parsed as a single token.
317+
///
318+
/// See https://drafts.csswg.org/css-syntax/#serialization
319+
pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool {
320+
use self::TokenSerializationTypeVariants::*;
321+
match self.0 {
322+
Ident => matches!(other.0,
323+
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
324+
UnicodeRange | CDC | OpenParen),
325+
AtKeywordOrHash | Dimension => matches!(other.0,
326+
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
327+
UnicodeRange | CDC),
328+
DelimHash | DelimMinus => matches!(other.0,
329+
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
330+
UnicodeRange),
331+
DelimAt => matches!(other.0,
332+
Ident | Function | UrlOrBadUrl | DelimMinus | UnicodeRange),
333+
UnicodeRange => matches!(other.0,
334+
Ident | Function | Number | Percentage | Dimension | DelimQuestion),
335+
DelimDotOrPlus => matches!(other.0, Number | Percentage | Dimension),
336+
DelimAssorted | DelimAsterisk => matches!(other.0, DelimEquals),
337+
DelimBar => matches!(other.0, DelimEquals | DelimBar | DashMatch),
338+
DelimSlash => matches!(other.0, DelimAsterisk | SubstringMatch),
339+
Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen |
340+
DashMatch | SubstringMatch | DelimQuestion | DelimEquals | Other => false,
341+
}
342+
}
343+
}
344+
345+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
346+
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
347+
enum TokenSerializationTypeVariants {
348+
Nothing,
349+
WhiteSpace,
350+
AtKeywordOrHash,
351+
Number,
352+
Dimension,
353+
Percentage,
354+
UnicodeRange,
355+
UrlOrBadUrl,
356+
Function,
357+
Ident,
358+
CDC,
359+
DashMatch,
360+
SubstringMatch,
361+
OpenParen, // '('
362+
DelimHash, // '#'
363+
DelimAt, // '@'
364+
DelimDotOrPlus, // '.', '+'
365+
DelimMinus, // '-'
366+
DelimQuestion, // '?'
367+
DelimAssorted, // '$', '^', '~'
368+
DelimEquals, // '='
369+
DelimBar, // '|'
370+
DelimSlash, // '/'
371+
DelimAsterisk, // '*'
372+
Other, // anything else
373+
}
374+
375+
impl<'a> Token<'a> {
376+
/// Categorize a token into a type that determines when `/**/` needs to be inserted
377+
/// between two tokens when serialized next to each other without whitespace in between.
378+
///
379+
/// See the `TokenSerializationType::needs_separator_when_before` method.
380+
pub fn serialization_type(&self) -> TokenSerializationType {
381+
use self::TokenSerializationTypeVariants::*;
382+
TokenSerializationType(match *self {
383+
Token::Ident(_) => Ident,
384+
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
385+
Token::Url(_) | Token::BadUrl => UrlOrBadUrl,
386+
Token::Delim('#') => DelimHash,
387+
Token::Delim('@') => DelimAt,
388+
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
389+
Token::Delim('-') => DelimMinus,
390+
Token::Delim('?') => DelimQuestion,
391+
Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted,
392+
Token::Delim('=') => DelimEquals,
393+
Token::Delim('|') => DelimBar,
394+
Token::Delim('/') => DelimSlash,
395+
Token::Delim('*') => DelimAsterisk,
396+
Token::Number(_) => Number,
397+
Token::Percentage(_) => Percentage,
398+
Token::Dimension(..) => Dimension,
399+
Token::UnicodeRange(..) => UnicodeRange,
400+
Token::WhiteSpace(_) => WhiteSpace,
401+
Token::Comment(_) => DelimSlash,
402+
Token::DashMatch => DashMatch,
403+
Token::SubstringMatch => SubstringMatch,
404+
Token::Column => DelimBar,
405+
Token::CDC => CDC,
406+
Token::Function(_) => Function,
407+
Token::ParenthesisBlock => OpenParen,
408+
Token::SquareBracketBlock | Token::CurlyBracketBlock |
409+
Token::CloseParenthesis | Token::CloseSquareBracket | Token::CloseCurlyBracket |
410+
Token::QuotedString(_) | Token::BadString |
411+
Token::Delim(_) | Token::Colon | Token::Semicolon | Token::Comma | Token::CDO |
412+
Token::IncludeMatch | Token::PrefixMatch | Token::SuffixMatch
413+
=> Other,
414+
})
415+
}
416+
}

src/tests.rs

+38-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use super::{Parser, Delimiter, Token, NumericValue, PercentageValue, SourceLocat
1818
AtRuleType, AtRuleParser, QualifiedRuleParser,
1919
parse_one_declaration, parse_one_rule, parse_important,
2020
decode_stylesheet_bytes,
21+
TokenSerializationType,
2122
Color, RGBA, parse_nth, ToCss};
2223

2324

@@ -323,7 +324,43 @@ fn nth() {
323324

324325

325326
#[test]
326-
fn serializer() {
327+
fn serializer_not_preserving_comments() {
328+
run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| {
329+
fn write_to(mut previous_token: TokenSerializationType,
330+
input: &mut Parser,
331+
string: &mut String) {
332+
while let Ok(token) = input.next_including_whitespace() {
333+
let token_type = token.serialization_type();
334+
if previous_token.needs_separator_when_before(token_type) {
335+
string.push_str("/**/")
336+
}
337+
previous_token = token_type;
338+
token.to_css(string).unwrap();
339+
let closing_token = match token {
340+
Token::Function(_) | Token::ParenthesisBlock => Some(Token::CloseParenthesis),
341+
Token::SquareBracketBlock => Some(Token::CloseSquareBracket),
342+
Token::CurlyBracketBlock => Some(Token::CloseCurlyBracket),
343+
_ => None
344+
};
345+
if let Some(closing_token) = closing_token {
346+
input.parse_nested_block(|input| {
347+
write_to(previous_token, input, string);
348+
Ok(())
349+
}).unwrap();
350+
closing_token.to_css(string).unwrap();
351+
}
352+
}
353+
}
354+
let mut serialized = String::new();
355+
write_to(TokenSerializationType::nothing(), input, &mut serialized);
356+
let parser = &mut Parser::new(&serialized);
357+
Json::Array(component_values_to_json(parser))
358+
});
359+
}
360+
361+
362+
#[test]
363+
fn serializer_preserving_comments() {
327364
run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| {
328365
fn write_to(input: &mut Parser, string: &mut String) {
329366
while let Ok(token) = input.next_including_whitespace_and_comments() {

0 commit comments

Comments
 (0)