Skip to content

Commit 920c23a

Browse files
author
bors-servo
committed
Auto merge of #89 - servo:serialization-separators, r=mbrubeck
Add "token serialization types". Servo will use this for custom property values which are conceptually sequences of tokens, but are represented in memory as strings. When concatenating such strings, an empty comment `/**/` sometimes needs to be inserted so that two tokens are not reparsed as one. r? @pcwalton <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/89) <!-- Reviewable:end -->
2 parents 22cb1cb + c0d538e commit 920c23a

File tree

4 files changed

+153
-7
lines changed

4 files changed

+153
-7
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "cssparser"
4-
version = "0.3.8"
4+
version = "0.3.9"
55
authors = [ "Simon Sapin <[email protected]>" ]
66

77
description = "Rust implementation of CSS Syntax Level 3"

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser};
8484
pub use from_bytes::decode_stylesheet_bytes;
8585
pub use color::{RGBA, Color, parse_color_keyword};
8686
pub use nth::parse_nth;
87-
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string};
87+
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
8888
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition};
8989

9090

src/serializer.rs

+124
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,127 @@ impl_tocss_for_number!(i32);
290290
impl_tocss_for_number!(u32);
291291
impl_tocss_for_number!(i64);
292292
impl_tocss_for_number!(u64);
293+
294+
295+
/// A category of token. See the `needs_separator_when_before` method.
296+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
297+
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
298+
pub struct TokenSerializationType(TokenSerializationTypeVariants);
299+
300+
impl TokenSerializationType {
301+
/// Return a value that represents the absence of a token, e.g. before the start of the input.
302+
pub fn nothing() -> TokenSerializationType {
303+
TokenSerializationType(TokenSerializationTypeVariants::Nothing)
304+
}
305+
306+
/// If this value is `TokenSerializationType::nothing()`, set it to the given value instead.
307+
pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) {
308+
if self.0 == TokenSerializationTypeVariants::Nothing {
309+
self.0 = new_value.0
310+
}
311+
}
312+
313+
/// Return true if, when a token of category `self` is serialized just before
314+
/// a token of category `other` with no whitespace in between,
315+
/// an empty comment `/**/` needs to be inserted between them
316+
/// so that they are not re-parsed as a single token.
317+
///
318+
/// See https://drafts.csswg.org/css-syntax/#serialization
319+
pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool {
320+
use self::TokenSerializationTypeVariants::*;
321+
match self.0 {
322+
Ident => matches!(other.0,
323+
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
324+
UnicodeRange | CDC | OpenParen),
325+
AtKeywordOrHash | Dimension => matches!(other.0,
326+
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
327+
UnicodeRange | CDC),
328+
DelimHash | DelimMinus | Number => matches!(other.0,
329+
Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension |
330+
UnicodeRange),
331+
DelimAt => matches!(other.0,
332+
Ident | Function | UrlOrBadUrl | DelimMinus | UnicodeRange),
333+
UnicodeRange => matches!(other.0,
334+
Ident | Function | Number | Percentage | Dimension | DelimQuestion),
335+
DelimDotOrPlus => matches!(other.0, Number | Percentage | Dimension),
336+
DelimAssorted | DelimAsterisk => matches!(other.0, DelimEquals),
337+
DelimBar => matches!(other.0, DelimEquals | DelimBar | DashMatch),
338+
DelimSlash => matches!(other.0, DelimAsterisk | SubstringMatch),
339+
Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen |
340+
DashMatch | SubstringMatch | DelimQuestion | DelimEquals | Other => false,
341+
}
342+
}
343+
}
344+
345+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
346+
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
347+
enum TokenSerializationTypeVariants {
348+
Nothing,
349+
WhiteSpace,
350+
AtKeywordOrHash,
351+
Number,
352+
Dimension,
353+
Percentage,
354+
UnicodeRange,
355+
UrlOrBadUrl,
356+
Function,
357+
Ident,
358+
CDC,
359+
DashMatch,
360+
SubstringMatch,
361+
OpenParen, // '('
362+
DelimHash, // '#'
363+
DelimAt, // '@'
364+
DelimDotOrPlus, // '.', '+'
365+
DelimMinus, // '-'
366+
DelimQuestion, // '?'
367+
DelimAssorted, // '$', '^', '~'
368+
DelimEquals, // '='
369+
DelimBar, // '|'
370+
DelimSlash, // '/'
371+
DelimAsterisk, // '*'
372+
Other, // anything else
373+
}
374+
375+
impl<'a> Token<'a> {
376+
/// Categorize a token into a type that determines when `/**/` needs to be inserted
377+
/// between two tokens when serialized next to each other without whitespace in between.
378+
///
379+
/// See the `TokenSerializationType::needs_separator_when_before` method.
380+
pub fn serialization_type(&self) -> TokenSerializationType {
381+
use self::TokenSerializationTypeVariants::*;
382+
TokenSerializationType(match *self {
383+
Token::Ident(_) => Ident,
384+
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
385+
Token::Url(_) | Token::BadUrl => UrlOrBadUrl,
386+
Token::Delim('#') => DelimHash,
387+
Token::Delim('@') => DelimAt,
388+
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
389+
Token::Delim('-') => DelimMinus,
390+
Token::Delim('?') => DelimQuestion,
391+
Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted,
392+
Token::Delim('=') => DelimEquals,
393+
Token::Delim('|') => DelimBar,
394+
Token::Delim('/') => DelimSlash,
395+
Token::Delim('*') => DelimAsterisk,
396+
Token::Number(_) => Number,
397+
Token::Percentage(_) => Percentage,
398+
Token::Dimension(..) => Dimension,
399+
Token::UnicodeRange(..) => UnicodeRange,
400+
Token::WhiteSpace(_) => WhiteSpace,
401+
Token::Comment(_) => DelimSlash,
402+
Token::DashMatch => DashMatch,
403+
Token::SubstringMatch => SubstringMatch,
404+
Token::Column => DelimBar,
405+
Token::CDC => CDC,
406+
Token::Function(_) => Function,
407+
Token::ParenthesisBlock => OpenParen,
408+
Token::SquareBracketBlock | Token::CurlyBracketBlock |
409+
Token::CloseParenthesis | Token::CloseSquareBracket | Token::CloseCurlyBracket |
410+
Token::QuotedString(_) | Token::BadString |
411+
Token::Delim(_) | Token::Colon | Token::Semicolon | Token::Comma | Token::CDO |
412+
Token::IncludeMatch | Token::PrefixMatch | Token::SuffixMatch
413+
=> Other,
414+
})
415+
}
416+
}

src/tests.rs

+27-5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use super::{Parser, Delimiter, Token, NumericValue, PercentageValue, SourceLocat
1818
AtRuleType, AtRuleParser, QualifiedRuleParser,
1919
parse_one_declaration, parse_one_rule, parse_important,
2020
decode_stylesheet_bytes,
21+
TokenSerializationType,
2122
Color, RGBA, parse_nth, ToCss};
2223

2324

@@ -323,10 +324,31 @@ fn nth() {
323324

324325

325326
#[test]
326-
fn serializer() {
327+
fn serializer_not_preserving_comments() {
328+
serializer(false)
329+
}
330+
331+
#[test]
332+
fn serializer_preserving_comments() {
333+
serializer(true)
334+
}
335+
336+
fn serializer(preserve_comments: bool) {
327337
run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| {
328-
fn write_to(input: &mut Parser, string: &mut String) {
329-
while let Ok(token) = input.next_including_whitespace_and_comments() {
338+
fn write_to(mut previous_token: TokenSerializationType,
339+
input: &mut Parser,
340+
string: &mut String,
341+
preserve_comments: bool) {
342+
while let Ok(token) = if preserve_comments {
343+
input.next_including_whitespace_and_comments()
344+
} else {
345+
input.next_including_whitespace()
346+
} {
347+
let token_type = token.serialization_type();
348+
if !preserve_comments && previous_token.needs_separator_when_before(token_type) {
349+
string.push_str("/**/")
350+
}
351+
previous_token = token_type;
330352
token.to_css(string).unwrap();
331353
let closing_token = match token {
332354
Token::Function(_) | Token::ParenthesisBlock => Some(Token::CloseParenthesis),
@@ -336,15 +358,15 @@ fn serializer() {
336358
};
337359
if let Some(closing_token) = closing_token {
338360
input.parse_nested_block(|input| {
339-
write_to(input, string);
361+
write_to(previous_token, input, string, preserve_comments);
340362
Ok(())
341363
}).unwrap();
342364
closing_token.to_css(string).unwrap();
343365
}
344366
}
345367
}
346368
let mut serialized = String::new();
347-
write_to(input, &mut serialized);
369+
write_to(TokenSerializationType::nothing(), input, &mut serialized, preserve_comments);
348370
let parser = &mut Parser::new(&serialized);
349371
Json::Array(component_values_to_json(parser))
350372
});

0 commit comments

Comments
 (0)