Skip to content

Commit ec47650

Browse files
author
bors-servo
committed
Auto merge of #90 - servo:url-functions, r=mbrubeck
Treat url(<string>) as a normal functions, per spec change. Only unquoted URLs are special tokens now. Use `Parser::expect_url`. This is a [breaking-change]. The version number was incremented accordingly. This change will help with servo/servo#7767 This triggers rust-lang/rust#28934 and fails to build in the current Rust nightly, but works fine in the Rust version that Servo currently use. Hopefully that rustc bug will be fixed before we need to upgrade Rust in Servo. r? @mbrubeck <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/90) <!-- Reviewable:end -->
2 parents 920c23a + 3e2e0b5 commit ec47650

File tree

5 files changed

+98
-64
lines changed

5 files changed

+98
-64
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "cssparser"
4-
version = "0.3.9"
4+
version = "0.4.0"
55
authors = [ "Simon Sapin <[email protected]>" ]
66

77
description = "Rust implementation of CSS Syntax Level 3"

src/parser.rs

+13-7
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ impl<'i, 't> Parser<'i, 't> {
331331
/// This can help tell e.g. `color: green;` from `color: green 4px;`
332332
#[inline]
333333
pub fn parse_entirely<F, T>(&mut self, parse: F) -> Result<T, ()>
334-
where F: FnOnce(&mut Parser) -> Result<T, ()> {
334+
where F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ()> {
335335
let result = parse(self);
336336
try!(self.expect_exhausted());
337337
result
@@ -374,7 +374,7 @@ impl<'i, 't> Parser<'i, 't> {
374374
/// The result is overridden to `Err(())` if the closure leaves some input before that point.
375375
#[inline]
376376
pub fn parse_nested_block<F, T>(&mut self, parse: F) -> Result <T, ()>
377-
where F: FnOnce(&mut Parser) -> Result<T, ()> {
377+
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
378378
let block_type = self.at_start_of.take().expect("\
379379
A nested parser can only be created when a Function, \
380380
ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \
@@ -412,7 +412,7 @@ impl<'i, 't> Parser<'i, 't> {
412412
#[inline]
413413
pub fn parse_until_before<F, T>(&mut self, delimiters: Delimiters, parse: F)
414414
-> Result <T, ()>
415-
where F: FnOnce(&mut Parser) -> Result<T, ()> {
415+
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
416416
let delimiters = self.stop_before | delimiters;
417417
let result;
418418
// Introduce a new scope to limit duration of nested_parser’s borrow
@@ -451,7 +451,7 @@ impl<'i, 't> Parser<'i, 't> {
451451
#[inline]
452452
pub fn parse_until_after<F, T>(&mut self, delimiters: Delimiters, parse: F)
453453
-> Result <T, ()>
454-
where F: FnOnce(&mut Parser) -> Result<T, ()> {
454+
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
455455
let result = self.parse_until_before(delimiters, parse);
456456
let next_byte = self.tokenizer.next_byte();
457457
if next_byte.is_some() && !self.stop_before.contains(Delimiters::from_byte(next_byte)) {
@@ -481,7 +481,7 @@ impl<'i, 't> Parser<'i, 't> {
481481

482482
/// Parse a <ident-token> whose unescaped value is an ASCII-insensitive match for the given value.
483483
#[inline]
484-
pub fn expect_ident_matching<'a>(&mut self, expected_value: &str) -> Result<(), ()> {
484+
pub fn expect_ident_matching(&mut self, expected_value: &str) -> Result<(), ()> {
485485
match try!(self.next()) {
486486
Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()),
487487
_ => Err(())
@@ -511,7 +511,10 @@ impl<'i, 't> Parser<'i, 't> {
511511
#[inline]
512512
pub fn expect_url(&mut self) -> Result<Cow<'i, str>, ()> {
513513
match try!(self.next()) {
514-
Token::Url(value) => Ok(value),
514+
Token::UnquotedUrl(value) => Ok(value),
515+
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
516+
self.parse_nested_block(|input| input.expect_string())
517+
},
515518
_ => Err(())
516519
}
517520
}
@@ -520,8 +523,11 @@ impl<'i, 't> Parser<'i, 't> {
520523
#[inline]
521524
pub fn expect_url_or_string(&mut self) -> Result<Cow<'i, str>, ()> {
522525
match try!(self.next()) {
523-
Token::Url(value) => Ok(value),
526+
Token::UnquotedUrl(value) => Ok(value),
524527
Token::QuotedString(value) => Ok(value),
528+
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
529+
self.parse_nested_block(|input| input.expect_string())
530+
},
525531
_ => Err(())
526532
}
527533
}

src/serializer.rs

+23-3
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ impl<'a> ToCss for Token<'a> {
8181
try!(serialize_identifier(&**value, dest));
8282
}
8383
Token::QuotedString(ref value) => try!(serialize_string(&**value, dest)),
84-
Token::Url(ref value) => {
84+
Token::UnquotedUrl(ref value) => {
8585
try!(dest.write_str("url("));
86-
try!(serialize_string(&**value, dest));
86+
try!(serialize_unquoted_url(&**value, dest));
8787
try!(dest.write_str(")"));
8888
},
8989
Token::Delim(value) => try!(write!(dest, "{}", value)),
@@ -213,6 +213,26 @@ fn serialize_name<W>(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Writ
213213
}
214214

215215

216+
fn serialize_unquoted_url<W>(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Write {
217+
let mut chunk_start = 0;
218+
for (i, b) in value.bytes().enumerate() {
219+
let hex = match b {
220+
b'\0' ... b' ' | b'\x7F' => true,
221+
b'(' | b')' | b'"' | b'\'' | b'\\' => false,
222+
_ => continue
223+
};
224+
try!(dest.write_str(&value[chunk_start..i]));
225+
if hex {
226+
try!(write!(dest, "\\{:X} ", b));
227+
} else {
228+
try!(write!(dest, "\\{}", b as char));
229+
}
230+
chunk_start = i + 1;
231+
}
232+
dest.write_str(&value[chunk_start..])
233+
}
234+
235+
216236
/// Write a double-quoted CSS string token, escaping content as necessary.
217237
pub fn serialize_string<W>(value: &str, dest: &mut W) -> fmt::Result where W: fmt::Write {
218238
try!(dest.write_str("\""));
@@ -382,7 +402,7 @@ impl<'a> Token<'a> {
382402
TokenSerializationType(match *self {
383403
Token::Ident(_) => Ident,
384404
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
385-
Token::Url(_) | Token::BadUrl => UrlOrBadUrl,
405+
Token::UnquotedUrl(_) | Token::BadUrl => UrlOrBadUrl,
386406
Token::Delim('#') => DelimHash,
387407
Token::Delim('@') => DelimAt,
388408
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,

src/tests.rs

+40-31
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
* License, v. 2.0. If a copy of the MPL was not distributed with this
33
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
44

5-
use std::borrow::Cow::Borrowed;
5+
use std::borrow::Cow::{self, Borrowed};
66
use std::fs::File;
77
use std::io::{self, Write};
88
use std::path::Path;
99
use std::process::Command;
10-
use std::mem;
1110
use rustc_serialize::json::{self, Json, ToJson};
1211
use tempdir::TempDir;
1312

@@ -74,14 +73,8 @@ fn almost_equals(a: &Json, b: &Json) -> bool {
7473
fn normalize(json: &mut Json) {
7574
match *json {
7675
Json::Array(ref mut list) => {
77-
match find_url(list) {
78-
Some(Ok(url)) => *list = vec!["url".to_json(), Json::String(url)],
79-
Some(Err(())) => *list = vec!["error".to_json(), "bad-url".to_json()],
80-
None => {
81-
for item in list.iter_mut() {
82-
normalize(item)
83-
}
84-
}
76+
for item in list.iter_mut() {
77+
normalize(item)
8578
}
8679
}
8780
Json::String(ref mut s) => {
@@ -93,26 +86,6 @@ fn normalize(json: &mut Json) {
9386
}
9487
}
9588

96-
fn find_url(list: &mut [Json]) -> Option<Result<String, ()>> {
97-
if list.len() < 2 ||
98-
list[0].as_string() != Some("function") ||
99-
list[1].as_string() != Some("url") {
100-
return None
101-
}
102-
103-
let mut args = list[2..].iter_mut().filter(|a| a.as_string() != Some(" "));
104-
if let (Some(&mut Json::Array(ref mut arg)), None) = (args.next(), args.next()) {
105-
if arg.len() == 2 && arg[0].as_string() == Some("string") {
106-
if let &mut Json::String(ref mut value) = &mut arg[1] {
107-
return Some(Ok(mem::replace(value, String::new())))
108-
}
109-
}
110-
}
111-
112-
Some(Err(()))
113-
}
114-
115-
11689
fn assert_json_eq(results: json::Json, mut expected: json::Json, message: String) {
11790
normalize(&mut expected);
11891
if !almost_equals(&results, &expected) {
@@ -281,6 +254,42 @@ fn outer_block_end_consumed() {
281254
assert_eq!(input.next(), Err(()));
282255
}
283256

257+
#[test]
258+
fn unquoted_url_escaping() {
259+
let token = Token::UnquotedUrl("\
260+
\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\
261+
\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \
262+
!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\
263+
^_`abcdefghijklmnopqrstuvwxyz{|}~\x7fé\
264+
".into());
265+
let serialized = token.to_css_string();
266+
assert_eq!(serialized, "\
267+
url(\
268+
\\1 \\2 \\3 \\4 \\5 \\6 \\7 \\8 \\9 \\A \\B \\C \\D \\E \\F \\10 \
269+
\\11 \\12 \\13 \\14 \\15 \\16 \\17 \\18 \\19 \\1A \\1B \\1C \\1D \\1E \\1F \\20 \
270+
!\\\"#$%&\\'\\(\\)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]\
271+
^_`abcdefghijklmnopqrstuvwxyz{|}~\\7F é\
272+
)\
273+
");
274+
assert_eq!(Parser::new(&serialized).next(), Ok(token))
275+
}
276+
277+
#[test]
278+
fn test_expect_url() {
279+
fn parse(s: &str) -> Result<Cow<str>, ()> {
280+
Parser::new(s).expect_url()
281+
}
282+
assert_eq!(parse("url()").unwrap(), "");
283+
assert_eq!(parse("url( ").unwrap(), "");
284+
assert_eq!(parse("url( abc").unwrap(), "abc");
285+
assert_eq!(parse("url( abc \t)").unwrap(), "abc");
286+
assert_eq!(parse("url( 'abc' \t)").unwrap(), "abc");
287+
assert_eq!(parse("url(abc more stuff)"), Err(()));
288+
// The grammar at https://drafts.csswg.org/css-values/#urls plans for `<url-modifier>*`
289+
// at the position of "more stuff", but no such modifier is defined yet.
290+
assert_eq!(parse("url('abc' more stuff)"), Err(()));
291+
}
292+
284293

285294
fn run_color_tests<F: Fn(Result<Color, ()>) -> Json>(json_data: &str, to_json: F) {
286295
run_json_tests(json_data, |input| {
@@ -606,7 +615,7 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json {
606615
Token::Hash(value) => JArray!["hash", value, "unrestricted"],
607616
Token::IDHash(value) => JArray!["hash", value, "id"],
608617
Token::QuotedString(value) => JArray!["string", value],
609-
Token::Url(value) => JArray!["url", value],
618+
Token::UnquotedUrl(value) => JArray!["url", value],
610619
Token::Delim('\\') => "\\".to_json(),
611620
Token::Delim(value) => value.to_string().to_json(),
612621

src/tokenizer.rs

+21-22
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ pub enum Token<'a> {
4848
/// A [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram) or `url( <string-token> )` function
4949
///
5050
/// The value does not include the `url(` `)` markers or the quotes.
51-
Url(Cow<'a, str>),
51+
UnquotedUrl(Cow<'a, str>),
5252

5353
/// A `<delim-token>`
5454
Delim(char),
@@ -628,7 +628,7 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
628628
if !tokenizer.is_eof() && tokenizer.next_char() == '(' {
629629
tokenizer.advance(1);
630630
if value.eq_ignore_ascii_case("url") {
631-
consume_url(tokenizer)
631+
consume_unquoted_url(tokenizer).unwrap_or(Function(value))
632632
} else {
633633
if tokenizer.var_functions == VarFunctions::LookingForThem &&
634634
value.eq_ignore_ascii_case("var") {
@@ -791,31 +791,30 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
791791
}
792792

793793

794-
fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
795-
while !tokenizer.is_eof() {
796-
match tokenizer.next_char() {
797-
' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1),
798-
'"' => return consume_quoted_url(tokenizer, false),
799-
'\'' => return consume_quoted_url(tokenizer, true),
800-
')' => { tokenizer.advance(1); break },
801-
_ => return consume_unquoted_url(tokenizer),
802-
}
803-
}
804-
return Url(Borrowed(""));
805-
806-
fn consume_quoted_url<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
807-
match consume_quoted_string(tokenizer, single_quote) {
808-
Ok(value) => consume_url_end(tokenizer, value),
809-
Err(()) => consume_bad_url(tokenizer),
794+
fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
795+
for (offset, c) in tokenizer.input[tokenizer.position..].char_indices() {
796+
match c {
797+
' ' | '\t' | '\n' | '\r' | '\x0C' => {},
798+
'"' | '\'' => return Err(()), // Do not advance
799+
')' => {
800+
tokenizer.advance(offset + 1);
801+
return Ok(UnquotedUrl(Borrowed("")));
802+
}
803+
_ => {
804+
tokenizer.advance(offset);
805+
return Ok(consume_unquoted_url(tokenizer))
806+
}
810807
}
811808
}
809+
tokenizer.position = tokenizer.input.len();
810+
return Ok(UnquotedUrl(Borrowed("")));
812811

813812
fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
814813
let start_pos = tokenizer.position();
815814
let mut string;
816815
loop {
817816
if tokenizer.is_eof() {
818-
return Url(Borrowed(tokenizer.slice_from(start_pos)))
817+
return UnquotedUrl(Borrowed(tokenizer.slice_from(start_pos)))
819818
}
820819
match tokenizer.next_char() {
821820
' ' | '\t' | '\n' | '\r' | '\x0C' => {
@@ -826,7 +825,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
826825
')' => {
827826
let value = tokenizer.slice_from(start_pos);
828827
tokenizer.advance(1);
829-
return Url(Borrowed(value))
828+
return UnquotedUrl(Borrowed(value))
830829
}
831830
'\x01'...'\x08' | '\x0B' | '\x0E'...'\x1F' | '\x7F' // non-printable
832831
| '"' | '\'' | '(' => {
@@ -861,7 +860,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
861860
};
862861
string.push(next_char)
863862
}
864-
Url(Owned(string))
863+
UnquotedUrl(Owned(string))
865864
}
866865

867866
fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: Cow<'a, str>) -> Token<'a> {
@@ -872,7 +871,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
872871
_ => return consume_bad_url(tokenizer)
873872
}
874873
}
875-
Url(string)
874+
UnquotedUrl(string)
876875
}
877876

878877
fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {

0 commit comments

Comments
 (0)