diff --git a/Cargo.toml b/Cargo.toml index e213cb4a..2d7a5eff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "html5ever" -version = "0.2.11" +version = "0.3.0" authors = [ "The html5ever Project Developers" ] license = "MIT / Apache-2.0" repository = "https://github.com/servo/html5ever" @@ -26,9 +26,10 @@ log = "0" phf = "0.7" string_cache = "0.2.0" mac = "0" -tendril = "0.1.6" +tendril = "0.2" heapsize = { version = "0.1.1", optional = true } heapsize_plugin = { version = "0.1.0", optional = true } +hyper = {version = "0.7", optional = true} [dev-dependencies] rustc-serialize = "0.3.15" diff --git a/build.rs b/build.rs index 008de90b..b0146389 100644 --- a/build.rs +++ b/build.rs @@ -29,7 +29,9 @@ fn main() { named_entities_to_phf( &Path::new(&manifest_dir).join("data/entities.json"), - &Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs")) + &Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs")); + + println!("cargo:rerun-if-changed={}", rules_rs.display()); } #[cfg(feature = "codegen")] diff --git a/capi/Cargo.toml b/capi/Cargo.toml index 6acabe59..a1d46804 100644 --- a/capi/Cargo.toml +++ b/capi/Cargo.toml @@ -11,7 +11,6 @@ crate-type = ["staticlib"] [dependencies] libc = "0.2" string_cache = "0.2" -tendril = "0.1.6" [dependencies.html5ever] path = "../" diff --git a/capi/src/lib.rs b/capi/src/lib.rs index 8ce1dc49..e30f149b 100644 --- a/capi/src/lib.rs +++ b/capi/src/lib.rs @@ -9,7 +9,6 @@ extern crate libc; #[macro_use] extern crate string_cache; -extern crate tendril; extern crate html5ever; use libc::c_int; diff --git a/capi/src/tokenizer.rs b/capi/src/tokenizer.rs index 6ee8df0b..bf8d27b2 100644 --- a/capi/src/tokenizer.rs +++ b/capi/src/tokenizer.rs @@ -11,6 +11,7 @@ use c_bool; +use html5ever::tendril::{StrTendril, SliceExt}; use html5ever::tokenizer::{TokenSink, Token, Doctype, Tag, ParseError, DoctypeToken}; use html5ever::tokenizer::{CommentToken, CharacterTokens, NullCharacterToken}; use html5ever::tokenizer::{TagToken, StartTag, EndTag, EOFToken, Tokenizer}; @@ -20,7 +21,6 @@ use std::default::Default; use libc::{c_void, c_int, size_t}; use string_cache::Atom; -use tendril::{StrTendril, SliceExt}; #[repr(C)] #[derive(Copy, Clone)] diff --git a/examples/html2html.rs b/examples/html2html.rs index 26cbeefe..d5c73e0a 100644 --- a/examples/html2html.rs +++ b/examples/html2html.rs @@ -21,24 +21,26 @@ extern crate html5ever; use std::io::{self, Write}; use std::default::Default; -use tendril::{ByteTendril, ReadExt}; +use tendril::TendrilSink; use html5ever::driver::ParseOpts; use html5ever::tree_builder::TreeBuilderOpts; -use html5ever::{parse, one_input, serialize}; +use html5ever::{parse_document, serialize}; use html5ever::rcdom::RcDom; fn main() { - let mut input = ByteTendril::new(); - io::stdin().read_to_tendril(&mut input).unwrap(); - let input = input.try_reinterpret().unwrap(); - let dom: RcDom = parse(one_input(input), ParseOpts { + let opts = ParseOpts { tree_builder: TreeBuilderOpts { drop_doctype: true, ..Default::default() }, ..Default::default() - }); + }; + let stdin = io::stdin(); + let dom = parse_document(RcDom::default(), opts) + .from_utf8() + .read_from(&mut stdin.lock()) + .unwrap(); // The validator.nu HTML2HTML always prints a doctype at the very beginning. io::stdout().write_all(b"\n") diff --git a/examples/noop-tokenize.rs b/examples/noop-tokenize.rs index 4a0599d2..1310d54c 100644 --- a/examples/noop-tokenize.rs +++ b/examples/noop-tokenize.rs @@ -17,8 +17,7 @@ use std::default::Default; use tendril::{ByteTendril, ReadExt}; -use html5ever::tokenizer::{TokenSink, Token}; -use html5ever::driver::{tokenize_to, one_input}; +use html5ever::tokenizer::{TokenSink, Token, Tokenizer}; struct Sink(Vec); @@ -35,5 +34,7 @@ fn main() { io::stdin().read_to_tendril(&mut input).unwrap(); let input = input.try_reinterpret().unwrap(); - tokenize_to(Sink(Vec::new()), one_input(input), Default::default()); + let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default()); + tok.feed(input); + tok.end(); } diff --git a/examples/noop-tree-builder.rs b/examples/noop-tree-builder.rs index 63fd40fd..ba7c9169 100644 --- a/examples/noop-tree-builder.rs +++ b/examples/noop-tree-builder.rs @@ -18,9 +18,9 @@ use std::collections::HashMap; use std::borrow::Cow; use string_cache::QualName; -use tendril::{StrTendril, ByteTendril, ReadExt}; +use tendril::{StrTendril, TendrilSink}; -use html5ever::{parse_to, one_input}; +use html5ever::parse_document; use html5ever::tokenizer::Attribute; use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText}; @@ -39,6 +39,8 @@ impl Sink { impl TreeSink for Sink { type Handle = usize; + type Output = Self; + fn finish(self) -> Self { self } fn get_document(&mut self) -> usize { 0 @@ -96,9 +98,9 @@ fn main() { next_id: 1, names: HashMap::new(), }; - - let mut input = ByteTendril::new(); - io::stdin().read_to_tendril(&mut input).unwrap(); - let input = input.try_reinterpret().unwrap(); - parse_to(sink, one_input(input), Default::default()); + let stdin = io::stdin(); + parse_document(sink, Default::default()) + .from_utf8() + .read_from(&mut stdin.lock()) + .unwrap(); } diff --git a/examples/print-rcdom.rs b/examples/print-rcdom.rs index edbce485..016a9595 100644 --- a/examples/print-rcdom.rs +++ b/examples/print-rcdom.rs @@ -18,8 +18,8 @@ use std::iter::repeat; use std::default::Default; use std::string::String; -use tendril::{ByteTendril, ReadExt}; -use html5ever::{parse, one_input}; +use tendril::TendrilSink; +use html5ever::parse_document; use html5ever::rcdom::{Document, Doctype, Text, Comment, Element, RcDom, Handle}; // This is not proper HTML serialization, of course. @@ -63,10 +63,11 @@ pub fn escape_default(s: &str) -> String { } fn main() { - let mut input = ByteTendril::new(); - io::stdin().read_to_tendril(&mut input).unwrap(); - let input = input.try_reinterpret().unwrap(); - let dom: RcDom = parse(one_input(input), Default::default()); + let stdin = io::stdin(); + let dom = parse_document(RcDom::default(), Default::default()) + .from_utf8() + .read_from(&mut stdin.lock()) + .unwrap(); walk(0, dom.document); if !dom.errors.is_empty() { diff --git a/examples/print-tree-actions.rs b/examples/print-tree-actions.rs index df7f90f9..67020039 100644 --- a/examples/print-tree-actions.rs +++ b/examples/print-tree-actions.rs @@ -18,11 +18,11 @@ use std::collections::HashMap; use std::borrow::Cow; use string_cache::QualName; -use tendril::{ByteTendril, StrTendril, ReadExt}; +use tendril::{StrTendril, TendrilSink}; -use html5ever::{parse_to, one_input}; use html5ever::tokenizer::Attribute; use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText}; +use html5ever::parse_document; struct Sink { next_id: usize, @@ -39,6 +39,8 @@ impl Sink { impl TreeSink for Sink { type Handle = usize; + type Output = Self; + fn finish(self) -> Self { self } fn parse_error(&mut self, msg: Cow<'static, str>) { println!("Parse error: {}", msg); @@ -143,9 +145,9 @@ fn main() { next_id: 1, names: HashMap::new(), }; - - let mut input = ByteTendril::new(); - io::stdin().read_to_tendril(&mut input).unwrap(); - let input = input.try_reinterpret().unwrap(); - parse_to(sink, one_input(input), Default::default()); + let stdin = io::stdin(); + parse_document(sink, Default::default()) + .from_utf8() + .read_from(&mut stdin.lock()) + .unwrap(); } diff --git a/examples/tokenize.rs b/examples/tokenize.rs index 42f7c399..08dd1ae6 100644 --- a/examples/tokenize.rs +++ b/examples/tokenize.rs @@ -15,9 +15,8 @@ use std::default::Default; use tendril::{ByteTendril, ReadExt}; -use html5ever::tokenizer::{TokenSink, Token, TokenizerOpts, ParseError}; +use html5ever::tokenizer::{TokenSink, Tokenizer, Token, TokenizerOpts, ParseError}; use html5ever::tokenizer::{CharacterTokens, NullCharacterToken, TagToken, StartTag, EndTag}; -use html5ever::driver::{tokenize_to, one_input}; #[derive(Copy, Clone)] struct TokenPrinter { @@ -84,9 +83,12 @@ fn main() { let mut input = ByteTendril::new(); io::stdin().read_to_tendril(&mut input).unwrap(); let input = input.try_reinterpret().unwrap(); - tokenize_to(sink, one_input(input), TokenizerOpts { + + let mut tok = Tokenizer::new(sink, TokenizerOpts { profile: true, .. Default::default() }); + tok.feed(input); + tok.end(); sink.is_char(false); } diff --git a/scripts/travis-build.sh b/scripts/travis-build.sh index f3169ff0..e13a56c7 100755 --- a/scripts/travis-build.sh +++ b/scripts/travis-build.sh @@ -10,6 +10,7 @@ set -ex +cargo build --features hyper # Test without unstable first, to make sure src/tree_builder/rules.expanded.rs is up-to-date. cargo test --no-run cargo test | ./scripts/shrink-test-output.py diff --git a/src/driver.rs b/src/driver.rs index efc6863a..50a36ff6 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -9,39 +9,18 @@ //! High-level interface to the parser. -use tokenizer::{Attribute, TokenSink, Tokenizer, TokenizerOpts}; +use tokenizer::{Attribute, Tokenizer, TokenizerOpts}; use tree_builder::{TreeBuilderOpts, TreeBuilder, TreeSink}; -use std::option; -use std::default::Default; +use std::borrow::Cow; +use std::mem; +use encoding::{self, EncodingRef}; +#[cfg(feature = "hyper")] use hyper::client::IntoUrl; use string_cache::QualName; -use tendril::StrTendril; - -/// Convenience function to turn a single value into an iterator. -pub fn one_input(x: T) -> option::IntoIter { - Some(x).into_iter() -} - -/// Tokenize and send results to a `TokenSink`. -/// -/// ## Example -/// -/// ```ignore -/// let sink = MySink; -/// tokenize_to(sink, one_input(my_str), Default::default()); -/// ``` -pub fn tokenize_to(sink: Sink, input: It, opts: TokenizerOpts) -> Sink - where Sink: TokenSink, - It: Iterator, -{ - let mut tok = Tokenizer::new(sink, opts); - for s in input { - tok.feed(s); - } - tok.end(); - tok.unwrap() -} +use tendril; +use tendril::{StrTendril, ByteTendril}; +use tendril::stream::{TendrilSink, Utf8LossyDecoder, LossyDecoder}; /// All-encompassing options struct for the parser. #[derive(Clone, Default)] @@ -53,96 +32,207 @@ pub struct ParseOpts { pub tree_builder: TreeBuilderOpts, } -/// Parse and send results to a `TreeSink`. +/// Parse an HTML document /// -/// ## Example +/// The returned value implements `tendril::TendrilSink` +/// so that Unicode input may be provided incrementally, +/// or all at once with the `one` method. /// -/// ```ignore -/// let sink = MySink; -/// parse_to(sink, one_input(my_str), Default::default()); -/// ``` -pub fn parse_to(sink: Sink, input: It, opts: ParseOpts) -> Sink - where Sink: TreeSink, - It: Iterator, -{ +/// If your input is bytes, use `Parser::from_utf8` or `Parser::from_bytes`. +pub fn parse_document(sink: Sink, opts: ParseOpts) -> Parser where Sink: TreeSink { let tb = TreeBuilder::new(sink, opts.tree_builder); - let mut tok = Tokenizer::new(tb, opts.tokenizer); - for s in input { - tok.feed(s); - } - tok.end(); - tok.unwrap().unwrap() + let tok = Tokenizer::new(tb, opts.tokenizer); + Parser { tokenizer: tok } } -/// Parse an HTML fragment and send results to a `TreeSink`. +/// Parse an HTML fragment /// -/// ## Example +/// The returned value implements `tendril::TendrilSink` +/// so that Unicode input may be provided incrementally, +/// or all at once with the `one` method. /// -/// ```ignore -/// let sink = MySink; -/// parse_fragment_to(sink, one_input(my_str), context_name, context_attrs, Default::default()); -/// ``` -pub fn parse_fragment_to(mut sink: Sink, - input: It, - context_name: QualName, - context_attrs: Vec, - opts: ParseOpts) -> Sink - where Sink: TreeSink, - It: Iterator -{ +/// If your input is bytes, use `Parser::from_utf8` or `Parser::from_bytes`. +pub fn parse_fragment(mut sink: Sink, opts: ParseOpts, + context_name: QualName, context_attrs: Vec) + -> Parser + where Sink: TreeSink { let context_elem = sink.create_element(context_name, context_attrs); let tb = TreeBuilder::new_for_fragment(sink, context_elem, None, opts.tree_builder); let tok_opts = TokenizerOpts { initial_state: Some(tb.tokenizer_state_for_context_elem()), .. opts.tokenizer }; - let mut tok = Tokenizer::new(tb, tok_opts); - for s in input { - tok.feed(s); + let tok = Tokenizer::new(tb, tok_opts); + Parser { tokenizer: tok } +} + +/// An HTML parser, +/// ready to recieve Unicode input through the `tendril::TendrilSink` trait’s methods. +pub struct Parser where Sink: TreeSink { + tokenizer: Tokenizer>, +} + +impl TendrilSink for Parser { + fn process(&mut self, t: StrTendril) { + self.tokenizer.feed(t) + } + + // FIXME: Is it too noisy to report every character decoding error? + fn error(&mut self, desc: Cow<'static, str>) { + self.tokenizer.sink_mut().sink_mut().parse_error(desc) + } + + type Output = Sink::Output; + + fn finish(mut self) -> Self::Output { + self.tokenizer.end(); + self.tokenizer.unwrap().unwrap().finish() } - tok.end(); - tok.unwrap().unwrap() } -/// Results which can be extracted from a `TreeSink`. -/// -/// Implement this for your parse tree data type so that it -/// can be returned by `parse()`. -pub trait ParseResult { - type Sink: TreeSink + Default; - fn get_result(sink: Self::Sink) -> Self; +impl Parser { + /// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes. + /// + /// Use this when your input is bytes that are known to be in the UTF-8 encoding. + /// Decoding is lossy, like `String::from_utf8_lossy`. + pub fn from_utf8(self) -> Utf8LossyDecoder { + Utf8LossyDecoder::new(self) + } + + /// Wrap this parser into a `TendrilSink` that accepts bytes + /// and tries to detect the correct character encoding. + /// + /// Currently this looks for a Byte Order Mark, + /// then uses `BytesOpts::transport_layer_encoding`, + /// then falls back to UTF-8. + /// + /// FIXME(https://github.com/servo/html5ever/issues/18): this should look for `` elements + /// and other data per + /// https://html.spec.whatwg.org/multipage/syntax.html#determining-the-character-encoding + pub fn from_bytes(self, opts: BytesOpts) -> BytesParser { + BytesParser { + state: BytesParserState::Initial { parser: self }, + opts: opts, + } + } + + /// Fetch an HTTP or HTTPS URL with Hyper and parse. + #[cfg(feature = "hyper")] + pub fn from_http(self, url: U) -> Result { + use hyper::Client; + use hyper::header::ContentType; + use hyper::mime::Attr::Charset; + use encoding::label::encoding_from_whatwg_label; + + let mut response = try!(Client::new().get(url).send()); + let opts = BytesOpts { + transport_layer_encoding: response.headers.get::() + .and_then(|content_type| content_type.get_param(Charset)) + .and_then(|charset| encoding_from_whatwg_label(charset)) + }; + Ok(try!(self.from_bytes(opts).read_from(&mut response))) + } } -/// Parse into a type which implements `ParseResult`. -/// -/// ## Example -/// -/// ```ignore -/// let dom: RcDom = parse(one_input(my_str), Default::default()); -/// ``` -pub fn parse(input: It, opts: ParseOpts) -> Output - where Output: ParseResult, - It: Iterator, -{ - let sink = parse_to(Default::default(), input, opts); - ParseResult::get_result(sink) +/// Options for choosing a character encoding +#[derive(Clone, Default)] +pub struct BytesOpts { + /// The character encoding specified by the transport layer, if any. + /// In HTTP for example, this is the `charset` parameter of the `Content-Type` response header. + pub transport_layer_encoding: Option, } -/// Parse an HTML fragment into a type which implements `ParseResult`. -/// -/// ## Example +/// An HTML parser, +/// ready to recieve bytes input through the `tendril::TendrilSink` trait’s methods. /// -/// ```ignore -/// let dom: RcDom = parse_fragment( -/// one_input(my_str), context_name, context_attrs, Default::default()); -/// ``` -pub fn parse_fragment(input: It, - context_name: QualName, - context_attrs: Vec, - opts: ParseOpts) -> Output - where Output: ParseResult, - It: Iterator, -{ - let sink = parse_fragment_to(Default::default(), input, context_name, context_attrs, opts); - ParseResult::get_result(sink) +/// See `Parser::from_bytes`. +pub struct BytesParser where Sink: TreeSink { + state: BytesParserState, + opts: BytesOpts, +} + +enum BytesParserState where Sink: TreeSink { + Initial { + parser: Parser, + }, + Buffering { + parser: Parser, + buffer: ByteTendril + }, + Parsing { + decoder: LossyDecoder>, + }, + Transient +} + +impl TendrilSink for BytesParser { + fn process(&mut self, t: ByteTendril) { + if let &mut BytesParserState::Parsing { ref mut decoder } = &mut self.state { + return decoder.process(t) + } + let (parser, buffer) = match mem::replace(&mut self.state, BytesParserState::Transient) { + BytesParserState::Initial{ parser } => (parser, t), + BytesParserState::Buffering { parser, mut buffer } => { + buffer.push_tendril(&t); + (parser, buffer) + } + BytesParserState::Parsing { .. } | BytesParserState::Transient => unreachable!(), + }; + if buffer.len32() >= PRESCAN_BYTES { + let encoding = detect_encoding(&buffer, &self.opts); + let decoder = LossyDecoder::new(encoding, parser); + self.state = BytesParserState::Parsing { decoder: decoder } + } else { + self.state = BytesParserState::Buffering { + parser: parser, + buffer: buffer, + } + } + } + + fn error(&mut self, desc: Cow<'static, str>) { + match self.state { + BytesParserState::Initial { ref mut parser } => parser.error(desc), + BytesParserState::Buffering { ref mut parser, .. } => parser.error(desc), + BytesParserState::Parsing { ref mut decoder } => decoder.error(desc), + BytesParserState::Transient => unreachable!(), + } + } + + type Output = Sink::Output; + + fn finish(self) -> Self::Output { + match self.state { + BytesParserState::Initial { parser } => parser.finish(), + BytesParserState::Buffering { parser, buffer } => { + let encoding = detect_encoding(&buffer, &self.opts); + let decoder = LossyDecoder::new(encoding, parser); + decoder.finish() + }, + BytesParserState::Parsing { decoder } => decoder.finish(), + BytesParserState::Transient => unreachable!(), + } + } +} + +/// How many bytes does detect_encoding() need +// NOTE: 3 would be enough for a BOM, but 1024 is specified for elements. +const PRESCAN_BYTES: u32 = 1024; + +/// https://html.spec.whatwg.org/multipage/syntax.html#determining-the-character-encoding +fn detect_encoding(bytes: &ByteTendril, opts: &BytesOpts) -> EncodingRef { + if bytes.starts_with(b"\xEF\xBB\xBF") { + return encoding::all::UTF_8 + } + if bytes.starts_with(b"\xFE\xFF") { + return encoding::all::UTF_16BE + } + if bytes.starts_with(b"\xFF\xFE") { + return encoding::all::UTF_16LE + } + if let Some(encoding) = opts.transport_layer_encoding { + return encoding + } + // FIXME: etc. + return encoding::all::UTF_8 } diff --git a/src/lib.rs b/src/lib.rs index 1a946cf6..7ab3cf9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,14 +18,14 @@ #[cfg(feature = "heap_size")] extern crate heapsize; +#[cfg(feature = "hyper")] extern crate hyper; + #[macro_use] extern crate log; #[macro_use] extern crate string_cache; -extern crate tendril; - #[macro_use] extern crate mac; @@ -34,7 +34,7 @@ extern crate phf; extern crate time; pub use tokenizer::Attribute; -pub use driver::{one_input, ParseOpts, parse_to, parse_fragment_to, parse, parse_fragment}; +pub use driver::{ParseOpts, parse_document, parse_fragment, Parser}; pub use serialize::serialize; @@ -52,3 +52,12 @@ pub mod tree_builder; pub mod serialize; pub mod driver; pub mod rcdom; + +/// Re-export the tendril crate. +pub mod tendril { + extern crate tendril; + pub use self::tendril::*; +} + +/// Re-export the encoding crate. +pub use tendril::encoding; diff --git a/src/rcdom.rs b/src/rcdom.rs index d173ef05..85cb499a 100644 --- a/src/rcdom.rs +++ b/src/rcdom.rs @@ -31,7 +31,6 @@ use tree_builder; use serialize::{Serializable, Serializer}; use serialize::TraversalScope; use serialize::TraversalScope::{IncludeNode, ChildrenOnly}; -use driver::ParseResult; pub use self::ElementEnum::{AnnotationXml, Normal, Script, Template}; pub use self::NodeEnum::{Document, Doctype, Text, Comment, Element}; @@ -165,6 +164,9 @@ pub struct RcDom { } impl TreeSink for RcDom { + type Output = Self; + fn finish(self) -> Self { self } + type Handle = Handle; fn parse_error(&mut self, msg: Cow<'static, str>) { @@ -336,14 +338,6 @@ impl Default for RcDom { } } -impl ParseResult for RcDom { - type Sink = RcDom; - - fn get_result(sink: RcDom) -> RcDom { - sink - } -} - impl Serializable for Handle { fn serialize<'wr, Wr: Write>(&self, serializer: &mut Serializer<'wr, Wr>, traversal_scope: TraversalScope) -> io::Result<()> { diff --git a/src/tree_builder/interface.rs b/src/tree_builder/interface.rs index e0808ee4..2d746c6e 100644 --- a/src/tree_builder/interface.rs +++ b/src/tree_builder/interface.rs @@ -49,6 +49,19 @@ pub enum NextParserState { /// Types which can process tree modifications from the tree builder. pub trait TreeSink { + /// The overall result of parsing. + /// + /// This should default to Self, but default associated types are not stable yet. + /// (https://github.com/rust-lang/rust/issues/29661) + type Output; + + /// Consume this sink and return the overall result of parsing. + /// + /// This should default to `fn finish(self) -> Self::Output { self }`, + /// but default associated types are not stable yet. + /// (https://github.com/rust-lang/rust/issues/29661) + fn finish(self) -> Self::Output; + /// `Handle` is a reference to a DOM node. The tree builder requires /// that a `Handle` implements `Clone` to get another reference to /// the same node. diff --git a/tests/serializer.rs b/tests/serializer.rs index 57bd8824..2406da5e 100644 --- a/tests/serializer.rs +++ b/tests/serializer.rs @@ -13,17 +13,16 @@ extern crate html5ever; use std::default::Default; -use tendril::{StrTendril, SliceExt}; +use tendril::{StrTendril, SliceExt, TendrilSink}; use html5ever::driver::ParseOpts; -use html5ever::{parse_fragment, parse, one_input, serialize}; +use html5ever::{parse_fragment, parse_document, serialize}; use html5ever::rcdom::RcDom; fn parse_and_serialize(input: StrTendril) -> StrTendril { - let dom: RcDom = parse_fragment(one_input(input), - qualname!(html, "body"), - vec![], - ParseOpts::default()); + let dom = parse_fragment( + RcDom::default(), ParseOpts::default(), qualname!(html, "body"), vec![] + ).one(input); let inner = &dom.document.borrow().children[0]; let mut result = vec![]; @@ -101,7 +100,8 @@ test!(attr_ns_4, r#""#); #[test] fn doctype() { - let dom: RcDom = parse(one_input("".to_tendril()), ParseOpts::default()); + let dom = parse_document( + RcDom::default(), ParseOpts::default()).one(""); dom.document.borrow_mut().children.truncate(1); // Remove let mut result = vec![]; serialize(&mut result, &dom.document, Default::default()).unwrap(); diff --git a/tests/tree_builder.rs b/tests/tree_builder.rs index f021ef8b..290ebeaa 100644 --- a/tests/tree_builder.rs +++ b/tests/tree_builder.rs @@ -29,12 +29,12 @@ use std::collections::{HashSet, HashMap}; #[cfg(feature = "unstable")] use test::{TestDesc, TestDescAndFn, DynTestName, DynTestFn}; #[cfg(feature = "unstable")] use test::ShouldPanic::No; -use html5ever::{ParseOpts, parse, parse_fragment, one_input}; +use html5ever::{ParseOpts, parse_document, parse_fragment}; use html5ever::rcdom::{Comment, Document, Doctype, Element, Handle, RcDom}; use html5ever::rcdom::{Template, Text}; use string_cache::{Atom, QualName}; -use tendril::StrTendril; +use tendril::{StrTendril, TendrilSink}; fn parse_tests>(mut lines: It) -> Vec> { let mut tests = vec!(); @@ -215,16 +215,14 @@ fn make_test_desc_with_scripting_flag( let mut result = String::new(); match context { None => { - let dom: RcDom = parse(one_input(data.clone()), opts); + let dom = parse_document(RcDom::default(), opts).one(data.clone()); for child in dom.document.borrow().children.iter() { serialize(&mut result, 1, child.clone()); } }, Some(ref context) => { - let dom: RcDom = parse_fragment(one_input(data.clone()), - context.clone(), - vec![], - opts); + let dom = parse_fragment(RcDom::default(), opts, context.clone(), vec![]) + .one(data.clone()); // fragment case: serialize children of the html element // rather than children of the document let doc = dom.document.borrow();