Skip to content

Rewrite the high-level API (driver module) to use TendrilSink #188

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 26, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "html5ever"
version = "0.2.11"
version = "0.3.0"
authors = [ "The html5ever Project Developers" ]
license = "MIT / Apache-2.0"
repository = "https://github.com/servo/html5ever"
Expand All @@ -26,9 +26,10 @@ log = "0"
phf = "0.7"
string_cache = "0.2.0"
mac = "0"
tendril = "0.1.6"
tendril = "0.2"
heapsize = { version = "0.1.1", optional = true }
heapsize_plugin = { version = "0.1.0", optional = true }
hyper = {version = "0.7", optional = true}

[dev-dependencies]
rustc-serialize = "0.3.15"
Expand Down
4 changes: 3 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ fn main() {

named_entities_to_phf(
&Path::new(&manifest_dir).join("data/entities.json"),
&Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs"))
&Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs"));

println!("cargo:rerun-if-changed={}", rules_rs.display());
}

#[cfg(feature = "codegen")]
Expand Down
1 change: 0 additions & 1 deletion capi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ crate-type = ["staticlib"]
[dependencies]
libc = "0.2"
string_cache = "0.2"
tendril = "0.1.6"

[dependencies.html5ever]
path = "../"
Expand Down
1 change: 0 additions & 1 deletion capi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

extern crate libc;
#[macro_use] extern crate string_cache;
extern crate tendril;
extern crate html5ever;

use libc::c_int;
Expand Down
2 changes: 1 addition & 1 deletion capi/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

use c_bool;

use html5ever::tendril::{StrTendril, SliceExt};
use html5ever::tokenizer::{TokenSink, Token, Doctype, Tag, ParseError, DoctypeToken};
use html5ever::tokenizer::{CommentToken, CharacterTokens, NullCharacterToken};
use html5ever::tokenizer::{TagToken, StartTag, EndTag, EOFToken, Tokenizer};
Expand All @@ -20,7 +21,6 @@ use std::default::Default;

use libc::{c_void, c_int, size_t};
use string_cache::Atom;
use tendril::{StrTendril, SliceExt};

#[repr(C)]
#[derive(Copy, Clone)]
Expand Down
16 changes: 9 additions & 7 deletions examples/html2html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,26 @@ extern crate html5ever;
use std::io::{self, Write};
use std::default::Default;

use tendril::{ByteTendril, ReadExt};
use tendril::TendrilSink;

use html5ever::driver::ParseOpts;
use html5ever::tree_builder::TreeBuilderOpts;
use html5ever::{parse, one_input, serialize};
use html5ever::{parse_document, serialize};
use html5ever::rcdom::RcDom;

fn main() {
let mut input = ByteTendril::new();
io::stdin().read_to_tendril(&mut input).unwrap();
let input = input.try_reinterpret().unwrap();
let dom: RcDom = parse(one_input(input), ParseOpts {
let opts = ParseOpts {
tree_builder: TreeBuilderOpts {
drop_doctype: true,
..Default::default()
},
..Default::default()
});
};
let stdin = io::stdin();
let dom = parse_document(RcDom::default(), opts)
.from_utf8()
.read_from(&mut stdin.lock())
.unwrap();

// The validator.nu HTML2HTML always prints a doctype at the very beginning.
io::stdout().write_all(b"<!DOCTYPE html>\n")
Expand Down
7 changes: 4 additions & 3 deletions examples/noop-tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ use std::default::Default;

use tendril::{ByteTendril, ReadExt};

use html5ever::tokenizer::{TokenSink, Token};
use html5ever::driver::{tokenize_to, one_input};
use html5ever::tokenizer::{TokenSink, Token, Tokenizer};

struct Sink(Vec<Token>);

Expand All @@ -35,5 +34,7 @@ fn main() {
io::stdin().read_to_tendril(&mut input).unwrap();
let input = input.try_reinterpret().unwrap();

tokenize_to(Sink(Vec::new()), one_input(input), Default::default());
let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default());
tok.feed(input);
tok.end();
}
16 changes: 9 additions & 7 deletions examples/noop-tree-builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ use std::collections::HashMap;
use std::borrow::Cow;
use string_cache::QualName;

use tendril::{StrTendril, ByteTendril, ReadExt};
use tendril::{StrTendril, TendrilSink};

use html5ever::{parse_to, one_input};
use html5ever::parse_document;
use html5ever::tokenizer::Attribute;
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText};

Expand All @@ -39,6 +39,8 @@ impl Sink {

impl TreeSink for Sink {
type Handle = usize;
type Output = Self;
fn finish(self) -> Self { self }

fn get_document(&mut self) -> usize {
0
Expand Down Expand Up @@ -96,9 +98,9 @@ fn main() {
next_id: 1,
names: HashMap::new(),
};

let mut input = ByteTendril::new();
io::stdin().read_to_tendril(&mut input).unwrap();
let input = input.try_reinterpret().unwrap();
parse_to(sink, one_input(input), Default::default());
let stdin = io::stdin();
parse_document(sink, Default::default())
.from_utf8()
.read_from(&mut stdin.lock())
.unwrap();
}
13 changes: 7 additions & 6 deletions examples/print-rcdom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ use std::iter::repeat;
use std::default::Default;
use std::string::String;

use tendril::{ByteTendril, ReadExt};
use html5ever::{parse, one_input};
use tendril::TendrilSink;
use html5ever::parse_document;
use html5ever::rcdom::{Document, Doctype, Text, Comment, Element, RcDom, Handle};

// This is not proper HTML serialization, of course.
Expand Down Expand Up @@ -63,10 +63,11 @@ pub fn escape_default(s: &str) -> String {
}

fn main() {
let mut input = ByteTendril::new();
io::stdin().read_to_tendril(&mut input).unwrap();
let input = input.try_reinterpret().unwrap();
let dom: RcDom = parse(one_input(input), Default::default());
let stdin = io::stdin();
let dom = parse_document(RcDom::default(), Default::default())
.from_utf8()
.read_from(&mut stdin.lock())
.unwrap();
walk(0, dom.document);

if !dom.errors.is_empty() {
Expand Down
16 changes: 9 additions & 7 deletions examples/print-tree-actions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ use std::collections::HashMap;
use std::borrow::Cow;
use string_cache::QualName;

use tendril::{ByteTendril, StrTendril, ReadExt};
use tendril::{StrTendril, TendrilSink};

use html5ever::{parse_to, one_input};
use html5ever::tokenizer::Attribute;
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText};
use html5ever::parse_document;

struct Sink {
next_id: usize,
Expand All @@ -39,6 +39,8 @@ impl Sink {

impl TreeSink for Sink {
type Handle = usize;
type Output = Self;
fn finish(self) -> Self { self }

fn parse_error(&mut self, msg: Cow<'static, str>) {
println!("Parse error: {}", msg);
Expand Down Expand Up @@ -143,9 +145,9 @@ fn main() {
next_id: 1,
names: HashMap::new(),
};

let mut input = ByteTendril::new();
io::stdin().read_to_tendril(&mut input).unwrap();
let input = input.try_reinterpret().unwrap();
parse_to(sink, one_input(input), Default::default());
let stdin = io::stdin();
parse_document(sink, Default::default())
.from_utf8()
.read_from(&mut stdin.lock())
.unwrap();
}
8 changes: 5 additions & 3 deletions examples/tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ use std::default::Default;

use tendril::{ByteTendril, ReadExt};

use html5ever::tokenizer::{TokenSink, Token, TokenizerOpts, ParseError};
use html5ever::tokenizer::{TokenSink, Tokenizer, Token, TokenizerOpts, ParseError};
use html5ever::tokenizer::{CharacterTokens, NullCharacterToken, TagToken, StartTag, EndTag};
use html5ever::driver::{tokenize_to, one_input};

#[derive(Copy, Clone)]
struct TokenPrinter {
Expand Down Expand Up @@ -84,9 +83,12 @@ fn main() {
let mut input = ByteTendril::new();
io::stdin().read_to_tendril(&mut input).unwrap();
let input = input.try_reinterpret().unwrap();
tokenize_to(sink, one_input(input), TokenizerOpts {

let mut tok = Tokenizer::new(sink, TokenizerOpts {
profile: true,
.. Default::default()
});
tok.feed(input);
tok.end();
sink.is_char(false);
}
1 change: 1 addition & 0 deletions scripts/travis-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

set -ex

cargo build --features hyper
# Test without unstable first, to make sure src/tree_builder/rules.expanded.rs is up-to-date.
cargo test --no-run
cargo test | ./scripts/shrink-test-output.py
Expand Down
Loading