diff --git a/examples/document.rs b/examples/document.rs index 00621eb7..a8a0eeca 100644 --- a/examples/document.rs +++ b/examples/document.rs @@ -2,7 +2,7 @@ extern crate scraper; use std::io::{self, Read, Write}; -use scraper::{Selector, Html}; +use scraper::{Html, Selector}; fn main() { let mut input = String::new(); diff --git a/examples/fragment.rs b/examples/fragment.rs index eb944bd7..7ab0a2bf 100644 --- a/examples/fragment.rs +++ b/examples/fragment.rs @@ -2,7 +2,7 @@ extern crate scraper; use std::io::{self, Read, Write}; -use scraper::{Selector, Html}; +use scraper::{Html, Selector}; fn main() { let mut input = String::new(); diff --git a/src/element_ref/element.rs b/src/element_ref/element.rs index 28c5cb11..ad4fb1e1 100644 --- a/src/element_ref/element.rs +++ b/src/element_ref/element.rs @@ -1,7 +1,7 @@ -use selectors::{Element, OpaqueElement}; -use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}; use html5ever::{LocalName, Namespace}; +use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}; use selectors::matching; +use selectors::{Element, OpaqueElement}; use super::ElementRef; use selector::{NonTSPseudoClass, PseudoElement, Simple}; @@ -59,7 +59,8 @@ impl<'a> Element for ElementRef<'a> { ) -> bool { self.value().attrs.iter().any(|(key, value)| { !matches!(*ns, NamespaceConstraint::Specific(url) if *url != key.ns) - && *local_name == key.local && operation.eval_str(value) + && *local_name == key.local + && operation.eval_str(value) }) } @@ -100,7 +101,8 @@ impl<'a> Element for ElementRef<'a> { } fn is_empty(&self) -> bool { - !self.children() + !self + .children() .any(|child| child.value().is_element() || child.value().is_text()) } @@ -114,8 +116,8 @@ impl<'a> Element for ElementRef<'a> { mod tests { use html::Html; use selector::Selector; - use selectors::Element; use selectors::attr::CaseSensitivity; + use selectors::Element; #[test] fn test_has_id() { diff --git a/src/element_ref/mod.rs b/src/element_ref/mod.rs index 5f644e3d..a5ab7746 100644 --- a/src/element_ref/mod.rs +++ b/src/element_ref/mod.rs @@ -2,12 +2,13 @@ use std::ops::Deref; +use ego_tree::iter::{Edge, Traverse}; use ego_tree::NodeRef; -use ego_tree::iter::{Traverse, Edge}; use html5ever::serialize::{serialize, SerializeOpts, TraversalScope}; -use {Node, Selector}; use node::Element; +use selectors::parser::SelectorParseErrorKind; +use {Node, Selector}; /// Wrapper around a reference to an element node. /// @@ -48,6 +49,18 @@ impl<'a> ElementRef<'a> { } } + /// Returns an iterator over descendent elements matching a selector. + pub fn try_select<'s>( + &self, + selector: &'s str, + ) -> Result, cssparser::ParseError<'s, SelectorParseErrorKind<'s>>> { + let mut inner = self.traverse(); + inner.next(); // Skip Edge::Open(self). + let selector = Selector::parse(selector)?; + + Ok(SelectOwned { inner, selector }) + } + fn serialize(&self, traversal_scope: TraversalScope) -> String { let opts = SerializeOpts { scripting_enabled: false, // It's not clear what this does. @@ -71,13 +84,17 @@ impl<'a> ElementRef<'a> { /// Returns an iterator over descendent text nodes. pub fn text(&self) -> Text<'a> { - Text { inner: self.traverse() } + Text { + inner: self.traverse(), + } } } impl<'a> Deref for ElementRef<'a> { type Target = NodeRef<'a, Node>; - fn deref(&self) -> &NodeRef<'a, Node> { &self.node } + fn deref(&self) -> &NodeRef<'a, Node> { + &self.node + } } /// Iterator over descendent elements matching a selector. @@ -104,6 +121,30 @@ impl<'a, 'b> Iterator for Select<'a, 'b> { } } +/// Iterator over descendent elements matching a selector. +#[derive(Debug, Clone)] +pub struct SelectOwned<'a> { + inner: Traverse<'a, Node>, + selector: Selector, +} + +impl<'a> Iterator for SelectOwned<'a> { + type Item = ElementRef<'a>; + + fn next(&mut self) -> Option> { + for edge in &mut self.inner { + if let Edge::Open(node) = edge { + if let Some(element) = ElementRef::wrap(node) { + if self.selector.matches(&element) { + return Some(element); + } + } + } + } + None + } +} + /// Iterator over descendent text nodes. #[derive(Debug, Clone)] pub struct Text<'a> { diff --git a/src/element_ref/serializable.rs b/src/element_ref/serializable.rs index 79971e32..568e788c 100644 --- a/src/element_ref/serializable.rs +++ b/src/element_ref/serializable.rs @@ -5,7 +5,6 @@ use html5ever::serialize::{Serialize, Serializer, TraversalScope}; use {ElementRef, Node}; - impl<'a> Serialize for ElementRef<'a> { fn serialize( &self, @@ -22,20 +21,20 @@ impl<'a> Serialize for ElementRef<'a> { match *node.value() { Node::Doctype(ref doctype) => { try!(serializer.write_doctype(doctype.name())); - }, + } Node::Comment(ref comment) => { try!(serializer.write_comment(comment)); - }, + } Node::Text(ref text) => { try!(serializer.write_text(text)); - }, + } Node::Element(ref elem) => { let attrs = elem.attrs.iter().map(|(k, v)| (k, &v[..])); try!(serializer.start_elem(elem.name.clone(), attrs)); - }, + } _ => (), } - }, + } Edge::Close(node) => { if node == **self && traversal_scope == TraversalScope::ChildrenOnly(None) { @@ -45,7 +44,7 @@ impl<'a> Serialize for ElementRef<'a> { if let Some(elem) = node.value().as_element() { try!(serializer.end_elem(elem.name.clone())); } - }, + } } } diff --git a/src/html/mod.rs b/src/html/mod.rs index b174f37c..6db44511 100644 --- a/src/html/mod.rs +++ b/src/html/mod.rs @@ -2,15 +2,16 @@ use std::borrow::Cow; -use ego_tree::Tree; use ego_tree::iter::Nodes; +use ego_tree::Tree; use html5ever::driver; use html5ever::tree_builder::QuirksMode; use html5ever::QualName; use tendril::TendrilSink; -use {Node, ElementRef}; use selector::Selector; +use selectors::parser::SelectorParseErrorKind; +use {ElementRef, Node}; /// An HTML tree. /// @@ -91,6 +92,19 @@ impl Html { } } + /// Returns an iterator over elements matching a selector. + pub fn try_select<'a, 's>( + &'a self, + selector: &'s str, + ) -> Result, cssparser::ParseError<'s, SelectorParseErrorKind<'s>>> { + let selector = Selector::parse(selector)?; + + Ok(SelectOwned { + inner: self.tree.nodes(), + selector, + }) + } + /// Returns the root `` element. pub fn root_element(&self) -> ElementRef { let root_node = self @@ -125,6 +139,28 @@ impl<'a, 'b> Iterator for Select<'a, 'b> { } } +/// Iterator over elements matching a owned selector. +#[derive(Debug)] +pub struct SelectOwned<'a> { + inner: Nodes<'a, Node>, + selector: Selector, +} + +impl<'a> Iterator for SelectOwned<'a> { + type Item = ElementRef<'a>; + + fn next(&mut self) -> Option> { + for node in self.inner.by_ref() { + if let Some(element) = ElementRef::wrap(node) { + if element.parent().is_some() && self.selector.matches(&element) { + return Some(element); + } + } + } + None + } +} + mod tree_sink; #[cfg(test)] @@ -136,7 +172,10 @@ mod tests { fn root_element_fragment() { let html = Html::parse_fragment(r#"1"#); let root_ref = html.root_element(); - let href = root_ref.select(&Selector::parse("a").unwrap()).next().unwrap(); + let href = root_ref + .select(&Selector::parse("a").unwrap()) + .next() + .unwrap(); assert_eq!(href.inner_html(), "1"); assert_eq!(href.value().attr("href").unwrap(), "http://github.com"); } @@ -145,7 +184,10 @@ mod tests { fn root_element_document_doctype() { let html = Html::parse_document("\nabc"); let root_ref = html.root_element(); - let title = root_ref.select(&Selector::parse("title").unwrap()).next().unwrap(); + let title = root_ref + .select(&Selector::parse("title").unwrap()) + .next() + .unwrap(); assert_eq!(title.inner_html(), "abc"); } @@ -153,7 +195,10 @@ mod tests { fn root_element_document_comment() { let html = Html::parse_document("abc"); let root_ref = html.root_element(); - let title = root_ref.select(&Selector::parse("title").unwrap()).next().unwrap(); + let title = root_ref + .select(&Selector::parse("title").unwrap()) + .next() + .unwrap(); assert_eq!(title.inner_html(), "abc"); } } diff --git a/src/html/tree_sink.rs b/src/html/tree_sink.rs index 2737c4b3..3cfbe1b0 100644 --- a/src/html/tree_sink.rs +++ b/src/html/tree_sink.rs @@ -1,19 +1,21 @@ use std::borrow::Cow; +use super::Html; use ego_tree::NodeId; -use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, ElementFlags}; -use html5ever::Attribute; -use html5ever::{QualName, ExpandedName}; use html5ever::tendril::StrTendril; -use super::Html; -use node::{Node, Doctype, Comment, Text, Element, ProcessingInstruction}; +use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +use html5ever::Attribute; +use html5ever::{ExpandedName, QualName}; +use node::{Comment, Doctype, Element, Node, ProcessingInstruction, Text}; /// Note: does not support the `