From 84a58d5b9ecb7c956c27128d0324c022de88eb1e Mon Sep 17 00:00:00 2001 From: bcorrigan Date: Mon, 11 Dec 2017 17:25:45 +0000 Subject: [PATCH 1/2] Test documents need to have unix line endings on windows --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..5d5c9ec0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Unit tests expect these files to have unix line endings, even on windows +tests/documents/* text eol=lf \ No newline at end of file From 21607138ece27c50cf5dd395469feb0bcf194e04 Mon Sep 17 00:00:00 2001 From: Barry Corrigan Date: Mon, 11 Dec 2017 23:10:40 +0000 Subject: [PATCH 2/2] If the XML has a BOM (byte order mark) at the start, just ignore it instead of throwing an error. --- src/reader/parser/outside_tag.rs | 12 ++++++++++-- tests/documents/sample_4.xml | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/reader/parser/outside_tag.rs b/src/reader/parser/outside_tag.rs index 8ddc515b..98219258 100644 --- a/src/reader/parser/outside_tag.rs +++ b/src/reader/parser/outside_tag.rs @@ -2,6 +2,7 @@ use common::is_whitespace_char; use reader::events::XmlEvent; use reader::lexer::Token; +use std::str; use super::{ Result, PullParser, State, ClosingTagSubstate, OpeningTagSubstate, @@ -16,8 +17,15 @@ impl PullParser { Token::Whitespace(_) if self.depth() == 0 => None, // skip whitespace outside of the root element - _ if t.contains_char_data() && self.depth() == 0 => - Some(self_error!(self; "Unexpected characters outside the root element: {}", t)), + _ if t.contains_char_data() && self.depth() == 0 => { + if let Token::Character(c) = t { //If the character is the UTF-8 BOM mark, just ignore it + let bom = &[0xefu8, 0xbbu8, 0xbfu8]; + if c.to_string()==str::from_utf8(bom).unwrap() { + return None; + } + } + Some(self_error!(self; "Unexpected characters outside the root element: {}", t)) + }, Token::Whitespace(_) if self.config.trim_whitespace && !self.buf_has_data() => None, diff --git a/tests/documents/sample_4.xml b/tests/documents/sample_4.xml index fb915ffc..27ea961c 100644 --- a/tests/documents/sample_4.xml +++ b/tests/documents/sample_4.xml @@ -1,4 +1,4 @@ - +