From 667c89646a898cc9067afa77b0d503019eb3ba41 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:16:02 +0100 Subject: [PATCH 01/35] Add a Config struct following the builder pattern to idna --- idna/src/uts46.rs | 129 ++++++++++++++++++++++++++++++---------------- 1 file changed, 86 insertions(+), 43 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index ac348d1fa..65cf5685b 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -353,6 +353,90 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { validated } +#[derive(Clone, Copy)] +pub struct Config { + flags: Flags, +} + +impl From for Config { + #[inline] + fn from(flags: Flags) -> Self { + Self { flags } + } +} + +impl Config { + #[inline] + pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { + self.flags.use_std3_ascii_rules = value; + self + } + + #[inline] + pub fn transitional_processing(mut self, value: bool) -> Self { + self.flags.transitional_processing = value; + self + } + + #[inline] + pub fn verify_dns_length(mut self, value: bool) -> Self { + self.flags.verify_dns_length = value; + self + } + + /// http://www.unicode.org/reports/tr46/#ToASCII + pub fn to_ascii(self, domain: &str) -> Result { + let mut errors = Vec::new(); + let mut result = String::new(); + let mut first = true; + for label in processing(domain, self.flags, &mut errors).split('.') { + if !first { + result.push('.'); + } + first = false; + if label.is_ascii() { + result.push_str(label); + } else { + match punycode::encode_str(label) { + Some(x) => { + result.push_str(PUNYCODE_PREFIX); + result.push_str(&x); + }, + None => errors.push(Error::PunycodeError) + } + } + } + + if self.flags.verify_dns_length { + let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; + if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { + errors.push(Error::TooShortForDns) + } + if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { + errors.push(Error::TooLongForDns) + } + } + if errors.is_empty() { + Ok(result) + } else { + Err(Errors(errors)) + } + } + + /// http://www.unicode.org/reports/tr46/#ToUnicode + pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { + let mut errors = Vec::new(); + let domain = processing(domain, self.flags, &mut errors); + let errors = if errors.is_empty() { + Ok(()) + } else { + Err(Errors(errors)) + }; + (domain, errors) + } + +} + #[derive(Copy, Clone)] pub struct Flags { pub use_std3_ascii_rules: bool, @@ -380,41 +464,7 @@ pub struct Errors(Vec); /// http://www.unicode.org/reports/tr46/#ToASCII pub fn to_ascii(domain: &str, flags: Flags) -> Result { - let mut errors = Vec::new(); - let mut result = String::new(); - let mut first = true; - for label in processing(domain, flags, &mut errors).split('.') { - if !first { - result.push('.'); - } - first = false; - if label.is_ascii() { - result.push_str(label); - } else { - match punycode::encode_str(label) { - Some(x) => { - result.push_str(PUNYCODE_PREFIX); - result.push_str(&x); - }, - None => errors.push(Error::PunycodeError) - } - } - } - - if flags.verify_dns_length { - let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; - if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { - errors.push(Error::TooShortForDns) - } - if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { - errors.push(Error::TooLongForDns) - } - } - if errors.is_empty() { - Ok(result) - } else { - Err(Errors(errors)) - } + Config::from(flags).to_ascii(domain) } /// http://www.unicode.org/reports/tr46/#ToUnicode @@ -422,12 +472,5 @@ pub fn to_ascii(domain: &str, flags: Flags) -> Result { /// Only `use_std3_ascii_rules` is used in `flags`. pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) { flags.transitional_processing = false; - let mut errors = Vec::new(); - let domain = processing(domain, flags, &mut errors); - let errors = if errors.is_empty() { - Ok(()) - } else { - Err(Errors(errors)) - }; - (domain, errors) + Config::from(flags).to_unicode(domain) } From 946c29897f085d61727e4f691b60713f22742d31 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:25:23 +0100 Subject: [PATCH 02/35] Pass a Config value to validate in uts46 --- idna/src/uts46.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index 65cf5685b..b4f78b0a5 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -233,16 +233,16 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { } /// http://www.unicode.org/reports/tr46/#Validity_Criteria -fn validate_full(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) { +fn validate_full(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec) { // V1: Must be in NFC form. if label.nfc().ne(label.chars()) { errors.push(Error::ValidityCriteria); } else { - validate(label, is_bidi_domain, flags, errors); + validate(label, is_bidi_domain, config, errors); } } -fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) { +fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec) { let first_char = label.chars().next(); if first_char == None { // Empty string, pass @@ -273,8 +273,8 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec false, - Mapping::Deviation(_) => flags.transitional_processing, - Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules, + Mapping::Deviation(_) => config.flags.transitional_processing, + Mapping::DisallowedStd3Valid => config.flags.use_std3_ascii_rules, _ => true, }) { errors.push(Error::ValidityCriteria); @@ -294,10 +294,10 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) -> String { +fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { let mut mapped = String::with_capacity(domain.len()); for c in domain.chars() { - map_char(c, flags, &mut mapped, errors) + map_char(c, config.flags, &mut mapped, errors) } let mut normalized = String::with_capacity(mapped.len()); normalized.extend(mapped.nfc()); @@ -338,15 +338,15 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { if label.starts_with(PUNYCODE_PREFIX) { match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) { Some(decoded_label) => { - let flags = Flags { transitional_processing: false, ..flags }; - validate_full(&decoded_label, is_bidi_domain, flags, errors); + let config = config.transitional_processing(false); + validate_full(&decoded_label, is_bidi_domain, config, errors); validated.push_str(&decoded_label) } None => errors.push(Error::PunycodeError) } } else { // `normalized` is already `NFC` so we can skip that check - validate(label, is_bidi_domain, flags, errors); + validate(label, is_bidi_domain, config, errors); validated.push_str(label) } } @@ -389,7 +389,7 @@ impl Config { let mut errors = Vec::new(); let mut result = String::new(); let mut first = true; - for label in processing(domain, self.flags, &mut errors).split('.') { + for label in processing(domain, self, &mut errors).split('.') { if !first { result.push('.'); } @@ -426,7 +426,7 @@ impl Config { /// http://www.unicode.org/reports/tr46/#ToUnicode pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { let mut errors = Vec::new(); - let domain = processing(domain, self.flags, &mut errors); + let domain = processing(domain, self, &mut errors); let errors = if errors.is_empty() { Ok(()) } else { From 6e18952bc7bf3197bb7027729c9ae2e135bd21b4 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:28:37 +0100 Subject: [PATCH 03/35] Introduce Config::check_hyphens in uts46 Hyphens are checked by default. --- idna/src/uts46.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index b4f78b0a5..2c981e099 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -253,11 +253,9 @@ fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec< // NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the // third and fourth positions. But nobody follows this criteria. See the spec issue below: // https://github.com/whatwg/url/issues/53 - // - // TODO: Add *CheckHyphens* flag. // V3: neither begin nor end with a U+002D HYPHEN-MINUS - else if label.starts_with("-") || label.ends_with("-") { + else if config.check_hyphens && (label.starts_with("-") || label.ends_with("-")) { errors.push(Error::ValidityCriteria); } @@ -356,12 +354,13 @@ fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { #[derive(Clone, Copy)] pub struct Config { flags: Flags, + check_hyphens: bool, } impl From for Config { #[inline] fn from(flags: Flags) -> Self { - Self { flags } + Self { flags, check_hyphens: true } } } @@ -384,6 +383,12 @@ impl Config { self } + #[inline] + pub fn check_hyphens(mut self, value: bool) -> Self { + self.check_hyphens = value; + self + } + /// http://www.unicode.org/reports/tr46/#ToASCII pub fn to_ascii(self, domain: &str) -> Result { let mut errors = Vec::new(); From 8975bb98a8a5532563a2b696c62004d31c2f43e2 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:29:40 +0100 Subject: [PATCH 04/35] Bump idna to 0.1.6 --- idna/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 1050ad35b..3be5b4858 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "idna" -version = "0.1.5" +version = "0.1.6" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." repository = "https://github.com/servo/rust-url/" From fb3b9573372c10b4ff5dcec701d91e5d013e43c0 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:38:03 +0100 Subject: [PATCH 05/35] Don't check hyphens in domain_to_ascii and domain_to_unicode (fixes #483) --- idna/src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 92b5df230..7f6fd1037 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -47,11 +47,12 @@ pub mod uts46; /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result { - uts46::to_ascii(domain, uts46::Flags { + let flags = uts46::Flags { use_std3_ascii_rules: false, transitional_processing: false, verify_dns_length: false, - }) + }; + uts46::Config::from(flags).check_hyphens(false).to_ascii(domain) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. @@ -63,11 +64,12 @@ pub fn domain_to_ascii(domain: &str) -> Result { /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { - uts46::to_unicode(domain, uts46::Flags { + let flags = uts46::Flags { use_std3_ascii_rules: false, // Unused: transitional_processing: false, verify_dns_length: false, - }) + }; + uts46::Config::from(flags).check_hyphens(false).to_unicode(domain) } From ce34673c732aef87c7cf758613de2332f646cdcf Mon Sep 17 00:00:00 2001 From: est31 Date: Sat, 13 Jul 2019 11:09:09 +0200 Subject: [PATCH 06/35] url crate: remove rustc-serialize optional dependency --- Cargo.toml | 1 - src/lib.rs | 18 ------------------ 2 files changed, 19 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 40e6b64f5..4a3ddf452 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,7 +47,6 @@ heapsize = {version = ">=0.4.1, <0.5", optional = true} idna = { version = "0.1.0", path = "./idna" } matches = "0.1" percent-encoding = { version = "1.0.0", path = "./percent_encoding" } -rustc-serialize = {version = "0.3", optional = true} serde = {version = ">=0.6.1, <0.9", optional = true} [[bench]] diff --git a/src/lib.rs b/src/lib.rs index 203b973bb..cc33b40cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,7 +107,6 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); #![doc(html_root_url = "https://docs.rs/url/1.7.0")] -#[cfg(feature="rustc-serialize")] extern crate rustc_serialize; #[macro_use] extern crate matches; #[cfg(feature="serde")] extern crate serde; #[cfg(feature="heapsize")] #[macro_use] extern crate heapsize; @@ -2226,23 +2225,6 @@ impl RangeArg for RangeTo { } } -#[cfg(feature="rustc-serialize")] -impl rustc_serialize::Encodable for Url { - fn encode(&self, encoder: &mut S) -> Result<(), S::Error> { - encoder.emit_str(self.as_str()) - } -} - - -#[cfg(feature="rustc-serialize")] -impl rustc_serialize::Decodable for Url { - fn decode(decoder: &mut D) -> Result { - Url::parse(&*decoder.read_str()?).map_err(|error| { - decoder.error(&format!("URL parsing error: {}", error)) - }) - } -} - /// Serializes this URL into a `serde` stream. /// /// This implementation is only available if the `serde` Cargo feature is enabled. From 424f3e45ac41da22744035e5562cfdae68a3df91 Mon Sep 17 00:00:00 2001 From: est31 Date: Sat, 13 Jul 2019 11:22:01 +0200 Subject: [PATCH 07/35] Remove reundant use --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 203b973bb..f09dbe2b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2346,7 +2346,6 @@ fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split) -> Result { use std::ffi::OsStr; use std::os::unix::prelude::OsStrExt; - use std::path::PathBuf; if host.is_some() { return Err(()); From 5ae0998d226f49e068586127711978a33056481a Mon Sep 17 00:00:00 2001 From: est31 Date: Sat, 13 Jul 2019 11:23:39 +0200 Subject: [PATCH 08/35] Remove AsciiExt It is deprecated since 1.26.0 if the inherent methods, that are stable since 1.23.0, want to be relied on. --- idna/src/punycode.rs | 2 -- idna/src/uts46.rs | 2 -- percent_encoding/lib.rs | 1 - src/form_urlencoded.rs | 1 - src/parser.rs | 3 --- tests/unit.rs | 1 - 6 files changed, 10 deletions(-) diff --git a/idna/src/punycode.rs b/idna/src/punycode.rs index acdde5897..c0a45d89a 100644 --- a/idna/src/punycode.rs +++ b/idna/src/punycode.rs @@ -15,8 +15,6 @@ use std::u32; use std::char; -#[allow(unused_imports, deprecated)] -use std::ascii::AsciiExt; // Bootstring parameters for Punycode static BASE: u32 = 36; diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index ac348d1fa..b425c3438 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -11,8 +11,6 @@ use self::Mapping::*; use punycode; -#[allow(unused_imports, deprecated)] -use std::ascii::AsciiExt; use std::cmp::Ordering::{Equal, Less, Greater}; use unicode_bidi::{BidiClass, bidi_class}; use unicode_normalization::UnicodeNormalization; diff --git a/percent_encoding/lib.rs b/percent_encoding/lib.rs index cb094116d..5b461448f 100644 --- a/percent_encoding/lib.rs +++ b/percent_encoding/lib.rs @@ -32,7 +32,6 @@ //! assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F"); //! ``` -use std::ascii::AsciiExt; use std::borrow::Cow; use std::fmt; use std::slice; diff --git a/src/form_urlencoded.rs b/src/form_urlencoded.rs index f378c9a6c..3669c53c1 100644 --- a/src/form_urlencoded.rs +++ b/src/form_urlencoded.rs @@ -55,7 +55,6 @@ pub fn parse_with_encoding<'a>(input: &'a [u8], encoding_override: Option<::encoding::EncodingRef>, use_charset: bool) -> Result, ()> { - use std::ascii::AsciiExt; let mut encoding = EncodingOverride::from_opt_encoding(encoding_override); if !(encoding.is_utf8() || input.is_ascii()) { diff --git a/src/parser.rs b/src/parser.rs index 4f9cc524b..0c8cf6e71 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -6,9 +6,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[allow(unused_imports, deprecated)] -use std::ascii::AsciiExt; - use std::error::Error; use std::fmt::{self, Formatter, Write}; use std::str; diff --git a/tests/unit.rs b/tests/unit.rs index 62401c943..81153ed08 100644 --- a/tests/unit.rs +++ b/tests/unit.rs @@ -11,7 +11,6 @@ #[macro_use] extern crate url; -use std::ascii::AsciiExt; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::net::{Ipv4Addr, Ipv6Addr}; From 03c43b29bd6f878cf1ce847101ba37ca6200590c Mon Sep 17 00:00:00 2001 From: est31 Date: Sat, 13 Jul 2019 11:28:27 +0200 Subject: [PATCH 09/35] Replace deprecated ATOMIC_USIZE_INIT by AtomicUsize::new This requires at least rustc 1.24.0 as only then the function became stably const-fn. --- src/origin.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/origin.rs b/src/origin.rs index ee0b83e50..358fe7684 100644 --- a/src/origin.rs +++ b/src/origin.rs @@ -10,7 +10,7 @@ use host::Host; use idna::domain_to_unicode; use parser::default_port; -use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering}; +use std::sync::atomic::{AtomicUsize, Ordering}; use Url; pub fn url_origin(url: &Url) -> Origin { @@ -76,7 +76,7 @@ impl HeapSizeOf for Origin { impl Origin { /// Creates a new opaque origin that is only equal to itself. pub fn new_opaque() -> Origin { - static COUNTER: AtomicUsize = ATOMIC_USIZE_INIT; + static COUNTER: AtomicUsize = AtomicUsize::new(0); Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst))) } From bc09a23bd457a4ec3d528bc9747b795d95cf6aa7 Mon Sep 17 00:00:00 2001 From: est31 Date: Sat, 13 Jul 2019 11:30:27 +0200 Subject: [PATCH 10/35] Increase MSRV to 1.24.0 The 1.24.0 release has been over a year ago and is needed by the prior commits. --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index e58559ec2..df2dd1102 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ language: rust jobs: include: - - rust: 1.17.0 + - rust: 1.24.0 install: # --precise requires Cargo.lock to already exist - cargo update @@ -11,8 +11,7 @@ jobs: - cargo update -p unicode-normalization --precise 0.1.5 - # data-url uses pub(crate) which is unstable in 1.17 - script: cargo test --all-features -p url -p idna -p percent-encoding -p url_serde + script: cargo test --all-features -p url -p idna -p percent-encoding -p url_serde -p data-url - rust: stable script: cargo test --all-features --all From 85bfa5312c24d2d5d4e18041f48125aa92991c46 Mon Sep 17 00:00:00 2001 From: est31 Date: Sat, 13 Jul 2019 11:44:32 +0200 Subject: [PATCH 11/35] Remove the deprecated log_syntax_violation function for 2.0 --- src/lib.rs | 14 -------------- src/parser.rs | 4 ---- tests/unit.rs | 15 --------------- 3 files changed, 33 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 203b973bb..96b8ce7d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -211,20 +211,6 @@ impl<'a> ParseOptions<'a> { self } - /// Call the provided function or closure on non-fatal parse errors, passing - /// a static string description. This method is deprecated in favor of - /// `syntax_violation_callback` and is implemented as an adaptor for the - /// latter, passing the `SyntaxViolation` description. Only the last value - /// passed to either method will be used by a parser. - #[deprecated] - pub fn log_syntax_violation(mut self, new: Option<&'a Fn(&'static str)>) -> Self { - self.violation_fn = match new { - Some(f) => ViolationFn::OldFn(f), - None => ViolationFn::NoOp - }; - self - } - /// Call the provided function or closure for a non-fatal `SyntaxViolation` /// when it occurs during parsing. Note that since the provided function is /// `Fn`, the caller might need to utilize _interior mutability_, such as with diff --git a/src/parser.rs b/src/parser.rs index 4f9cc524b..39f3711c8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -275,7 +275,6 @@ impl<'i> Iterator for Input<'i> { #[derive(Copy, Clone)] pub enum ViolationFn<'a> { NewFn(&'a (Fn(SyntaxViolation) + 'a)), - OldFn(&'a (Fn(&'static str) + 'a)), NoOp } @@ -284,7 +283,6 @@ impl<'a> ViolationFn<'a> { pub fn call(self, v: SyntaxViolation) { match self { ViolationFn::NewFn(f) => f(v), - ViolationFn::OldFn(f) => f(v.description()), ViolationFn::NoOp => {} } } @@ -296,7 +294,6 @@ impl<'a> ViolationFn<'a> { { match self { ViolationFn::NewFn(f) => if test() { f(v) }, - ViolationFn::OldFn(f) => if test() { f(v.description()) }, ViolationFn::NoOp => {} // avoid test } } @@ -314,7 +311,6 @@ impl<'a> fmt::Debug for ViolationFn<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match *self { ViolationFn::NewFn(_) => write!(f, "NewFn(Fn(SyntaxViolation))"), - ViolationFn::OldFn(_) => write!(f, "OldFn(Fn(&'static str))"), ViolationFn::NoOp => write!(f, "NoOp") } } diff --git a/tests/unit.rs b/tests/unit.rs index 62401c943..605a07a85 100644 --- a/tests/unit.rs +++ b/tests/unit.rs @@ -490,21 +490,6 @@ fn test_windows_unc_path() { assert!(url.is_err()); } -// Test the now deprecated log_syntax_violation method for backward -// compatibility -#[test] -#[allow(deprecated)] -fn test_old_log_violation_option() { - let violation = Cell::new(None); - let url = Url::options() - .log_syntax_violation(Some(&|s| violation.set(Some(s.to_owned())))) - .parse("http:////mozilla.org:42").unwrap(); - assert_eq!(url.port(), Some(42)); - - let violation = violation.take(); - assert_eq!(violation, Some("expected //".to_string())); -} - #[test] fn test_syntax_violation_callback() { use url::SyntaxViolation::*; From d6c3bcd787505b0a2e6a8f550c783740e8a646d3 Mon Sep 17 00:00:00 2001 From: est31 Date: Sun, 14 Jul 2019 01:39:40 +0200 Subject: [PATCH 12/35] Update to serde 1.0 --- Cargo.toml | 4 ++-- src/host.rs | 14 +++++++------- src/lib.rs | 20 ++++++++++++-------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4a3ddf452..533f82c18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ test = false [dev-dependencies] rustc-test = "0.3" rustc-serialize = "0.3" -serde_json = ">=0.6.1, <0.9" +serde_json = "1.0" bencher = "0.1" @@ -47,7 +47,7 @@ heapsize = {version = ">=0.4.1, <0.5", optional = true} idna = { version = "0.1.0", path = "./idna" } matches = "0.1" percent-encoding = { version = "1.0.0", path = "./percent_encoding" } -serde = {version = ">=0.6.1, <0.9", optional = true} +serde = {version = "1.0", optional = true} [[bench]] name = "parse_url" diff --git a/src/host.rs b/src/host.rs index 38e88a3bb..c425abe2c 100644 --- a/src/host.rs +++ b/src/host.rs @@ -29,7 +29,7 @@ known_heap_size!(0, HostInternal); #[cfg(feature="serde")] impl ::serde::Serialize for HostInternal { - fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> where S: ::serde::Serializer { + fn serialize(&self, serializer: S) -> Result where S: ::serde::Serializer { // This doesn’t use `derive` because that involves // large dependencies (that take a long time to build), and // either Macros 1.1 which are not stable yet or a cumbersome build script. @@ -47,8 +47,8 @@ impl ::serde::Serialize for HostInternal { } #[cfg(feature="serde")] -impl ::serde::Deserialize for HostInternal { - fn deserialize(deserializer: &mut D) -> Result where D: ::serde::Deserializer { +impl<'de> ::serde::Deserialize<'de> for HostInternal { + fn deserialize(deserializer: D) -> Result where D: ::serde::Deserializer<'de> { use std::net::IpAddr; Ok(match ::serde::Deserialize::deserialize(deserializer)? { None => HostInternal::None, @@ -92,8 +92,8 @@ pub enum Host { } #[cfg(feature="serde")] -impl ::serde::Serialize for Host { - fn serialize(&self, serializer: &mut R) -> Result<(), R::Error> where R: ::serde::Serializer { +impl ::serde::Serialize for Host { + fn serialize(&self, serializer: R) -> Result where R: ::serde::Serializer { use std::net::IpAddr; match *self { Host::Domain(ref s) => Ok(s), @@ -104,8 +104,8 @@ impl ::serde::Serialize for Host { } #[cfg(feature="serde")] -impl ::serde::Deserialize for Host { - fn deserialize(deserializer: &mut D) -> Result where D: ::serde::Deserializer { +impl<'de, S: ::serde::Deserialize<'de>> ::serde::Deserialize<'de> for Host { + fn deserialize(deserializer: D) -> Result where D: ::serde::Deserializer<'de> { use std::net::IpAddr; Ok(match ::serde::Deserialize::deserialize(deserializer)? { Ok(s) => Host::Domain(s), diff --git a/src/lib.rs b/src/lib.rs index cc33b40cd..4c7e85d41 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2028,7 +2028,7 @@ impl Url { /// This method is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] #[deny(unused)] - pub fn serialize_internal(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer { + pub fn serialize_internal(&self, serializer: S) -> Result where S: serde::Serializer { use serde::Serialize; // Destructuring first lets us ensure that adding or removing fields forces this method // to be updated @@ -2050,8 +2050,8 @@ impl Url { /// This method is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] #[deny(unused)] - pub fn deserialize_internal(deserializer: &mut D) -> Result where D: serde::Deserializer { - use serde::{Deserialize, Error}; + pub fn deserialize_internal<'de, D>(deserializer: D) -> Result where D: serde::Deserializer<'de> { + use serde::de::{Deserialize, Error, Unexpected}; let (serialization, scheme_end, username_end, host_start, host_end, host, port, path_start, query_start, fragment_start) = Deserialize::deserialize(deserializer)?; @@ -2068,7 +2068,10 @@ impl Url { fragment_start: fragment_start }; if cfg!(debug_assertions) { - url.check_invariants().map_err(|ref reason| Error::invalid_value(&reason))? + url.check_invariants().map_err(|reason| { + let reason: &str = &reason; + Error::invalid_value(Unexpected::Other("value"), &reason) + })? } Ok(url) } @@ -2230,7 +2233,7 @@ impl RangeArg for RangeTo { /// This implementation is only available if the `serde` Cargo feature is enabled. #[cfg(feature="serde")] impl serde::Serialize for Url { - fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer { + fn serialize(&self, serializer: S) -> Result where S: serde::Serializer { serializer.serialize_str(self.as_str()) } } @@ -2239,11 +2242,12 @@ impl serde::Serialize for Url { /// /// This implementation is only available if the `serde` Cargo feature is enabled. #[cfg(feature="serde")] -impl serde::Deserialize for Url { - fn deserialize(deserializer: &mut D) -> Result where D: serde::Deserializer { +impl<'de> serde::Deserialize<'de> for Url { + fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de> { + use serde::de::{Unexpected, Error}; let string_representation: String = serde::Deserialize::deserialize(deserializer)?; Url::parse(&string_representation).map_err(|err| { - serde::Error::invalid_value(err.description()) + Error::invalid_value(Unexpected::Str(&string_representation), &err.description()) }) } } From a1ed777e42e3dcc57449ab95d6384cfcc29628cc Mon Sep 17 00:00:00 2001 From: est31 Date: Mon, 15 Jul 2019 01:27:47 +0200 Subject: [PATCH 13/35] Make the tests use serde_json instead of rustc_serialize --- Cargo.toml | 1 - tests/data.rs | 40 +++++++++++++++++++--------------------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 533f82c18..b28fab832 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,6 @@ test = false [dev-dependencies] rustc-test = "0.3" -rustc-serialize = "0.3" serde_json = "1.0" bencher = "0.1" diff --git a/tests/data.rs b/tests/data.rs index e9203b1b0..29d2e0deb 100644 --- a/tests/data.rs +++ b/tests/data.rs @@ -8,17 +8,17 @@ //! Data-driven tests -extern crate rustc_serialize; +extern crate serde_json; extern crate rustc_test as test; extern crate url; -use rustc_serialize::json::{self, Json}; +use serde_json::Value; use url::{Url, quirks}; +use std::str::FromStr; fn check_invariants(url: &Url) { url.check_invariants().unwrap(); #[cfg(feature="serde")] { - extern crate serde_json; let bytes = serde_json::to_vec(url).unwrap(); let new_url: Url = serde_json::from_slice(&bytes).unwrap(); assert_eq!(url, &new_url); @@ -84,33 +84,28 @@ struct ExpectedAttributes { } trait JsonExt { - fn take(&mut self, key: &str) -> Option; - fn object(self) -> json::Object; + fn take_key(&mut self, key: &str) -> Option; fn string(self) -> String; fn take_string(&mut self, key: &str) -> String; } -impl JsonExt for Json { - fn take(&mut self, key: &str) -> Option { +impl JsonExt for Value { + fn take_key(&mut self, key: &str) -> Option { self.as_object_mut().unwrap().remove(key) } - fn object(self) -> json::Object { - if let Json::Object(o) = self { o } else { panic!("Not a Json::Object") } - } - fn string(self) -> String { - if let Json::String(s) = self { s } else { panic!("Not a Json::String") } + if let Value::String(s) = self { s } else { panic!("Not a Value::String") } } fn take_string(&mut self, key: &str) -> String { - self.take(key).unwrap().string() + self.take_key(key).unwrap().string() } } fn collect_parsing(add_test: &mut F) { // Copied form https://github.com/w3c/web-platform-tests/blob/master/url/ - let mut json = Json::from_str(include_str!("urltestdata.json")) + let mut json = Value::from_str(include_str!("urltestdata.json")) .expect("JSON parse error in urltestdata.json"); for entry in json.as_array_mut().unwrap() { if entry.is_string() { @@ -118,12 +113,13 @@ fn collect_parsing(add_test: &mut F) { } let base = entry.take_string("base"); let input = entry.take_string("input"); - let expected = if entry.find("failure").is_some() { + let expected = if entry.take_key("failure").is_some() { Err(()) } else { Ok(ExpectedAttributes { href: entry.take_string("href"), - origin: entry.take("origin").map(Json::string), + origin: entry.take_key("origin") + .map(|s| s.string()), protocol: entry.take_string("protocol"), username: entry.take_string("username"), password: entry.take_string("password"), @@ -141,18 +137,20 @@ fn collect_parsing(add_test: &mut F) { } fn collect_setters(add_test: &mut F) where F: FnMut(String, test::TestFn) { - let mut json = Json::from_str(include_str!("setters_tests.json")) + let mut json = Value::from_str(include_str!("setters_tests.json")) .expect("JSON parse error in setters_tests.json"); macro_rules! setter { ($attr: expr, $setter: ident) => {{ - let mut tests = json.take($attr).unwrap(); + let mut tests = json.take_key($attr).unwrap(); for mut test in tests.as_array_mut().unwrap().drain(..) { - let comment = test.take("comment").map(Json::string).unwrap_or(String::new()); + let comment = test.take_key("comment") + .map(|s| s.string()) + .unwrap_or(String::new()); let href = test.take_string("href"); let new_value = test.take_string("new_value"); let name = format!("{:?}.{} = {:?} {}", href, $attr, new_value, comment); - let mut expected = test.take("expected").unwrap(); + let mut expected = test.take_key("expected").unwrap(); add_test(name, test::TestFn::dyn_test_fn(move || { let mut url = Url::parse(&href).unwrap(); check_invariants(&url); @@ -167,7 +165,7 @@ fn collect_setters(add_test: &mut F) where F: FnMut(String, test::TestFn) { macro_rules! assert_attributes { ($url: expr, $expected: expr, $($attr: ident)+) => { $( - if let Some(value) = $expected.take(stringify!($attr)) { + if let Some(value) = $expected.take_key(stringify!($attr)) { assert_eq!(quirks::$attr(&$url), value.string()) } )+ From a9526cda7592386fb12e2f79184446d825a4857a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 18:01:17 +0200 Subject: [PATCH 14/35] Simplify .travis.yml --- .travis.yml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/.travis.yml b/.travis.yml index df2dd1102..3335746de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,27 +1,12 @@ language: rust +script: cargo test --all-features --all jobs: include: - rust: 1.24.0 - install: - # --precise requires Cargo.lock to already exist - - cargo update - # getopts is only used in tests. Its versions 0.2.16+ don’t build on 1.17.0 - - cargo update -p getopts --precise 0.2.15 - - - cargo update -p unicode-normalization --precise 0.1.5 - - script: cargo test --all-features -p url -p idna -p percent-encoding -p url_serde -p data-url - - rust: stable - script: cargo test --all-features --all - - rust: beta - script: cargo test --all-features --all - - rust: nightly - script: cargo test --all-features --all - - rust: nightly env: TARGET=WASM32 # For job list UI install: rustup target add wasm32-unknown-unknown From 629ec2a29400e4c38f1a23668924daed1f24a5ca Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 18:08:43 +0200 Subject: [PATCH 15/35] Fix autotests warning --- idna/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 1050ad35b..0a8723799 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -5,6 +5,7 @@ authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." repository = "https://github.com/servo/rust-url/" license = "MIT/Apache-2.0" +autotests = false [lib] doctest = false From 77a10333ecf5a21529b1f64c00edc9a79fb6867c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 18:10:41 +0200 Subject: [PATCH 16/35] cargo fix --- idna/src/punycode.rs | 10 +++++----- src/form_urlencoded.rs | 8 ++++---- src/host.rs | 2 +- src/lib.rs | 8 ++++---- src/parser.rs | 34 +++++++++++++++++----------------- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/idna/src/punycode.rs b/idna/src/punycode.rs index c0a45d89a..bc8758f86 100644 --- a/idna/src/punycode.rs +++ b/idna/src/punycode.rs @@ -80,9 +80,9 @@ pub fn decode(input: &str) -> Option> { // which gets added to i. loop { let digit = match byte { - byte @ b'0' ... b'9' => byte - b'0' + 26, - byte @ b'A' ... b'Z' => byte - b'A', - byte @ b'a' ... b'z' => byte - b'a', + byte @ b'0' ..= b'9' => byte - b'0' + 26, + byte @ b'A' ..= b'Z' => byte - b'A', + byte @ b'a' ..= b'z' => byte - b'a', _ => return None } as u32; if digit > (u32::MAX - i) / weight { @@ -204,8 +204,8 @@ pub fn encode(input: &[char]) -> Option { #[inline] fn value_to_digit(value: u32) -> char { match value { - 0 ... 25 => (value as u8 + 'a' as u8) as char, // a..z - 26 ... 35 => (value as u8 - 26 + '0' as u8) as char, // 0..9 + 0 ..= 25 => (value as u8 + 'a' as u8) as char, // a..z + 26 ..= 35 => (value as u8 - 26 + '0' as u8) as char, // 0..9 _ => panic!() } } diff --git a/src/form_urlencoded.rs b/src/form_urlencoded.rs index 3669c53c1..27e650e2f 100644 --- a/src/form_urlencoded.rs +++ b/src/form_urlencoded.rs @@ -175,7 +175,7 @@ pub struct ByteSerialize<'a> { } fn byte_serialized_unchanged(byte: u8) -> bool { - matches!(byte, b'*' | b'-' | b'.' | b'0' ... b'9' | b'A' ... b'Z' | b'_' | b'a' ... b'z') + matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z') } impl<'a> Iterator for ByteSerialize<'a> { @@ -216,7 +216,7 @@ pub struct Serializer { target: Option, start_position: usize, encoding: EncodingOverride, - custom_encoding: Option Cow<[u8]>>>>, + custom_encoding: Option Cow<[u8]>>>>, } struct SilentDebug(T); @@ -391,7 +391,7 @@ fn string(target: &mut Option) -> &mut String { } fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOverride, - custom_encoding: &mut Option Cow<[u8]>>>>, + custom_encoding: &mut Option Cow<[u8]>>>>, name: &str, value: &str) { append_separator_if_needed(string, start_position); append_encoded(name, string, encoding, custom_encoding); @@ -400,7 +400,7 @@ fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOve } fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride, - custom_encoding: &mut Option Cow<[u8]>>>>) { + custom_encoding: &mut Option Cow<[u8]>>>>) { let bytes = if let Some(SilentDebug(ref mut custom)) = *custom_encoding { custom(s) } else { diff --git a/src/host.rs b/src/host.rs index c425abe2c..8c8a1d0a4 100644 --- a/src/host.rs +++ b/src/host.rs @@ -500,7 +500,7 @@ fn parse_ipv6addr(input: &str) -> ParseResult { let mut ipv4_piece = None; while i < len { let digit = match input[i] { - c @ b'0' ... b'9' => c - b'0', + c @ b'0' ..= b'9' => c - b'0', _ => break }; match ipv4_piece { diff --git a/src/lib.rs b/src/lib.rs index a6db7d89b..da85f3546 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -216,7 +216,7 @@ impl<'a> ParseOptions<'a> { /// latter, passing the `SyntaxViolation` description. Only the last value /// passed to either method will be used by a parser. #[deprecated] - pub fn log_syntax_violation(mut self, new: Option<&'a Fn(&'static str)>) -> Self { + pub fn log_syntax_violation(mut self, new: Option<&'a dyn Fn(&'static str)>) -> Self { self.violation_fn = match new { Some(f) => ViolationFn::OldFn(f), None => ViolationFn::NoOp @@ -246,7 +246,7 @@ impl<'a> ParseOptions<'a> { /// # } /// # run().unwrap(); /// ``` - pub fn syntax_violation_callback(mut self, new: Option<&'a Fn(SyntaxViolation)>) -> Self { + pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { self.violation_fn = match new { Some(f) => ViolationFn::NewFn(f), None => ViolationFn::NoOp @@ -484,9 +484,9 @@ impl Url { } assert!(self.scheme_end >= 1); - assert!(matches!(self.byte_at(0), b'a'...b'z' | b'A'...b'Z')); + assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z')); assert!(self.slice(1..self.scheme_end).chars() - .all(|c| matches!(c, 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.'))); + .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.'))); assert_eq!(self.byte_at(self.scheme_end), b':'); if self.slice(self.scheme_end + 1 ..).starts_with("//") { diff --git a/src/parser.rs b/src/parser.rs index 0c8cf6e71..dd5943b2a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -271,8 +271,8 @@ impl<'i> Iterator for Input<'i> { /// Wrapper for syntax violation callback functions. #[derive(Copy, Clone)] pub enum ViolationFn<'a> { - NewFn(&'a (Fn(SyntaxViolation) + 'a)), - OldFn(&'a (Fn(&'static str) + 'a)), + NewFn(&'a (dyn Fn(SyntaxViolation) + 'a)), + OldFn(&'a (dyn Fn(&'static str) + 'a)), NoOp } @@ -376,7 +376,7 @@ impl<'a> Parser<'a> { debug_assert!(self.serialization.is_empty()); while let Some(c) = input.next() { match c { - 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.' => { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => { self.serialization.push(c.to_ascii_lowercase()) } ':' => return Ok(input), @@ -1218,7 +1218,7 @@ impl<'a> Parser<'a> { #[inline] fn is_ascii_hex_digit(c: char) -> bool { - matches!(c, 'a'...'f' | 'A'...'F' | '0'...'9') + matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9') } // Non URL code points: @@ -1231,20 +1231,20 @@ fn is_ascii_hex_digit(c: char) -> bool { #[inline] fn is_url_code_point(c: char) -> bool { matches!(c, - 'a'...'z' | - 'A'...'Z' | - '0'...'9' | + 'a'..='z' | + 'A'..='Z' | + '0'..='9' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' | - '\u{A0}'...'\u{D7FF}' | '\u{E000}'...'\u{FDCF}' | '\u{FDF0}'...'\u{FFFD}' | - '\u{10000}'...'\u{1FFFD}' | '\u{20000}'...'\u{2FFFD}' | - '\u{30000}'...'\u{3FFFD}' | '\u{40000}'...'\u{4FFFD}' | - '\u{50000}'...'\u{5FFFD}' | '\u{60000}'...'\u{6FFFD}' | - '\u{70000}'...'\u{7FFFD}' | '\u{80000}'...'\u{8FFFD}' | - '\u{90000}'...'\u{9FFFD}' | '\u{A0000}'...'\u{AFFFD}' | - '\u{B0000}'...'\u{BFFFD}' | '\u{C0000}'...'\u{CFFFD}' | - '\u{D0000}'...'\u{DFFFD}' | '\u{E1000}'...'\u{EFFFD}' | - '\u{F0000}'...'\u{FFFFD}' | '\u{100000}'...'\u{10FFFD}') + '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | + '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' | + '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' | + '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' | + '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' | + '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' | + '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' | + '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' | + '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') } /// https://url.spec.whatwg.org/#c0-controls-and-space @@ -1256,7 +1256,7 @@ fn c0_control_or_space(ch: char) -> bool { /// https://url.spec.whatwg.org/#ascii-alpha #[inline] pub fn ascii_alpha(ch: char) -> bool { - matches!(ch, 'a'...'z' | 'A'...'Z') + matches!(ch, 'a'..='z' | 'A'..='Z') } #[inline] From 48f3bed3e773b8d9da10f1bb99094575afda4c43 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 18:21:12 +0200 Subject: [PATCH 17/35] Use Option instead of a custom ViolationFn enum --- .travis.yml | 2 +- src/lib.rs | 13 +++---- src/parser.rs | 102 +++++++++++++++++--------------------------------- 3 files changed, 41 insertions(+), 76 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3335746de..40e34584c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ script: cargo test --all-features --all jobs: include: - - rust: 1.24.0 + - rust: 1.27.0 - rust: stable - rust: beta - rust: nightly diff --git a/src/lib.rs b/src/lib.rs index e7183710a..80cad8445 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,7 +118,7 @@ pub extern crate percent_encoding; use encoding::EncodingOverride; #[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; use host::HostInternal; -use parser::{Parser, Context, SchemeType, to_u32, ViolationFn}; +use parser::{Parser, Context, SchemeType, to_u32}; use percent_encoding::{PATH_SEGMENT_ENCODE_SET, USERINFO_ENCODE_SET, percent_encode, percent_decode, utf8_percent_encode}; use std::borrow::Borrow; @@ -187,7 +187,7 @@ impl HeapSizeOf for Url { pub struct ParseOptions<'a> { base_url: Option<&'a Url>, encoding_override: encoding::EncodingOverride, - violation_fn: ViolationFn<'a>, + violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, } impl<'a> ParseOptions<'a> { @@ -233,10 +233,7 @@ impl<'a> ParseOptions<'a> { /// # run().unwrap(); /// ``` pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { - self.violation_fn = match new { - Some(f) => ViolationFn::NewFn(f), - None => ViolationFn::NoOp - }; + self.violation_fn = new; self } @@ -259,7 +256,7 @@ impl<'a> Debug for ParseOptions<'a> { violation_fn: {:?} }}", self.base_url, self.encoding_override, - self.violation_fn) + self.violation_fn.map(|_| "…")) } } @@ -389,7 +386,7 @@ impl Url { ParseOptions { base_url: None, encoding_override: EncodingOverride::utf8(), - violation_fn: ViolationFn::NoOp, + violation_fn: None, } } diff --git a/src/parser.rs b/src/parser.rs index 79b75bffa..3ebf1b965 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -165,17 +165,17 @@ pub struct Input<'i> { impl<'i> Input<'i> { pub fn new(input: &'i str) -> Self { - Input::with_log(input, ViolationFn::NoOp) + Input::with_log(input, None) } - pub fn with_log(original_input: &'i str, vfn: ViolationFn) -> Self { + pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self { let input = original_input.trim_matches(c0_control_or_space); - if vfn.is_set() { + if let Some(vfn) = vfn { if input.len() < original_input.len() { - vfn.call(SyntaxViolation::C0SpaceIgnored) + vfn(SyntaxViolation::C0SpaceIgnored) } if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { - vfn.call(SyntaxViolation::TabOrNewlineIgnored) + vfn(SyntaxViolation::TabOrNewlineIgnored) } } Input { chars: input.chars() } @@ -268,56 +268,11 @@ impl<'i> Iterator for Input<'i> { } } -/// Wrapper for syntax violation callback functions. -#[derive(Copy, Clone)] -pub enum ViolationFn<'a> { - NewFn(&'a (dyn Fn(SyntaxViolation) + 'a)), - NoOp -} - -impl<'a> ViolationFn<'a> { - /// Call with a violation. - pub fn call(self, v: SyntaxViolation) { - match self { - ViolationFn::NewFn(f) => f(v), - ViolationFn::NoOp => {} - } - } - - /// Call with a violation, if provided test returns true. Avoids - /// the test entirely if `NoOp`. - pub fn call_if(self, v: SyntaxViolation, test: F) - where F: Fn() -> bool - { - match self { - ViolationFn::NewFn(f) => if test() { f(v) }, - ViolationFn::NoOp => {} // avoid test - } - } - - /// True if not `NoOp` - pub fn is_set(self) -> bool { - match self { - ViolationFn::NoOp => false, - _ => true - } - } -} - -impl<'a> fmt::Debug for ViolationFn<'a> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match *self { - ViolationFn::NewFn(_) => write!(f, "NewFn(Fn(SyntaxViolation))"), - ViolationFn::NoOp => write!(f, "NoOp") - } - } -} - pub struct Parser<'a> { pub serialization: String, pub base_url: Option<&'a Url>, pub query_encoding_override: EncodingOverride, - pub violation_fn: ViolationFn<'a>, + pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, pub context: Context, } @@ -329,12 +284,26 @@ pub enum Context { } impl<'a> Parser<'a> { + fn log_violation(&self, v: SyntaxViolation) { + if let Some(f) = self.violation_fn { + f(v) + } + } + + fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) { + if let Some(f) = self.violation_fn { + if test() { + f(v) + } + } + } + pub fn for_setter(serialization: String) -> Parser<'a> { Parser { serialization: serialization, base_url: None, query_encoding_override: EncodingOverride::utf8(), - violation_fn: ViolationFn::NoOp, + violation_fn: None, context: Context::Setter, } } @@ -398,7 +367,7 @@ impl<'a> Parser<'a> { self.serialization.push(':'); match scheme_type { SchemeType::File => { - self.violation_fn.call_if(ExpectedFileDoubleSlash, || !input.starts_with("//")); + self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//")); let base_file_url = self.base_url.and_then(|base| { if base.scheme() == "file" { Some(base) } else { None } }); @@ -418,7 +387,7 @@ impl<'a> Parser<'a> { } } // special authority slashes state - self.violation_fn.call_if(ExpectedDoubleSlash, || { + self.log_violation_if(ExpectedDoubleSlash, || { input.clone().take_while(|&c| matches!(c, '/' | '\\')) .collect::() != "//" }); @@ -552,10 +521,10 @@ impl<'a> Parser<'a> { } } Some('/') | Some('\\') => { - self.violation_fn.call_if(Backslash, || first_char == Some('\\')); + self.log_violation_if(Backslash, || first_char == Some('\\')); // file slash state let (next_char, input_after_next_char) = input_after_first_char.split_first(); - self.violation_fn.call_if(Backslash, || next_char == Some('\\')); + self.log_violation_if(Backslash, || next_char == Some('\\')); if matches!(next_char, Some('/') | Some('\\')) { // file host state self.serialization.push_str("file://"); @@ -707,7 +676,7 @@ impl<'a> Parser<'a> { Some('/') | Some('\\') => { let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); if slashes_count >= 2 { - self.violation_fn.call_if(SyntaxViolation::ExpectedDoubleSlash, || { + self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || { input.clone().take_while(|&c| matches!(c, '/' | '\\')) .collect::() != "//" }); @@ -771,9 +740,9 @@ impl<'a> Parser<'a> { match c { '@' => { if last_at.is_some() { - self.violation_fn.call(SyntaxViolation::UnencodedAtSign) + self.log_violation(SyntaxViolation::UnencodedAtSign) } else { - self.violation_fn.call(SyntaxViolation::EmbeddedCredentials) + self.log_violation(SyntaxViolation::EmbeddedCredentials) } last_at = Some((char_count, remaining.clone())) }, @@ -971,7 +940,7 @@ impl<'a> Parser<'a> { match input.split_first() { (Some('/'), remaining) => input = remaining, (Some('\\'), remaining) => if scheme_type.is_special() { - self.violation_fn.call(SyntaxViolation::Backslash); + self.log_violation(SyntaxViolation::Backslash); input = remaining }, _ => {} @@ -999,7 +968,7 @@ impl<'a> Parser<'a> { }, '\\' if self.context != Context::PathSegmentSetter && scheme_type.is_special() => { - self.violation_fn.call(SyntaxViolation::Backslash); + self.log_violation(SyntaxViolation::Backslash); ends_with_slash = true; break }, @@ -1045,7 +1014,7 @@ impl<'a> Parser<'a> { self.serialization.push(':'); } if *has_host { - self.violation_fn.call(SyntaxViolation::FileWithHostAndWindowsDrive); + self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive); *has_host = false; // FIXME account for this in callers } } @@ -1187,7 +1156,7 @@ impl<'a> Parser<'a> { pub fn parse_fragment(&mut self, mut input: Input) { while let Some((c, utf8_c)) = input.next_utf8() { if c == '\0' { - self.violation_fn.call(SyntaxViolation::NullInFragment) + self.log_violation(SyntaxViolation::NullInFragment) } else { self.check_url_code_point(c, &input); self.serialization.extend(utf8_percent_encode(utf8_c, @@ -1197,16 +1166,15 @@ impl<'a> Parser<'a> { } fn check_url_code_point(&self, c: char, input: &Input) { - let vfn = self.violation_fn; - if vfn.is_set() { + if let Some(vfn) = self.violation_fn { if c == '%' { let mut input = input.clone(); if !matches!((input.next(), input.next()), (Some(a), Some(b)) if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) { - vfn.call(SyntaxViolation::PercentDecode) + vfn(SyntaxViolation::PercentDecode) } } else if !is_url_code_point(c) { - vfn.call(SyntaxViolation::NonUrlCodePoint) + vfn(SyntaxViolation::NonUrlCodePoint) } } } From 9b8c149ce1d4757b5e63fa4dbbc08ac31ea7ce7f Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 14 Jul 2019 14:37:29 +0200 Subject: [PATCH 18/35] Tell rustfmt to leave some data tables alone --- data-url/src/forgiving_base64.rs | 1 + data-url/src/mime.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/data-url/src/forgiving_base64.rs b/data-url/src/forgiving_base64.rs index a4dbae75d..c38388d78 100644 --- a/data-url/src/forgiving_base64.rs +++ b/data-url/src/forgiving_base64.rs @@ -148,6 +148,7 @@ impl Decoder where F: FnMut(&[u8]) -> Result<(), E> { /// Array values are their positions in the base64 alphabet, /// or -1 for symbols not in the alphabet. /// The position contributes 6 bits to the decoded bytes. +#[rustfmt::skip] const BASE64_DECODE_TABLE: [i8; 256] = [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, diff --git a/data-url/src/mime.rs b/data-url/src/mime.rs index 101b240a1..22c5a5e3f 100644 --- a/data-url/src/mime.rs +++ b/data-url/src/mime.rs @@ -160,6 +160,7 @@ macro_rules! byte_map { } // Copied from https://github.com/hyperium/mime/blob/v0.3.5/src/parse.rs#L293 +#[rustfmt::skip] static IS_HTTP_TOKEN: [bool; 256] = byte_map![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, From c276a8237a74cb89b5e9d273b7a526237941cef2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 14 Jul 2019 14:37:45 +0200 Subject: [PATCH 19/35] rustfmt --- data-url/src/forgiving_base64.rs | 34 ++- data-url/src/lib.rs | 76 +++--- data-url/src/mime.rs | 20 +- data-url/tests/wpt.rs | 35 ++- idna/src/lib.rs | 31 ++- idna/src/punycode.rs | 83 +++--- idna/src/uts46.rs | 151 ++++++----- idna/tests/punycode.rs | 60 +++-- idna/tests/unit.rs | 19 +- idna/tests/uts46.rs | 127 +++++---- percent_encoding/lib.rs | 84 +++--- src/encoding.rs | 43 +-- src/form_urlencoded.rs | 126 ++++++--- src/host.rs | 194 +++++++++----- src/lib.rs | 443 ++++++++++++++++++++----------- src/origin.rs | 28 +- src/parser.rs | 432 +++++++++++++++++++----------- src/path_segments.rs | 30 ++- src/quirks.rs | 29 +- src/slicing.rs | 69 ++--- tests/data.rs | 52 ++-- tests/unit.rs | 182 +++++++++---- url_serde/src/lib.rs | 149 +++++++---- 23 files changed, 1566 insertions(+), 931 deletions(-) diff --git a/data-url/src/forgiving_base64.rs b/data-url/src/forgiving_base64.rs index c38388d78..95dd92232 100644 --- a/data-url/src/forgiving_base64.rs +++ b/data-url/src/forgiving_base64.rs @@ -29,7 +29,7 @@ impl From> for InvalidBase64 { fn from(e: DecodeError) -> Self { match e { DecodeError::InvalidBase64(e) => e, - DecodeError::WriteError(e) => match e {} + DecodeError::WriteError(e) => match e {}, } } } @@ -46,14 +46,20 @@ pub fn decode_to_vec(input: &[u8]) -> Result, InvalidBase64> { } /// -pub struct Decoder where F: FnMut(&[u8]) -> Result<(), E> { +pub struct Decoder +where + F: FnMut(&[u8]) -> Result<(), E>, +{ write_bytes: F, bit_buffer: u32, buffer_bit_length: u8, padding_symbols: u8, } -impl Decoder where F: FnMut(&[u8]) -> Result<(), E> { +impl Decoder +where + F: FnMut(&[u8]) -> Result<(), E>, +{ pub fn new(write_bytes: F) -> Self { Self { write_bytes, @@ -72,12 +78,12 @@ impl Decoder where F: FnMut(&[u8]) -> Result<(), E> { // Remove ASCII whitespace if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0C') { - continue + continue; } if byte == b'=' { self.padding_symbols = self.padding_symbols.saturating_add(1); - continue + continue; } Err(InvalidBase64Details::UnexpectedSymbol(byte))? @@ -115,32 +121,22 @@ impl Decoder where F: FnMut(&[u8]) -> Result<(), E> { (12, 2) | (12, 0) => { // A multiple of four of alphabet symbols, followed by two more symbols, // optionally followed by two padding characters (which make a total multiple of four). - let byte_buffer = [ - (self.bit_buffer >> 4) as u8, - ]; + let byte_buffer = [(self.bit_buffer >> 4) as u8]; (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?; } (18, 1) | (18, 0) => { // A multiple of four of alphabet symbols, followed by three more symbols, // optionally followed by one padding character (which make a total multiple of four). - let byte_buffer = [ - (self.bit_buffer >> 10) as u8, - (self.bit_buffer >> 2) as u8, - ]; + let byte_buffer = [(self.bit_buffer >> 10) as u8, (self.bit_buffer >> 2) as u8]; (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?; } - (6, _) => { - Err(InvalidBase64Details::LoneAlphabetSymbol)? - } - _ => { - Err(InvalidBase64Details::Padding)? - } + (6, _) => Err(InvalidBase64Details::LoneAlphabetSymbol)?, + _ => Err(InvalidBase64Details::Padding)?, } Ok(()) } } - /// Generated by `make_base64_decode_table.py` based on "Table 1: The Base 64 Alphabet" /// at /// diff --git a/data-url/src/lib.rs b/data-url/src/lib.rs index cd5788f78..34bbd1927 100644 --- a/data-url/src/lib.rs +++ b/data-url/src/lib.rs @@ -15,14 +15,15 @@ //! assert!(fragment.is_none()); //! ``` -#[macro_use] extern crate matches; +#[macro_use] +extern crate matches; macro_rules! require { ($condition: expr) => { if !$condition { - return None + return None; } - } + }; } pub mod forgiving_base64; @@ -53,7 +54,11 @@ impl<'a> DataUrl<'a> { let (mime_type, base64) = parse_header(from_colon_to_comma); - Ok(DataUrl { mime_type, base64, encoded_body_plus_fragment }) + Ok(DataUrl { + mime_type, + base64, + encoded_body_plus_fragment, + }) } pub fn mime_type(&self) -> &mime::Mime { @@ -62,9 +67,12 @@ impl<'a> DataUrl<'a> { /// Streaming-decode the data URL’s body to `write_body_bytes`, /// and return the URL’s fragment identifier if it has one. - pub fn decode(&self, write_body_bytes: F) - -> Result>, forgiving_base64::DecodeError> - where F: FnMut(&[u8]) -> Result<(), E> + pub fn decode( + &self, + write_body_bytes: F, + ) -> Result>, forgiving_base64::DecodeError> + where + F: FnMut(&[u8]) -> Result<(), E>, { if self.base64 { decode_with_base64(self.encoded_body_plus_fragment, write_body_bytes) @@ -75,9 +83,9 @@ impl<'a> DataUrl<'a> { } /// Return the decoded body, and the URL’s fragment identifier if it has one. - pub fn decode_to_vec(&self) - -> Result<(Vec, Option>), forgiving_base64::InvalidBase64> - { + pub fn decode_to_vec( + &self, + ) -> Result<(Vec, Option>), forgiving_base64::InvalidBase64> { let mut body = Vec::new(); let fragment = self.decode(|bytes| Ok(body.extend_from_slice(bytes)))?; Ok((body, fragment)) @@ -100,7 +108,7 @@ impl<'a> FragmentIdentifier<'a> { percent_encode(byte, &mut string) } // Printable ASCII - _ => string.push(byte as char) + _ => string.push(byte as char), } } string @@ -125,7 +133,9 @@ fn pretend_parse_data_url(input: &str) -> Option<&str> { let mut bytes = left_trimmed.bytes(); { // Ignore ASCII tabs or newlines like the URL parser would - let mut iter = bytes.by_ref().filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); + let mut iter = bytes + .by_ref() + .filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); require!(iter.next()?.to_ascii_lowercase() == b'd'); require!(iter.next()?.to_ascii_lowercase() == b'a'); require!(iter.next()?.to_ascii_lowercase() == b't'); @@ -142,10 +152,10 @@ fn pretend_parse_data_url(input: &str) -> Option<&str> { fn find_comma_before_fragment(after_colon: &str) -> Option<(&str, &str)> { for (i, byte) in after_colon.bytes().enumerate() { if byte == b',' { - return Some((&after_colon[..i], &after_colon[i + 1..])) + return Some((&after_colon[..i], &after_colon[i + 1..])); } if byte == b'#' { - break + break; } } None @@ -187,18 +197,16 @@ fn parse_header(from_colon_to_comma: &str) -> (mime::Mime, bool) { } // Printable ASCII - _ => string.push(byte as char) + _ => string.push(byte as char), } } // FIXME: does Mime::from_str match the MIME Sniffing Standard’s parsing algorithm? // - let mime_type = string.parse().unwrap_or_else(|_| { - mime::Mime { - type_: String::from("text"), - subtype: String::from("plain"), - parameters: vec![(String::from("charset"), String::from("US-ASCII"))], - } + let mime_type = string.parse().unwrap_or_else(|_| mime::Mime { + type_: String::from("text"), + subtype: String::from("plain"), + parameters: vec![(String::from("charset"), String::from("US-ASCII"))], }); (mime_type, base64) @@ -209,7 +217,9 @@ fn remove_base64_suffix(s: &str) -> Option<&str> { let mut bytes = s.bytes(); { // Ignore ASCII tabs or newlines like the URL parser would - let iter = bytes.by_ref().filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); + let iter = bytes + .by_ref() + .filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); // Search from the end let mut iter = iter.rev(); @@ -240,9 +250,12 @@ fn percent_encode(byte: u8, string: &mut String) { /// Anything that would have been UTF-8 percent-encoded by the URL parser /// would be percent-decoded here. /// We skip that round-trip and pass it through unchanged. -fn decode_without_base64(encoded_body_plus_fragment: &str, mut write_bytes: F) - -> Result, E> - where F: FnMut(&[u8]) -> Result<(), E> +fn decode_without_base64( + encoded_body_plus_fragment: &str, + mut write_bytes: F, +) -> Result, E> +where + F: FnMut(&[u8]) -> Result<(), E>, { let bytes = encoded_body_plus_fragment.as_bytes(); let mut slice_start = 0; @@ -275,11 +288,11 @@ fn decode_without_base64(encoded_body_plus_fragment: &str, mut write_bytes b'#' => { let fragment_start = i + 1; let fragment = &encoded_body_plus_fragment[fragment_start..]; - return Ok(Some(FragmentIdentifier(fragment))) + return Ok(Some(FragmentIdentifier(fragment))); } // Ignore over '\t' | '\n' | '\r' - _ => slice_start = i + 1 + _ => slice_start = i + 1, } } } @@ -290,9 +303,12 @@ fn decode_without_base64(encoded_body_plus_fragment: &str, mut write_bytes /// `decode_without_base64()` composed with /// composed with /// . -fn decode_with_base64(encoded_body_plus_fragment: &str, write_bytes: F) - -> Result, forgiving_base64::DecodeError> - where F: FnMut(&[u8]) -> Result<(), E> +fn decode_with_base64( + encoded_body_plus_fragment: &str, + write_bytes: F, +) -> Result, forgiving_base64::DecodeError> +where + F: FnMut(&[u8]) -> Result<(), E>, { let mut decoder = forgiving_base64::Decoder::new(write_bytes); let fragment = decode_without_base64(encoded_body_plus_fragment, |bytes| decoder.feed(bytes))?; diff --git a/data-url/src/mime.rs b/data-url/src/mime.rs index 22c5a5e3f..fbbe1150a 100644 --- a/data-url/src/mime.rs +++ b/data-url/src/mime.rs @@ -7,14 +7,16 @@ pub struct Mime { pub type_: String, pub subtype: String, /// (name, value) - pub parameters: Vec<(String, String)> + pub parameters: Vec<(String, String)>, } impl Mime { pub fn get_parameter

(&self, name: &P) -> Option<&str> - where P: ?Sized + PartialEq + where + P: ?Sized + PartialEq, { - self.parameters.iter() + self.parameters + .iter() .find(|&&(ref n, _)| name == &**n) .map(|&(_, ref v)| &**v) } @@ -67,11 +69,11 @@ fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) { let piece = piece.trim_left_matches(ascii_whitespace); let (name, value) = split2(piece, '='); if name.is_empty() || !only_http_token_code_points(name) || contains(¶meters, name) { - continue + continue; } if let Some(value) = value { let value = if value.starts_with('"') { - let max_len = value.len().saturating_sub(2); // without start or end quotes + let max_len = value.len().saturating_sub(2); // without start or end quotes let mut unescaped_value = String::with_capacity(max_len); let mut chars = value[1..].chars(); 'until_closing_quote: loop { @@ -79,7 +81,7 @@ fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) { match c { '"' => break 'until_closing_quote, '\\' => unescaped_value.push(chars.next().unwrap_or('\\')), - _ => unescaped_value.push(c) + _ => unescaped_value.push(c), } } if let Some(piece) = semicolon_separated.next() { @@ -88,17 +90,17 @@ fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) { unescaped_value.push(';'); chars = piece.chars() } else { - break + break; } } if !valid_value(&unescaped_value) { - continue + continue; } unescaped_value } else { let value = value.trim_right_matches(ascii_whitespace); if !valid_value(value) { - continue + continue; } value.to_owned() }; diff --git a/data-url/tests/wpt.rs b/data-url/tests/wpt.rs index 7481529e8..3ab410051 100644 --- a/data-url/tests/wpt.rs +++ b/data-url/tests/wpt.rs @@ -1,6 +1,7 @@ extern crate data_url; extern crate rustc_test; -#[macro_use] extern crate serde; +#[macro_use] +extern crate serde; extern crate serde_json; fn run_data_url(input: String, expected_mime: Option, expected_body: Option>) { @@ -22,11 +23,10 @@ fn run_data_url(input: String, expected_mime: Option, expected_body: Opt } fn collect_data_url(add_test: &mut F) - where F: FnMut(String, bool, rustc_test::TestFn) +where + F: FnMut(String, bool, rustc_test::TestFn), { - let known_failures = [ - "data://test:test/,X", - ]; + let known_failures = ["data://test:test/,X"]; #[derive(Deserialize)] #[serde(untagged)] @@ -47,7 +47,7 @@ fn collect_data_url(add_test: &mut F) should_panic, rustc_test::TestFn::dyn_test_fn(move || { run_data_url(input, expected_mime, expected_body) - }) + }), ); } } @@ -62,9 +62,9 @@ fn run_base64(input: String, expected: Option>) { } } - fn collect_base64(add_test: &mut F) - where F: FnMut(String, bool, rustc_test::TestFn) +where + F: FnMut(String, bool, rustc_test::TestFn), { let known_failures = []; @@ -75,9 +75,7 @@ fn collect_base64(add_test: &mut F) add_test( format!("base64 {:?}", input), should_panic, - rustc_test::TestFn::dyn_test_fn(move || { - run_base64(input, expected) - }) + rustc_test::TestFn::dyn_test_fn(move || run_base64(input, expected)), ); } } @@ -92,9 +90,9 @@ fn run_mime(input: String, expected: Option) { } } - fn collect_mime(add_test: &mut F) - where F: FnMut(String, bool, rustc_test::TestFn) +where + F: FnMut(String, bool, rustc_test::TestFn), { let known_failures = []; @@ -102,7 +100,10 @@ fn collect_mime(add_test: &mut F) #[serde(untagged)] enum Entry { Comment(String), - TestCase { input: String, output: Option } + TestCase { + input: String, + output: Option, + }, } let v: Vec = serde_json::from_str(include_str!("mime-types.json")).unwrap(); @@ -115,7 +116,7 @@ fn collect_mime(add_test: &mut F) Entry::TestCase { input, output } => (input, output), Entry::Comment(s) => { last_comment = Some(s); - continue + continue; } }; @@ -127,9 +128,7 @@ fn collect_mime(add_test: &mut F) format!("MIME type {:?}", input) }, should_panic, - rustc_test::TestFn::dyn_test_fn(move || { - run_mime(input, expected) - }) + rustc_test::TestFn::dyn_test_fn(move || run_mime(input, expected)), ); } } diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 92b5df230..d8b5f7155 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -32,7 +32,8 @@ //! > that minimizes the impact of this transition for client software, //! > allowing client software to access domains that are valid under either system. -#[macro_use] extern crate matches; +#[macro_use] +extern crate matches; extern crate unicode_bidi; extern crate unicode_normalization; @@ -47,11 +48,14 @@ pub mod uts46; /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result { - uts46::to_ascii(domain, uts46::Flags { - use_std3_ascii_rules: false, - transitional_processing: false, - verify_dns_length: false, - }) + uts46::to_ascii( + domain, + uts46::Flags { + use_std3_ascii_rules: false, + transitional_processing: false, + verify_dns_length: false, + }, + ) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. @@ -63,11 +67,14 @@ pub fn domain_to_ascii(domain: &str) -> Result { /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { - uts46::to_unicode(domain, uts46::Flags { - use_std3_ascii_rules: false, + uts46::to_unicode( + domain, + uts46::Flags { + use_std3_ascii_rules: false, - // Unused: - transitional_processing: false, - verify_dns_length: false, - }) + // Unused: + transitional_processing: false, + verify_dns_length: false, + }, + ) } diff --git a/idna/src/punycode.rs b/idna/src/punycode.rs index bc8758f86..829684b14 100644 --- a/idna/src/punycode.rs +++ b/idna/src/punycode.rs @@ -13,8 +13,8 @@ //! `encode_str` and `decode_to_string` provide convenience wrappers //! that convert from and to Rust’s UTF-8 based `str` and `String` types. -use std::u32; use std::char; +use std::u32; // Bootstring parameters for Punycode static BASE: u32 = 36; @@ -26,7 +26,6 @@ static INITIAL_BIAS: u32 = 72; static INITIAL_N: u32 = 0x80; static DELIMITER: char = '-'; - #[inline] fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 { delta /= if first_time { DAMP } else { 2 }; @@ -39,7 +38,6 @@ fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 { k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW)) } - /// Convert Punycode to an Unicode `String`. /// /// This is a convenience wrapper around `decode`. @@ -48,7 +46,6 @@ pub fn decode_to_string(input: &str) -> Option { decode(input).map(|chars| chars.into_iter().collect()) } - /// Convert Punycode to Unicode. /// /// Return None on malformed input or overflow. @@ -61,8 +58,12 @@ pub fn decode(input: &str) -> Option> { None => (Vec::new(), input), Some(position) => ( input[..position].chars().collect(), - if position > 0 { &input[position + 1..] } else { input } - ) + if position > 0 { + &input[position + 1..] + } else { + input + }, + ), }; let mut code_point = INITIAL_N; let mut bias = INITIAL_BIAS; @@ -80,35 +81,39 @@ pub fn decode(input: &str) -> Option> { // which gets added to i. loop { let digit = match byte { - byte @ b'0' ..= b'9' => byte - b'0' + 26, - byte @ b'A' ..= b'Z' => byte - b'A', - byte @ b'a' ..= b'z' => byte - b'a', - _ => return None + byte @ b'0'..=b'9' => byte - b'0' + 26, + byte @ b'A'..=b'Z' => byte - b'A', + byte @ b'a'..=b'z' => byte - b'a', + _ => return None, } as u32; if digit > (u32::MAX - i) / weight { - return None // Overflow + return None; // Overflow } i += digit * weight; - let t = if k <= bias { T_MIN } - else if k >= bias + T_MAX { T_MAX } - else { k - bias }; + let t = if k <= bias { + T_MIN + } else if k >= bias + T_MAX { + T_MAX + } else { + k - bias + }; if digit < t { - break + break; } if weight > u32::MAX / (BASE - t) { - return None // Overflow + return None; // Overflow } weight *= BASE - t; k += BASE; byte = match iter.next() { - None => return None, // End of input before the end of this delta + None => return None, // End of input before the end of this delta Some(byte) => byte, }; } let length = output.len() as u32; bias = adapt(i - previous_i, length + 1, previous_i == 0); if i / (length + 1) > u32::MAX - code_point { - return None // Overflow + return None; // Overflow } // i was supposed to wrap around from length+1 to 0, // incrementing code_point each time. @@ -116,7 +121,7 @@ pub fn decode(input: &str) -> Option> { i %= length + 1; let c = match char::from_u32(code_point) { Some(c) => c, - None => return None + None => return None, }; output.insert(i as usize, c); i += 1; @@ -124,7 +129,6 @@ pub fn decode(input: &str) -> Option> { Some(output) } - /// Convert an Unicode `str` to Punycode. /// /// This is a convenience wrapper around `encode`. @@ -133,16 +137,16 @@ pub fn encode_str(input: &str) -> Option { encode(&input.chars().collect::>()) } - /// Convert Unicode to Punycode. /// /// Return None on overflow, which can only happen on inputs that would take more than /// 63 encoded bytes, the DNS limit on domain name labels. pub fn encode(input: &[char]) -> Option { // Handle "basic" (ASCII) code points. They are encoded as-is. - let output_bytes = input.iter().filter_map(|&c| - if c.is_ascii() { Some(c as u8) } else { None } - ).collect(); + let output_bytes = input + .iter() + .filter_map(|&c| if c.is_ascii() { Some(c as u8) } else { None }) + .collect(); let mut output = unsafe { String::from_utf8_unchecked(output_bytes) }; let basic_length = output.len() as u32; if basic_length > 0 { @@ -156,10 +160,14 @@ pub fn encode(input: &[char]) -> Option { while processed < input_length { // All code points < code_point have been handled already. // Find the next larger one. - let min_code_point = input.iter().map(|&c| c as u32) - .filter(|&c| c >= code_point).min().unwrap(); + let min_code_point = input + .iter() + .map(|&c| c as u32) + .filter(|&c| c >= code_point) + .min() + .unwrap(); if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) { - return None // Overflow + return None; // Overflow } // Increase delta to advance the decoder’s state to delta += (min_code_point - code_point) * (processed + 1); @@ -169,7 +177,7 @@ pub fn encode(input: &[char]) -> Option { if c < code_point { delta += 1; if delta == 0 { - return None // Overflow + return None; // Overflow } } if c == code_point { @@ -177,11 +185,15 @@ pub fn encode(input: &[char]) -> Option { let mut q = delta; let mut k = BASE; loop { - let t = if k <= bias { T_MIN } - else if k >= bias + T_MAX { T_MAX } - else { k - bias }; + let t = if k <= bias { + T_MIN + } else if k >= bias + T_MAX { + T_MAX + } else { + k - bias + }; if q < t { - break + break; } let value = t + ((q - t) % (BASE - t)); output.push(value_to_digit(value)); @@ -200,12 +212,11 @@ pub fn encode(input: &[char]) -> Option { Some(output) } - #[inline] fn value_to_digit(value: u32) -> char { match value { - 0 ..= 25 => (value as u8 + 'a' as u8) as char, // a..z - 26 ..= 35 => (value as u8 - 26 + '0' as u8) as char, // 0..9 - _ => panic!() + 0..=25 => (value as u8 + 'a' as u8) as char, // a..z + 26..=35 => (value as u8 - 26 + '0' as u8) as char, // 0..9 + _ => panic!(), } } diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index b425c3438..04f53ed5b 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -11,17 +11,15 @@ use self::Mapping::*; use punycode; -use std::cmp::Ordering::{Equal, Less, Greater}; -use unicode_bidi::{BidiClass, bidi_class}; -use unicode_normalization::UnicodeNormalization; +use std::cmp::Ordering::{Equal, Greater, Less}; +use unicode_bidi::{bidi_class, BidiClass}; use unicode_normalization::char::is_combining_mark; +use unicode_normalization::UnicodeNormalization; include!("uts46_mapping_table.rs"); - pub static PUNYCODE_PREFIX: &'static str = "xn--"; - #[derive(Debug)] struct StringTableSlice { // Store these as separate fields so the structure will have an @@ -66,25 +64,27 @@ fn find_char(codepoint: char) -> &'static Mapping { Equal } }); - r.ok().map(|i| { - const SINGLE_MARKER: u16 = 1 << 15; + r.ok() + .map(|i| { + const SINGLE_MARKER: u16 = 1 << 15; - let x = INDEX_TABLE[i]; - let single = (x & SINGLE_MARKER) != 0; - let offset = !SINGLE_MARKER & x; + let x = INDEX_TABLE[i]; + let single = (x & SINGLE_MARKER) != 0; + let offset = !SINGLE_MARKER & x; - if single { - &MAPPING_TABLE[offset as usize] - } else { - &MAPPING_TABLE[(offset + (codepoint as u16 - TABLE[i].from as u16)) as usize] - } - }).unwrap() + if single { + &MAPPING_TABLE[offset as usize] + } else { + &MAPPING_TABLE[(offset + (codepoint as u16 - TABLE[i].from as u16)) as usize] + } + }) + .unwrap() } fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec) { match *find_char(codepoint) { Mapping::Valid => output.push(codepoint), - Mapping::Ignored => {}, + Mapping::Ignored => {} Mapping::Mapped(ref slice) => output.push_str(decode_slice(slice)), Mapping::Deviation(ref slice) => { if flags.transitional_processing { @@ -133,16 +133,23 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { loop { match chars.next() { Some(c) => { - if !matches!(bidi_class(c), - BidiClass::L | BidiClass::EN | - BidiClass::ES | BidiClass::CS | - BidiClass::ET | BidiClass::ON | - BidiClass::BN | BidiClass::NSM - ) { + if !matches!( + bidi_class(c), + BidiClass::L + | BidiClass::EN + | BidiClass::ES + | BidiClass::CS + | BidiClass::ET + | BidiClass::ON + | BidiClass::BN + | BidiClass::NSM + ) { return false; } - }, - None => { break; }, + } + None => { + break; + } } } @@ -156,16 +163,18 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { last_non_nsm = rev_chars.next(); continue; } - _ => { break; }, + _ => { + break; + } } } match last_non_nsm { - Some(c) if bidi_class(c) == BidiClass::L - || bidi_class(c) == BidiClass::EN => {}, - Some(_) => { return false; }, + Some(c) if bidi_class(c) == BidiClass::L || bidi_class(c) == BidiClass::EN => {} + Some(_) => { + return false; + } _ => {} } - } // RTL label @@ -186,33 +195,51 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { found_an = true; } - if !matches!(char_class, BidiClass::R | BidiClass::AL | - BidiClass::AN | BidiClass::EN | - BidiClass::ES | BidiClass::CS | - BidiClass::ET | BidiClass::ON | - BidiClass::BN | BidiClass::NSM) { + if !matches!( + char_class, + BidiClass::R + | BidiClass::AL + | BidiClass::AN + | BidiClass::EN + | BidiClass::ES + | BidiClass::CS + | BidiClass::ET + | BidiClass::ON + | BidiClass::BN + | BidiClass::NSM + ) { return false; } - }, - None => { break; }, + } + None => { + break; + } } } // Rule 3 let mut rev_chars = label.chars().rev(); let mut last = rev_chars.next(); - loop { // must end in L or EN followed by 0 or more NSM + loop { + // must end in L or EN followed by 0 or more NSM match last { Some(c) if bidi_class(c) == BidiClass::NSM => { last = rev_chars.next(); continue; } - _ => { break; }, + _ => { + break; + } } } match last { - Some(c) if matches!(bidi_class(c), BidiClass::R | BidiClass::AL | - BidiClass::EN | BidiClass::AN) => {}, - _ => { return false; } + Some(c) + if matches!( + bidi_class(c), + BidiClass::R | BidiClass::AL | BidiClass::EN | BidiClass::AN + ) => {} + _ => { + return false; + } } // Rule 4 @@ -245,7 +272,6 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec false, @@ -277,7 +301,6 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) -> String { // Find out if it's a Bidi Domain Name // // First, check for literal bidi chars - let mut is_bidi_domain = domain.chars().any(|c| - matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN) - ); + let mut is_bidi_domain = domain + .chars() + .any(|c| matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN)); if !is_bidi_domain { // Then check for punycode-encoded bidi chars for label in normalized.split('.') { if label.starts_with(PUNYCODE_PREFIX) { match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) { Some(decoded_label) => { - if decoded_label.chars().any(|c| + if decoded_label.chars().any(|c| { matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN) - ) { + }) { is_bidi_domain = true; } } @@ -336,11 +358,14 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { if label.starts_with(PUNYCODE_PREFIX) { match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) { Some(decoded_label) => { - let flags = Flags { transitional_processing: false, ..flags }; + let flags = Flags { + transitional_processing: false, + ..flags + }; validate_full(&decoded_label, is_bidi_domain, flags, errors); validated.push_str(&decoded_label) } - None => errors.push(Error::PunycodeError) + None => errors.push(Error::PunycodeError), } } else { // `normalized` is already `NFC` so we can skip that check @@ -353,9 +378,9 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { #[derive(Copy, Clone)] pub struct Flags { - pub use_std3_ascii_rules: bool, - pub transitional_processing: bool, - pub verify_dns_length: bool, + pub use_std3_ascii_rules: bool, + pub transitional_processing: bool, + pub verify_dns_length: bool, } #[derive(PartialEq, Eq, Clone, Copy, Debug)] @@ -393,14 +418,18 @@ pub fn to_ascii(domain: &str, flags: Flags) -> Result { Some(x) => { result.push_str(PUNYCODE_PREFIX); result.push_str(&x); - }, - None => errors.push(Error::PunycodeError) + } + None => errors.push(Error::PunycodeError), } } } if flags.verify_dns_length { - let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; + let domain = if result.ends_with(".") { + &result[..result.len() - 1] + } else { + &*result + }; if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { errors.push(Error::TooShortForDns) } diff --git a/idna/tests/punycode.rs b/idna/tests/punycode.rs index 67988e80c..fe5e94edc 100644 --- a/idna/tests/punycode.rs +++ b/idna/tests/punycode.rs @@ -15,19 +15,25 @@ fn one_test(decoded: &str, encoded: &str) { None => panic!("Decoding {} failed.", encoded), Some(result) => { let result = result.into_iter().collect::(); - assert!(result == decoded, - format!("Incorrect decoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", - encoded, result, decoded)) + assert!( + result == decoded, + format!( + "Incorrect decoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", + encoded, result, decoded + ) + ) } } match encode_str(decoded) { None => panic!("Encoding {} failed.", decoded), - Some(result) => { - assert!(result == encoded, - format!("Incorrect encoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", - decoded, result, encoded)) - } + Some(result) => assert!( + result == encoded, + format!( + "Incorrect encoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", + decoded, result, encoded + ) + ), } } @@ -41,25 +47,29 @@ fn get_string<'a>(map: &'a Object, key: &str) -> &'a str { pub fn collect_tests(add_test: &mut F) { match Json::from_str(include_str!("punycode_tests.json")) { - Ok(Json::Array(tests)) => for (i, test) in tests.into_iter().enumerate() { - match test { - Json::Object(o) => { - let test_name = { - let desc = get_string(&o, "description"); + Ok(Json::Array(tests)) => { + for (i, test) in tests.into_iter().enumerate() { + match test { + Json::Object(o) => { + let test_name = { + let desc = get_string(&o, "description"); if desc.is_empty() { - format!("Punycode {}", i + 1) - } else { - format!("Punycode {}: {}", i + 1, desc) - } - }; - add_test(test_name, TestFn::dyn_test_fn(move || one_test( - get_string(&o, "decoded"), - get_string(&o, "encoded"), - ))) + format!("Punycode {}", i + 1) + } else { + format!("Punycode {}: {}", i + 1, desc) + } + }; + add_test( + test_name, + TestFn::dyn_test_fn(move || { + one_test(get_string(&o, "decoded"), get_string(&o, "encoded")) + }), + ) + } + _ => panic!(), } - _ => panic!(), } - }, - other => panic!("{:?}", other) + } + other => panic!("{:?}", other), } } diff --git a/idna/tests/unit.rs b/idna/tests/unit.rs index a7d158d5c..83f6b6576 100644 --- a/idna/tests/unit.rs +++ b/idna/tests/unit.rs @@ -4,13 +4,15 @@ extern crate unicode_normalization; use idna::uts46; use unicode_normalization::char::is_combining_mark; - fn _to_ascii(domain: &str) -> Result { - uts46::to_ascii(domain, uts46::Flags { - transitional_processing: false, - use_std3_ascii_rules: true, - verify_dns_length: true, - }) + uts46::to_ascii( + domain, + uts46::Flags { + transitional_processing: false, + use_std3_ascii_rules: true, + verify_dns_length: true, + }, + ) } #[test] @@ -29,7 +31,10 @@ fn test_v8_bidi_rules() { assert_eq!(_to_ascii("אבּג").unwrap(), "xn--kdb3bdf"); assert_eq!(_to_ascii("ابج").unwrap(), "xn--mgbcm"); assert_eq!(_to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm"); - assert_eq!(_to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm"); + assert_eq!( + _to_ascii("אבּג.ابج").unwrap(), + "xn--kdb3bdf.xn--mgbcm" + ); // Bidi domain names cannot start with digits assert!(_to_ascii("0a.\u{05D0}").is_err()); diff --git a/idna/tests/uts46.rs b/idna/tests/uts46.rs index 59ec1cd76..976c9e921 100644 --- a/idna/tests/uts46.rs +++ b/idna/tests/uts46.rs @@ -6,20 +6,20 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::char; use idna::uts46; +use std::char; use test::TestFn; pub fn collect_tests(add_test: &mut F) { // http://www.unicode.org/Public/idna/latest/IdnaTest.txt for (i, line) in include_str!("IdnaTest.txt").lines().enumerate() { if line == "" || line.starts_with("#") { - continue + continue; } // Remove comments let mut line = match line.find("#") { Some(index) => &line[0..index], - None => line + None => line, }; let mut expected_failure = false; @@ -35,61 +35,87 @@ pub fn collect_tests(add_test: &mut F) { let source = unescape(original); let to_unicode = pieces.remove(0); let to_ascii = pieces.remove(0); - let nv8 = if pieces.len() > 0 { pieces.remove(0) } else { "" }; + let nv8 = if pieces.len() > 0 { + pieces.remove(0) + } else { + "" + }; if expected_failure { continue; } let test_name = format!("UTS #46 line {}", i + 1); - add_test(test_name, TestFn::dyn_test_fn(move || { - let result = uts46::to_ascii(&source, uts46::Flags { - use_std3_ascii_rules: true, - transitional_processing: test_type == "T", - verify_dns_length: true, - }); + add_test( + test_name, + TestFn::dyn_test_fn(move || { + let result = uts46::to_ascii( + &source, + uts46::Flags { + use_std3_ascii_rules: true, + transitional_processing: test_type == "T", + verify_dns_length: true, + }, + ); - if to_ascii.starts_with("[") { - if to_ascii.starts_with("[C") { - // http://unicode.org/reports/tr46/#Deviations - // applications that perform IDNA2008 lookup are not required to check - // for these contexts - return; - } - if to_ascii == "[V2]" { - // Everybody ignores V2 - // https://github.com/servo/rust-url/pull/240 - // https://github.com/whatwg/url/issues/53#issuecomment-181528158 - // http://www.unicode.org/review/pri317/ + if to_ascii.starts_with("[") { + if to_ascii.starts_with("[C") { + // http://unicode.org/reports/tr46/#Deviations + // applications that perform IDNA2008 lookup are not required to check + // for these contexts + return; + } + if to_ascii == "[V2]" { + // Everybody ignores V2 + // https://github.com/servo/rust-url/pull/240 + // https://github.com/whatwg/url/issues/53#issuecomment-181528158 + // http://www.unicode.org/review/pri317/ + return; + } + let res = result.ok(); + assert!( + res == None, + "Expected error. result: {} | original: {} | source: {}", + res.unwrap(), + original, + source + ); return; } - let res = result.ok(); - assert!(res == None, "Expected error. result: {} | original: {} | source: {}", - res.unwrap(), original, source); - return; - } - let to_ascii = if to_ascii.len() > 0 { - to_ascii.to_string() - } else { - if to_unicode.len() > 0 { - to_unicode.to_string() + let to_ascii = if to_ascii.len() > 0 { + to_ascii.to_string() } else { - source.clone() - } - }; + if to_unicode.len() > 0 { + to_unicode.to_string() + } else { + source.clone() + } + }; - if nv8 == "NV8" { - // This result isn't valid under IDNA2008. Skip it - return; - } + if nv8 == "NV8" { + // This result isn't valid under IDNA2008. Skip it + return; + } - assert!(result.is_ok(), "Couldn't parse {} | original: {} | error: {:?}", - source, original, result.err()); - let output = result.ok().unwrap(); - assert!(output == to_ascii, "result: {} | expected: {} | original: {} | source: {}", - output, to_ascii, original, source); - })) + assert!( + result.is_ok(), + "Couldn't parse {} | original: {} | error: {:?}", + source, + original, + result.err() + ); + let output = result.ok().unwrap(); + assert!( + output == to_ascii, + "result: {} | expected: {} | original: {} | source: {}", + output, + to_ascii, + original, + source + ); + }), + ) } } @@ -99,7 +125,7 @@ fn unescape(input: &str) -> String { loop { match chars.next() { None => return output, - Some(c) => + Some(c) => { if c == '\\' { match chars.next().unwrap() { '\\' => output.push('\\'), @@ -108,10 +134,12 @@ fn unescape(input: &str) -> String { let c2 = chars.next().unwrap().to_digit(16).unwrap(); let c3 = chars.next().unwrap().to_digit(16).unwrap(); let c4 = chars.next().unwrap().to_digit(16).unwrap(); - match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) - { + match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) { Some(c) => output.push(c), - None => { output.push_str(&format!("\\u{:X}{:X}{:X}{:X}",c1,c2,c3,c4)); } + None => { + output + .push_str(&format!("\\u{:X}{:X}{:X}{:X}", c1, c2, c3, c4)); + } }; } _ => panic!("Invalid test data input"), @@ -119,6 +147,7 @@ fn unescape(input: &str) -> String { } else { output.push(c); } + } } } } diff --git a/percent_encoding/lib.rs b/percent_encoding/lib.rs index 5b461448f..a5c2987a2 100644 --- a/percent_encoding/lib.rs +++ b/percent_encoding/lib.rs @@ -175,23 +175,23 @@ define_encode_set! { pub fn percent_encode_byte(byte: u8) -> &'static str { let index = usize::from(byte) * 3; &"\ - %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F\ - %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F\ - %20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F\ - %30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F\ - %40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F\ - %50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F\ - %60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F\ - %70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F\ - %80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F\ - %90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F\ - %A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF\ - %B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF\ - %C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF\ - %D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF\ - %E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF\ - %F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF\ - "[index..index + 3] + %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F\ + %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F\ + %20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F\ + %30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F\ + %40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F\ + %50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F\ + %60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F\ + %70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F\ + %80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F\ + %90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F\ + %A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF\ + %B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF\ + %C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF\ + %D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF\ + %E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF\ + %F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF\ + "[index..index + 3] } /// Percent-encode the given bytes with the given encode set. @@ -259,7 +259,7 @@ impl<'a, E: EncodeSet> Iterator for PercentEncode<'a, E> { // 1 for first_byte + i for previous iterations of this loop let (unchanged_slice, remaining) = self.bytes.split_at(1 + i); self.bytes = remaining; - return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) + return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }); } else { assert!(byte.is_ascii()); } @@ -295,17 +295,15 @@ impl<'a, E: EncodeSet> From> for Cow<'a, str> { fn from(mut iter: PercentEncode<'a, E>) -> Self { match iter.next() { None => "".into(), - Some(first) => { - match iter.next() { - None => first.into(), - Some(second) => { - let mut string = first.to_owned(); - string.push_str(second); - string.extend(iter); - string.into() - } + Some(first) => match iter.next() { + None => first.into(), + Some(second) => { + let mut string = first.to_owned(); + string.push_str(second); + string.extend(iter); + string.into() } - } + }, } } } @@ -327,7 +325,7 @@ impl<'a, E: EncodeSet> From> for Cow<'a, str> { #[inline] pub fn percent_decode(input: &[u8]) -> PercentDecode { PercentDecode { - bytes: input.iter() + bytes: input.iter(), } } @@ -387,10 +385,8 @@ impl<'a> PercentDecode<'a> { let unchanged_bytes_len = initial_bytes.len() - bytes_iter.len() - 3; let mut decoded = initial_bytes[..unchanged_bytes_len].to_owned(); decoded.push(decoded_byte); - decoded.extend(PercentDecode { - bytes: bytes_iter - }); - return Some(decoded) + decoded.extend(PercentDecode { bytes: bytes_iter }); + return Some(decoded); } } // Nothing to decode @@ -402,18 +398,14 @@ impl<'a> PercentDecode<'a> { /// This is return `Err` when the percent-decoded bytes are not well-formed in UTF-8. pub fn decode_utf8(self) -> Result, str::Utf8Error> { match self.clone().into() { - Cow::Borrowed(bytes) => { - match str::from_utf8(bytes) { - Ok(s) => Ok(s.into()), - Err(e) => Err(e), - } - } - Cow::Owned(bytes) => { - match String::from_utf8(bytes) { - Ok(s) => Ok(s.into()), - Err(e) => Err(e.utf8_error()), - } - } + Cow::Borrowed(bytes) => match str::from_utf8(bytes) { + Ok(s) => Ok(s.into()), + Err(e) => Err(e), + }, + Cow::Owned(bytes) => match String::from_utf8(bytes) { + Ok(s) => Ok(s.into()), + Err(e) => Err(e.utf8_error()), + }, } } @@ -442,5 +434,3 @@ fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { } } } - - diff --git a/src/encoding.rs b/src/encoding.rs index 920b30e11..e6c10f0a2 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -6,24 +6,28 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. - //! Abstraction that conditionally compiles either to rust-encoding, //! or to only support UTF-8. -#[cfg(feature = "query_encoding")] extern crate encoding; +#[cfg(feature = "query_encoding")] +extern crate encoding; use std::borrow::Cow; -#[cfg(feature = "query_encoding")] use std::fmt::{self, Debug, Formatter}; +#[cfg(feature = "query_encoding")] +use std::fmt::{self, Debug, Formatter}; -#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap}; -#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label; -#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; +#[cfg(feature = "query_encoding")] +use self::encoding::label::encoding_from_whatwg_label; +#[cfg(feature = "query_encoding")] +pub use self::encoding::types::EncodingRef; +#[cfg(feature = "query_encoding")] +use self::encoding::types::{DecoderTrap, EncoderTrap}; #[cfg(feature = "query_encoding")] #[derive(Copy, Clone)] pub struct EncodingOverride { /// `None` means UTF-8. - encoding: Option + encoding: Option, } #[cfg(feature = "query_encoding")] @@ -34,7 +38,11 @@ impl EncodingOverride { pub fn from_encoding(encoding: EncodingRef) -> Self { EncodingOverride { - encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) } + encoding: if encoding.name() == "utf-8" { + None + } else { + Some(encoding) + }, } } @@ -47,16 +55,16 @@ impl EncodingOverride { // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD // https://encoding.spec.whatwg.org/#names-and-labels ::std::str::from_utf8(label) - .ok() - .and_then(encoding_from_whatwg_label) - .map(Self::from_encoding) + .ok() + .and_then(encoding_from_whatwg_label) + .map(Self::from_encoding) } /// https://encoding.spec.whatwg.org/#get-an-output-encoding pub fn to_output_encoding(self) -> Self { if let Some(encoding) = self.encoding { if matches!(encoding.name(), "utf-16le" | "utf-16be") { - return Self::utf8() + return Self::utf8(); } } self @@ -76,7 +84,10 @@ impl EncodingOverride { pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { match self.encoding { // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace` - Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(), + Some(encoding) => encoding + .decode(&input, DecoderTrap::Replace) + .unwrap() + .into(), None => decode_utf8_lossy(input), } } @@ -85,7 +96,7 @@ impl EncodingOverride { match self.encoding { // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()), - None => encode_utf8(input) + None => encode_utf8(input), } } } @@ -96,7 +107,7 @@ impl Debug for EncodingOverride { write!(f, "EncodingOverride {{ encoding: ")?; match self.encoding { Some(e) => write!(f, "{} }}", e.name()), - None => write!(f, "None }}") + None => write!(f, "None }}"), } } } @@ -141,6 +152,6 @@ pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { pub fn encode_utf8(input: Cow) -> Cow<[u8]> { match input { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), - Cow::Owned(s) => Cow::Owned(s.into_bytes()) + Cow::Owned(s) => Cow::Owned(s.into_bytes()), } } diff --git a/src/form_urlencoded.rs b/src/form_urlencoded.rs index 27e650e2f..75cb6b7e7 100644 --- a/src/form_urlencoded.rs +++ b/src/form_urlencoded.rs @@ -14,12 +14,11 @@ //! and a sequence of (name, value) pairs. use encoding::EncodingOverride; -use percent_encoding::{percent_encode_byte, percent_decode}; +use percent_encoding::{percent_decode, percent_encode_byte}; use std::borrow::{Borrow, Cow}; use std::fmt; use std::str; - /// Convert a byte string in the `application/x-www-form-urlencoded` syntax /// into a iterator of (name, value) pairs. /// @@ -35,7 +34,6 @@ pub fn parse(input: &[u8]) -> Parse { } } - /// Convert a byte string in the `application/x-www-form-urlencoded` syntax /// into a iterator of (name, value) pairs. /// @@ -51,14 +49,14 @@ pub fn parse(input: &[u8]) -> Parse { /// `EncodingRef` is defined in [rust-encoding](https://github.com/lifthrasiir/rust-encoding). /// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`. #[cfg(feature = "query_encoding")] -pub fn parse_with_encoding<'a>(input: &'a [u8], - encoding_override: Option<::encoding::EncodingRef>, - use_charset: bool) - -> Result, ()> { - +pub fn parse_with_encoding<'a>( + input: &'a [u8], + encoding_override: Option<::encoding::EncodingRef>, + use_charset: bool, +) -> Result, ()> { let mut encoding = EncodingOverride::from_opt_encoding(encoding_override); if !(encoding.is_utf8() || input.is_ascii()) { - return Err(()) + return Err(()); } if use_charset { for sequence in input.split(|&b| b == b'&') { @@ -69,7 +67,7 @@ pub fn parse_with_encoding<'a>(input: &'a [u8], // https://encoding.spec.whatwg.org/#names-and-labels if let Some(e) = EncodingOverride::lookup(value) { encoding = e; - break + break; } } } @@ -93,21 +91,18 @@ impl<'a> Iterator for Parse<'a> { fn next(&mut self) -> Option { loop { if self.input.is_empty() { - return None + return None; } let mut split2 = self.input.splitn(2, |&b| b == b'&'); let sequence = split2.next().unwrap(); self.input = split2.next().unwrap_or(&[][..]); if sequence.is_empty() { - continue + continue; } let mut split2 = sequence.splitn(2, |&b| b == b'='); let name = split2.next().unwrap(); let value = split2.next().unwrap_or(&[][..]); - return Some(( - decode(name, self.encoding), - decode(value, self.encoding), - )) + return Some((decode(name, self.encoding), decode(value, self.encoding))); } } } @@ -147,14 +142,16 @@ impl<'a> Parse<'a> { /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow`. #[derive(Debug)] pub struct ParseIntoOwned<'a> { - inner: Parse<'a> + inner: Parse<'a>, } impl<'a> Iterator for ParseIntoOwned<'a> { type Item = (String, String); fn next(&mut self) -> Option { - self.inner.next().map(|(k, v)| (k.into_owned(), v.into_owned())) + self.inner + .next() + .map(|(k, v)| (k.into_owned(), v.into_owned())) } } @@ -163,9 +160,7 @@ impl<'a> Iterator for ParseIntoOwned<'a> { /// /// Return an iterator of `&str` slices. pub fn byte_serialize(input: &[u8]) -> ByteSerialize { - ByteSerialize { - bytes: input, - } + ByteSerialize { bytes: input } } /// Return value of `byte_serialize()`. @@ -185,7 +180,11 @@ impl<'a> Iterator for ByteSerialize<'a> { if let Some((&first, tail)) = self.bytes.split_first() { if !byte_serialized_unchanged(first) { self.bytes = tail; - return Some(if first == b' ' { "+" } else { percent_encode_byte(first) }) + return Some(if first == b' ' { + "+" + } else { + percent_encode_byte(first) + }); } let position = tail.iter().position(|&b| !byte_serialized_unchanged(b)); let (unchanged_slice, remaining) = match position { @@ -234,14 +233,22 @@ pub trait Target { } impl Target for String { - fn as_mut_string(&mut self) -> &mut String { self } - fn finish(self) -> Self { self } + fn as_mut_string(&mut self) -> &mut String { + self + } + fn finish(self) -> Self { + self + } type Finished = Self; } impl<'a> Target for &'a mut String { - fn as_mut_string(&mut self) -> &mut String { &mut **self } - fn finish(self) -> Self { self } + fn as_mut_string(&mut self) -> &mut String { + &mut **self + } + fn finish(self) -> Self { + self + } type Finished = Self; } @@ -284,7 +291,7 @@ impl Serializer { /// If that suffix is non-empty, /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. pub fn for_suffix(mut target: T, start_position: usize) -> Self { - &target.as_mut_string()[start_position..]; // Panic if out of bounds + &target.as_mut_string()[start_position..]; // Panic if out of bounds Serializer { target: Some(target), start_position: start_position, @@ -310,7 +317,8 @@ impl Serializer { /// Set the character encoding to be used for names and values before percent-encoding. pub fn custom_encoding_override(&mut self, encode: F) -> &mut Self - where F: FnMut(&str) -> Cow<[u8]> + 'static + where + F: FnMut(&str) -> Cow<[u8]> + 'static, { self.custom_encoding = Some(SilentDebug(Box::new(encode))); self @@ -320,8 +328,14 @@ impl Serializer { /// /// Panics if called after `.finish()`. pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self { - append_pair(string(&mut self.target), self.start_position, self.encoding, - &mut self.custom_encoding, name, value); + append_pair( + string(&mut self.target), + self.start_position, + self.encoding, + &mut self.custom_encoding, + name, + value, + ); self } @@ -333,13 +347,24 @@ impl Serializer { /// /// Panics if called after `.finish()`. pub fn extend_pairs(&mut self, iter: I) -> &mut Self - where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef, V: AsRef { + where + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef, + V: AsRef, + { { let string = string(&mut self.target); for pair in iter { let &(ref k, ref v) = pair.borrow(); - append_pair(string, self.start_position, self.encoding, - &mut self.custom_encoding, k.as_ref(), v.as_ref()); + append_pair( + string, + self.start_position, + self.encoding, + &mut self.custom_encoding, + k.as_ref(), + v.as_ref(), + ); } } self @@ -352,8 +377,10 @@ impl Serializer { /// Panics if called after `.finish()`. #[cfg(feature = "query_encoding")] pub fn append_charset(&mut self) -> &mut Self { - assert!(self.custom_encoding.is_none(), - "Cannot use both custom_encoding_override() and append_charset()"); + assert!( + self.custom_encoding.is_none(), + "Cannot use both custom_encoding_override() and append_charset()" + ); { let string = string(&mut self.target); append_separator_if_needed(string, self.start_position); @@ -376,7 +403,10 @@ impl Serializer { /// /// Panics if called more than once. pub fn finish(&mut self) -> T::Finished { - self.target.take().expect("url::form_urlencoded::Serializer double finish").finish() + self.target + .take() + .expect("url::form_urlencoded::Serializer double finish") + .finish() } } @@ -387,20 +417,32 @@ fn append_separator_if_needed(string: &mut String, start_position: usize) { } fn string(target: &mut Option) -> &mut String { - target.as_mut().expect("url::form_urlencoded::Serializer finished").as_mut_string() + target + .as_mut() + .expect("url::form_urlencoded::Serializer finished") + .as_mut_string() } -fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOverride, - custom_encoding: &mut Option Cow<[u8]>>>>, - name: &str, value: &str) { +fn append_pair( + string: &mut String, + start_position: usize, + encoding: EncodingOverride, + custom_encoding: &mut Option Cow<[u8]>>>>, + name: &str, + value: &str, +) { append_separator_if_needed(string, start_position); append_encoded(name, string, encoding, custom_encoding); string.push('='); append_encoded(value, string, encoding, custom_encoding); } -fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride, - custom_encoding: &mut Option Cow<[u8]>>>>) { +fn append_encoded( + s: &str, + string: &mut String, + encoding: EncodingOverride, + custom_encoding: &mut Option Cow<[u8]>>>>, +) { let bytes = if let Some(SilentDebug(ref mut custom)) = *custom_encoding { custom(s) } else { diff --git a/src/host.rs b/src/host.rs index 8c8a1d0a4..72b9d0a77 100644 --- a/src/host.rs +++ b/src/host.rs @@ -6,15 +6,16 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +#[cfg(feature = "heapsize")] +use heapsize::HeapSizeOf; +use idna; +use parser::{ParseError, ParseResult}; +use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET}; use std::cmp; use std::fmt::{self, Formatter}; use std::io; use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs}; use std::vec; -use parser::{ParseResult, ParseError}; -use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET}; -use idna; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum HostInternal { @@ -27,9 +28,12 @@ pub enum HostInternal { #[cfg(feature = "heapsize")] known_heap_size!(0, HostInternal); -#[cfg(feature="serde")] +#[cfg(feature = "serde")] impl ::serde::Serialize for HostInternal { - fn serialize(&self, serializer: S) -> Result where S: ::serde::Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: ::serde::Serializer, + { // This doesn’t use `derive` because that involves // large dependencies (that take a long time to build), and // either Macros 1.1 which are not stable yet or a cumbersome build script. @@ -42,13 +46,17 @@ impl ::serde::Serialize for HostInternal { HostInternal::Domain => Some(None), HostInternal::Ipv4(addr) => Some(Some(IpAddr::V4(addr))), HostInternal::Ipv6(addr) => Some(Some(IpAddr::V6(addr))), - }.serialize(serializer) + } + .serialize(serializer) } } -#[cfg(feature="serde")] +#[cfg(feature = "serde")] impl<'de> ::serde::Deserialize<'de> for HostInternal { - fn deserialize(deserializer: D) -> Result where D: ::serde::Deserializer<'de> { + fn deserialize(deserializer: D) -> Result + where + D: ::serde::Deserializer<'de>, + { use std::net::IpAddr; Ok(match ::serde::Deserialize::deserialize(deserializer)? { None => HostInternal::None, @@ -71,7 +79,7 @@ impl From> for HostInternal { /// The host name of an URL. #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub enum Host { +pub enum Host { /// A DNS domain name, as '.' dot-separated labels. /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of /// a special URL, or percent encoded for non-special URLs. Hosts for @@ -91,21 +99,28 @@ pub enum Host { Ipv6(Ipv6Addr), } -#[cfg(feature="serde")] +#[cfg(feature = "serde")] impl ::serde::Serialize for Host { - fn serialize(&self, serializer: R) -> Result where R: ::serde::Serializer { + fn serialize(&self, serializer: R) -> Result + where + R: ::serde::Serializer, + { use std::net::IpAddr; match *self { Host::Domain(ref s) => Ok(s), Host::Ipv4(addr) => Err(IpAddr::V4(addr)), Host::Ipv6(addr) => Err(IpAddr::V6(addr)), - }.serialize(serializer) + } + .serialize(serializer) } } -#[cfg(feature="serde")] +#[cfg(feature = "serde")] impl<'de, S: ::serde::Deserialize<'de>> ::serde::Deserialize<'de> for Host { - fn deserialize(deserializer: D) -> Result where D: ::serde::Deserializer<'de> { + fn deserialize(deserializer: D) -> Result + where + D: ::serde::Deserializer<'de>, + { use std::net::IpAddr; Ok(match ::serde::Deserialize::deserialize(deserializer)? { Ok(s) => Host::Domain(s), @@ -143,16 +158,34 @@ impl Host { pub fn parse(input: &str) -> Result { if input.starts_with('[') { if !input.ends_with(']') { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } - return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6) + return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } let domain = percent_decode(input.as_bytes()).decode_utf8_lossy(); let domain = idna::domain_to_ascii(&domain)?; - if domain.find(|c| matches!(c, - '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '%' | '/' | ':' | '?' | '@' | '[' | '\\' | ']' - )).is_some() { - return Err(ParseError::InvalidDomainCharacter) + if domain + .find(|c| { + matches!( + c, + '\0' | '\t' + | '\n' + | '\r' + | ' ' + | '#' + | '%' + | '/' + | ':' + | '?' + | '@' + | '[' + | '\\' + | ']' + ) + }) + .is_some() + { + return Err(ParseError::InvalidDomainCharacter); } if let Some(address) = parse_ipv4addr(&domain)? { Ok(Host::Ipv4(address)) @@ -165,14 +198,31 @@ impl Host { pub fn parse_opaque(input: &str) -> Result { if input.starts_with('[') { if !input.ends_with(']') { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } - return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6) + return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } - if input.find(|c| matches!(c, - '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '/' | ':' | '?' | '@' | '[' | '\\' | ']' - )).is_some() { - return Err(ParseError::InvalidDomainCharacter) + if input + .find(|c| { + matches!( + c, + '\0' | '\t' + | '\n' + | '\r' + | ' ' + | '#' + | '/' + | ':' + | '?' + | '@' + | '[' + | '\\' + | ']' + ) + }) + .is_some() + { + return Err(ParseError::InvalidDomainCharacter); } let s = utf8_percent_encode(input, SIMPLE_ENCODE_SET).to_string(); Ok(Host::Domain(s)) @@ -196,7 +246,7 @@ impl> fmt::Display for Host { /// This mostly exists because coherence rules don’t allow us to implement /// `ToSocketAddrs for (Host, u16)`. #[derive(Clone, Debug)] -pub struct HostAndPort { +pub struct HostAndPort { pub host: Host, pub port: u16, } @@ -206,7 +256,7 @@ impl<'a> HostAndPort<&'a str> { pub fn to_owned(&self) -> HostAndPort { HostAndPort { host: self.host.to_owned(), - port: self.port + port: self.port, } } } @@ -219,7 +269,6 @@ impl> fmt::Display for HostAndPort { } } - impl> ToSocketAddrs for HostAndPort { type Iter = SocketAddrs; @@ -228,13 +277,15 @@ impl> ToSocketAddrs for HostAndPort { match self.host { Host::Domain(ref domain) => Ok(SocketAddrs { // FIXME: use std::net::lookup_host when it’s stable. - state: SocketAddrsState::Domain((domain.as_ref(), port).to_socket_addrs()?) + state: SocketAddrsState::Domain((domain.as_ref(), port).to_socket_addrs()?), }), Host::Ipv4(address) => Ok(SocketAddrs { - state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port))) + state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port))), }), Host::Ipv6(address) => Ok(SocketAddrs { - state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new(address, port, 0, 0))) + state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new( + address, port, 0, 0, + ))), }), } } @@ -243,7 +294,7 @@ impl> ToSocketAddrs for HostAndPort { /// Socket addresses for an URL. #[derive(Debug)] pub struct SocketAddrs { - state: SocketAddrsState + state: SocketAddrsState, } #[derive(Debug)] @@ -262,7 +313,7 @@ impl Iterator for SocketAddrs { self.state = SocketAddrsState::Done; Some(s) } - SocketAddrsState::Done => None + SocketAddrsState::Done => None, } } } @@ -344,10 +395,12 @@ fn parse_ipv4number(mut input: &str) -> Result, ()> { // So instead we check if the input looks like a real number and only return // an error when it's an overflow. let valid_number = match r { - 8 => input.chars().all(|c| c >= '0' && c <='7'), - 10 => input.chars().all(|c| c >= '0' && c <='9'), - 16 => input.chars().all(|c| (c >= '0' && c <='9') || (c >='a' && c <= 'f') || (c >= 'A' && c <= 'F')), - _ => false + 8 => input.chars().all(|c| c >= '0' && c <= '7'), + 10 => input.chars().all(|c| c >= '0' && c <= '9'), + 16 => input + .chars() + .all(|c| (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')), + _ => false, }; if !valid_number { @@ -369,7 +422,7 @@ fn parse_ipv4number(mut input: &str) -> Result, ()> { /// fn parse_ipv4addr(input: &str) -> ParseResult> { if input.is_empty() { - return Ok(None) + return Ok(None); } let mut parts: Vec<&str> = input.split('.').collect(); if parts.last() == Some(&"") { @@ -387,7 +440,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult> { match parse_ipv4number(part) { Ok(Some(n)) => numbers.push(n), Ok(None) => return Ok(None), - Err(()) => overflow = true + Err(()) => overflow = true, }; } if overflow { @@ -395,7 +448,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult> { } let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); // Equivalent to: ipv4 >= 256 ** (4 − numbers.len()) - if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { + if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { return Err(ParseError::InvalidIpv4Address); } if numbers.iter().any(|x| *x > 255) { @@ -418,12 +471,12 @@ fn parse_ipv6addr(input: &str) -> ParseResult { let mut i = 0; if len < 2 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } if input[0] == b':' { if input[1] != b':' { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } i = 2; piece_pointer = 1; @@ -432,16 +485,16 @@ fn parse_ipv6addr(input: &str) -> ParseResult { while i < len { if piece_pointer == 8 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } if input[i] == b':' { if compress_pointer.is_some() { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } i += 1; piece_pointer += 1; compress_pointer = Some(piece_pointer); - continue + continue; } let start = i; let end = cmp::min(len, start + 4); @@ -451,33 +504,33 @@ fn parse_ipv6addr(input: &str) -> ParseResult { Some(digit) => { value = value * 0x10 + digit as u16; i += 1; - }, - None => break + } + None => break, } } if i < len { match input[i] { b'.' => { if i == start { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } i = start; if piece_pointer > 6 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } is_ip_v4 = true; - }, + } b':' => { i += 1; if i == len { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } - }, - _ => return Err(ParseError::InvalidIpv6Address) + } + _ => return Err(ParseError::InvalidIpv6Address), } } if is_ip_v4 { - break + break; } pieces[piece_pointer] = value; piece_pointer += 1; @@ -485,7 +538,7 @@ fn parse_ipv6addr(input: &str) -> ParseResult { if is_ip_v4 { if piece_pointer > 6 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } let mut numbers_seen = 0; while i < len { @@ -493,23 +546,23 @@ fn parse_ipv6addr(input: &str) -> ParseResult { if numbers_seen < 4 && (i < len && input[i] == b'.') { i += 1 } else { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } } let mut ipv4_piece = None; while i < len { let digit = match input[i] { - c @ b'0' ..= b'9' => c - b'0', - _ => break + c @ b'0'..=b'9' => c - b'0', + _ => break, }; match ipv4_piece { None => ipv4_piece = Some(digit as u16), - Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero + Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero Some(ref mut v) => { *v = *v * 10 + digit as u16; if *v > 255 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } } } @@ -519,7 +572,7 @@ fn parse_ipv6addr(input: &str) -> ParseResult { pieces[piece_pointer] = if let Some(v) = ipv4_piece { pieces[piece_pointer] * 0x100 + v } else { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); }; numbers_seen += 1; @@ -529,12 +582,12 @@ fn parse_ipv6addr(input: &str) -> ParseResult { } if numbers_seen != 4 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } } if i < len { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } match compress_pointer { @@ -547,10 +600,13 @@ fn parse_ipv6addr(input: &str) -> ParseResult { piece_pointer -= 1; } } - _ => if piece_pointer != 8 { - return Err(ParseError::InvalidIpv6Address) + _ => { + if piece_pointer != 8 { + return Err(ParseError::InvalidIpv6Address); + } } } - Ok(Ipv6Addr::new(pieces[0], pieces[1], pieces[2], pieces[3], - pieces[4], pieces[5], pieces[6], pieces[7])) + Ok(Ipv6Addr::new( + pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7], + )) } diff --git a/src/lib.rs b/src/lib.rs index 80cad8445..4a195ceea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,47 +107,56 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); #![doc(html_root_url = "https://docs.rs/url/1.7.0")] -#[macro_use] extern crate matches; -#[cfg(feature="serde")] extern crate serde; -#[cfg(feature="heapsize")] #[macro_use] extern crate heapsize; +#[macro_use] +extern crate matches; +#[cfg(feature = "serde")] +extern crate serde; +#[cfg(feature = "heapsize")] +#[macro_use] +extern crate heapsize; pub extern crate idna; #[macro_use] pub extern crate percent_encoding; use encoding::EncodingOverride; -#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +#[cfg(feature = "heapsize")] +use heapsize::HeapSizeOf; use host::HostInternal; -use parser::{Parser, Context, SchemeType, to_u32}; -use percent_encoding::{PATH_SEGMENT_ENCODE_SET, USERINFO_ENCODE_SET, - percent_encode, percent_decode, utf8_percent_encode}; +use parser::{to_u32, Context, Parser, SchemeType}; +use percent_encoding::{ + percent_decode, percent_encode, utf8_percent_encode, PATH_SEGMENT_ENCODE_SET, + USERINFO_ENCODE_SET, +}; use std::borrow::Borrow; use std::cmp; -#[cfg(feature = "serde")] use std::error::Error; -use std::fmt::{self, Write, Debug, Formatter}; +#[cfg(feature = "serde")] +use std::error::Error; +use std::fmt::{self, Debug, Formatter, Write}; use std::hash; use std::io; use std::mem; -use std::net::{ToSocketAddrs, IpAddr}; +use std::net::{IpAddr, ToSocketAddrs}; use std::ops::{Range, RangeFrom, RangeTo}; use std::path::{Path, PathBuf}; use std::str; -pub use origin::{Origin, OpaqueOrigin}; pub use host::{Host, HostAndPort, SocketAddrs}; -pub use path_segments::PathSegmentsMut; +pub use origin::{OpaqueOrigin, Origin}; pub use parser::{ParseError, SyntaxViolation}; +pub use path_segments::PathSegmentsMut; pub use slicing::Position; mod encoding; mod host; mod origin; -mod path_segments; mod parser; +mod path_segments; mod slicing; pub mod form_urlencoded; -#[doc(hidden)] pub mod quirks; +#[doc(hidden)] +pub mod quirks; /// A parsed URL record. #[derive(Clone)] @@ -164,15 +173,15 @@ pub struct Url { serialization: String, // Components - scheme_end: u32, // Before ':' - username_end: u32, // Before ':' (if a password is given) or '@' (if not) + scheme_end: u32, // Before ':' + username_end: u32, // Before ':' (if a password is given) or '@' (if not) host_start: u32, host_end: u32, host: HostInternal, port: Option, - path_start: u32, // Before initial '/', if any - query_start: Option, // Before '?', unlike Position::QueryStart - fragment_start: Option, // Before '#', unlike Position::FragmentStart + path_start: u32, // Before initial '/', if any + query_start: Option, // Before '?', unlike Position::QueryStart + fragment_start: Option, // Before '#', unlike Position::FragmentStart } #[cfg(feature = "heapsize")] @@ -245,18 +254,21 @@ impl<'a> ParseOptions<'a> { query_encoding_override: self.encoding_override, violation_fn: self.violation_fn, context: Context::UrlParser, - }.parse_url(input) + } + .parse_url(input) } } impl<'a> Debug for ParseOptions<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, - "ParseOptions {{ base_url: {:?}, encoding_override: {:?}, \ - violation_fn: {:?} }}", - self.base_url, - self.encoding_override, - self.violation_fn.map(|_| "…")) + write!( + f, + "ParseOptions {{ base_url: {:?}, encoding_override: {:?}, \ + violation_fn: {:?} }}", + self.base_url, + self.encoding_override, + self.violation_fn.map(|_| "…") + ) } } @@ -313,10 +325,11 @@ impl Url { /// [`ParseError`]: enum.ParseError.html #[inline] pub fn parse_with_params(input: &str, iter: I) -> Result - where I: IntoIterator, - I::Item: Borrow<(K, V)>, - K: AsRef, - V: AsRef + where + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef, + V: AsRef, { let mut url = Url::options().parse(input); @@ -446,10 +459,13 @@ impl Url { macro_rules! assert { ($x: expr) => { if !$x { - return Err(format!("!( {} ) for URL {:?}", - stringify!($x), self.serialization)) + return Err(format!( + "!( {} ) for URL {:?}", + stringify!($x), + self.serialization + )); } - } + }; } macro_rules! assert_eq { @@ -468,11 +484,13 @@ impl Url { assert!(self.scheme_end >= 1); assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z')); - assert!(self.slice(1..self.scheme_end).chars() - .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.'))); + assert!(self + .slice(1..self.scheme_end) + .chars() + .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.'))); assert_eq!(self.byte_at(self.scheme_end), b':'); - if self.slice(self.scheme_end + 1 ..).starts_with("//") { + if self.slice(self.scheme_end + 1..).starts_with("//") { // URL with authority match self.byte_at(self.username_end) { b':' => { @@ -503,7 +521,10 @@ impl Url { } else { assert_eq!(self.byte_at(self.host_end), b':'); let port_str = self.slice(self.host_end + 1..self.path_start); - assert_eq!(self.port, Some(port_str.parse::().expect("Couldn't parse port?"))); + assert_eq!( + self.port, + Some(port_str.parse::().expect("Couldn't parse port?")) + ); } assert_eq!(self.byte_at(self.path_start), b'/'); } else { @@ -533,10 +554,12 @@ impl Url { assert_eq!(self.username_end, other.username_end); assert_eq!(self.host_start, other.host_start); assert_eq!(self.host_end, other.host_end); - assert!(self.host == other.host || + assert!( + self.host == other.host || // XXX No host round-trips to empty host. // See https://github.com/whatwg/url/issues/79 - (self.host_str(), other.host_str()) == (None, Some(""))); + (self.host_str(), other.host_str()) == (None, Some("")) + ); assert_eq!(self.port, other.port); assert_eq!(self.path_start, other.path_start); assert_eq!(self.query_start, other.query_start); @@ -982,15 +1005,19 @@ impl Url { /// } /// ``` pub fn with_default_port(&self, f: F) -> io::Result> - where F: FnOnce(&Url) -> Result { + where + F: FnOnce(&Url) -> Result, + { Ok(HostAndPort { - host: self.host() - .ok_or(()) - .or_else(|()| io_error("URL has no host"))?, - port: self.port_or_known_default() - .ok_or(()) - .or_else(|()| f(self)) - .or_else(|()| io_error("URL has no port number"))? + host: self + .host() + .ok_or(()) + .or_else(|()| io_error("URL has no host"))?, + port: self + .port_or_known_default() + .ok_or(()) + .or_else(|()| f(self)) + .or_else(|()| io_error("URL has no port number"))?, }) } @@ -1020,8 +1047,7 @@ impl Url { pub fn path(&self) -> &str { match (self.query_start, self.fragment_start) { (None, None) => self.slice(self.path_start..), - (Some(next_component_start), _) | - (None, Some(next_component_start)) => { + (Some(next_component_start), _) | (None, Some(next_component_start)) => { self.slice(self.path_start..next_component_start) } } @@ -1327,7 +1353,10 @@ impl Url { self.serialization.push('?'); } - let query = UrlQuery { url: Some(self), fragment: fragment }; + let query = UrlQuery { + url: Some(self), + fragment: fragment, + }; form_urlencoded::Serializer::for_suffix(query, query_start + "?".len()) } @@ -1337,7 +1366,7 @@ impl Url { let after_path = self.slice(i..).to_owned(); self.serialization.truncate(i as usize); after_path - }, + } (None, None) => String::new(), } } @@ -1378,7 +1407,7 @@ impl Url { } parser.parse_cannot_be_a_base_path(parser::Input::new(path)); } else { - let mut has_host = true; // FIXME + let mut has_host = true; // FIXME parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path)); } }); @@ -1402,8 +1431,12 @@ impl Url { *index -= old_after_path_position; *index += new_after_path_position; }; - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } self.serialization.push_str(after_path) } @@ -1452,7 +1485,7 @@ impl Url { pub fn set_port(&mut self, mut port: Option) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { - return Err(()) + return Err(()); } if port.is_some() && port == parser::default_port(self.scheme()) { port = None @@ -1465,11 +1498,16 @@ impl Url { match (self.port, port) { (None, None) => {} (Some(_), None) => { - self.serialization.drain(self.host_end as usize .. self.path_start as usize); + self.serialization + .drain(self.host_end as usize..self.path_start as usize); let offset = self.path_start - self.host_end; self.path_start = self.host_end; - if let Some(ref mut index) = self.query_start { *index -= offset } - if let Some(ref mut index) = self.fragment_start { *index -= offset } + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } } (Some(old), Some(new)) if old == new => {} (_, Some(new)) => { @@ -1483,8 +1521,12 @@ impl Url { *index -= old_path_start; *index += new_path_start; }; - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } self.serialization.push_str(&path_and_after); } } @@ -1575,7 +1617,7 @@ impl Url { /// [`ParseError`]: enum.ParseError.html pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { if self.cannot_be_a_base() { - return Err(ParseError::SetHostOnCannotBeABaseUrl) + return Err(ParseError::SetHostOnCannotBeABaseUrl); } if let Some(host) = host { @@ -1589,27 +1631,36 @@ impl Url { } } else if self.has_host() { if SchemeType::from(self.scheme()).is_special() { - return Err(ParseError::EmptyHost) + return Err(ParseError::EmptyHost); } debug_assert!(self.byte_at(self.scheme_end) == b':'); debug_assert!(self.byte_at(self.path_start) == b'/'); let new_path_start = self.scheme_end + 1; - self.serialization.drain(new_path_start as usize..self.path_start as usize); + self.serialization + .drain(new_path_start as usize..self.path_start as usize); let offset = self.path_start - new_path_start; self.path_start = new_path_start; self.username_end = new_path_start; self.host_start = new_path_start; self.host_end = new_path_start; self.port = None; - if let Some(ref mut index) = self.query_start { *index -= offset } - if let Some(ref mut index) = self.fragment_start { *index -= offset } + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } } Ok(()) } /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. fn set_host_internal(&mut self, host: Host, opt_new_port: Option>) { - let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { self.host_end }; + let old_suffix_pos = if opt_new_port.is_some() { + self.path_start + } else { + self.host_end + }; let suffix = self.slice(old_suffix_pos..).to_owned(); self.serialization.truncate(self.host_start as usize); if !self.has_authority() { @@ -1638,8 +1689,12 @@ impl Url { *index += new_suffix_pos; }; adjust(&mut self.path_start); - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } } /// Change this URL’s host to the given IP address. @@ -1681,7 +1736,7 @@ impl Url { /// pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { if self.cannot_be_a_base() { - return Err(()) + return Err(()); } let address = match address { @@ -1721,13 +1776,14 @@ impl Url { pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { - return Err(()) + return Err(()); } if let Some(password) = password { let host_and_after = self.slice(self.host_start..).to_owned(); self.serialization.truncate(self.username_end as usize); self.serialization.push(':'); - self.serialization.extend(utf8_percent_encode(password, USERINFO_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(password, USERINFO_ENCODE_SET)); self.serialization.push('@'); let old_host_start = self.host_start; @@ -1739,28 +1795,37 @@ impl Url { self.host_start = new_host_start; adjust(&mut self.host_end); adjust(&mut self.path_start); - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } self.serialization.push_str(&host_and_after); - } else if self.byte_at(self.username_end) == b':' { // If there is a password to remove + } else if self.byte_at(self.username_end) == b':' { + // If there is a password to remove let has_username_or_password = self.byte_at(self.host_start - 1) == b'@'; debug_assert!(has_username_or_password); let username_start = self.scheme_end + 3; let empty_username = username_start == self.username_end; - let start = self.username_end; // Remove the ':' + let start = self.username_end; // Remove the ':' let end = if empty_username { self.host_start // Remove the '@' as well } else { - self.host_start - 1 // Keep the '@' to separate the username from the host + self.host_start - 1 // Keep the '@' to separate the username from the host }; - self.serialization.drain(start as usize .. end as usize); + self.serialization.drain(start as usize..end as usize); let offset = end - start; self.host_start -= offset; self.host_end -= offset; self.path_start -= offset; - if let Some(ref mut index) = self.query_start { *index -= offset } - if let Some(ref mut index) = self.fragment_start { *index -= offset } + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } } Ok(()) } @@ -1803,16 +1868,17 @@ impl Url { pub fn set_username(&mut self, username: &str) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { - return Err(()) + return Err(()); } let username_start = self.scheme_end + 3; debug_assert!(self.slice(self.scheme_end..username_start) == "://"); if self.slice(username_start..self.username_end) == username { - return Ok(()) + return Ok(()); } let after_username = self.slice(self.username_end..).to_owned(); self.serialization.truncate(username_start as usize); - self.serialization.extend(utf8_percent_encode(username, USERINFO_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(username, USERINFO_ENCODE_SET)); let mut removed_bytes = self.username_end; self.username_end = to_u32(self.serialization.len()).unwrap(); @@ -1841,8 +1907,12 @@ impl Url { adjust(&mut self.host_start); adjust(&mut self.host_end); adjust(&mut self.path_start); - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } Ok(()) } @@ -1907,9 +1977,10 @@ impl Url { pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { let mut parser = Parser::for_setter(String::new()); let remaining = parser.parse_scheme(parser::Input::new(scheme))?; - if !remaining.is_empty() || - (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) { - return Err(()) + if !remaining.is_empty() + || (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) + { + return Err(()); } let old_scheme_end = self.scheme_end; let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); @@ -1923,8 +1994,12 @@ impl Url { adjust(&mut self.host_start); adjust(&mut self.host_end); adjust(&mut self.path_start); - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } parser.serialization.push_str(self.slice(old_scheme_end..)); self.serialization = parser.serialization; @@ -1958,7 +2033,7 @@ impl Url { /// # run().unwrap(); /// # } /// ``` - #[cfg(any(unix, windows, target_os="redox"))] + #[cfg(any(unix, windows, target_os = "redox"))] pub fn from_file_path>(path: P) -> Result { let mut serialization = "file://".to_owned(); let host_start = serialization.len() as u32; @@ -1994,7 +2069,7 @@ impl Url { /// /// Note that `std::path` does not consider trailing slashes significant /// and usually does not include them (e.g. in `Path::parent()`). - #[cfg(any(unix, windows, target_os="redox"))] + #[cfg(any(unix, windows, target_os = "redox"))] pub fn from_directory_path>(path: P) -> Result { let mut url = Url::from_file_path(path)?; if !url.serialization.ends_with('/') { @@ -2011,18 +2086,38 @@ impl Url { /// This method is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] #[deny(unused)] - pub fn serialize_internal(&self, serializer: S) -> Result where S: serde::Serializer { + pub fn serialize_internal(&self, serializer: S) -> Result + where + S: serde::Serializer, + { use serde::Serialize; // Destructuring first lets us ensure that adding or removing fields forces this method // to be updated - let Url { ref serialization, ref scheme_end, - ref username_end, ref host_start, - ref host_end, ref host, ref port, - ref path_start, ref query_start, - ref fragment_start} = *self; - (serialization, scheme_end, username_end, - host_start, host_end, host, port, path_start, - query_start, fragment_start).serialize(serializer) + let Url { + ref serialization, + ref scheme_end, + ref username_end, + ref host_start, + ref host_end, + ref host, + ref port, + ref path_start, + ref query_start, + ref fragment_start, + } = *self; + ( + serialization, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + query_start, + fragment_start, + ) + .serialize(serializer) } /// Serialize with Serde using the internal representation of the `Url` struct. @@ -2033,11 +2128,23 @@ impl Url { /// This method is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] #[deny(unused)] - pub fn deserialize_internal<'de, D>(deserializer: D) -> Result where D: serde::Deserializer<'de> { + pub fn deserialize_internal<'de, D>(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { use serde::de::{Deserialize, Error, Unexpected}; - let (serialization, scheme_end, username_end, - host_start, host_end, host, port, path_start, - query_start, fragment_start) = Deserialize::deserialize(deserializer)?; + let ( + serialization, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + query_start, + fragment_start, + ) = Deserialize::deserialize(deserializer)?; let url = Url { serialization: serialization, scheme_end: scheme_end, @@ -2048,7 +2155,7 @@ impl Url { port: port, path_start: path_start, query_start: query_start, - fragment_start: fragment_start + fragment_start: fragment_start, }; if cfg!(debug_assertions) { url.check_invariants().map_err(|reason| { @@ -2059,7 +2166,6 @@ impl Url { Ok(url) } - /// Assuming the URL is in the `file` scheme or similar, /// convert its path to an absolute `std::path::Path`. /// @@ -2079,15 +2185,15 @@ impl Url { /// (That is, if the percent-decoded path contains a NUL byte or, /// for a Windows path, is not UTF-8.) #[inline] - #[cfg(any(unix, windows, target_os="redox"))] + #[cfg(any(unix, windows, target_os = "redox"))] pub fn to_file_path(&self) -> Result { if let Some(segments) = self.path_segments() { let host = match self.host() { None | Some(Host::Domain("localhost")) => None, Some(_) if cfg!(windows) && self.scheme() == "file" => { - Some(&self.serialization[self.host_start as usize .. self.host_end as usize]) - }, - _ => return Err(()) + Some(&self.serialization[self.host_start as usize..self.host_end as usize]) + } + _ => return Err(()), }; return file_url_segments_to_pathbuf(host, segments); @@ -2098,7 +2204,10 @@ impl Url { // Private helper methods: #[inline] - fn slice(&self, range: R) -> &str where R: RangeArg { + fn slice(&self, range: R) -> &str + where + R: RangeArg, + { range.slice_of(&self.serialization) } @@ -2173,7 +2282,10 @@ impl PartialOrd for Url { /// URLs hash like their serialization. impl hash::Hash for Url { #[inline] - fn hash(&self, state: &mut H) where H: hash::Hasher { + fn hash(&self, state: &mut H) + where + H: hash::Hasher, + { hash::Hash::hash(&self.serialization, state) } } @@ -2193,30 +2305,33 @@ trait RangeArg { impl RangeArg for Range { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { - &s[self.start as usize .. self.end as usize] + &s[self.start as usize..self.end as usize] } } impl RangeArg for RangeFrom { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { - &s[self.start as usize ..] + &s[self.start as usize..] } } impl RangeArg for RangeTo { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { - &s[.. self.end as usize] + &s[..self.end as usize] } } /// Serializes this URL into a `serde` stream. /// /// This implementation is only available if the `serde` Cargo feature is enabled. -#[cfg(feature="serde")] +#[cfg(feature = "serde")] impl serde::Serialize for Url { - fn serialize(&self, serializer: S) -> Result where S: serde::Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { serializer.serialize_str(self.as_str()) } } @@ -2224,10 +2339,13 @@ impl serde::Serialize for Url { /// Deserializes this URL from a `serde` stream. /// /// This implementation is only available if the `serde` Cargo feature is enabled. -#[cfg(feature="serde")] +#[cfg(feature = "serde")] impl<'de> serde::Deserialize<'de> for Url { - fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de> { - use serde::de::{Unexpected, Error}; + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{Error, Unexpected}; let string_representation: String = serde::Deserialize::deserialize(deserializer)?; Url::parse(&string_representation).map_err(|err| { Error::invalid_value(Unexpected::Str(&string_representation), &err.description()) @@ -2236,11 +2354,13 @@ impl<'de> serde::Deserialize<'de> for Url { } #[cfg(any(unix, target_os = "redox"))] -fn path_to_file_url_segments(path: &Path, serialization: &mut String) - -> Result<(u32, HostInternal), ()> { +fn path_to_file_url_segments( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { use std::os::unix::prelude::OsStrExt; if !path.is_absolute() { - return Err(()) + return Err(()); } let host_end = to_u32(serialization.len()).unwrap(); let mut empty = true; @@ -2249,7 +2369,9 @@ fn path_to_file_url_segments(path: &Path, serialization: &mut String) empty = false; serialization.push('/'); serialization.extend(percent_encode( - component.as_os_str().as_bytes(), PATH_SEGMENT_ENCODE_SET)); + component.as_os_str().as_bytes(), + PATH_SEGMENT_ENCODE_SET, + )); } if empty { // An URL’s path must not be empty. @@ -2259,18 +2381,22 @@ fn path_to_file_url_segments(path: &Path, serialization: &mut String) } #[cfg(windows)] -fn path_to_file_url_segments(path: &Path, serialization: &mut String) - -> Result<(u32, HostInternal), ()> { +fn path_to_file_url_segments( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { path_to_file_url_segments_windows(path, serialization) } // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 #[cfg_attr(not(windows), allow(dead_code))] -fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) - -> Result<(u32, HostInternal), ()> { - use std::path::{Prefix, Component}; +fn path_to_file_url_segments_windows( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { + use std::path::{Component, Prefix}; if !path.is_absolute() { - return Err(()) + return Err(()); } let mut components = path.components(); @@ -2284,7 +2410,7 @@ fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) serialization.push('/'); serialization.push(letter as char); serialization.push(':'); - }, + } Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; write!(serialization, "{}", host).unwrap(); @@ -2293,26 +2419,33 @@ fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) serialization.push('/'); let share = share.to_str().ok_or(())?; serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT_ENCODE_SET)); - }, - _ => return Err(()) + } + _ => return Err(()), }, - _ => return Err(()) + _ => return Err(()), } for component in components { - if component == Component::RootDir { continue } + if component == Component::RootDir { + continue; + } // FIXME: somehow work with non-unicode? let component = component.as_os_str().to_str().ok_or(())?; serialization.push('/'); - serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT_ENCODE_SET)); + serialization.extend(percent_encode( + component.as_bytes(), + PATH_SEGMENT_ENCODE_SET, + )); } Ok((host_end, host_internal)) } - #[cfg(any(unix, target_os = "redox"))] -fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split) -> Result { +fn file_url_segments_to_pathbuf( + host: Option<&str>, + segments: str::Split, +) -> Result { use std::ffi::OsStr; use std::os::unix::prelude::OsStrExt; @@ -2331,20 +2464,27 @@ fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split) } let os_str = OsStr::from_bytes(&bytes); let path = PathBuf::from(os_str); - debug_assert!(path.is_absolute(), - "to_file_path() failed to produce an absolute Path"); + debug_assert!( + path.is_absolute(), + "to_file_path() failed to produce an absolute Path" + ); Ok(path) } #[cfg(windows)] -fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split) -> Result { +fn file_url_segments_to_pathbuf( + host: Option<&str>, + segments: str::Split, +) -> Result { file_url_segments_to_pathbuf_windows(host, segments) } // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 #[cfg_attr(not(windows), allow(dead_code))] -fn file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::Split) -> Result { - +fn file_url_segments_to_pathbuf_windows( + host: Option<&str>, + mut segments: str::Split, +) -> Result { let mut string = if let Some(host) = host { r"\\".to_owned() + host } else { @@ -2353,23 +2493,23 @@ fn file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::S match first.len() { 2 => { if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { - return Err(()) + return Err(()); } first.to_owned() - }, + } 4 => { if !first.starts_with(parser::ascii_alpha) { - return Err(()) + return Err(()); } let bytes = first.as_bytes(); if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') { - return Err(()) + return Err(()); } first[0..1].to_owned() + ":" - }, + } _ => return Err(()), } @@ -2385,8 +2525,10 @@ fn file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::S } } let path = PathBuf::from(string); - debug_assert!(path.is_absolute(), - "to_file_path() failed to produce an absolute Path"); + debug_assert!( + path.is_absolute(), + "to_file_path() failed to produce an absolute Path" + ); Ok(path) } @@ -2409,7 +2551,6 @@ impl<'a> Drop for UrlQuery<'a> { } } - /// Define a new struct /// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait, /// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html) diff --git a/src/origin.rs b/src/origin.rs index 358fe7684..718d25ebe 100644 --- a/src/origin.rs +++ b/src/origin.rs @@ -6,7 +6,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +#[cfg(feature = "heapsize")] +use heapsize::HeapSizeOf; use host::Host; use idna::domain_to_unicode; use parser::default_port; @@ -20,16 +21,17 @@ pub fn url_origin(url: &Url) -> Origin { let result = Url::parse(url.path()); match result { Ok(ref url) => url_origin(url), - Err(_) => Origin::new_opaque() + Err(_) => Origin::new_opaque(), } - }, - "ftp" | "gopher" | "http" | "https" | "ws" | "wss" => { - Origin::Tuple(scheme.to_owned(), url.host().unwrap().to_owned(), - url.port_or_known_default().unwrap()) - }, + } + "ftp" | "gopher" | "http" | "https" | "ws" | "wss" => Origin::Tuple( + scheme.to_owned(), + url.host().unwrap().to_owned(), + url.port_or_known_default().unwrap(), + ), // TODO: Figure out what to do if the scheme is a file "file" => Origin::new_opaque(), - _ => Origin::new_opaque() + _ => Origin::new_opaque(), } } @@ -56,7 +58,7 @@ pub enum Origin { Opaque(OpaqueOrigin), /// Consists of the URL's scheme, host and port - Tuple(String, Host, u16) + Tuple(String, Host, u16), } #[cfg(feature = "heapsize")] @@ -64,15 +66,13 @@ impl HeapSizeOf for Origin { fn heap_size_of_children(&self) -> usize { match *self { Origin::Tuple(ref scheme, ref host, _) => { - scheme.heap_size_of_children() + - host.heap_size_of_children() - }, + scheme.heap_size_of_children() + host.heap_size_of_children() + } _ => 0, } } } - impl Origin { /// Creates a new opaque origin that is only equal to itself. pub fn new_opaque() -> Origin { @@ -110,7 +110,7 @@ impl Origin { let (domain, _errors) = domain_to_unicode(domain); Host::Domain(domain) } - _ => host.clone() + _ => host.clone(), }; if default_port(scheme) == Some(port) { format!("{}://{}", scheme, host) diff --git a/src/parser.rs b/src/parser.rs index 3ebf1b965..f72c22b48 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -10,14 +10,13 @@ use std::error::Error; use std::fmt::{self, Formatter, Write}; use std::str; -use Url; use encoding::EncodingOverride; use host::{Host, HostInternal}; use percent_encoding::{ - utf8_percent_encode, percent_encode, - SIMPLE_ENCODE_SET, DEFAULT_ENCODE_SET, USERINFO_ENCODE_SET, QUERY_ENCODE_SET, - PATH_SEGMENT_ENCODE_SET + percent_encode, utf8_percent_encode, DEFAULT_ENCODE_SET, PATH_SEGMENT_ENCODE_SET, + QUERY_ENCODE_SET, SIMPLE_ENCODE_SET, USERINFO_ENCODE_SET, }; +use Url; define_encode_set! { // The backslash (\) character is treated as a path separator in special URLs @@ -72,7 +71,9 @@ impl fmt::Display for ParseError { } impl From<::idna::uts46::Errors> for ParseError { - fn from(_: ::idna::uts46::Errors) -> ParseError { ParseError::IdnaError } + fn from(_: ::idna::uts46::Errors) -> ParseError { + ParseError::IdnaError + } } macro_rules! syntax_violation_enum { @@ -178,7 +179,9 @@ impl<'i> Input<'i> { vfn(SyntaxViolation::TabOrNewlineIgnored) } } - Input { chars: input.chars() } + Input { + chars: input.chars(), + } } #[inline] @@ -217,7 +220,7 @@ impl<'i> Input<'i> { remaining = input; count += 1; } else { - return (count, remaining) + return (count, remaining); } } } @@ -229,10 +232,10 @@ impl<'i> Input<'i> { match self.chars.next() { Some(c) => { if !matches!(c, '\t' | '\n' | '\r') { - return Some((c, &utf8[..c.len_utf8()])) + return Some((c, &utf8[..c.len_utf8()])); } } - None => return None + None => return None, } } } @@ -243,14 +246,16 @@ pub trait Pattern { } impl Pattern for char { - fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { input.next() == Some(self) } + fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { + input.next() == Some(self) + } } impl<'a> Pattern for &'a str { fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { for c in self.chars() { if input.next() != Some(c) { - return false + return false; } } true @@ -258,13 +263,17 @@ impl<'a> Pattern for &'a str { } impl bool> Pattern for F { - fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { input.next().map_or(false, self) } + fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { + input.next().map_or(false, self) + } } impl<'i> Iterator for Input<'i> { type Item = char; fn next(&mut self) -> Option { - self.chars.by_ref().find(|&c| !matches!(c, '\t' | '\n' | '\r')) + self.chars + .by_ref() + .find(|&c| !matches!(c, '\t' | '\n' | '\r')) } } @@ -312,7 +321,7 @@ impl<'a> Parser<'a> { pub fn parse_url(mut self, input: &str) -> ParseResult { let input = Input::with_log(input, self.violation_fn); if let Ok(remaining) = self.parse_scheme(input.clone()) { - return self.parse_with_scheme(remaining) + return self.parse_with_scheme(remaining); } // No-scheme state @@ -336,7 +345,7 @@ impl<'a> Parser<'a> { pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result, ()> { if input.is_empty() || !input.starts_with(ascii_alpha) { - return Err(()) + return Err(()); } debug_assert!(self.serialization.is_empty()); while let Some(c) = input.next() { @@ -347,7 +356,7 @@ impl<'a> Parser<'a> { ':' => return Ok(input), _ => { self.serialization.clear(); - return Err(()) + return Err(()); } } } @@ -361,7 +370,7 @@ impl<'a> Parser<'a> { } fn parse_with_scheme(mut self, input: Input) -> ParseResult { - use SyntaxViolation::{ExpectedFileDoubleSlash, ExpectedDoubleSlash}; + use SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash}; let scheme_end = to_u32(self.serialization.len())?; let scheme_type = SchemeType::from(&self.serialization); self.serialization.push(':'); @@ -369,7 +378,11 @@ impl<'a> Parser<'a> { SchemeType::File => { self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//")); let base_file_url = self.base_url.and_then(|base| { - if base.scheme() == "file" { Some(base) } else { None } + if base.scheme() == "file" { + Some(base) + } else { + None + } }); self.serialization.clear(); self.parse_file(input, base_file_url) @@ -378,31 +391,39 @@ impl<'a> Parser<'a> { // special relative or authority state let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); if let Some(base_url) = self.base_url { - if slashes_count < 2 && - base_url.scheme() == &self.serialization[..scheme_end as usize] { + if slashes_count < 2 + && base_url.scheme() == &self.serialization[..scheme_end as usize] + { // "Cannot-be-a-base" URLs only happen with "not special" schemes. debug_assert!(!base_url.cannot_be_a_base()); self.serialization.clear(); - return self.parse_relative(input, scheme_type, base_url) + return self.parse_relative(input, scheme_type, base_url); } } // special authority slashes state self.log_violation_if(ExpectedDoubleSlash, || { - input.clone().take_while(|&c| matches!(c, '/' | '\\')) - .collect::() != "//" + input + .clone() + .take_while(|&c| matches!(c, '/' | '\\')) + .collect::() + != "//" }); self.after_double_slash(remaining, scheme_type, scheme_end) } - SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end) + SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end), } } /// Scheme other than file, http, https, ws, ws, ftp, gopher. - fn parse_non_special(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32) - -> ParseResult { + fn parse_non_special( + mut self, + input: Input, + scheme_type: SchemeType, + scheme_end: u32, + ) -> ParseResult { // path or authority state ( if let Some(input) = input.split_prefix("//") { - return self.after_double_slash(input, scheme_type, scheme_end) + return self.after_double_slash(input, scheme_type, scheme_end); } // Anarchist URL (no authority) let path_start = to_u32(self.serialization.len())?; @@ -418,8 +439,16 @@ impl<'a> Parser<'a> { } else { self.parse_cannot_be_a_base_path(input) }; - self.with_query_and_fragment(scheme_end, username_end, host_start, - host_end, host, port, path_start, remaining) + self.with_query_and_fragment( + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + remaining, + ) } fn parse_file(mut self, input: Input, mut base_file_url: Option<&Url>) -> ParseResult { @@ -458,14 +487,13 @@ impl<'a> Parser<'a> { fragment_start: None, }) } - }, + } Some('?') => { if let Some(base_url) = base_file_url { // Copy everything up to the query string let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, - (Some(i), _) | - (None, Some(i)) => base_url.slice(..i) + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); let (query_start, fragment_start) = @@ -495,7 +523,7 @@ impl<'a> Parser<'a> { fragment_start: fragment_start, }) } - }, + } Some('#') => { if let Some(base_url) = base_file_url { self.fragment_only(base_url, input) @@ -544,7 +572,8 @@ impl<'a> Parser<'a> { // For file URLs that have a host and whose path starts // with the windows drive letter we just remove the host. if !has_host { - self.serialization.drain(host_start as usize..host_end as usize); + self.serialization + .drain(host_start as usize..host_end as usize); host_end = host_start; host = HostInternal::None; } @@ -575,7 +604,11 @@ impl<'a> Parser<'a> { } } let remaining = self.parse_path( - SchemeType::File, &mut false, path_start, input_after_first_char); + SchemeType::File, + &mut false, + path_start, + input_after_first_char, + ); let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_end, remaining)?; let path_start = path_start as u32; @@ -600,22 +633,32 @@ impl<'a> Parser<'a> { if let Some(base_url) = base_file_url { let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, - (Some(i), _) | - (None, Some(i)) => base_url.slice(..i) + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); self.pop_path(SchemeType::File, base_url.path_start as usize); let remaining = self.parse_path( - SchemeType::File, &mut true, base_url.path_start as usize, input); + SchemeType::File, + &mut true, + base_url.path_start as usize, + input, + ); self.with_query_and_fragment( - base_url.scheme_end, base_url.username_end, base_url.host_start, - base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) } else { self.serialization.push_str("file:///"); let scheme_end = "file".len() as u32; let path_start = "file://".len(); - let remaining = self.parse_path( - SchemeType::File, &mut false, path_start, input); + let remaining = + self.parse_path(SchemeType::File, &mut false, path_start, input); let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_end, remaining)?; let path_start = path_start as u32; @@ -636,8 +679,12 @@ impl<'a> Parser<'a> { } } - fn parse_relative(mut self, input: Input, scheme_type: SchemeType, base_url: &Url) - -> ParseResult { + fn parse_relative( + mut self, + input: Input, + scheme_type: SchemeType, + base_url: &Url, + ) -> ParseResult { // relative state debug_assert!(self.serialization.is_empty()); let (first_char, input_after_first_char) = input.split_first(); @@ -654,13 +701,12 @@ impl<'a> Parser<'a> { fragment_start: None, ..*base_url }) - }, + } Some('?') => { // Copy everything up to the query string let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, - (Some(i), _) | - (None, Some(i)) => base_url.slice(..i) + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); let (query_start, fragment_start) = @@ -671,49 +717,75 @@ impl<'a> Parser<'a> { fragment_start: fragment_start, ..*base_url }) - }, + } Some('#') => self.fragment_only(base_url, input), Some('/') | Some('\\') => { let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); if slashes_count >= 2 { self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || { - input.clone().take_while(|&c| matches!(c, '/' | '\\')) - .collect::() != "//" + input + .clone() + .take_while(|&c| matches!(c, '/' | '\\')) + .collect::() + != "//" }); let scheme_end = base_url.scheme_end; debug_assert!(base_url.byte_at(scheme_end) == b':'); - self.serialization.push_str(base_url.slice(..scheme_end + 1)); - return self.after_double_slash(remaining, scheme_type, scheme_end) + self.serialization + .push_str(base_url.slice(..scheme_end + 1)); + return self.after_double_slash(remaining, scheme_type, scheme_end); } let path_start = base_url.path_start; debug_assert!(base_url.byte_at(path_start) == b'/'); - self.serialization.push_str(base_url.slice(..path_start + 1)); + self.serialization + .push_str(base_url.slice(..path_start + 1)); let remaining = self.parse_path( - scheme_type, &mut true, path_start as usize, input_after_first_char); + scheme_type, + &mut true, + path_start as usize, + input_after_first_char, + ); self.with_query_and_fragment( - base_url.scheme_end, base_url.username_end, base_url.host_start, - base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) } _ => { let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, - (Some(i), _) | - (None, Some(i)) => base_url.slice(..i) + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); // FIXME spec says just "remove last entry", not the "pop" algorithm self.pop_path(scheme_type, base_url.path_start as usize); - let remaining = self.parse_path( - scheme_type, &mut true, base_url.path_start as usize, input); + let remaining = + self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input); self.with_query_and_fragment( - base_url.scheme_end, base_url.username_end, base_url.host_start, - base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) } } } - fn after_double_slash(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32) - -> ParseResult { + fn after_double_slash( + mut self, + input: Input, + scheme_type: SchemeType, + scheme_end: u32, + ) -> ParseResult { self.serialization.push('/'); self.serialization.push('/'); // authority state @@ -724,15 +796,25 @@ impl<'a> Parser<'a> { self.parse_host_and_port(remaining, scheme_end, scheme_type)?; // path state let path_start = to_u32(self.serialization.len())?; - let remaining = self.parse_path_start( - scheme_type, &mut true, remaining); - self.with_query_and_fragment(scheme_end, username_end, host_start, - host_end, host, port, path_start, remaining) + let remaining = self.parse_path_start(scheme_type, &mut true, remaining); + self.with_query_and_fragment( + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + remaining, + ) } /// Return (username_end, remaining) - fn parse_userinfo<'i>(&mut self, mut input: Input<'i>, scheme_type: SchemeType) - -> ParseResult<(u32, Input<'i>)> { + fn parse_userinfo<'i>( + &mut self, + mut input: Input<'i>, + scheme_type: SchemeType, + ) -> ParseResult<(u32, Input<'i>)> { let mut last_at = None; let mut remaining = input.clone(); let mut char_count = 0; @@ -745,7 +827,7 @@ impl<'a> Parser<'a> { self.log_violation(SyntaxViolation::EmbeddedCredentials) } last_at = Some((char_count, remaining.clone())) - }, + } '/' | '?' | '#' => break, '\\' if scheme_type.is_special() => break, _ => (), @@ -755,7 +837,7 @@ impl<'a> Parser<'a> { let (mut userinfo_char_count, remaining) = match last_at { None => return Ok((to_u32(self.serialization.len())?, input)), Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)), - Some(x) => x + Some(x) => x, }; let mut username_end = None; @@ -777,7 +859,8 @@ impl<'a> Parser<'a> { has_username = true; } self.check_url_code_point(c, &input); - self.serialization.extend(utf8_percent_encode(utf8_c, USERINFO_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, USERINFO_ENCODE_SET)); } } let username_end = match username_end { @@ -790,9 +873,12 @@ impl<'a> Parser<'a> { Ok((username_end, remaining)) } - fn parse_host_and_port<'i>(&mut self, input: Input<'i>, - scheme_end: u32, scheme_type: SchemeType) - -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { + fn parse_host_and_port<'i>( + &mut self, + input: Input<'i>, + scheme_end: u32, + scheme_type: SchemeType, + ) -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { let (host, remaining) = Parser::parse_host(input, scheme_type)?; write!(&mut self.serialization, "{}", host).unwrap(); let host_end = to_u32(self.serialization.len())?; @@ -808,8 +894,10 @@ impl<'a> Parser<'a> { Ok((host_end, host.into(), port, remaining)) } - pub fn parse_host(mut input: Input, scheme_type: SchemeType) - -> ParseResult<(Host, Input)> { + pub fn parse_host( + mut input: Input, + scheme_type: SchemeType, + ) -> ParseResult<(Host, Input)> { // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); @@ -833,7 +921,7 @@ impl<'a> Parser<'a> { inside_square_brackets = false; non_ignored_chars += 1 } - _ => non_ignored_chars += 1 + _ => non_ignored_chars += 1, } bytes += c.len_utf8(); } @@ -850,7 +938,7 @@ impl<'a> Parser<'a> { } } if scheme_type.is_special() && host_str.is_empty() { - return Err(ParseError::EmptyHost) + return Err(ParseError::EmptyHost); } if !scheme_type.is_special() { let host = Host::parse_opaque(host_str)?; @@ -860,8 +948,10 @@ impl<'a> Parser<'a> { Ok((host, input)) } - pub fn parse_file_host<'i>(&mut self, input: Input<'i>) - -> ParseResult<(bool, HostInternal, Input<'i>)> { + pub fn parse_file_host<'i>( + &mut self, + input: Input<'i>, + ) -> ParseResult<(bool, HostInternal, Input<'i>)> { // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); @@ -890,7 +980,7 @@ impl<'a> Parser<'a> { } } if is_windows_drive_letter(host_str) { - return Ok((false, HostInternal::None, input)) + return Ok((false, HostInternal::None, input)); } let host = if host_str.is_empty() { HostInternal::None @@ -906,23 +996,27 @@ impl<'a> Parser<'a> { Ok((true, host, remaining)) } - pub fn parse_port

(mut input: Input, default_port: P, - context: Context) - -> ParseResult<(Option, Input)> - where P: Fn() -> Option { + pub fn parse_port

( + mut input: Input, + default_port: P, + context: Context, + ) -> ParseResult<(Option, Input)> + where + P: Fn() -> Option, + { let mut port: u32 = 0; let mut has_any_digit = false; while let (Some(c), remaining) = input.split_first() { if let Some(digit) = c.to_digit(10) { port = port * 10 + digit; if port > ::std::u16::MAX as u32 { - return Err(ParseError::InvalidPort) + return Err(ParseError::InvalidPort); } has_any_digit = true; } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') { - return Err(ParseError::InvalidPort) + return Err(ParseError::InvalidPort); } else { - break + break; } input = remaining; } @@ -933,16 +1027,21 @@ impl<'a> Parser<'a> { Ok((opt_port, input)) } - pub fn parse_path_start<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool, - mut input: Input<'i>) - -> Input<'i> { + pub fn parse_path_start<'i>( + &mut self, + scheme_type: SchemeType, + has_host: &mut bool, + mut input: Input<'i>, + ) -> Input<'i> { // Path start state match input.split_first() { (Some('/'), remaining) => input = remaining, - (Some('\\'), remaining) => if scheme_type.is_special() { - self.log_violation(SyntaxViolation::Backslash); - input = remaining - }, + (Some('\\'), remaining) => { + if scheme_type.is_special() { + self.log_violation(SyntaxViolation::Backslash); + input = remaining + } + } _ => {} } let path_start = self.serialization.len(); @@ -950,9 +1049,13 @@ impl<'a> Parser<'a> { self.parse_path(scheme_type, has_host, path_start, input) } - pub fn parse_path<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool, - path_start: usize, mut input: Input<'i>) - -> Input<'i> { + pub fn parse_path<'i>( + &mut self, + scheme_type: SchemeType, + has_host: &mut bool, + path_start: usize, + mut input: Input<'i>, + ) -> Input<'i> { // Relative path state debug_assert!(self.serialization.ends_with('/')); loop { @@ -960,62 +1063,70 @@ impl<'a> Parser<'a> { let mut ends_with_slash = false; loop { let input_before_c = input.clone(); - let (c, utf8_c) = if let Some(x) = input.next_utf8() { x } else { break }; + let (c, utf8_c) = if let Some(x) = input.next_utf8() { + x + } else { + break; + }; match c { '/' if self.context != Context::PathSegmentSetter => { ends_with_slash = true; - break - }, - '\\' if self.context != Context::PathSegmentSetter && - scheme_type.is_special() => { + break; + } + '\\' if self.context != Context::PathSegmentSetter + && scheme_type.is_special() => + { self.log_violation(SyntaxViolation::Backslash); ends_with_slash = true; - break - }, + break; + } '?' | '#' if self.context == Context::UrlParser => { input = input_before_c; - break - }, + break; + } _ => { self.check_url_code_point(c, &input); if self.context == Context::PathSegmentSetter { if scheme_type.is_special() { self.serialization.extend(utf8_percent_encode( - utf8_c, SPECIAL_PATH_SEGMENT_ENCODE_SET)); + utf8_c, + SPECIAL_PATH_SEGMENT_ENCODE_SET, + )); } else { - self.serialization.extend(utf8_percent_encode( - utf8_c, PATH_SEGMENT_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT_ENCODE_SET)); } } else { - self.serialization.extend(utf8_percent_encode( - utf8_c, DEFAULT_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, DEFAULT_ENCODE_SET)); } } } } match &self.serialization[segment_start..] { - ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" | ".%2E" => { + ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" + | ".%2E" => { debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/'); - self.serialization.truncate(segment_start - 1); // Truncate "/.." + self.serialization.truncate(segment_start - 1); // Truncate "/.." self.pop_path(scheme_type, path_start); if !self.serialization[path_start..].ends_with('/') { self.serialization.push('/') } - }, + } "." | "%2e" | "%2E" => { self.serialization.truncate(segment_start); - }, + } _ => { - if scheme_type.is_file() && is_windows_drive_letter( - &self.serialization[path_start + 1..] - ) { + if scheme_type.is_file() + && is_windows_drive_letter(&self.serialization[path_start + 1..]) + { if self.serialization.ends_with('|') { self.serialization.pop(); self.serialization.push(':'); } if *has_host { self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive); - *has_host = false; // FIXME account for this in callers + *has_host = false; // FIXME account for this in callers } } if ends_with_slash { @@ -1024,7 +1135,7 @@ impl<'a> Parser<'a> { } } if !ends_with_slash { - break + break; } } input @@ -1038,14 +1149,12 @@ impl<'a> Parser<'a> { let segment_start = path_start + slash_position + 1; // Don’t pop a Windows drive letter // FIXME: *normalized* Windows drive letter - if !( - scheme_type.is_file() && - is_windows_drive_letter(&self.serialization[segment_start..]) - ) { + if !(scheme_type.is_file() + && is_windows_drive_letter(&self.serialization[segment_start..])) + { self.serialization.truncate(segment_start); } } - } pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> { @@ -1057,20 +1166,26 @@ impl<'a> Parser<'a> { } Some((c, utf8_c)) => { self.check_url_code_point(c, &input); - self.serialization.extend(utf8_percent_encode( - utf8_c, SIMPLE_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, SIMPLE_ENCODE_SET)); } - None => return input + None => return input, } } } - fn with_query_and_fragment(mut self, scheme_end: u32, username_end: u32, - host_start: u32, host_end: u32, host: HostInternal, - port: Option, path_start: u32, remaining: Input) - -> ParseResult { - let (query_start, fragment_start) = - self.parse_query_and_fragment(scheme_end, remaining)?; + fn with_query_and_fragment( + mut self, + scheme_end: u32, + username_end: u32, + host_start: u32, + host_end: u32, + host: HostInternal, + port: Option, + path_start: u32, + remaining: Input, + ) -> ParseResult { + let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_end, remaining)?; Ok(Url { serialization: self.serialization, scheme_end: scheme_end, @@ -1081,13 +1196,16 @@ impl<'a> Parser<'a> { port: port, path_start: path_start, query_start: query_start, - fragment_start: fragment_start + fragment_start: fragment_start, }) } /// Return (query_start, fragment_start) - fn parse_query_and_fragment(&mut self, scheme_end: u32, mut input: Input) - -> ParseResult<(Option, Option)> { + fn parse_query_and_fragment( + &mut self, + scheme_end: u32, + mut input: Input, + ) -> ParseResult<(Option, Option)> { let mut query_start = None; match input.next() { Some('#') => {} @@ -1098,11 +1216,11 @@ impl<'a> Parser<'a> { if let Some(remaining) = remaining { input = remaining } else { - return Ok((query_start, None)) + return Ok((query_start, None)); } } None => return Ok((None, None)), - _ => panic!("Programming error. parse_query_and_fragment() called without ? or #") + _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"), } let fragment_start = to_u32(self.serialization.len())?; @@ -1111,14 +1229,13 @@ impl<'a> Parser<'a> { Ok((query_start, Some(fragment_start))) } - pub fn parse_query<'i>(&mut self, scheme_end: u32, mut input: Input<'i>) - -> Option> { - let mut query = String::new(); // FIXME: use a streaming decoder instead + pub fn parse_query<'i>(&mut self, scheme_end: u32, mut input: Input<'i>) -> Option> { + let mut query = String::new(); // FIXME: use a streaming decoder instead let mut remaining = None; while let Some(c) = input.next() { if c == '#' && self.context == Context::UrlParser { remaining = Some(input); - break + break; } else { self.check_url_code_point(c, &input); query.push(c); @@ -1130,7 +1247,8 @@ impl<'a> Parser<'a> { _ => EncodingOverride::utf8(), }; let query_bytes = encoding.encode(query.into()); - self.serialization.extend(percent_encode(&query_bytes, QUERY_ENCODE_SET)); + self.serialization + .extend(percent_encode(&query_bytes, QUERY_ENCODE_SET)); remaining } @@ -1140,7 +1258,8 @@ impl<'a> Parser<'a> { None => &*base_url.serialization, }; debug_assert!(self.serialization.is_empty()); - self.serialization.reserve(before_fragment.len() + input.chars.as_str().len()); + self.serialization + .reserve(before_fragment.len() + input.chars.as_str().len()); self.serialization.push_str(before_fragment); self.serialization.push('#'); let next = input.next(); @@ -1155,12 +1274,12 @@ impl<'a> Parser<'a> { pub fn parse_fragment(&mut self, mut input: Input) { while let Some((c, utf8_c)) = input.next_utf8() { - if c == '\0' { + if c == '\0' { self.log_violation(SyntaxViolation::NullInFragment) } else { self.check_url_code_point(c, &input); - self.serialization.extend(utf8_percent_encode(utf8_c, - SIMPLE_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, SIMPLE_ENCODE_SET)); } } } @@ -1170,7 +1289,8 @@ impl<'a> Parser<'a> { if c == '%' { let mut input = input.clone(); if !matches!((input.next(), input.next()), (Some(a), Some(b)) - if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) { + if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) + { vfn(SyntaxViolation::PercentDecode) } } else if !is_url_code_point(c) { @@ -1214,7 +1334,7 @@ fn is_url_code_point(c: char) -> bool { /// https://url.spec.whatwg.org/#c0-controls-and-space #[inline] fn c0_control_or_space(ch: char) -> bool { - ch <= ' ' // U+0000 to U+0020 + ch <= ' ' // U+0000 to U+0020 } /// https://url.spec.whatwg.org/#ascii-alpha @@ -1235,13 +1355,11 @@ pub fn to_u32(i: usize) -> ParseResult { /// Wether the scheme is file:, the path has a single segment, and that segment /// is a Windows drive letter fn is_windows_drive_letter(segment: &str) -> bool { - segment.len() == 2 - && starts_with_windows_drive_letter(segment) + segment.len() == 2 && starts_with_windows_drive_letter(segment) } fn starts_with_windows_drive_letter(s: &str) -> bool { - ascii_alpha(s.as_bytes()[0] as char) - && matches!(s.as_bytes()[1], b':' | b'|') + ascii_alpha(s.as_bytes()[0] as char) && matches!(s.as_bytes()[1], b':' | b'|') } fn starts_with_windows_drive_letter_segment(input: &Input) -> bool { diff --git a/src/path_segments.rs b/src/path_segments.rs index f5b7d51f9..459d088db 100644 --- a/src/path_segments.rs +++ b/src/path_segments.rs @@ -6,7 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use parser::{self, SchemeType, to_u32}; +use parser::{self, to_u32, SchemeType}; use std::str; use Url; @@ -56,7 +56,8 @@ pub fn new(url: &mut Url) -> PathSegmentsMut { impl<'a> Drop for PathSegmentsMut<'a> { fn drop(&mut self) { - self.url.restore_after_path(self.old_after_path_position, &self.after_path) + self.url + .restore_after_path(self.old_after_path_position, &self.after_path) } } @@ -126,8 +127,12 @@ impl<'a> PathSegmentsMut<'a> { /// /// Returns `&mut Self` so that method calls can be chained. pub fn pop(&mut self) -> &mut Self { - let last_slash = self.url.serialization[self.after_first_slash..].rfind('/').unwrap_or(0); - self.url.serialization.truncate(self.after_first_slash + last_slash); + let last_slash = self.url.serialization[self.after_first_slash..] + .rfind('/') + .unwrap_or(0); + self.url + .serialization + .truncate(self.after_first_slash + last_slash); self } @@ -194,7 +199,10 @@ impl<'a> PathSegmentsMut<'a> { /// # run().unwrap(); /// ``` pub fn extend(&mut self, segments: I) -> &mut Self - where I: IntoIterator, I::Item: AsRef { + where + I: IntoIterator, + I::Item: AsRef, + { let scheme_type = SchemeType::from(self.url.scheme()); let path_start = self.url.path_start as usize; self.url.mutate(|parser| { @@ -202,14 +210,18 @@ impl<'a> PathSegmentsMut<'a> { for segment in segments { let segment = segment.as_ref(); if matches!(segment, "." | "..") { - continue + continue; } if parser.serialization.len() > path_start + 1 { parser.serialization.push('/'); } - let mut has_host = true; // FIXME account for this? - parser.parse_path(scheme_type, &mut has_host, path_start, - parser::Input::new(segment)); + let mut has_host = true; // FIXME account for this? + parser.parse_path( + scheme_type, + &mut has_host, + path_start, + parser::Input::new(segment), + ); } }); self diff --git a/src/quirks.rs b/src/quirks.rs index 0c7aaa894..285ee21b6 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -11,8 +11,8 @@ //! Unless you need to be interoperable with web browsers, //! you probably want to use `Url` method instead. -use {Url, Position, Host, ParseError, idna}; -use parser::{Parser, SchemeType, default_port, Context, Input}; +use parser::{default_port, Context, Input, Parser, SchemeType}; +use {idna, Host, ParseError, Position, Url}; /// https://url.spec.whatwg.org/#dom-url-domaintoascii pub fn domain_to_ascii(domain: &str) -> String { @@ -84,7 +84,11 @@ pub fn password(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-password pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> { - url.set_password(if new_password.is_empty() { None } else { Some(new_password) }) + url.set_password(if new_password.is_empty() { + None + } else { + Some(new_password) + }) } /// Getter for https://url.spec.whatwg.org/#dom-url-host @@ -96,7 +100,7 @@ pub fn host(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-host pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { if url.cannot_be_a_base() { - return Err(()) + return Err(()); } let host; let opt_port; @@ -108,12 +112,13 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { host = h; opt_port = if let Some(remaining) = remaining.split_prefix(':') { Parser::parse_port(remaining, || default_port(scheme), Context::Setter) - .ok().map(|(port, _remaining)| port) + .ok() + .map(|(port, _remaining)| port) } else { None }; } - Err(_) => return Err(()) + Err(_) => return Err(()), } } url.set_host_internal(host, opt_port); @@ -129,7 +134,7 @@ pub fn hostname(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-hostname pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { if url.cannot_be_a_base() { - return Err(()) + return Err(()); } let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme())); if let Ok((host, _remaining)) = result { @@ -153,9 +158,13 @@ pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { // has_host implies !cannot_be_a_base let scheme = url.scheme(); if !url.has_host() || url.host() == Some(Host::Domain("")) || scheme == "file" { - return Err(()) + return Err(()); } - result = Parser::parse_port(Input::new(new_port), || default_port(scheme), Context::Setter) + result = Parser::parse_port( + Input::new(new_port), + || default_port(scheme), + Context::Setter, + ) } if let Ok((new_port, _remaining)) = result { url.set_port_internal(new_port); @@ -168,7 +177,7 @@ pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { /// Getter for https://url.spec.whatwg.org/#dom-url-pathname #[inline] pub fn pathname(url: &Url) -> &str { - url.path() + url.path() } /// Setter for https://url.spec.whatwg.org/#dom-url-pathname diff --git a/src/slicing.rs b/src/slicing.rs index 926f3c796..2d7f78e6f 100644 --- a/src/slicing.rs +++ b/src/slicing.rs @@ -6,7 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::ops::{Range, RangeFrom, RangeTo, RangeFull, Index}; +use std::ops::{Index, Range, RangeFrom, RangeFull, RangeTo}; use Url; impl Index for Url { @@ -94,7 +94,7 @@ pub enum Position { BeforeQuery, AfterQuery, BeforeFragment, - AfterFragment + AfterFragment, } impl Url { @@ -105,43 +105,49 @@ impl Url { Position::AfterScheme => self.scheme_end as usize, - Position::BeforeUsername => if self.has_authority() { - self.scheme_end as usize + "://".len() - } else { - debug_assert!(self.byte_at(self.scheme_end) == b':'); - debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end); - self.scheme_end as usize + ":".len() - }, + Position::BeforeUsername => { + if self.has_authority() { + self.scheme_end as usize + "://".len() + } else { + debug_assert!(self.byte_at(self.scheme_end) == b':'); + debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end); + self.scheme_end as usize + ":".len() + } + } Position::AfterUsername => self.username_end as usize, - Position::BeforePassword => if self.has_authority() && - self.byte_at(self.username_end) == b':' { - self.username_end as usize + ":".len() - } else { - debug_assert!(self.username_end == self.host_start); - self.username_end as usize - }, - - Position::AfterPassword => if self.has_authority() && - self.byte_at(self.username_end) == b':' { - debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@'); - self.host_start as usize - "@".len() - } else { - debug_assert!(self.username_end == self.host_start); - self.host_start as usize - }, + Position::BeforePassword => { + if self.has_authority() && self.byte_at(self.username_end) == b':' { + self.username_end as usize + ":".len() + } else { + debug_assert!(self.username_end == self.host_start); + self.username_end as usize + } + } + + Position::AfterPassword => { + if self.has_authority() && self.byte_at(self.username_end) == b':' { + debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@'); + self.host_start as usize - "@".len() + } else { + debug_assert!(self.username_end == self.host_start); + self.host_start as usize + } + } Position::BeforeHost => self.host_start as usize, Position::AfterHost => self.host_end as usize, - Position::BeforePort => if self.port.is_some() { - debug_assert!(self.byte_at(self.host_end) == b':'); - self.host_end as usize + ":".len() - } else { - self.host_end as usize - }, + Position::BeforePort => { + if self.port.is_some() { + debug_assert!(self.byte_at(self.host_end) == b':'); + self.host_end as usize + ":".len() + } else { + self.host_end as usize + } + } Position::AfterPort => self.path_start as usize, @@ -179,4 +185,3 @@ impl Url { } } } - diff --git a/tests/data.rs b/tests/data.rs index 29d2e0deb..1981814e6 100644 --- a/tests/data.rs +++ b/tests/data.rs @@ -8,29 +8,29 @@ //! Data-driven tests -extern crate serde_json; extern crate rustc_test as test; +extern crate serde_json; extern crate url; use serde_json::Value; -use url::{Url, quirks}; use std::str::FromStr; +use url::{quirks, Url}; fn check_invariants(url: &Url) { url.check_invariants().unwrap(); - #[cfg(feature="serde")] { + #[cfg(feature = "serde")] + { let bytes = serde_json::to_vec(url).unwrap(); let new_url: Url = serde_json::from_slice(&bytes).unwrap(); assert_eq!(url, &new_url); } } - fn run_parsing(input: &str, base: &str, expected: Result) { let base = match Url::parse(&base) { Ok(base) => base, Err(_) if expected.is_err() => return, - Err(message) => panic!("Error parsing base {:?}: {}", base, message) + Err(message) => panic!("Error parsing base {:?}: {}", base, message), }; let (url, expected) = match (base.join(&input), expected) { (Ok(url), Ok(expected)) => (url, expected), @@ -42,14 +42,18 @@ fn run_parsing(input: &str, base: &str, expected: Result check_invariants(&url); macro_rules! assert_eq { - ($expected: expr, $got: expr) => { - { - let expected = $expected; - let got = $got; - assert!(expected == got, "{:?} != {} {:?} for URL {:?}", - got, stringify!($expected), expected, url); - } - } + ($expected: expr, $got: expr) => {{ + let expected = $expected; + let got = $got; + assert!( + expected == got, + "{:?} != {} {:?} for URL {:?}", + got, + stringify!($expected), + expected, + url + ); + }}; } macro_rules! assert_attributes { @@ -95,7 +99,11 @@ impl JsonExt for Value { } fn string(self) -> String { - if let Value::String(s) = self { s } else { panic!("Not a Value::String") } + if let Value::String(s) = self { + s + } else { + panic!("Not a Value::String") + } } fn take_string(&mut self, key: &str) -> String { @@ -109,7 +117,7 @@ fn collect_parsing(add_test: &mut F) { .expect("JSON parse error in urltestdata.json"); for entry in json.as_array_mut().unwrap() { if entry.is_string() { - continue // ignore comments + continue; // ignore comments } let base = entry.take_string("base"); let input = entry.take_string("input"); @@ -118,8 +126,7 @@ fn collect_parsing(add_test: &mut F) { } else { Ok(ExpectedAttributes { href: entry.take_string("href"), - origin: entry.take_key("origin") - .map(|s| s.string()), + origin: entry.take_key("origin").map(|s| s.string()), protocol: entry.take_string("protocol"), username: entry.take_string("username"), password: entry.take_string("password"), @@ -131,12 +138,17 @@ fn collect_parsing(add_test: &mut F) { hash: entry.take_string("hash"), }) }; - add_test(format!("{:?} @ base {:?}", input, base), - test::TestFn::dyn_test_fn(move || run_parsing(&input, &base, expected))); + add_test( + format!("{:?} @ base {:?}", input, base), + test::TestFn::dyn_test_fn(move || run_parsing(&input, &base, expected)), + ); } } -fn collect_setters(add_test: &mut F) where F: FnMut(String, test::TestFn) { +fn collect_setters(add_test: &mut F) +where + F: FnMut(String, test::TestFn), +{ let mut json = Value::from_str(include_str!("setters_tests.json")) .expect("JSON parse error in setters_tests.json"); diff --git a/tests/unit.rs b/tests/unit.rs index 708c4af31..bbe67e6ae 100644 --- a/tests/unit.rs +++ b/tests/unit.rs @@ -15,7 +15,7 @@ use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::net::{Ipv4Addr, Ipv6Addr}; use std::path::{Path, PathBuf}; -use url::{Host, HostAndPort, Url, form_urlencoded}; +use url::{form_urlencoded, Host, HostAndPort, Url}; #[test] fn size() { @@ -24,7 +24,9 @@ fn size() { } macro_rules! assert_from_file_path { - ($path: expr) => { assert_from_file_path!($path, $path) }; + ($path: expr) => { + assert_from_file_path!($path, $path) + }; ($path: expr, $url_path: expr) => {{ let url = Url::from_file_path(Path::new($path)).unwrap(); assert_eq!(url.host(), None); @@ -33,8 +35,6 @@ macro_rules! assert_from_file_path { }}; } - - #[test] fn new_file_paths() { if cfg!(unix) { @@ -73,7 +73,10 @@ fn new_path_windows_fun() { assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r"); // Invalid UTF-8 - assert!(Url::parse("file:///C:/foo/ba%80r").unwrap().to_file_path().is_err()); + assert!(Url::parse("file:///C:/foo/ba%80r") + .unwrap() + .to_file_path() + .is_err()); // test windows canonicalized path let path = PathBuf::from(r"\\?\C:\foo\bar"); @@ -85,7 +88,6 @@ fn new_path_windows_fun() { } } - #[test] fn new_directory_paths() { if cfg!(unix) { @@ -99,7 +101,10 @@ fn new_directory_paths() { if cfg!(windows) { assert_eq!(Url::from_directory_path(Path::new("relative")), Err(())); assert_eq!(Url::from_directory_path(Path::new(r"..\relative")), Err(())); - assert_eq!(Url::from_directory_path(Path::new(r"\drive-relative")), Err(())); + assert_eq!( + Url::from_directory_path(Path::new(r"\drive-relative")), + Err(()) + ); assert_eq!(Url::from_directory_path(Path::new(r"\\ucn\")), Err(())); let url = Url::from_directory_path(Path::new(r"C:\foo\bar")).unwrap(); @@ -126,10 +131,16 @@ fn from_str() { #[test] fn parse_with_params() { - let url = Url::parse_with_params("http://testing.com/this?dont=clobberme", - &[("lang", "rust")]).unwrap(); + let url = Url::parse_with_params( + "http://testing.com/this?dont=clobberme", + &[("lang", "rust")], + ) + .unwrap(); - assert_eq!(url.as_str(), "http://testing.com/this?dont=clobberme&lang=rust"); + assert_eq!( + url.as_str(), + "http://testing.com/this?dont=clobberme&lang=rust" + ); } #[test] @@ -144,8 +155,8 @@ fn issue_124() { #[test] fn test_equality() { - use std::hash::{Hash, Hasher}; use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; fn check_eq(a: &Url, b: &Url) { assert_eq!(a, b); @@ -195,13 +206,29 @@ fn host() { assert_eq!(Url::parse(input).unwrap().host(), Some(host)); } assert_host("http://www.mozilla.org", Host::Domain("www.mozilla.org")); - assert_host("http://1.35.33.49", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); - assert_host("http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", Host::Ipv6(Ipv6Addr::new( - 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344))); + assert_host( + "http://1.35.33.49", + Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), + ); + assert_host( + "http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + Host::Ipv6(Ipv6Addr::new( + 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344, + )), + ); assert_host("http://1.35.+33.49", Host::Domain("1.35.+33.49")); - assert_host("http://[::]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0))); - assert_host("http://[::1]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1))); - assert_host("http://0x1.0X23.0x21.061", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); + assert_host( + "http://[::]", + Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), + ); + assert_host( + "http://[::1]", + Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)), + ); + assert_host( + "http://0x1.0X23.0x21.061", + Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), + ); assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111))); assert_host("http://2..2.3", Host::Domain("2..2.3")); @@ -216,15 +243,26 @@ fn host_serialization() { // but https://url.spec.whatwg.org/#concept-ipv6-serializer specifies not to. // Not [::0.0.0.2] / [::ffff:0.0.0.2] - assert_eq!(Url::parse("http://[0::2]").unwrap().host_str(), Some("[::2]")); - assert_eq!(Url::parse("http://[0::ffff:0:2]").unwrap().host_str(), Some("[::ffff:0:2]")); + assert_eq!( + Url::parse("http://[0::2]").unwrap().host_str(), + Some("[::2]") + ); + assert_eq!( + Url::parse("http://[0::ffff:0:2]").unwrap().host_str(), + Some("[::ffff:0:2]") + ); } #[test] fn test_idna() { assert!("http://goșu.ro".parse::().is_ok()); - assert_eq!(Url::parse("http://☃.net/").unwrap().host(), Some(Host::Domain("xn--n3h.net"))); - assert!("https://r2---sn-huoa-cvhl.googlevideo.com/crossdomain.xml".parse::().is_ok()); + assert_eq!( + Url::parse("http://☃.net/").unwrap().host(), + Some(Host::Domain("xn--n3h.net")) + ); + assert!("https://r2---sn-huoa-cvhl.googlevideo.com/crossdomain.xml" + .parse::() + .is_ok()); } #[test] @@ -235,9 +273,18 @@ fn test_serialization() { ("http://@emptyuser.com/", "http://emptyuser.com/"), ("http://:@emptypass.com/", "http://emptypass.com/"), ("http://user@user.com/", "http://user@user.com/"), - ("http://user:pass@userpass.com/", "http://user:pass@userpass.com/"), - ("http://slashquery.com/path/?q=something", "http://slashquery.com/path/?q=something"), - ("http://noslashquery.com/path?q=something", "http://noslashquery.com/path?q=something") + ( + "http://user:pass@userpass.com/", + "http://user:pass@userpass.com/", + ), + ( + "http://slashquery.com/path/?q=something", + "http://slashquery.com/path/?q=something", + ), + ( + "http://noslashquery.com/path?q=something", + "http://noslashquery.com/path?q=something", + ), ]; for &(input, result) in &data { let url = Url::parse(input).unwrap(); @@ -250,11 +297,16 @@ fn test_form_urlencoded() { let pairs: &[(Cow, Cow)] = &[ ("foo".into(), "é&".into()), ("bar".into(), "".into()), - ("foo".into(), "#".into()) + ("foo".into(), "#".into()), ]; - let encoded = form_urlencoded::Serializer::new(String::new()).extend_pairs(pairs).finish(); + let encoded = form_urlencoded::Serializer::new(String::new()) + .extend_pairs(pairs) + .finish(); assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23"); - assert_eq!(form_urlencoded::parse(encoded.as_bytes()).collect::>(), pairs.to_vec()); + assert_eq!( + form_urlencoded::parse(encoded.as_bytes()).collect::>(), + pairs.to_vec() + ); } #[test] @@ -281,27 +333,33 @@ fn host_and_port_display() { assert_eq!( format!( "{}", - HostAndPort{ host: Host::Domain("www.mozilla.org"), port: 80} + HostAndPort { + host: Host::Domain("www.mozilla.org"), + port: 80 + } ), "www.mozilla.org:80" ); assert_eq!( format!( "{}", - HostAndPort::{ host: Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), port: 65535 } + HostAndPort:: { + host: Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), + port: 65535 + } ), "1.35.33.49:65535" ); assert_eq!( format!( "{}", - HostAndPort::{ + HostAndPort:: { host: Host::Ipv6(Ipv6Addr::new( 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344 )), port: 1337 - }) - , + } + ), "[2001:db8:85a3:8d3:1319:8a2e:370:7344]:1337" ) } @@ -322,8 +380,13 @@ fn issue_61() { fn issue_197() { let mut url = Url::from_file_path("/").expect("Failed to parse path"); url.check_invariants().unwrap(); - assert_eq!(url, Url::parse("file:///").expect("Failed to parse path + protocol")); - url.path_segments_mut().expect("path_segments_mut").pop_if_empty(); + assert_eq!( + url, + Url::parse("file:///").expect("Failed to parse path + protocol") + ); + url.path_segments_mut() + .expect("path_segments_mut") + .pop_if_empty(); } #[test] @@ -345,12 +408,19 @@ fn append_trailing_slash() { /// https://github.com/servo/rust-url/issues/227 fn extend_query_pairs_then_mutate() { let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap(); - url.query_pairs_mut().extend_pairs(vec![ ("auth", "my-token") ].into_iter()); + url.query_pairs_mut() + .extend_pairs(vec![("auth", "my-token")].into_iter()); url.check_invariants().unwrap(); - assert_eq!(url.to_string(), "http://localhost:6767/foo/bar?auth=my-token"); + assert_eq!( + url.to_string(), + "http://localhost:6767/foo/bar?auth=my-token" + ); url.path_segments_mut().unwrap().push("some_other_path"); url.check_invariants().unwrap(); - assert_eq!(url.to_string(), "http://localhost:6767/foo/bar/some_other_path?auth=my-token"); + assert_eq!( + url.to_string(), + "http://localhost:6767/foo/bar/some_other_path?auth=my-token" + ); } #[test] @@ -387,7 +457,10 @@ fn test_set_host() { #[test] // https://github.com/servo/rust-url/issues/166 fn test_leading_dots() { - assert_eq!(Host::parse(".org").unwrap(), Host::Domain(".org".to_owned())); + assert_eq!( + Host::parse(".org").unwrap(), + Host::Domain(".org".to_owned()) + ); assert_eq!(Url::parse("file://./foo").unwrap().domain(), Some(".")); } @@ -402,7 +475,10 @@ fn define_encode_set_scopes() { pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} } - assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), "foo%20bar"); + assert_eq!( + utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), + "foo%20bar" + ); mod m { use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; @@ -413,7 +489,10 @@ fn define_encode_set_scopes() { } pub fn test() { - assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), "foo%20bar"); + assert_eq!( + utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), + "foo%20bar" + ); } } @@ -423,8 +502,8 @@ fn define_encode_set_scopes() { #[test] /// https://github.com/servo/rust-url/issues/302 fn test_origin_hash() { - use std::hash::{Hash,Hasher}; use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; fn hash(value: &T) -> u64 { let mut hasher = DefaultHasher::new(); @@ -443,7 +522,9 @@ fn test_origin_hash() { Url::parse("ftp://example.net").unwrap().origin(), Url::parse("file://example.net").unwrap().origin(), Url::parse("http://user@example.net/").unwrap().origin(), - Url::parse("http://user:pass@example.net/").unwrap().origin(), + Url::parse("http://user:pass@example.net/") + .unwrap() + .origin(), ]; for origin_to_compare in &origins_to_compare { @@ -465,7 +546,7 @@ fn test_origin_hash() { #[test] fn test_windows_unc_path() { if !cfg!(windows) { - return + return; } let url = Url::from_file_path(Path::new(r"\\host\share\path\file.txt")).unwrap(); @@ -495,7 +576,8 @@ fn test_syntax_violation_callback() { let violation = Cell::new(None); let url = Url::options() .syntax_violation_callback(Some(&|v| violation.set(Some(v)))) - .parse("http:////mozilla.org:42").unwrap(); + .parse("http:////mozilla.org:42") + .unwrap(); assert_eq!(url.port(), Some(42)); let v = violation.take().unwrap(); @@ -511,13 +593,15 @@ fn test_syntax_violation_callback_lifetimes() { let url = Url::options() .syntax_violation_callback(Some(&vfn)) - .parse("http:////mozilla.org:42").unwrap(); + .parse("http:////mozilla.org:42") + .unwrap(); assert_eq!(url.port(), Some(42)); assert_eq!(violation.take(), Some(ExpectedDoubleSlash)); let url = Url::options() .syntax_violation_callback(Some(&vfn)) - .parse("http://mozilla.org\\path").unwrap(); + .parse("http://mozilla.org\\path") + .unwrap(); assert_eq!(url.path(), "/path"); assert_eq!(violation.take(), Some(Backslash)); } @@ -528,13 +612,11 @@ fn test_options_reuse() { let violations = RefCell::new(Vec::new()); let vfn = |v| violations.borrow_mut().push(v); - let options = Url::options() - .syntax_violation_callback(Some(&vfn)); + let options = Url::options().syntax_violation_callback(Some(&vfn)); let url = options.parse("http:////mozilla.org").unwrap(); let options = options.base_url(Some(&url)); let url = options.parse("/sub\\path").unwrap(); assert_eq!(url.as_str(), "http://mozilla.org/sub/path"); - assert_eq!(*violations.borrow(), - vec!(ExpectedDoubleSlash, Backslash)); + assert_eq!(*violations.borrow(), vec!(ExpectedDoubleSlash, Backslash)); } diff --git a/url_serde/src/lib.rs b/url_serde/src/lib.rs index ce8c47466..5a582ccb7 100644 --- a/url_serde/src/lib.rs +++ b/url_serde/src/lib.rs @@ -67,25 +67,30 @@ ipc::channel::>() #![deny(unsafe_code)] extern crate serde; -#[cfg(test)] #[macro_use] extern crate serde_derive; -#[cfg(test)] extern crate serde_json; +#[cfg(test)] +#[macro_use] +extern crate serde_derive; +#[cfg(test)] +extern crate serde_json; extern crate url; -use serde::{Deserialize, Serialize, Serializer, Deserializer}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::cmp::PartialEq; use std::error::Error; use std::fmt; use std::io::Write; use std::ops::{Deref, DerefMut}; use std::str; -use url::{Url, Host}; +use url::{Host, Url}; /// Serialises `value` with a given serializer. /// /// This is useful to serialize `rust-url` types used in structure fields or /// tuple members with `#[serde(serialize_with = "url_serde::serialize")]`. pub fn serialize(value: &T, serializer: S) -> Result - where S: Serializer, for<'a> Ser<'a, T>: Serialize +where + S: Serializer, + for<'a> Ser<'a, T>: Serialize, { Ser::new(value).serialize(serializer) } @@ -98,7 +103,10 @@ pub fn serialize(value: &T, serializer: S) -> Result #[derive(Debug)] pub struct Ser<'a, T: 'a>(&'a T); -impl<'a, T> Ser<'a, T> where Ser<'a, T>: Serialize { +impl<'a, T> Ser<'a, T> +where + Ser<'a, T>: Serialize, +{ /// Returns a new `Ser` wrapper. #[inline(always)] pub fn new(value: &'a T) -> Self { @@ -108,14 +116,20 @@ impl<'a, T> Ser<'a, T> where Ser<'a, T>: Serialize { /// Serializes this URL into a `serde` stream. impl<'a> Serialize for Ser<'a, Url> { - fn serialize(&self, serializer: S) -> Result where S: Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { serializer.serialize_str(self.0.as_str()) } } /// Serializes this Option into a `serde` stream. impl<'a> Serialize for Ser<'a, Option> { - fn serialize(&self, serializer: S) -> Result where S: Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { if let Some(url) = self.0.as_ref() { serializer.serialize_some(url.as_str()) } else { @@ -124,8 +138,14 @@ impl<'a> Serialize for Ser<'a, Option> { } } -impl<'a, String> Serialize for Ser<'a, Host> where String: AsRef { - fn serialize(&self, serializer: S) -> Result where S: Serializer { +impl<'a, String> Serialize for Ser<'a, Host> +where + String: AsRef, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { match *self.0 { Host::Domain(ref s) => serializer.serialize_str(s.as_ref()), Host::Ipv4(_) | Host::Ipv6(_) => { @@ -166,7 +186,9 @@ fn display_into_buffer<'a, T: fmt::Display>(value: &T, buffer: &'a mut [u8]) -> /// This is useful to deserialize Url types used in structure fields or /// tuple members with `#[serde(deserialize_with = "url_serde::deserialize")]`. pub fn deserialize<'de, T, D>(deserializer: D) -> Result - where D: Deserializer<'de>, De: Deserialize<'de> +where + D: Deserializer<'de>, + De: Deserialize<'de>, { De::deserialize(deserializer).map(De::into_inner) } @@ -180,7 +202,10 @@ pub fn deserialize<'de, T, D>(deserializer: D) -> Result #[derive(Debug)] pub struct De(T); -impl<'de, T> De where De: serde::Deserialize<'de> { +impl<'de, T> De +where + De: serde::Deserialize<'de>, +{ /// Consumes this wrapper, returning the deserialized value. #[inline(always)] pub fn into_inner(self) -> T { @@ -190,35 +215,43 @@ impl<'de, T> De where De: serde::Deserialize<'de> { /// Deserializes this URL from a `serde` stream. impl<'de> Deserialize<'de> for De { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { let string_representation: String = Deserialize::deserialize(deserializer)?; - Url::parse(&string_representation).map(De).map_err(|err| { - serde::de::Error::custom(err.description()) - }) + Url::parse(&string_representation) + .map(De) + .map_err(|err| serde::de::Error::custom(err.description())) } } /// Deserializes this Option from a `serde` stream. impl<'de> Deserialize<'de> for De> { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { let option_representation: Option = Deserialize::deserialize(deserializer)?; if let Some(s) = option_representation { return Url::parse(&s) .map(Some) .map(De) - .map_err(|err| {serde::de::Error::custom(err.description())}); + .map_err(|err| serde::de::Error::custom(err.description())); } Ok(De(None)) - } } impl<'de> Deserialize<'de> for De { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { let string_representation: String = Deserialize::deserialize(deserializer)?; - Host::parse(&string_representation).map(De).map_err(|err| { - serde::de::Error::custom(err.description()) - }) + Host::parse(&string_representation) + .map(De) + .map_err(|err| serde::de::Error::custom(err.description())) } } @@ -231,7 +264,9 @@ pub struct Serde(pub T); pub type SerdeUrl = Serde; impl<'de, T> Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize +where + De: Deserialize<'de>, + for<'a> Ser<'a, T>: Serialize, { /// Consumes this wrapper, returning the inner value. #[inline(always)] @@ -241,7 +276,10 @@ where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize } impl<'de, T> fmt::Debug for Serde -where T: fmt::Debug, De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize +where + T: fmt::Debug, + De: Deserialize<'de>, + for<'a> Ser<'a, T>: Serialize, { fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { self.0.fmt(formatter) @@ -249,7 +287,9 @@ where T: fmt::Debug, De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize } impl<'de, T> Deref for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize +where + De: Deserialize<'de>, + for<'a> Ser<'a, T>: Serialize, { type Target = T; @@ -259,7 +299,9 @@ where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize } impl<'de, T> DerefMut for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize +where + De: Deserialize<'de>, + for<'a> Ser<'a, T>: Serialize, { fn deref_mut(&mut self) -> &mut T { &mut self.0 @@ -267,7 +309,9 @@ where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize } impl<'de, T: PartialEq> PartialEq for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize +where + De: Deserialize<'de>, + for<'a> Ser<'a, T>: Serialize, { fn eq(&self, other: &T) -> bool { self.0 == *other @@ -275,20 +319,26 @@ where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize } impl<'de, T> Deserialize<'de> for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize +where + De: Deserialize<'de>, + for<'a> Ser<'a, T>: Serialize, { fn deserialize(deserializer: D) -> Result - where D: Deserializer<'de> + where + D: Deserializer<'de>, { De::deserialize(deserializer).map(De::into_inner).map(Serde) } } impl<'de, T> Serialize for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize +where + De: Deserialize<'de>, + for<'a> Ser<'a, T>: Serialize, { fn serialize(&self, serializer: S) -> Result - where S: Serializer + where + S: Serializer, { Ser(&self.0).serialize(serializer) } @@ -307,18 +357,17 @@ fn test_derive_deserialize_with_for_url() { #[derive(Deserialize, Debug, Eq, PartialEq)] struct Test { #[serde(deserialize_with = "deserialize", rename = "_url_")] - url: Url + url: Url, } let url_str = "http://www.test.com/foo/bar?$param=bazz"; let expected = Test { - url: Url::parse(url_str).unwrap() + url: Url::parse(url_str).unwrap(), }; let json_string = format!(r#"{{"_url_": "{}"}}"#, url_str); let got: Test = serde_json::from_str(&json_string).unwrap(); assert_eq!(expected, got); - } #[test] @@ -326,21 +375,19 @@ fn test_derive_deserialize_with_for_option_url() { #[derive(Deserialize, Debug, Eq, PartialEq)] struct Test { #[serde(deserialize_with = "deserialize", rename = "_url_")] - url: Option + url: Option, } let url_str = "http://www.test.com/foo/bar?$param=bazz"; let expected = Test { - url: Some(Url::parse(url_str).unwrap()) + url: Some(Url::parse(url_str).unwrap()), }; let json_string = format!(r#"{{"_url_": "{}"}}"#, url_str); let got: Test = serde_json::from_str(&json_string).unwrap(); assert_eq!(expected, got); - let expected = Test { - url: None - }; + let expected = Test { url: None }; let json_string = r#"{"_url_": null}"#; let got: Test = serde_json::from_str(&json_string).unwrap(); assert_eq!(expected, got); @@ -351,13 +398,15 @@ fn test_derive_serialize_with_for_url() { #[derive(Serialize, Debug, Eq, PartialEq)] struct Test { #[serde(serialize_with = "serialize", rename = "_url_")] - url: Url + url: Url, } let url_str = "http://www.test.com/foo/bar?$param=bazz"; let expected = format!(r#"{{"_url_":"{}"}}"#, url_str); - let input = Test {url: Url::parse(url_str).unwrap()}; + let input = Test { + url: Url::parse(url_str).unwrap(), + }; let got = serde_json::to_string(&input).unwrap(); assert_eq!(expected, got); } @@ -367,18 +416,20 @@ fn test_derive_serialize_with_for_option_url() { #[derive(Serialize, Debug, Eq, PartialEq)] struct Test { #[serde(serialize_with = "serialize", rename = "_url_")] - url: Option + url: Option, } let url_str = "http://www.test.com/foo/bar?$param=bazz"; let expected = format!(r#"{{"_url_":"{}"}}"#, url_str); - let input = Test {url: Some(Url::parse(url_str).unwrap())}; + let input = Test { + url: Some(Url::parse(url_str).unwrap()), + }; let got = serde_json::to_string(&input).unwrap(); assert_eq!(expected, got); let expected = format!(r#"{{"_url_":null}}"#); - let input = Test {url: None}; + let input = Test { url: None }; let got = serde_json::to_string(&input).unwrap(); assert_eq!(expected, got); } @@ -388,7 +439,7 @@ fn test_derive_with_for_url() { #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] struct Test { #[serde(with = "self", rename = "_url_")] - url: Url + url: Url, } let url_str = "http://www.test.com/foo/bar?$param=bazz"; @@ -396,13 +447,15 @@ fn test_derive_with_for_url() { // test deserialization let expected = Test { - url: Url::parse(url_str).unwrap() + url: Url::parse(url_str).unwrap(), }; let got: Test = serde_json::from_str(&json_string).unwrap(); assert_eq!(expected, got); // test serialization - let input = Test {url: Url::parse(url_str).unwrap()}; + let input = Test { + url: Url::parse(url_str).unwrap(), + }; let got = serde_json::to_string(&input).unwrap(); assert_eq!(json_string, got); } From 394e63a7518e1bfe8e106ebc7938706b10cfa1aa Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 14 Jul 2019 14:41:48 +0200 Subject: [PATCH 20/35] Remove support for the heapsize crate --- Cargo.toml | 2 -- src/host.rs | 15 --------------- src/lib.rs | 12 ------------ src/origin.rs | 17 ----------------- src/parser.rs | 6 ------ 5 files changed, 52 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b28fab832..13d3c8ecc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,11 +38,9 @@ bencher = "0.1" [features] query_encoding = ["encoding"] -heap_size = ["heapsize"] [dependencies] encoding = {version = "0.2", optional = true} -heapsize = {version = ">=0.4.1, <0.5", optional = true} idna = { version = "0.1.0", path = "./idna" } matches = "0.1" percent-encoding = { version = "1.0.0", path = "./percent_encoding" } diff --git a/src/host.rs b/src/host.rs index 72b9d0a77..79e9bb89c 100644 --- a/src/host.rs +++ b/src/host.rs @@ -6,8 +6,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[cfg(feature = "heapsize")] -use heapsize::HeapSizeOf; use idna; use parser::{ParseError, ParseResult}; use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET}; @@ -25,9 +23,6 @@ pub enum HostInternal { Ipv6(Ipv6Addr), } -#[cfg(feature = "heapsize")] -known_heap_size!(0, HostInternal); - #[cfg(feature = "serde")] impl ::serde::Serialize for HostInternal { fn serialize(&self, serializer: S) -> Result @@ -130,16 +125,6 @@ impl<'de, S: ::serde::Deserialize<'de>> ::serde::Deserialize<'de> for Host { } } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for Host { - fn heap_size_of_children(&self) -> usize { - match *self { - Host::Domain(ref s) => s.heap_size_of_children(), - _ => 0, - } - } -} - impl<'a> Host<&'a str> { /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. pub fn to_owned(&self) -> Host { diff --git a/src/lib.rs b/src/lib.rs index 4a195ceea..ca2446557 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -111,17 +111,12 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); extern crate matches; #[cfg(feature = "serde")] extern crate serde; -#[cfg(feature = "heapsize")] -#[macro_use] -extern crate heapsize; pub extern crate idna; #[macro_use] pub extern crate percent_encoding; use encoding::EncodingOverride; -#[cfg(feature = "heapsize")] -use heapsize::HeapSizeOf; use host::HostInternal; use parser::{to_u32, Context, Parser, SchemeType}; use percent_encoding::{ @@ -184,13 +179,6 @@ pub struct Url { fragment_start: Option, // Before '#', unlike Position::FragmentStart } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for Url { - fn heap_size_of_children(&self) -> usize { - self.serialization.heap_size_of_children() - } -} - /// Full configuration for the URL parser. #[derive(Copy, Clone)] pub struct ParseOptions<'a> { diff --git a/src/origin.rs b/src/origin.rs index 718d25ebe..3223709dd 100644 --- a/src/origin.rs +++ b/src/origin.rs @@ -6,8 +6,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[cfg(feature = "heapsize")] -use heapsize::HeapSizeOf; use host::Host; use idna::domain_to_unicode; use parser::default_port; @@ -61,18 +59,6 @@ pub enum Origin { Tuple(String, Host, u16), } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for Origin { - fn heap_size_of_children(&self) -> usize { - match *self { - Origin::Tuple(ref scheme, ref host, _) => { - scheme.heap_size_of_children() + host.heap_size_of_children() - } - _ => 0, - } - } -} - impl Origin { /// Creates a new opaque origin that is only equal to itself. pub fn new_opaque() -> Origin { @@ -125,6 +111,3 @@ impl Origin { /// Opaque identifier for URLs that have file or other schemes #[derive(Eq, PartialEq, Hash, Clone, Debug)] pub struct OpaqueOrigin(usize); - -#[cfg(feature = "heapsize")] -known_heap_size!(0, OpaqueOrigin); diff --git a/src/parser.rs b/src/parser.rs index f72c22b48..7f55eb186 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -61,9 +61,6 @@ simple_enum_error! { Overflow => "URLs more than 4 GB are not supported", } -#[cfg(feature = "heapsize")] -known_heap_size!(0, ParseError); - impl fmt::Display for ParseError { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { self.description().fmt(fmt) @@ -115,9 +112,6 @@ syntax_violation_enum! { UnencodedAtSign => "unencoded @ sign in username or password", } -#[cfg(feature = "heapsize")] -known_heap_size!(0, SyntaxViolation); - impl fmt::Display for SyntaxViolation { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { self.description().fmt(fmt) From fe74a60bd0636c5e5da920674b9bbffc22f3c384 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 18:36:52 +0200 Subject: [PATCH 21/35] =?UTF-8?q?Don=E2=80=99t=20reexport=20idna=20and=20p?= =?UTF-8?q?ercent=5Fencoding=20in=20the=20url=20crate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 49 ++----------------------------------------------- tests/unit.rs | 7 ++++--- 2 files changed, 6 insertions(+), 50 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ca2446557..7828d2458 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -109,12 +109,11 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); #[macro_use] extern crate matches; +extern crate idna; #[cfg(feature = "serde")] extern crate serde; - -pub extern crate idna; #[macro_use] -pub extern crate percent_encoding; +extern crate percent_encoding; use encoding::EncodingOverride; use host::HostInternal; @@ -2538,47 +2537,3 @@ impl<'a> Drop for UrlQuery<'a> { } } } - -/// Define a new struct -/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait, -/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html) -/// and related functions. -/// -/// Parameters are characters to include in the set in addition to those of the base set. -/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set). -/// -/// Example -/// ======= -/// -/// ```rust -/// #[macro_use] extern crate url; -/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; -/// define_encode_set! { -/// /// This encode set is used in the URL parser for query strings. -/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} -/// } -/// # fn main() { -/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), "foo%20bar"); -/// # } -/// ``` -#[macro_export] -macro_rules! define_encode_set { - ($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => { - $(#[$attr])* - #[derive(Copy, Clone)] - #[allow(non_camel_case_types)] - pub struct $name; - - impl $crate::percent_encoding::EncodeSet for $name { - #[inline] - fn contains(&self, byte: u8) -> bool { - match byte as char { - $( - $ch => true, - )* - _ => $base_set.contains(byte) - } - } - } - } -} diff --git a/tests/unit.rs b/tests/unit.rs index bbe67e6ae..ff2033d3d 100644 --- a/tests/unit.rs +++ b/tests/unit.rs @@ -8,8 +8,9 @@ //! Unit tests -#[macro_use] extern crate url; +#[macro_use] +extern crate percent_encoding; use std::borrow::Cow; use std::cell::{Cell, RefCell}; @@ -468,7 +469,7 @@ fn test_leading_dots() { // inside both a module and a function #[test] fn define_encode_set_scopes() { - use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; + use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; define_encode_set! { /// This encode set is used in the URL parser for query strings. @@ -481,7 +482,7 @@ fn define_encode_set_scopes() { ); mod m { - use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; + use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; define_encode_set! { /// This encode set is used in the URL parser for query strings. From 568425aa59431b5065c310e6bd3f3ab71cbd6409 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 18:38:28 +0200 Subject: [PATCH 22/35] Bump url to 2.0.0 --- Cargo.toml | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 13d3c8ecc..a3a469c16 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "url" # When updating version, also modify html_root_url in the lib.rs -version = "1.7.2" +version = "2.0.0" authors = ["The rust-url developers"] description = "URL library for Rust, based on the WHATWG URL Standard" diff --git a/src/lib.rs b/src/lib.rs index 7828d2458..cf3de0ce1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,7 +105,7 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); # run().unwrap(); */ -#![doc(html_root_url = "https://docs.rs/url/1.7.0")] +#![doc(html_root_url = "https://docs.rs/url/2.0.0")] #[macro_use] extern crate matches; From 51d6b33f717d29880cb53a1f5bf0d061d846ad35 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 19:20:16 +0200 Subject: [PATCH 23/35] Remove url_serde --- Cargo.toml | 2 +- url_serde/Cargo.toml | 23 -- url_serde/LICENSE-APACHE | 1 - url_serde/LICENSE-MIT | 1 - url_serde/README.md | 11 - url_serde/src/lib.rs | 474 --------------------------------------- 6 files changed, 1 insertion(+), 511 deletions(-) delete mode 100644 url_serde/Cargo.toml delete mode 120000 url_serde/LICENSE-APACHE delete mode 120000 url_serde/LICENSE-MIT delete mode 100644 url_serde/README.md delete mode 100644 url_serde/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index a3a469c16..8a55f9cbf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ travis-ci = { repository = "servo/rust-url" } appveyor = { repository = "Manishearth/rust-url" } [workspace] -members = [".", "idna", "percent_encoding", "url_serde", "data-url"] +members = [".", "idna", "percent_encoding", "data-url"] [[test]] name = "unit" diff --git a/url_serde/Cargo.toml b/url_serde/Cargo.toml deleted file mode 100644 index ad57e79b1..000000000 --- a/url_serde/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] - -name = "url_serde" -version = "0.2.0" -authors = ["The rust-url developers"] - -description = "Serde support for URL types" -documentation = "https://docs.rs/url_serde/" -repository = "https://github.com/servo/rust-url" -readme = "README.md" -keywords = ["url", "serde"] -license = "MIT/Apache-2.0" - -[dependencies] -serde = "1.0" -url = {version = "1.0.0", path = ".."} - -[dev-dependencies] -serde_json = "1.0" -serde_derive = "1.0" - -[lib] -doctest = false diff --git a/url_serde/LICENSE-APACHE b/url_serde/LICENSE-APACHE deleted file mode 120000 index 965b606f3..000000000 --- a/url_serde/LICENSE-APACHE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE-APACHE \ No newline at end of file diff --git a/url_serde/LICENSE-MIT b/url_serde/LICENSE-MIT deleted file mode 120000 index 76219eb72..000000000 --- a/url_serde/LICENSE-MIT +++ /dev/null @@ -1 +0,0 @@ -../LICENSE-MIT \ No newline at end of file diff --git a/url_serde/README.md b/url_serde/README.md deleted file mode 100644 index dea503946..000000000 --- a/url_serde/README.md +++ /dev/null @@ -1,11 +0,0 @@ -Serde support for rust-url types -================================ - -This crate provides wrappers and convenience functions to make `rust-url` and `serde` -work hand in hand. - -Version `0.2` or newer of this crate offer support for `serde 1.0`. -Version `0.1` of this crate offer support for `serde 0.9`. -Versions of `serde` older than `0.9` are natively supported by `rust-url` crate directly. - -For more details, see the crate [documentation](https://docs.rs/url_serde/). \ No newline at end of file diff --git a/url_serde/src/lib.rs b/url_serde/src/lib.rs deleted file mode 100644 index 5a582ccb7..000000000 --- a/url_serde/src/lib.rs +++ /dev/null @@ -1,474 +0,0 @@ -// Copyright 2017 The rust-url developers. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/*! - -This crate provides wrappers and convenience functions to make rust-url -and Serde work hand in hand. - -The supported types are: - -* `url::Url` - -# How do I use a data type with a `Url` member with Serde? - -Use the serde attributes `deserialize_with` and `serialize_with`. - -``` -#[derive(serde::Serialize, serde::Deserialize)] -struct MyStruct { - #[serde(serialize_with = "serialize")] - url: Url, -} -``` - -# How do I use a data type with an unnamed `Url` member with serde? - -Same problem, same solution. - -``` -#[derive(serde::Serialize, serde::Deserialize)] -enum MyEnum { - A(#[serde(with = "url_serde")] Url, OtherType), -} -``` - -# How do I encode a `Url` value with `serde_json::to_string`? - -Use the `Ser` wrapper. - -``` -serde_json::to_string(&Ser::new(&url)) -``` - -# How do I decode a `Url` value with `serde_json::parse`? - -Use the `De` wrapper. - -``` -serde_json::from_str(r"http:://www.rust-lang.org").map(De::into_inner) -``` - -# How do I send `Url` values as part of an IPC channel? - -Use the `Serde` wrapper. It implements `Deref` and `DerefMut` for convenience. - -``` -ipc::channel::>() -``` -*/ - -#![deny(missing_docs)] -#![deny(unsafe_code)] - -extern crate serde; -#[cfg(test)] -#[macro_use] -extern crate serde_derive; -#[cfg(test)] -extern crate serde_json; -extern crate url; - -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::cmp::PartialEq; -use std::error::Error; -use std::fmt; -use std::io::Write; -use std::ops::{Deref, DerefMut}; -use std::str; -use url::{Host, Url}; - -/// Serialises `value` with a given serializer. -/// -/// This is useful to serialize `rust-url` types used in structure fields or -/// tuple members with `#[serde(serialize_with = "url_serde::serialize")]`. -pub fn serialize(value: &T, serializer: S) -> Result -where - S: Serializer, - for<'a> Ser<'a, T>: Serialize, -{ - Ser::new(value).serialize(serializer) -} - -/// A wrapper to serialize `rust-url` types. -/// -/// This is useful with functions such as `serde_json::to_string`. -/// -/// Values of this type can only be passed to the `serde::Serialize` trait. -#[derive(Debug)] -pub struct Ser<'a, T: 'a>(&'a T); - -impl<'a, T> Ser<'a, T> -where - Ser<'a, T>: Serialize, -{ - /// Returns a new `Ser` wrapper. - #[inline(always)] - pub fn new(value: &'a T) -> Self { - Ser(value) - } -} - -/// Serializes this URL into a `serde` stream. -impl<'a> Serialize for Ser<'a, Url> { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(self.0.as_str()) - } -} - -/// Serializes this Option into a `serde` stream. -impl<'a> Serialize for Ser<'a, Option> { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - if let Some(url) = self.0.as_ref() { - serializer.serialize_some(url.as_str()) - } else { - serializer.serialize_none() - } - } -} - -impl<'a, String> Serialize for Ser<'a, Host> -where - String: AsRef, -{ - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match *self.0 { - Host::Domain(ref s) => serializer.serialize_str(s.as_ref()), - Host::Ipv4(_) | Host::Ipv6(_) => { - // max("101.102.103.104".len(), - // "[1000:1002:1003:1004:1005:1006:101.102.103.104]".len()) - const MAX_LEN: usize = 47; - let mut buffer = [0; MAX_LEN]; - serializer.serialize_str(display_into_buffer(&self.0, &mut buffer)) - } - } - } -} - -/// Like .to_string(), but doesn’t allocate memory for a `String`. -/// -/// Panics if `buffer` is too small. -fn display_into_buffer<'a, T: fmt::Display>(value: &T, buffer: &'a mut [u8]) -> &'a str { - let remaining_len; - { - let mut remaining = &mut *buffer; - write!(remaining, "{}", value).unwrap(); - remaining_len = remaining.len() - } - let written_len = buffer.len() - remaining_len; - let written = &buffer[..written_len]; - - // write! only provides std::fmt::Formatter to Display implementations, - // which has methods write_str and write_char but no method to write arbitrary bytes. - // Therefore, `written` is well-formed in UTF-8. - #[allow(unsafe_code)] - unsafe { - str::from_utf8_unchecked(written) - } -} - -/// Deserialises a `T` value with a given deserializer. -/// -/// This is useful to deserialize Url types used in structure fields or -/// tuple members with `#[serde(deserialize_with = "url_serde::deserialize")]`. -pub fn deserialize<'de, T, D>(deserializer: D) -> Result -where - D: Deserializer<'de>, - De: Deserialize<'de>, -{ - De::deserialize(deserializer).map(De::into_inner) -} - -/// A wrapper to deserialize `rust-url` types. -/// -/// This is useful with functions such as `serde_json::from_str`. -/// -/// Values of this type can only be obtained through -/// the `serde::Deserialize` trait. -#[derive(Debug)] -pub struct De(T); - -impl<'de, T> De -where - De: serde::Deserialize<'de>, -{ - /// Consumes this wrapper, returning the deserialized value. - #[inline(always)] - pub fn into_inner(self) -> T { - self.0 - } -} - -/// Deserializes this URL from a `serde` stream. -impl<'de> Deserialize<'de> for De { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let string_representation: String = Deserialize::deserialize(deserializer)?; - Url::parse(&string_representation) - .map(De) - .map_err(|err| serde::de::Error::custom(err.description())) - } -} - -/// Deserializes this Option from a `serde` stream. -impl<'de> Deserialize<'de> for De> { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let option_representation: Option = Deserialize::deserialize(deserializer)?; - if let Some(s) = option_representation { - return Url::parse(&s) - .map(Some) - .map(De) - .map_err(|err| serde::de::Error::custom(err.description())); - } - Ok(De(None)) - } -} - -impl<'de> Deserialize<'de> for De { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let string_representation: String = Deserialize::deserialize(deserializer)?; - Host::parse(&string_representation) - .map(De) - .map_err(|err| serde::de::Error::custom(err.description())) - } -} - -/// A convenience wrapper to be used as a type parameter, for example when -/// a `Vec` or an `HashMap` need to be passed to serde. -#[derive(Clone, Eq, Hash, PartialEq)] -pub struct Serde(pub T); - -/// A convenience type alias for Serde. -pub type SerdeUrl = Serde; - -impl<'de, T> Serde -where - De: Deserialize<'de>, - for<'a> Ser<'a, T>: Serialize, -{ - /// Consumes this wrapper, returning the inner value. - #[inline(always)] - pub fn into_inner(self) -> T { - self.0 - } -} - -impl<'de, T> fmt::Debug for Serde -where - T: fmt::Debug, - De: Deserialize<'de>, - for<'a> Ser<'a, T>: Serialize, -{ - fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { - self.0.fmt(formatter) - } -} - -impl<'de, T> Deref for Serde -where - De: Deserialize<'de>, - for<'a> Ser<'a, T>: Serialize, -{ - type Target = T; - - fn deref(&self) -> &T { - &self.0 - } -} - -impl<'de, T> DerefMut for Serde -where - De: Deserialize<'de>, - for<'a> Ser<'a, T>: Serialize, -{ - fn deref_mut(&mut self) -> &mut T { - &mut self.0 - } -} - -impl<'de, T: PartialEq> PartialEq for Serde -where - De: Deserialize<'de>, - for<'a> Ser<'a, T>: Serialize, -{ - fn eq(&self, other: &T) -> bool { - self.0 == *other - } -} - -impl<'de, T> Deserialize<'de> for Serde -where - De: Deserialize<'de>, - for<'a> Ser<'a, T>: Serialize, -{ - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - De::deserialize(deserializer).map(De::into_inner).map(Serde) - } -} - -impl<'de, T> Serialize for Serde -where - De: Deserialize<'de>, - for<'a> Ser<'a, T>: Serialize, -{ - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - Ser(&self.0).serialize(serializer) - } -} - -#[test] -fn test_ser_de_url() { - let url = Url::parse("http://www.test.com/foo/bar?$param=bazz").unwrap(); - let s = serde_json::to_string(&Ser::new(&url)).unwrap(); - let new_url: Url = serde_json::from_str(&s).map(De::into_inner).unwrap(); - assert_eq!(url, new_url); -} - -#[test] -fn test_derive_deserialize_with_for_url() { - #[derive(Deserialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(deserialize_with = "deserialize", rename = "_url_")] - url: Url, - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - - let expected = Test { - url: Url::parse(url_str).unwrap(), - }; - let json_string = format!(r#"{{"_url_": "{}"}}"#, url_str); - let got: Test = serde_json::from_str(&json_string).unwrap(); - assert_eq!(expected, got); -} - -#[test] -fn test_derive_deserialize_with_for_option_url() { - #[derive(Deserialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(deserialize_with = "deserialize", rename = "_url_")] - url: Option, - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - - let expected = Test { - url: Some(Url::parse(url_str).unwrap()), - }; - let json_string = format!(r#"{{"_url_": "{}"}}"#, url_str); - let got: Test = serde_json::from_str(&json_string).unwrap(); - assert_eq!(expected, got); - - let expected = Test { url: None }; - let json_string = r#"{"_url_": null}"#; - let got: Test = serde_json::from_str(&json_string).unwrap(); - assert_eq!(expected, got); -} - -#[test] -fn test_derive_serialize_with_for_url() { - #[derive(Serialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(serialize_with = "serialize", rename = "_url_")] - url: Url, - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - - let expected = format!(r#"{{"_url_":"{}"}}"#, url_str); - let input = Test { - url: Url::parse(url_str).unwrap(), - }; - let got = serde_json::to_string(&input).unwrap(); - assert_eq!(expected, got); -} - -#[test] -fn test_derive_serialize_with_for_option_url() { - #[derive(Serialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(serialize_with = "serialize", rename = "_url_")] - url: Option, - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - - let expected = format!(r#"{{"_url_":"{}"}}"#, url_str); - let input = Test { - url: Some(Url::parse(url_str).unwrap()), - }; - let got = serde_json::to_string(&input).unwrap(); - assert_eq!(expected, got); - - let expected = format!(r#"{{"_url_":null}}"#); - let input = Test { url: None }; - let got = serde_json::to_string(&input).unwrap(); - assert_eq!(expected, got); -} - -#[test] -fn test_derive_with_for_url() { - #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(with = "self", rename = "_url_")] - url: Url, - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - let json_string = format!(r#"{{"_url_":"{}"}}"#, url_str); - - // test deserialization - let expected = Test { - url: Url::parse(url_str).unwrap(), - }; - let got: Test = serde_json::from_str(&json_string).unwrap(); - assert_eq!(expected, got); - - // test serialization - let input = Test { - url: Url::parse(url_str).unwrap(), - }; - let got = serde_json::to_string(&input).unwrap(); - assert_eq!(json_string, got); -} - -#[test] -fn test_host() { - for host in &[ - Host::Domain("foo.com".to_owned()), - Host::Ipv4("127.0.0.1".parse().unwrap()), - Host::Ipv6("::1".parse().unwrap()), - ] { - let json = serde_json::to_string(&Ser(host)).unwrap(); - let de: De = serde_json::from_str(&json).unwrap(); - assert_eq!(de.into_inner(), *host) - } -} From 6e082014827061a79a27be1d7712e53a84c28280 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 19:36:44 +0200 Subject: [PATCH 24/35] Remove DNS-related functionality --- src/host.rs | 79 +-------------------------------------------------- src/lib.rs | 63 ++-------------------------------------- tests/unit.rs | 38 +------------------------ 3 files changed, 4 insertions(+), 176 deletions(-) diff --git a/src/host.rs b/src/host.rs index 79e9bb89c..6aa820911 100644 --- a/src/host.rs +++ b/src/host.rs @@ -11,9 +11,7 @@ use parser::{ParseError, ParseResult}; use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET}; use std::cmp; use std::fmt::{self, Formatter}; -use std::io; -use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs}; -use std::vec; +use std::net::{Ipv4Addr, Ipv6Addr}; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum HostInternal { @@ -228,81 +226,6 @@ impl> fmt::Display for Host { } } -/// This mostly exists because coherence rules don’t allow us to implement -/// `ToSocketAddrs for (Host, u16)`. -#[derive(Clone, Debug)] -pub struct HostAndPort { - pub host: Host, - pub port: u16, -} - -impl<'a> HostAndPort<&'a str> { - /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. - pub fn to_owned(&self) -> HostAndPort { - HostAndPort { - host: self.host.to_owned(), - port: self.port, - } - } -} - -impl> fmt::Display for HostAndPort { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.host.fmt(f)?; - f.write_str(":")?; - self.port.fmt(f) - } -} - -impl> ToSocketAddrs for HostAndPort { - type Iter = SocketAddrs; - - fn to_socket_addrs(&self) -> io::Result { - let port = self.port; - match self.host { - Host::Domain(ref domain) => Ok(SocketAddrs { - // FIXME: use std::net::lookup_host when it’s stable. - state: SocketAddrsState::Domain((domain.as_ref(), port).to_socket_addrs()?), - }), - Host::Ipv4(address) => Ok(SocketAddrs { - state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port))), - }), - Host::Ipv6(address) => Ok(SocketAddrs { - state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new( - address, port, 0, 0, - ))), - }), - } - } -} - -/// Socket addresses for an URL. -#[derive(Debug)] -pub struct SocketAddrs { - state: SocketAddrsState, -} - -#[derive(Debug)] -enum SocketAddrsState { - Domain(vec::IntoIter), - One(SocketAddr), - Done, -} - -impl Iterator for SocketAddrs { - type Item = SocketAddr; - fn next(&mut self) -> Option { - match self.state { - SocketAddrsState::Domain(ref mut iter) => iter.next(), - SocketAddrsState::One(s) => { - self.state = SocketAddrsState::Done; - Some(s) - } - SocketAddrsState::Done => None, - } - } -} - fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result { let segments = addr.segments(); let (compress_start, compress_end) = longest_zero_sequence(&segments); diff --git a/src/lib.rs b/src/lib.rs index cf3de0ce1..8d13807b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -128,14 +128,13 @@ use std::cmp; use std::error::Error; use std::fmt::{self, Debug, Formatter, Write}; use std::hash; -use std::io; use std::mem; -use std::net::{IpAddr, ToSocketAddrs}; +use std::net::IpAddr; use std::ops::{Range, RangeFrom, RangeTo}; use std::path::{Path, PathBuf}; use std::str; -pub use host::{Host, HostAndPort, SocketAddrs}; +pub use host::Host; pub use origin::{OpaqueOrigin, Origin}; pub use parser::{ParseError, SyntaxViolation}; pub use path_segments::PathSegmentsMut; @@ -963,51 +962,6 @@ impl Url { self.port.or_else(|| parser::default_port(self.scheme())) } - /// If the URL has a host, return something that implements `ToSocketAddrs`. - /// - /// If the URL has no port number and the scheme’s default port number is not known - /// (see `Url::port_or_known_default`), - /// the closure is called to obtain a port number. - /// Typically, this closure can match on the result `Url::scheme` - /// to have per-scheme default port numbers, - /// and panic for schemes it’s not prepared to handle. - /// For example: - /// - /// ```rust - /// # use url::Url; - /// # use std::net::TcpStream; - /// # use std::io; - /// fn connect(url: &Url) -> io::Result { - /// TcpStream::connect(url.with_default_port(default_port)?) - /// } - /// - /// fn default_port(url: &Url) -> Result { - /// match url.scheme() { - /// "git" => Ok(9418), - /// "git+ssh" => Ok(22), - /// "git+https" => Ok(443), - /// "git+http" => Ok(80), - /// _ => Err(()), - /// } - /// } - /// ``` - pub fn with_default_port(&self, f: F) -> io::Result> - where - F: FnOnce(&Url) -> Result, - { - Ok(HostAndPort { - host: self - .host() - .ok_or(()) - .or_else(|()| io_error("URL has no host"))?, - port: self - .port_or_known_default() - .ok_or(()) - .or_else(|()| f(self)) - .or_else(|()| io_error("URL has no port number"))?, - }) - } - /// Return the path for this URL, as a percent-encoded ASCII string. /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. /// For other URLs, this starts with a '/' slash @@ -2204,15 +2158,6 @@ impl Url { } } -/// Return an error if `Url::host` or `Url::port_or_known_default` return `None`. -impl ToSocketAddrs for Url { - type Iter = SocketAddrs; - - fn to_socket_addrs(&self) -> io::Result { - self.with_default_port(|_| Err(()))?.to_socket_addrs() - } -} - /// Parse a string as an URL, without a base URL or encoding override. impl str::FromStr for Url { type Err = ParseError; @@ -2519,10 +2464,6 @@ fn file_url_segments_to_pathbuf_windows( Ok(path) } -fn io_error(reason: &str) -> io::Result { - Err(io::Error::new(io::ErrorKind::InvalidData, reason)) -} - /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. #[derive(Debug)] pub struct UrlQuery<'a> { diff --git a/tests/unit.rs b/tests/unit.rs index ff2033d3d..a326953dd 100644 --- a/tests/unit.rs +++ b/tests/unit.rs @@ -16,7 +16,7 @@ use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::net::{Ipv4Addr, Ipv6Addr}; use std::path::{Path, PathBuf}; -use url::{form_urlencoded, Host, HostAndPort, Url}; +use url::{form_urlencoded, Host, Url}; #[test] fn size() { @@ -329,42 +329,6 @@ fn form_urlencoded_custom_encoding_override() { assert_eq!(encoded, "FOO=BAR"); } -#[test] -fn host_and_port_display() { - assert_eq!( - format!( - "{}", - HostAndPort { - host: Host::Domain("www.mozilla.org"), - port: 80 - } - ), - "www.mozilla.org:80" - ); - assert_eq!( - format!( - "{}", - HostAndPort:: { - host: Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), - port: 65535 - } - ), - "1.35.33.49:65535" - ); - assert_eq!( - format!( - "{}", - HostAndPort:: { - host: Host::Ipv6(Ipv6Addr::new( - 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344 - )), - port: 1337 - } - ), - "[2001:db8:85a3:8d3:1319:8a2e:370:7344]:1337" - ) -} - #[test] /// https://github.com/servo/rust-url/issues/61 fn issue_61() { From 47e2286ff32359879e69651409ed08385949eb8c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 20:25:33 +0200 Subject: [PATCH 25/35] Remove _charset_ support CC https://github.com/whatwg/url/commit/3fe969679f78c92c353047661b0c4b6797f099f6 --- src/encoding.rs | 32 +---------------------- src/form_urlencoded.rs | 59 ++++-------------------------------------- 2 files changed, 6 insertions(+), 85 deletions(-) diff --git a/src/encoding.rs b/src/encoding.rs index e6c10f0a2..5d04bd30c 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -17,11 +17,9 @@ use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; #[cfg(feature = "query_encoding")] -use self::encoding::label::encoding_from_whatwg_label; +use self::encoding::types::EncoderTrap; #[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; -#[cfg(feature = "query_encoding")] -use self::encoding::types::{DecoderTrap, EncoderTrap}; #[cfg(feature = "query_encoding")] #[derive(Copy, Clone)] @@ -51,15 +49,6 @@ impl EncodingOverride { EncodingOverride { encoding: None } } - pub fn lookup(label: &[u8]) -> Option { - // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD - // https://encoding.spec.whatwg.org/#names-and-labels - ::std::str::from_utf8(label) - .ok() - .and_then(encoding_from_whatwg_label) - .map(Self::from_encoding) - } - /// https://encoding.spec.whatwg.org/#get-an-output-encoding pub fn to_output_encoding(self) -> Self { if let Some(encoding) = self.encoding { @@ -70,10 +59,6 @@ impl EncodingOverride { self } - pub fn is_utf8(&self) -> bool { - self.encoding.is_none() - } - pub fn name(&self) -> &'static str { match self.encoding { Some(encoding) => encoding.name(), @@ -81,17 +66,6 @@ impl EncodingOverride { } } - pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { - match self.encoding { - // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace` - Some(encoding) => encoding - .decode(&input, DecoderTrap::Replace) - .unwrap() - .into(), - None => decode_utf8_lossy(input), - } - } - pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { match self.encoding { // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` @@ -123,10 +97,6 @@ impl EncodingOverride { EncodingOverride } - pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { - decode_utf8_lossy(input) - } - pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { encode_utf8(input) } diff --git a/src/form_urlencoded.rs b/src/form_urlencoded.rs index 75cb6b7e7..5408317b2 100644 --- a/src/form_urlencoded.rs +++ b/src/form_urlencoded.rs @@ -13,7 +13,7 @@ //! Converts between a string (such as an URL’s query string) //! and a sequence of (name, value) pairs. -use encoding::EncodingOverride; +use encoding::{decode_utf8_lossy, EncodingOverride}; use percent_encoding::{percent_decode, percent_encode_byte}; use std::borrow::{Borrow, Cow}; use std::fmt; @@ -28,61 +28,12 @@ use std::str; /// converted to `[("#first", "%try%")]`. #[inline] pub fn parse(input: &[u8]) -> Parse { - Parse { - input: input, - encoding: EncodingOverride::utf8(), - } + Parse { input: input } } - -/// Convert a byte string in the `application/x-www-form-urlencoded` syntax -/// into a iterator of (name, value) pairs. -/// -/// Use `parse(input.as_bytes())` to parse a `&str` string. -/// -/// This function is only available if the `query_encoding` -/// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled. -/// -/// Arguments: -/// -/// * `encoding_override`: The character encoding each name and values is decoded as -/// after percent-decoding. Defaults to UTF-8. -/// `EncodingRef` is defined in [rust-encoding](https://github.com/lifthrasiir/rust-encoding). -/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`. -#[cfg(feature = "query_encoding")] -pub fn parse_with_encoding<'a>( - input: &'a [u8], - encoding_override: Option<::encoding::EncodingRef>, - use_charset: bool, -) -> Result, ()> { - let mut encoding = EncodingOverride::from_opt_encoding(encoding_override); - if !(encoding.is_utf8() || input.is_ascii()) { - return Err(()); - } - if use_charset { - for sequence in input.split(|&b| b == b'&') { - // No '+' in "_charset_" to replace with ' '. - if sequence.starts_with(b"_charset_=") { - let value = &sequence[b"_charset_=".len()..]; - // Skip replacing '+' with ' ' in value since no encoding label contains either: - // https://encoding.spec.whatwg.org/#names-and-labels - if let Some(e) = EncodingOverride::lookup(value) { - encoding = e; - break; - } - } - } - } - Ok(Parse { - input: input, - encoding: encoding, - }) -} - /// The return type of `parse()`. #[derive(Copy, Clone, Debug)] pub struct Parse<'a> { input: &'a [u8], - encoding: EncodingOverride, } impl<'a> Iterator for Parse<'a> { @@ -102,14 +53,14 @@ impl<'a> Iterator for Parse<'a> { let mut split2 = sequence.splitn(2, |&b| b == b'='); let name = split2.next().unwrap(); let value = split2.next().unwrap_or(&[][..]); - return Some((decode(name, self.encoding), decode(value, self.encoding))); + return Some((decode(name), decode(value))); } } } -fn decode(input: &[u8], encoding: EncodingOverride) -> Cow { +fn decode(input: &[u8]) -> Cow { let replaced = replace_plus(input); - encoding.decode(match percent_decode(&replaced).if_any() { + decode_utf8_lossy(match percent_decode(&replaced).if_any() { Some(vec) => Cow::Owned(vec), None => replaced, }) From b567a51e784bae1fdad1d1e5d7e4dcb00b406080 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 20:44:22 +0200 Subject: [PATCH 26/35] Remove rust-encoding support --- Cargo.toml | 7 --- src/encoding.rs | 127 ----------------------------------------- src/form_urlencoded.rs | 80 ++++---------------------- src/lib.rs | 33 +++-------- src/parser.rs | 10 ++-- src/query_encoding.rs | 38 ++++++++++++ tests/unit.rs | 4 +- 7 files changed, 64 insertions(+), 235 deletions(-) delete mode 100644 src/encoding.rs create mode 100644 src/query_encoding.rs diff --git a/Cargo.toml b/Cargo.toml index 8a55f9cbf..b9b4aff38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,11 +36,7 @@ serde_json = "1.0" bencher = "0.1" -[features] -query_encoding = ["encoding"] - [dependencies] -encoding = {version = "0.2", optional = true} idna = { version = "0.1.0", path = "./idna" } matches = "0.1" percent-encoding = { version = "1.0.0", path = "./percent_encoding" } @@ -49,6 +45,3 @@ serde = {version = "1.0", optional = true} [[bench]] name = "parse_url" harness = false - -[package.metadata.docs.rs] -features = ["query_encoding"] diff --git a/src/encoding.rs b/src/encoding.rs deleted file mode 100644 index 5d04bd30c..000000000 --- a/src/encoding.rs +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2013-2014 The rust-url developers. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Abstraction that conditionally compiles either to rust-encoding, -//! or to only support UTF-8. - -#[cfg(feature = "query_encoding")] -extern crate encoding; - -use std::borrow::Cow; -#[cfg(feature = "query_encoding")] -use std::fmt::{self, Debug, Formatter}; - -#[cfg(feature = "query_encoding")] -use self::encoding::types::EncoderTrap; -#[cfg(feature = "query_encoding")] -pub use self::encoding::types::EncodingRef; - -#[cfg(feature = "query_encoding")] -#[derive(Copy, Clone)] -pub struct EncodingOverride { - /// `None` means UTF-8. - encoding: Option, -} - -#[cfg(feature = "query_encoding")] -impl EncodingOverride { - pub fn from_opt_encoding(encoding: Option) -> Self { - encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8) - } - - pub fn from_encoding(encoding: EncodingRef) -> Self { - EncodingOverride { - encoding: if encoding.name() == "utf-8" { - None - } else { - Some(encoding) - }, - } - } - - #[inline] - pub fn utf8() -> Self { - EncodingOverride { encoding: None } - } - - /// https://encoding.spec.whatwg.org/#get-an-output-encoding - pub fn to_output_encoding(self) -> Self { - if let Some(encoding) = self.encoding { - if matches!(encoding.name(), "utf-16le" | "utf-16be") { - return Self::utf8(); - } - } - self - } - - pub fn name(&self) -> &'static str { - match self.encoding { - Some(encoding) => encoding.name(), - None => "utf-8", - } - } - - pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { - match self.encoding { - // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` - Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()), - None => encode_utf8(input), - } - } -} - -#[cfg(feature = "query_encoding")] -impl Debug for EncodingOverride { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "EncodingOverride {{ encoding: ")?; - match self.encoding { - Some(e) => write!(f, "{} }}", e.name()), - None => write!(f, "None }}"), - } - } -} - -#[cfg(not(feature = "query_encoding"))] -#[derive(Copy, Clone, Debug)] -pub struct EncodingOverride; - -#[cfg(not(feature = "query_encoding"))] -impl EncodingOverride { - #[inline] - pub fn utf8() -> Self { - EncodingOverride - } - - pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { - encode_utf8(input) - } -} - -pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { - match input { - Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), - Cow::Owned(bytes) => { - let raw_utf8: *const [u8]; - match String::from_utf8_lossy(&bytes) { - Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), - Cow::Owned(s) => return s.into(), - } - // from_utf8_lossy returned a borrow of `bytes` unchanged. - debug_assert!(raw_utf8 == &*bytes as *const [u8]); - // Reuse the existing `Vec` allocation. - unsafe { String::from_utf8_unchecked(bytes) }.into() - } - } -} - -pub fn encode_utf8(input: Cow) -> Cow<[u8]> { - match input { - Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), - Cow::Owned(s) => Cow::Owned(s.into_bytes()), - } -} diff --git a/src/form_urlencoded.rs b/src/form_urlencoded.rs index 5408317b2..176ffb750 100644 --- a/src/form_urlencoded.rs +++ b/src/form_urlencoded.rs @@ -13,10 +13,9 @@ //! Converts between a string (such as an URL’s query string) //! and a sequence of (name, value) pairs. -use encoding::{decode_utf8_lossy, EncodingOverride}; use percent_encoding::{percent_decode, percent_encode_byte}; +use query_encoding::{self, decode_utf8_lossy, EncodingOverride}; use std::borrow::{Borrow, Cow}; -use std::fmt; use std::str; /// Convert a byte string in the `application/x-www-form-urlencoded` syntax @@ -31,7 +30,7 @@ pub fn parse(input: &[u8]) -> Parse { Parse { input: input } } /// The return type of `parse()`. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone)] pub struct Parse<'a> { input: &'a [u8], } @@ -91,7 +90,6 @@ impl<'a> Parse<'a> { } /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow`. -#[derive(Debug)] pub struct ParseIntoOwned<'a> { inner: Parse<'a>, } @@ -161,20 +159,10 @@ impl<'a> Iterator for ByteSerialize<'a> { /// The [`application/x-www-form-urlencoded` serializer]( /// https://url.spec.whatwg.org/#concept-urlencoded-serializer). -#[derive(Debug)] -pub struct Serializer { +pub struct Serializer<'a, T: Target> { target: Option, start_position: usize, - encoding: EncodingOverride, - custom_encoding: Option Cow<[u8]>>>>, -} - -struct SilentDebug(T); - -impl fmt::Debug for SilentDebug { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str("…") - } + encoding: EncodingOverride<'a>, } pub trait Target { @@ -227,7 +215,7 @@ impl<'a> Target for ::UrlQuery<'a> { type Finished = &'a mut ::Url; } -impl Serializer { +impl<'a, T: Target> Serializer<'a, T> { /// Create a new `application/x-www-form-urlencoded` serializer for the given target. /// /// If the target is non-empty, @@ -246,8 +234,7 @@ impl Serializer { Serializer { target: Some(target), start_position: start_position, - encoding: EncodingOverride::utf8(), - custom_encoding: None, + encoding: None, } } @@ -260,18 +247,8 @@ impl Serializer { } /// Set the character encoding to be used for names and values before percent-encoding. - #[cfg(feature = "query_encoding")] - pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self { - self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding(); - self - } - - /// Set the character encoding to be used for names and values before percent-encoding. - pub fn custom_encoding_override(&mut self, encode: F) -> &mut Self - where - F: FnMut(&str) -> Cow<[u8]> + 'static, - { - self.custom_encoding = Some(SilentDebug(Box::new(encode))); + pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self { + self.encoding = new; self } @@ -283,7 +260,6 @@ impl Serializer { string(&mut self.target), self.start_position, self.encoding, - &mut self.custom_encoding, name, value, ); @@ -312,7 +288,6 @@ impl Serializer { string, self.start_position, self.encoding, - &mut self.custom_encoding, k.as_ref(), v.as_ref(), ); @@ -321,26 +296,6 @@ impl Serializer { self } - /// Add a name/value pair whose name is `_charset_` - /// and whose value is the character encoding’s name. - /// (See the `encoding_override()` method.) - /// - /// Panics if called after `.finish()`. - #[cfg(feature = "query_encoding")] - pub fn append_charset(&mut self) -> &mut Self { - assert!( - self.custom_encoding.is_none(), - "Cannot use both custom_encoding_override() and append_charset()" - ); - { - let string = string(&mut self.target); - append_separator_if_needed(string, self.start_position); - string.push_str("_charset_="); - string.push_str(self.encoding.name()); - } - self - } - /// If this serializer was constructed with a string, take and return that string. /// /// ```rust @@ -378,26 +333,15 @@ fn append_pair( string: &mut String, start_position: usize, encoding: EncodingOverride, - custom_encoding: &mut Option Cow<[u8]>>>>, name: &str, value: &str, ) { append_separator_if_needed(string, start_position); - append_encoded(name, string, encoding, custom_encoding); + append_encoded(name, string, encoding); string.push('='); - append_encoded(value, string, encoding, custom_encoding); + append_encoded(value, string, encoding); } -fn append_encoded( - s: &str, - string: &mut String, - encoding: EncodingOverride, - custom_encoding: &mut Option Cow<[u8]>>>>, -) { - let bytes = if let Some(SilentDebug(ref mut custom)) = *custom_encoding { - custom(s) - } else { - encoding.encode(s.into()) - }; - string.extend(byte_serialize(&bytes)); +fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride) { + string.extend(byte_serialize(&query_encoding::encode(encoding, s.into()))) } diff --git a/src/lib.rs b/src/lib.rs index d6ebf444c..dca527916 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -115,7 +115,6 @@ extern crate serde; #[macro_use] extern crate percent_encoding; -use encoding::EncodingOverride; use host::HostInternal; use parser::{to_u32, Context, Parser, SchemeType}; use percent_encoding::{ @@ -126,7 +125,7 @@ use std::borrow::Borrow; use std::cmp; #[cfg(feature = "serde")] use std::error::Error; -use std::fmt::{self, Debug, Formatter, Write}; +use std::fmt::{self, Write}; use std::hash; use std::mem; use std::net::IpAddr; @@ -139,13 +138,14 @@ pub use origin::{OpaqueOrigin, Origin}; pub use parser::{ParseError, SyntaxViolation}; pub use path_segments::PathSegmentsMut; pub use slicing::Position; +pub use query_encoding::EncodingOverride; -mod encoding; mod host; mod origin; mod parser; mod path_segments; mod slicing; +mod query_encoding; pub mod form_urlencoded; #[doc(hidden)] @@ -181,7 +181,7 @@ pub struct Url { #[derive(Copy, Clone)] pub struct ParseOptions<'a> { base_url: Option<&'a Url>, - encoding_override: encoding::EncodingOverride, + encoding_override: EncodingOverride<'a>, violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, } @@ -194,14 +194,8 @@ impl<'a> ParseOptions<'a> { /// Override the character encoding of query strings. /// This is a legacy concept only relevant for HTML. - /// - /// `EncodingRef` is defined in [rust-encoding](https://github.com/lifthrasiir/rust-encoding). - /// - /// This method is only available if the `query_encoding` - /// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled. - #[cfg(feature = "query_encoding")] - pub fn encoding_override(mut self, new: Option) -> Self { - self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding(); + pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self { + self.encoding_override = new; self } @@ -245,19 +239,6 @@ impl<'a> ParseOptions<'a> { } } -impl<'a> Debug for ParseOptions<'a> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "ParseOptions {{ base_url: {:?}, encoding_override: {:?}, \ - violation_fn: {:?} }}", - self.base_url, - self.encoding_override, - self.violation_fn.map(|_| "…") - ) - } -} - impl Url { /// Parse an absolute URL from a string. /// @@ -384,7 +365,7 @@ impl Url { pub fn options<'a>() -> ParseOptions<'a> { ParseOptions { base_url: None, - encoding_override: EncodingOverride::utf8(), + encoding_override: None, violation_fn: None, } } diff --git a/src/parser.rs b/src/parser.rs index 7f55eb186..7df36e24d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -10,12 +10,12 @@ use std::error::Error; use std::fmt::{self, Formatter, Write}; use std::str; -use encoding::EncodingOverride; use host::{Host, HostInternal}; use percent_encoding::{ percent_encode, utf8_percent_encode, DEFAULT_ENCODE_SET, PATH_SEGMENT_ENCODE_SET, QUERY_ENCODE_SET, SIMPLE_ENCODE_SET, USERINFO_ENCODE_SET, }; +use query_encoding::EncodingOverride; use Url; define_encode_set! { @@ -274,7 +274,7 @@ impl<'i> Iterator for Input<'i> { pub struct Parser<'a> { pub serialization: String, pub base_url: Option<&'a Url>, - pub query_encoding_override: EncodingOverride, + pub query_encoding_override: EncodingOverride<'a>, pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, pub context: Context, } @@ -305,7 +305,7 @@ impl<'a> Parser<'a> { Parser { serialization: serialization, base_url: None, - query_encoding_override: EncodingOverride::utf8(), + query_encoding_override: None, violation_fn: None, context: Context::Setter, } @@ -1238,9 +1238,9 @@ impl<'a> Parser<'a> { let encoding = match &self.serialization[..scheme_end as usize] { "http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override, - _ => EncodingOverride::utf8(), + _ => None, }; - let query_bytes = encoding.encode(query.into()); + let query_bytes = ::query_encoding::encode(encoding, &query); self.serialization .extend(percent_encode(&query_bytes, QUERY_ENCODE_SET)); remaining diff --git a/src/query_encoding.rs b/src/query_encoding.rs new file mode 100644 index 000000000..ef9967997 --- /dev/null +++ b/src/query_encoding.rs @@ -0,0 +1,38 @@ +// Copyright 2019 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::borrow::Cow; + +pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<[u8]>>; + +pub(crate) fn encode<'a>( + encoding_override: EncodingOverride, + input: &'a str, +) -> Cow<'a, [u8]> { + if let Some(o) = encoding_override { + return o(input); + } + input.as_bytes().into() +} + +pub(crate) fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { + match input { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => { + let raw_utf8: *const [u8]; + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), + Cow::Owned(s) => return s.into(), + } + // from_utf8_lossy returned a borrow of `bytes` unchanged. + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + // Reuse the existing `Vec` allocation. + unsafe { String::from_utf8_unchecked(bytes) }.into() + } + } +} diff --git a/tests/unit.rs b/tests/unit.rs index a326953dd..9f3764911 100644 --- a/tests/unit.rs +++ b/tests/unit.rs @@ -321,9 +321,9 @@ fn test_form_serialize() { } #[test] -fn form_urlencoded_custom_encoding_override() { +fn form_urlencoded_encoding_override() { let encoded = form_urlencoded::Serializer::new(String::new()) - .custom_encoding_override(|s| s.as_bytes().to_ascii_uppercase().into()) + .encoding_override(Some(&|s| s.as_bytes().to_ascii_uppercase().into())) .append_pair("foo", "bar") .finish(); assert_eq!(encoded, "FOO=BAR"); From 9ab946f3419ed14142227e9e1dfea9bbb6ac5c17 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jul 2019 21:19:49 +0200 Subject: [PATCH 27/35] `#[rustfmt::skip]` requires 1.30 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 40e34584c..f3417d793 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ script: cargo test --all-features --all jobs: include: - - rust: 1.27.0 + - rust: 1.30.0 - rust: stable - rust: beta - rust: nightly From 5453f6f26caf25ef38100049a86db509d2b85025 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 16 Jul 2019 07:53:52 +0200 Subject: [PATCH 28/35] Move docs/ to the gh-pages branch --- docs/.nojekyll | 0 docs/404.html | 3 --- docs/index.html | 3 --- 3 files changed, 6 deletions(-) delete mode 100644 docs/.nojekyll delete mode 100644 docs/404.html delete mode 100644 docs/index.html diff --git a/docs/.nojekyll b/docs/.nojekyll deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/404.html b/docs/404.html deleted file mode 100644 index b13eac0ee..000000000 --- a/docs/404.html +++ /dev/null @@ -1,3 +0,0 @@ - - -Moved to docs.rs diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index b13eac0ee..000000000 --- a/docs/index.html +++ /dev/null @@ -1,3 +0,0 @@ - - -Moved to docs.rs From 449c10afbba978dbf6c4072375ce28993f4cb191 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 17 Jul 2019 11:55:13 +0200 Subject: [PATCH 29/35] Kill idna::uts46::Flags --- idna/src/lib.rs | 16 ++------------ idna/src/uts46.rs | 55 +++++++++++++---------------------------------- 2 files changed, 17 insertions(+), 54 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 7f6fd1037..2c713998e 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -47,12 +47,7 @@ pub mod uts46; /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result { - let flags = uts46::Flags { - use_std3_ascii_rules: false, - transitional_processing: false, - verify_dns_length: false, - }; - uts46::Config::from(flags).check_hyphens(false).to_ascii(domain) + uts46::Config::default().to_ascii(domain) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. @@ -64,12 +59,5 @@ pub fn domain_to_ascii(domain: &str) -> Result { /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { - let flags = uts46::Flags { - use_std3_ascii_rules: false, - - // Unused: - transitional_processing: false, - verify_dns_length: false, - }; - uts46::Config::from(flags).check_hyphens(false).to_unicode(domain) + uts46::Config::default().to_unicode(domain) } diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index 2c981e099..325bfc226 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -83,13 +83,13 @@ fn find_char(codepoint: char) -> &'static Mapping { }).unwrap() } -fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec) { +fn map_char(codepoint: char, config: Config, output: &mut String, errors: &mut Vec) { match *find_char(codepoint) { Mapping::Valid => output.push(codepoint), Mapping::Ignored => {}, Mapping::Mapped(ref slice) => output.push_str(decode_slice(slice)), Mapping::Deviation(ref slice) => { - if flags.transitional_processing { + if config.transitional_processing { output.push_str(decode_slice(slice)) } else { output.push(codepoint) @@ -100,13 +100,13 @@ fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec output.push(codepoint); } Mapping::DisallowedStd3Valid => { - if flags.use_std3_ascii_rules { + if config.use_std3_ascii_rules { errors.push(Error::DissallowedByStd3AsciiRules); } output.push(codepoint) } Mapping::DisallowedStd3Mapped(ref slice) => { - if flags.use_std3_ascii_rules { + if config.use_std3_ascii_rules { errors.push(Error::DissallowedMappedInStd3); } output.push_str(decode_slice(slice)) @@ -271,8 +271,8 @@ fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec< // V6: Check against Mapping Table else if label.chars().any(|c| match *find_char(c) { Mapping::Valid => false, - Mapping::Deviation(_) => config.flags.transitional_processing, - Mapping::DisallowedStd3Valid => config.flags.use_std3_ascii_rules, + Mapping::Deviation(_) => config.transitional_processing, + Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules, _ => true, }) { errors.push(Error::ValidityCriteria); @@ -295,7 +295,7 @@ fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec< fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { let mut mapped = String::with_capacity(domain.len()); for c in domain.chars() { - map_char(c, config.flags, &mut mapped, errors) + map_char(c, config, &mut mapped, errors) } let mut normalized = String::with_capacity(mapped.len()); normalized.extend(mapped.nfc()); @@ -351,35 +351,30 @@ fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { validated } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Default)] pub struct Config { - flags: Flags, + use_std3_ascii_rules: bool, + transitional_processing: bool, + verify_dns_length: bool, check_hyphens: bool, } -impl From for Config { - #[inline] - fn from(flags: Flags) -> Self { - Self { flags, check_hyphens: true } - } -} - impl Config { #[inline] pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { - self.flags.use_std3_ascii_rules = value; + self.use_std3_ascii_rules = value; self } #[inline] pub fn transitional_processing(mut self, value: bool) -> Self { - self.flags.transitional_processing = value; + self.transitional_processing = value; self } #[inline] pub fn verify_dns_length(mut self, value: bool) -> Self { - self.flags.verify_dns_length = value; + self.verify_dns_length = value; self } @@ -412,7 +407,7 @@ impl Config { } } - if self.flags.verify_dns_length { + if self.verify_dns_length { let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { errors.push(Error::TooShortForDns) @@ -442,13 +437,6 @@ impl Config { } -#[derive(Copy, Clone)] -pub struct Flags { - pub use_std3_ascii_rules: bool, - pub transitional_processing: bool, - pub verify_dns_length: bool, -} - #[derive(PartialEq, Eq, Clone, Copy, Debug)] enum Error { PunycodeError, @@ -466,16 +454,3 @@ enum Error { /// More details may be exposed in the future. #[derive(Debug)] pub struct Errors(Vec); - -/// http://www.unicode.org/reports/tr46/#ToASCII -pub fn to_ascii(domain: &str, flags: Flags) -> Result { - Config::from(flags).to_ascii(domain) -} - -/// http://www.unicode.org/reports/tr46/#ToUnicode -/// -/// Only `use_std3_ascii_rules` is used in `flags`. -pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) { - flags.transitional_processing = false; - Config::from(flags).to_unicode(domain) -} From 38cfea4d629d8f57050406d9aaff4189b620ed8f Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 17 Jul 2019 11:55:51 +0200 Subject: [PATCH 30/35] Bump idna to 0.2 --- Cargo.toml | 2 +- idna/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 40e6b64f5..2d13c70ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ heap_size = ["heapsize"] [dependencies] encoding = {version = "0.2", optional = true} heapsize = {version = ">=0.4.1, <0.5", optional = true} -idna = { version = "0.1.0", path = "./idna" } +idna = { version = "0.2.0", path = "./idna" } matches = "0.1" percent-encoding = { version = "1.0.0", path = "./percent_encoding" } rustc-serialize = {version = "0.3", optional = true} diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 1050ad35b..860faa2e6 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "idna" -version = "0.1.5" +version = "0.2.0" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." repository = "https://github.com/servo/rust-url/" From e3441b313685f3914c70d3764436a2e32c350f72 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 17 Jul 2019 12:00:52 +0200 Subject: [PATCH 31/35] rustfmt --- idna/src/uts46.rs | 11 +++++++---- src/lib.rs | 4 ++-- src/query_encoding.rs | 5 +---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index 40b2d23e0..b67935ead 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -421,14 +421,18 @@ impl Config { Some(x) => { result.push_str(PUNYCODE_PREFIX); result.push_str(&x); - }, - None => errors.push(Error::PunycodeError) + } + None => errors.push(Error::PunycodeError), } } } if self.verify_dns_length { - let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; + let domain = if result.ends_with(".") { + &result[..result.len() - 1] + } else { + &*result + }; if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { errors.push(Error::TooShortForDns) } @@ -454,7 +458,6 @@ impl Config { }; (domain, errors) } - } #[derive(PartialEq, Eq, Clone, Copy, Debug)] diff --git a/src/lib.rs b/src/lib.rs index dca527916..4cbac60ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -137,15 +137,15 @@ pub use host::Host; pub use origin::{OpaqueOrigin, Origin}; pub use parser::{ParseError, SyntaxViolation}; pub use path_segments::PathSegmentsMut; -pub use slicing::Position; pub use query_encoding::EncodingOverride; +pub use slicing::Position; mod host; mod origin; mod parser; mod path_segments; -mod slicing; mod query_encoding; +mod slicing; pub mod form_urlencoded; #[doc(hidden)] diff --git a/src/query_encoding.rs b/src/query_encoding.rs index ef9967997..76aed15a7 100644 --- a/src/query_encoding.rs +++ b/src/query_encoding.rs @@ -10,10 +10,7 @@ use std::borrow::Cow; pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<[u8]>>; -pub(crate) fn encode<'a>( - encoding_override: EncodingOverride, - input: &'a str, -) -> Cow<'a, [u8]> { +pub(crate) fn encode<'a>(encoding_override: EncodingOverride, input: &'a str) -> Cow<'a, [u8]> { if let Some(o) = encoding_override { return o(input); } From 5aeaf89afe43c78eef7c958b1089bd586f68c271 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 17 Jul 2019 12:08:36 +0200 Subject: [PATCH 32/35] Make the uts46 module private --- idna/src/lib.rs | 4 +++- idna/src/uts46.rs | 2 +- src/parser.rs | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 7aa4cac89..b560969de 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -38,7 +38,9 @@ extern crate unicode_bidi; extern crate unicode_normalization; pub mod punycode; -pub mod uts46; +mod uts46; + +pub use uts46::{Config, Errors}; /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm. /// diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index b67935ead..311b58905 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -18,7 +18,7 @@ use unicode_normalization::UnicodeNormalization; include!("uts46_mapping_table.rs"); -pub static PUNYCODE_PREFIX: &'static str = "xn--"; +const PUNYCODE_PREFIX: &'static str = "xn--"; #[derive(Debug)] struct StringTableSlice { diff --git a/src/parser.rs b/src/parser.rs index 7df36e24d..7a6eaad4f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -67,8 +67,8 @@ impl fmt::Display for ParseError { } } -impl From<::idna::uts46::Errors> for ParseError { - fn from(_: ::idna::uts46::Errors) -> ParseError { +impl From<::idna::Errors> for ParseError { + fn from(_: ::idna::Errors) -> ParseError { ParseError::IdnaError } } From 62cc6147d5159494a658e0b8551e422a16191247 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 17 Jul 2019 13:24:08 +0200 Subject: [PATCH 33/35] Have an explicit `impl Default for Config` in idna --- idna/src/uts46.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index 311b58905..b860acc42 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -371,7 +371,7 @@ fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { validated } -#[derive(Clone, Copy, Default)] +#[derive(Clone, Copy)] pub struct Config { use_std3_ascii_rules: bool, transitional_processing: bool, @@ -379,6 +379,22 @@ pub struct Config { check_hyphens: bool, } +/// The defaults are that of https://url.spec.whatwg.org/#idna +impl Default for Config { + fn default() -> Self { + Config { + use_std3_ascii_rules: false, + transitional_processing: false, + check_hyphens: false, + // check_bidi: true, + // check_joiners: true, + + // Only use for to_ascii, not to_unicode + verify_dns_length: false, + } + } +} + impl Config { #[inline] pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { From 7371286087b32d358610df1ad3a3b1f55f6836df Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 17 Jul 2019 13:27:59 +0200 Subject: [PATCH 34/35] Add idna::domain_to_ascii_strict --- idna/src/lib.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index b560969de..b1d858fe8 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -50,7 +50,16 @@ pub use uts46::{Config, Errors}; /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result { - uts46::Config::default().to_ascii(domain) + Config::default().to_ascii(domain) +} + +/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm, +/// with the `beStrict` flag set. +pub fn domain_to_ascii_strict(domain: &str) -> Result { + Config::default() + .use_std3_ascii_rules(true) + .verify_dns_length(true) + .to_ascii(domain) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. @@ -62,5 +71,5 @@ pub fn domain_to_ascii(domain: &str) -> Result { /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { - uts46::Config::default().to_unicode(domain) + Config::default().to_unicode(domain) } From 50c3efe92f02d952d3e0528274d6eefa515b93e0 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 17 Jul 2019 13:31:26 +0200 Subject: [PATCH 35/35] Fix idna tests --- idna/tests/unit.rs | 15 +++++---------- idna/tests/uts46.rs | 15 ++++++--------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/idna/tests/unit.rs b/idna/tests/unit.rs index 83f6b6576..e24e2a319 100644 --- a/idna/tests/unit.rs +++ b/idna/tests/unit.rs @@ -1,18 +1,13 @@ extern crate idna; extern crate unicode_normalization; -use idna::uts46; use unicode_normalization::char::is_combining_mark; -fn _to_ascii(domain: &str) -> Result { - uts46::to_ascii( - domain, - uts46::Flags { - transitional_processing: false, - use_std3_ascii_rules: true, - verify_dns_length: true, - }, - ) +fn _to_ascii(domain: &str) -> Result { + idna::Config::default() + .verify_dns_length(true) + .use_std3_ascii_rules(true) + .to_ascii(domain) } #[test] diff --git a/idna/tests/uts46.rs b/idna/tests/uts46.rs index 976c9e921..b3a5ff334 100644 --- a/idna/tests/uts46.rs +++ b/idna/tests/uts46.rs @@ -6,7 +6,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use idna::uts46; use std::char; use test::TestFn; @@ -49,14 +48,12 @@ pub fn collect_tests(add_test: &mut F) { add_test( test_name, TestFn::dyn_test_fn(move || { - let result = uts46::to_ascii( - &source, - uts46::Flags { - use_std3_ascii_rules: true, - transitional_processing: test_type == "T", - verify_dns_length: true, - }, - ); + let result = idna::Config::default() + .use_std3_ascii_rules(true) + .verify_dns_length(true) + .check_hyphens(true) + .transitional_processing(test_type == "T") + .to_ascii(&source); if to_ascii.starts_with("[") { if to_ascii.starts_with("[C") {