From 667c89646a898cc9067afa77b0d503019eb3ba41 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:16:02 +0100 Subject: [PATCH 1/6] Add a Config struct following the builder pattern to idna --- idna/src/uts46.rs | 129 ++++++++++++++++++++++++++++++---------------- 1 file changed, 86 insertions(+), 43 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index ac348d1fa..65cf5685b 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -353,6 +353,90 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { validated } +#[derive(Clone, Copy)] +pub struct Config { + flags: Flags, +} + +impl From for Config { + #[inline] + fn from(flags: Flags) -> Self { + Self { flags } + } +} + +impl Config { + #[inline] + pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { + self.flags.use_std3_ascii_rules = value; + self + } + + #[inline] + pub fn transitional_processing(mut self, value: bool) -> Self { + self.flags.transitional_processing = value; + self + } + + #[inline] + pub fn verify_dns_length(mut self, value: bool) -> Self { + self.flags.verify_dns_length = value; + self + } + + /// http://www.unicode.org/reports/tr46/#ToASCII + pub fn to_ascii(self, domain: &str) -> Result { + let mut errors = Vec::new(); + let mut result = String::new(); + let mut first = true; + for label in processing(domain, self.flags, &mut errors).split('.') { + if !first { + result.push('.'); + } + first = false; + if label.is_ascii() { + result.push_str(label); + } else { + match punycode::encode_str(label) { + Some(x) => { + result.push_str(PUNYCODE_PREFIX); + result.push_str(&x); + }, + None => errors.push(Error::PunycodeError) + } + } + } + + if self.flags.verify_dns_length { + let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; + if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { + errors.push(Error::TooShortForDns) + } + if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { + errors.push(Error::TooLongForDns) + } + } + if errors.is_empty() { + Ok(result) + } else { + Err(Errors(errors)) + } + } + + /// http://www.unicode.org/reports/tr46/#ToUnicode + pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { + let mut errors = Vec::new(); + let domain = processing(domain, self.flags, &mut errors); + let errors = if errors.is_empty() { + Ok(()) + } else { + Err(Errors(errors)) + }; + (domain, errors) + } + +} + #[derive(Copy, Clone)] pub struct Flags { pub use_std3_ascii_rules: bool, @@ -380,41 +464,7 @@ pub struct Errors(Vec); /// http://www.unicode.org/reports/tr46/#ToASCII pub fn to_ascii(domain: &str, flags: Flags) -> Result { - let mut errors = Vec::new(); - let mut result = String::new(); - let mut first = true; - for label in processing(domain, flags, &mut errors).split('.') { - if !first { - result.push('.'); - } - first = false; - if label.is_ascii() { - result.push_str(label); - } else { - match punycode::encode_str(label) { - Some(x) => { - result.push_str(PUNYCODE_PREFIX); - result.push_str(&x); - }, - None => errors.push(Error::PunycodeError) - } - } - } - - if flags.verify_dns_length { - let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; - if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { - errors.push(Error::TooShortForDns) - } - if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { - errors.push(Error::TooLongForDns) - } - } - if errors.is_empty() { - Ok(result) - } else { - Err(Errors(errors)) - } + Config::from(flags).to_ascii(domain) } /// http://www.unicode.org/reports/tr46/#ToUnicode @@ -422,12 +472,5 @@ pub fn to_ascii(domain: &str, flags: Flags) -> Result { /// Only `use_std3_ascii_rules` is used in `flags`. pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) { flags.transitional_processing = false; - let mut errors = Vec::new(); - let domain = processing(domain, flags, &mut errors); - let errors = if errors.is_empty() { - Ok(()) - } else { - Err(Errors(errors)) - }; - (domain, errors) + Config::from(flags).to_unicode(domain) } From 946c29897f085d61727e4f691b60713f22742d31 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:25:23 +0100 Subject: [PATCH 2/6] Pass a Config value to validate in uts46 --- idna/src/uts46.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index 65cf5685b..b4f78b0a5 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -233,16 +233,16 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { } /// http://www.unicode.org/reports/tr46/#Validity_Criteria -fn validate_full(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) { +fn validate_full(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec) { // V1: Must be in NFC form. if label.nfc().ne(label.chars()) { errors.push(Error::ValidityCriteria); } else { - validate(label, is_bidi_domain, flags, errors); + validate(label, is_bidi_domain, config, errors); } } -fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) { +fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec) { let first_char = label.chars().next(); if first_char == None { // Empty string, pass @@ -273,8 +273,8 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec false, - Mapping::Deviation(_) => flags.transitional_processing, - Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules, + Mapping::Deviation(_) => config.flags.transitional_processing, + Mapping::DisallowedStd3Valid => config.flags.use_std3_ascii_rules, _ => true, }) { errors.push(Error::ValidityCriteria); @@ -294,10 +294,10 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) -> String { +fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { let mut mapped = String::with_capacity(domain.len()); for c in domain.chars() { - map_char(c, flags, &mut mapped, errors) + map_char(c, config.flags, &mut mapped, errors) } let mut normalized = String::with_capacity(mapped.len()); normalized.extend(mapped.nfc()); @@ -338,15 +338,15 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { if label.starts_with(PUNYCODE_PREFIX) { match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) { Some(decoded_label) => { - let flags = Flags { transitional_processing: false, ..flags }; - validate_full(&decoded_label, is_bidi_domain, flags, errors); + let config = config.transitional_processing(false); + validate_full(&decoded_label, is_bidi_domain, config, errors); validated.push_str(&decoded_label) } None => errors.push(Error::PunycodeError) } } else { // `normalized` is already `NFC` so we can skip that check - validate(label, is_bidi_domain, flags, errors); + validate(label, is_bidi_domain, config, errors); validated.push_str(label) } } @@ -389,7 +389,7 @@ impl Config { let mut errors = Vec::new(); let mut result = String::new(); let mut first = true; - for label in processing(domain, self.flags, &mut errors).split('.') { + for label in processing(domain, self, &mut errors).split('.') { if !first { result.push('.'); } @@ -426,7 +426,7 @@ impl Config { /// http://www.unicode.org/reports/tr46/#ToUnicode pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { let mut errors = Vec::new(); - let domain = processing(domain, self.flags, &mut errors); + let domain = processing(domain, self, &mut errors); let errors = if errors.is_empty() { Ok(()) } else { From 6e18952bc7bf3197bb7027729c9ae2e135bd21b4 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:28:37 +0100 Subject: [PATCH 3/6] Introduce Config::check_hyphens in uts46 Hyphens are checked by default. --- idna/src/uts46.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index b4f78b0a5..2c981e099 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -253,11 +253,9 @@ fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec< // NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the // third and fourth positions. But nobody follows this criteria. See the spec issue below: // https://github.com/whatwg/url/issues/53 - // - // TODO: Add *CheckHyphens* flag. // V3: neither begin nor end with a U+002D HYPHEN-MINUS - else if label.starts_with("-") || label.ends_with("-") { + else if config.check_hyphens && (label.starts_with("-") || label.ends_with("-")) { errors.push(Error::ValidityCriteria); } @@ -356,12 +354,13 @@ fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { #[derive(Clone, Copy)] pub struct Config { flags: Flags, + check_hyphens: bool, } impl From for Config { #[inline] fn from(flags: Flags) -> Self { - Self { flags } + Self { flags, check_hyphens: true } } } @@ -384,6 +383,12 @@ impl Config { self } + #[inline] + pub fn check_hyphens(mut self, value: bool) -> Self { + self.check_hyphens = value; + self + } + /// http://www.unicode.org/reports/tr46/#ToASCII pub fn to_ascii(self, domain: &str) -> Result { let mut errors = Vec::new(); From fb3b9573372c10b4ff5dcec701d91e5d013e43c0 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 11 Feb 2019 13:38:03 +0100 Subject: [PATCH 4/6] Don't check hyphens in domain_to_ascii and domain_to_unicode (fixes #483) --- idna/src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 92b5df230..7f6fd1037 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -47,11 +47,12 @@ pub mod uts46; /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result { - uts46::to_ascii(domain, uts46::Flags { + let flags = uts46::Flags { use_std3_ascii_rules: false, transitional_processing: false, verify_dns_length: false, - }) + }; + uts46::Config::from(flags).check_hyphens(false).to_ascii(domain) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. @@ -63,11 +64,12 @@ pub fn domain_to_ascii(domain: &str) -> Result { /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { - uts46::to_unicode(domain, uts46::Flags { + let flags = uts46::Flags { use_std3_ascii_rules: false, // Unused: transitional_processing: false, verify_dns_length: false, - }) + }; + uts46::Config::from(flags).check_hyphens(false).to_unicode(domain) } From 449c10afbba978dbf6c4072375ce28993f4cb191 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 17 Jul 2019 11:55:13 +0200 Subject: [PATCH 5/6] Kill idna::uts46::Flags --- idna/src/lib.rs | 16 ++------------ idna/src/uts46.rs | 55 +++++++++++++---------------------------------- 2 files changed, 17 insertions(+), 54 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 7f6fd1037..2c713998e 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -47,12 +47,7 @@ pub mod uts46; /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result { - let flags = uts46::Flags { - use_std3_ascii_rules: false, - transitional_processing: false, - verify_dns_length: false, - }; - uts46::Config::from(flags).check_hyphens(false).to_ascii(domain) + uts46::Config::default().to_ascii(domain) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. @@ -64,12 +59,5 @@ pub fn domain_to_ascii(domain: &str) -> Result { /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { - let flags = uts46::Flags { - use_std3_ascii_rules: false, - - // Unused: - transitional_processing: false, - verify_dns_length: false, - }; - uts46::Config::from(flags).check_hyphens(false).to_unicode(domain) + uts46::Config::default().to_unicode(domain) } diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index 2c981e099..325bfc226 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -83,13 +83,13 @@ fn find_char(codepoint: char) -> &'static Mapping { }).unwrap() } -fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec) { +fn map_char(codepoint: char, config: Config, output: &mut String, errors: &mut Vec) { match *find_char(codepoint) { Mapping::Valid => output.push(codepoint), Mapping::Ignored => {}, Mapping::Mapped(ref slice) => output.push_str(decode_slice(slice)), Mapping::Deviation(ref slice) => { - if flags.transitional_processing { + if config.transitional_processing { output.push_str(decode_slice(slice)) } else { output.push(codepoint) @@ -100,13 +100,13 @@ fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec output.push(codepoint); } Mapping::DisallowedStd3Valid => { - if flags.use_std3_ascii_rules { + if config.use_std3_ascii_rules { errors.push(Error::DissallowedByStd3AsciiRules); } output.push(codepoint) } Mapping::DisallowedStd3Mapped(ref slice) => { - if flags.use_std3_ascii_rules { + if config.use_std3_ascii_rules { errors.push(Error::DissallowedMappedInStd3); } output.push_str(decode_slice(slice)) @@ -271,8 +271,8 @@ fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec< // V6: Check against Mapping Table else if label.chars().any(|c| match *find_char(c) { Mapping::Valid => false, - Mapping::Deviation(_) => config.flags.transitional_processing, - Mapping::DisallowedStd3Valid => config.flags.use_std3_ascii_rules, + Mapping::Deviation(_) => config.transitional_processing, + Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules, _ => true, }) { errors.push(Error::ValidityCriteria); @@ -295,7 +295,7 @@ fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec< fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { let mut mapped = String::with_capacity(domain.len()); for c in domain.chars() { - map_char(c, config.flags, &mut mapped, errors) + map_char(c, config, &mut mapped, errors) } let mut normalized = String::with_capacity(mapped.len()); normalized.extend(mapped.nfc()); @@ -351,35 +351,30 @@ fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { validated } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Default)] pub struct Config { - flags: Flags, + use_std3_ascii_rules: bool, + transitional_processing: bool, + verify_dns_length: bool, check_hyphens: bool, } -impl From for Config { - #[inline] - fn from(flags: Flags) -> Self { - Self { flags, check_hyphens: true } - } -} - impl Config { #[inline] pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { - self.flags.use_std3_ascii_rules = value; + self.use_std3_ascii_rules = value; self } #[inline] pub fn transitional_processing(mut self, value: bool) -> Self { - self.flags.transitional_processing = value; + self.transitional_processing = value; self } #[inline] pub fn verify_dns_length(mut self, value: bool) -> Self { - self.flags.verify_dns_length = value; + self.verify_dns_length = value; self } @@ -412,7 +407,7 @@ impl Config { } } - if self.flags.verify_dns_length { + if self.verify_dns_length { let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { errors.push(Error::TooShortForDns) @@ -442,13 +437,6 @@ impl Config { } -#[derive(Copy, Clone)] -pub struct Flags { - pub use_std3_ascii_rules: bool, - pub transitional_processing: bool, - pub verify_dns_length: bool, -} - #[derive(PartialEq, Eq, Clone, Copy, Debug)] enum Error { PunycodeError, @@ -466,16 +454,3 @@ enum Error { /// More details may be exposed in the future. #[derive(Debug)] pub struct Errors(Vec); - -/// http://www.unicode.org/reports/tr46/#ToASCII -pub fn to_ascii(domain: &str, flags: Flags) -> Result { - Config::from(flags).to_ascii(domain) -} - -/// http://www.unicode.org/reports/tr46/#ToUnicode -/// -/// Only `use_std3_ascii_rules` is used in `flags`. -pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) { - flags.transitional_processing = false; - Config::from(flags).to_unicode(domain) -} From 38cfea4d629d8f57050406d9aaff4189b620ed8f Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 17 Jul 2019 11:55:51 +0200 Subject: [PATCH 6/6] Bump idna to 0.2 --- Cargo.toml | 2 +- idna/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 40e6b64f5..2d13c70ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ heap_size = ["heapsize"] [dependencies] encoding = {version = "0.2", optional = true} heapsize = {version = ">=0.4.1, <0.5", optional = true} -idna = { version = "0.1.0", path = "./idna" } +idna = { version = "0.2.0", path = "./idna" } matches = "0.1" percent-encoding = { version = "1.0.0", path = "./percent_encoding" } rustc-serialize = {version = "0.3", optional = true} diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 1050ad35b..860faa2e6 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "idna" -version = "0.1.5" +version = "0.2.0" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." repository = "https://github.com/servo/rust-url/"