diff --git a/Cargo.toml b/Cargo.toml index 40e6b64f5..2d13c70ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ heap_size = ["heapsize"] [dependencies] encoding = {version = "0.2", optional = true} heapsize = {version = ">=0.4.1, <0.5", optional = true} -idna = { version = "0.1.0", path = "./idna" } +idna = { version = "0.2.0", path = "./idna" } matches = "0.1" percent-encoding = { version = "1.0.0", path = "./percent_encoding" } rustc-serialize = {version = "0.3", optional = true} diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 1050ad35b..860faa2e6 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "idna" -version = "0.1.5" +version = "0.2.0" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." repository = "https://github.com/servo/rust-url/" diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 92b5df230..2c713998e 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -47,11 +47,7 @@ pub mod uts46; /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result { - uts46::to_ascii(domain, uts46::Flags { - use_std3_ascii_rules: false, - transitional_processing: false, - verify_dns_length: false, - }) + uts46::Config::default().to_ascii(domain) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. @@ -63,11 +59,5 @@ pub fn domain_to_ascii(domain: &str) -> Result { /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { - uts46::to_unicode(domain, uts46::Flags { - use_std3_ascii_rules: false, - - // Unused: - transitional_processing: false, - verify_dns_length: false, - }) + uts46::Config::default().to_unicode(domain) } diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index ac348d1fa..325bfc226 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -83,13 +83,13 @@ fn find_char(codepoint: char) -> &'static Mapping { }).unwrap() } -fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec) { +fn map_char(codepoint: char, config: Config, output: &mut String, errors: &mut Vec) { match *find_char(codepoint) { Mapping::Valid => output.push(codepoint), Mapping::Ignored => {}, Mapping::Mapped(ref slice) => output.push_str(decode_slice(slice)), Mapping::Deviation(ref slice) => { - if flags.transitional_processing { + if config.transitional_processing { output.push_str(decode_slice(slice)) } else { output.push(codepoint) @@ -100,13 +100,13 @@ fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec output.push(codepoint); } Mapping::DisallowedStd3Valid => { - if flags.use_std3_ascii_rules { + if config.use_std3_ascii_rules { errors.push(Error::DissallowedByStd3AsciiRules); } output.push(codepoint) } Mapping::DisallowedStd3Mapped(ref slice) => { - if flags.use_std3_ascii_rules { + if config.use_std3_ascii_rules { errors.push(Error::DissallowedMappedInStd3); } output.push_str(decode_slice(slice)) @@ -233,16 +233,16 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { } /// http://www.unicode.org/reports/tr46/#Validity_Criteria -fn validate_full(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) { +fn validate_full(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec) { // V1: Must be in NFC form. if label.nfc().ne(label.chars()) { errors.push(Error::ValidityCriteria); } else { - validate(label, is_bidi_domain, flags, errors); + validate(label, is_bidi_domain, config, errors); } } -fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) { +fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec) { let first_char = label.chars().next(); if first_char == None { // Empty string, pass @@ -253,11 +253,9 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec false, - Mapping::Deviation(_) => flags.transitional_processing, - Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules, + Mapping::Deviation(_) => config.transitional_processing, + Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules, _ => true, }) { errors.push(Error::ValidityCriteria); @@ -294,10 +292,10 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) -> String { +fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { let mut mapped = String::with_capacity(domain.len()); for c in domain.chars() { - map_char(c, flags, &mut mapped, errors) + map_char(c, config, &mut mapped, errors) } let mut normalized = String::with_capacity(mapped.len()); normalized.extend(mapped.nfc()); @@ -338,26 +336,105 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { if label.starts_with(PUNYCODE_PREFIX) { match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) { Some(decoded_label) => { - let flags = Flags { transitional_processing: false, ..flags }; - validate_full(&decoded_label, is_bidi_domain, flags, errors); + let config = config.transitional_processing(false); + validate_full(&decoded_label, is_bidi_domain, config, errors); validated.push_str(&decoded_label) } None => errors.push(Error::PunycodeError) } } else { // `normalized` is already `NFC` so we can skip that check - validate(label, is_bidi_domain, flags, errors); + validate(label, is_bidi_domain, config, errors); validated.push_str(label) } } validated } -#[derive(Copy, Clone)] -pub struct Flags { - pub use_std3_ascii_rules: bool, - pub transitional_processing: bool, - pub verify_dns_length: bool, +#[derive(Clone, Copy, Default)] +pub struct Config { + use_std3_ascii_rules: bool, + transitional_processing: bool, + verify_dns_length: bool, + check_hyphens: bool, +} + +impl Config { + #[inline] + pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { + self.use_std3_ascii_rules = value; + self + } + + #[inline] + pub fn transitional_processing(mut self, value: bool) -> Self { + self.transitional_processing = value; + self + } + + #[inline] + pub fn verify_dns_length(mut self, value: bool) -> Self { + self.verify_dns_length = value; + self + } + + #[inline] + pub fn check_hyphens(mut self, value: bool) -> Self { + self.check_hyphens = value; + self + } + + /// http://www.unicode.org/reports/tr46/#ToASCII + pub fn to_ascii(self, domain: &str) -> Result { + let mut errors = Vec::new(); + let mut result = String::new(); + let mut first = true; + for label in processing(domain, self, &mut errors).split('.') { + if !first { + result.push('.'); + } + first = false; + if label.is_ascii() { + result.push_str(label); + } else { + match punycode::encode_str(label) { + Some(x) => { + result.push_str(PUNYCODE_PREFIX); + result.push_str(&x); + }, + None => errors.push(Error::PunycodeError) + } + } + } + + if self.verify_dns_length { + let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; + if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { + errors.push(Error::TooShortForDns) + } + if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { + errors.push(Error::TooLongForDns) + } + } + if errors.is_empty() { + Ok(result) + } else { + Err(Errors(errors)) + } + } + + /// http://www.unicode.org/reports/tr46/#ToUnicode + pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { + let mut errors = Vec::new(); + let domain = processing(domain, self, &mut errors); + let errors = if errors.is_empty() { + Ok(()) + } else { + Err(Errors(errors)) + }; + (domain, errors) + } + } #[derive(PartialEq, Eq, Clone, Copy, Debug)] @@ -377,57 +454,3 @@ enum Error { /// More details may be exposed in the future. #[derive(Debug)] pub struct Errors(Vec); - -/// http://www.unicode.org/reports/tr46/#ToASCII -pub fn to_ascii(domain: &str, flags: Flags) -> Result { - let mut errors = Vec::new(); - let mut result = String::new(); - let mut first = true; - for label in processing(domain, flags, &mut errors).split('.') { - if !first { - result.push('.'); - } - first = false; - if label.is_ascii() { - result.push_str(label); - } else { - match punycode::encode_str(label) { - Some(x) => { - result.push_str(PUNYCODE_PREFIX); - result.push_str(&x); - }, - None => errors.push(Error::PunycodeError) - } - } - } - - if flags.verify_dns_length { - let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; - if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { - errors.push(Error::TooShortForDns) - } - if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { - errors.push(Error::TooLongForDns) - } - } - if errors.is_empty() { - Ok(result) - } else { - Err(Errors(errors)) - } -} - -/// http://www.unicode.org/reports/tr46/#ToUnicode -/// -/// Only `use_std3_ascii_rules` is used in `flags`. -pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) { - flags.transitional_processing = false; - let mut errors = Vec::new(); - let domain = processing(domain, flags, &mut errors); - let errors = if errors.is_empty() { - Ok(()) - } else { - Err(Errors(errors)) - }; - (domain, errors) -}