Skip to content

Commit ed77d29

Browse files
author
bors-servo
committed
Auto merge of #172 - servo:to_unicode, r=valenting
Add idna::domain_to_unicode This is used by the "Unicode serialisation of an origin", which is used by the getter of the `URL::origin` WebIDL attribute. r? @valenting <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.svg" height="40" alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/rust-url/172) <!-- Reviewable:end -->
2 parents 3a3c850 + 5916b5e commit ed77d29

File tree

4 files changed

+60
-43
lines changed

4 files changed

+60
-43
lines changed

make_idna_table.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,7 @@
2222
2323
// Generated by make_idna_table.py
2424
25-
use idna::Mapping::*;
26-
use idna::Range;
27-
28-
pub static TABLE: &'static [Range] = &[
25+
static TABLE: &'static [Range] = &[
2926
''')
3027

3128
txt = open("IdnaMappingTable.txt")

src/idna.rs

Lines changed: 58 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22
//!
33
//! https://url.spec.whatwg.org/#idna
44
5-
use idna_mapping::TABLE;
5+
use self::Mapping::*;
66
use punycode;
77
use std::ascii::AsciiExt;
88
use unicode_normalization::UnicodeNormalization;
99
use unicode_normalization::char::is_combining_mark;
1010
use unicode_bidi::{BidiClass, bidi_class};
1111

12+
include!("idna_mapping.rs");
13+
1214
#[derive(Debug)]
13-
pub enum Mapping {
15+
enum Mapping {
1416
Valid,
1517
Ignored,
1618
Mapped(&'static str),
@@ -20,7 +22,7 @@ pub enum Mapping {
2022
DisallowedStd3Mapped(&'static str),
2123
}
2224

23-
pub struct Range {
25+
struct Range {
2426
pub from: char,
2527
pub to: char,
2628
pub mapping: Mapping,
@@ -43,7 +45,7 @@ fn find_char(codepoint: char) -> &'static Mapping {
4345
&TABLE[min].mapping
4446
}
4547

46-
fn map_char(codepoint: char, flags: Uts46Flags, output: &mut String) -> Result<(), Error> {
48+
fn map_char(codepoint: char, flags: Uts46Flags, output: &mut String, errors: &mut Vec<Error>) {
4749
match *find_char(codepoint) {
4850
Mapping::Valid => output.push(codepoint),
4951
Mapping::Ignored => {},
@@ -55,23 +57,23 @@ fn map_char(codepoint: char, flags: Uts46Flags, output: &mut String) -> Result<(
5557
output.push(codepoint)
5658
}
5759
}
58-
Mapping::Disallowed => return Err(Error::DissallowedCharacter),
60+
Mapping::Disallowed => {
61+
errors.push(Error::DissallowedCharacter);
62+
output.push(codepoint);
63+
}
5964
Mapping::DisallowedStd3Valid => {
6065
if flags.use_std3_ascii_rules {
61-
return Err(Error::DissallowedByStd3AsciiRules);
62-
} else {
63-
output.push(codepoint)
66+
errors.push(Error::DissallowedByStd3AsciiRules);
6467
}
68+
output.push(codepoint)
6569
}
6670
Mapping::DisallowedStd3Mapped(mapping) => {
6771
if flags.use_std3_ascii_rules {
68-
return Err(Error::DissallowedMappedInStd3);
69-
} else {
70-
output.push_str(mapping)
72+
errors.push(Error::DissallowedMappedInStd3);
7173
}
74+
output.push_str(mapping)
7275
}
7376
}
74-
Ok(())
7577
}
7678

7779
// http://tools.ietf.org/html/rfc5893#section-2
@@ -183,9 +185,9 @@ fn passes_bidi(label: &str, transitional_processing: bool) -> bool {
183185
}
184186

185187
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
186-
fn validate(label: &str, flags: Uts46Flags) -> Result<(), Error> {
188+
fn validate(label: &str, flags: Uts46Flags, errors: &mut Vec<Error>) {
187189
if label.nfc().ne(label.chars()) {
188-
return Err(Error::ValidityCriteria);
190+
errors.push(Error::ValidityCriteria);
189191
}
190192

191193
// Can not contain '.' since the input is from .split('.')
@@ -205,17 +207,15 @@ fn validate(label: &str, flags: Uts46Flags) -> Result<(), Error> {
205207
})
206208
|| !passes_bidi(label, flags.transitional_processing)
207209
{
208-
Err(Error::ValidityCriteria)
209-
} else {
210-
Ok(())
210+
errors.push(Error::ValidityCriteria)
211211
}
212212
}
213213

214214
/// http://www.unicode.org/reports/tr46/#Processing
215-
fn uts46_processing(domain: &str, flags: Uts46Flags) -> Result<String, Error> {
215+
fn uts46_processing(domain: &str, flags: Uts46Flags, errors: &mut Vec<Error>) -> String {
216216
let mut mapped = String::new();
217217
for c in domain.chars() {
218-
try!(map_char(c, flags, &mut mapped))
218+
map_char(c, flags, &mut mapped, errors)
219219
}
220220
let normalized: String = mapped.nfc().collect();
221221
let mut validated = String::new();
@@ -225,21 +225,19 @@ fn uts46_processing(domain: &str, flags: Uts46Flags) -> Result<String, Error> {
225225
}
226226
if label.starts_with("xn--") {
227227
match punycode::decode_to_string(&label["xn--".len()..]) {
228-
Some(label) => {
229-
try!(validate(&label, Uts46Flags {
230-
transitional_processing: false,
231-
..flags
232-
}));
233-
validated.push_str(&label)
228+
Some(decoded_label) => {
229+
let flags = Uts46Flags { transitional_processing: false, ..flags };
230+
validate(&decoded_label, flags, errors);
231+
validated.push_str(&decoded_label)
234232
}
235-
None => return Err(Error::PunycodeError),
233+
None => errors.push(Error::PunycodeError)
236234
}
237235
} else {
238-
try!(validate(label, flags));
236+
validate(label, flags, errors);
239237
validated.push_str(label)
240238
}
241239
}
242-
Ok(validated)
240+
validated
243241
}
244242

245243
#[derive(Copy, Clone)]
@@ -260,9 +258,10 @@ pub enum Error {
260258
}
261259

262260
/// http://www.unicode.org/reports/tr46/#ToASCII
263-
pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result<String, Error> {
261+
pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result<String, Vec<Error>> {
262+
let mut errors = Vec::new();
264263
let mut result = String::new();
265-
for label in try!(uts46_processing(domain, flags)).split('.') {
264+
for label in uts46_processing(domain, flags, &mut errors).split('.') {
266265
if result.len() > 0 {
267266
result.push('.');
268267
}
@@ -274,7 +273,7 @@ pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result<String, Error>
274273
result.push_str("xn--");
275274
result.push_str(&x);
276275
},
277-
None => return Err(Error::PunycodeError)
276+
None => errors.push(Error::PunycodeError)
278277
}
279278
}
280279
}
@@ -283,17 +282,42 @@ pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result<String, Error>
283282
let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result };
284283
if domain.len() < 1 || domain.len() > 253 ||
285284
domain.split('.').any(|label| label.len() < 1 || label.len() > 63) {
286-
return Err(Error::TooLongForDns)
285+
errors.push(Error::TooLongForDns)
287286
}
288287
}
289-
Ok(result)
288+
if errors.is_empty() {
289+
Ok(result)
290+
} else {
291+
Err(errors)
292+
}
290293
}
291294

292295
/// https://url.spec.whatwg.org/#concept-domain-to-ascii
293-
pub fn domain_to_ascii(domain: &str) -> Result<String, Error> {
296+
pub fn domain_to_ascii(domain: &str) -> Result<String, Vec<Error>> {
294297
uts46_to_ascii(domain, Uts46Flags {
295298
use_std3_ascii_rules: false,
296299
transitional_processing: true, // XXX: switch when Firefox does
297300
verify_dns_length: false,
298301
})
299302
}
303+
304+
/// http://www.unicode.org/reports/tr46/#ToUnicode
305+
///
306+
/// Only `use_std3_ascii_rules` is used in `flags`.
307+
pub fn uts46_to_unicode(domain: &str, mut flags: Uts46Flags) -> (String, Vec<Error>) {
308+
flags.transitional_processing = false;
309+
let mut errors = Vec::new();
310+
let domain = uts46_processing(domain, flags, &mut errors);
311+
(domain, errors)
312+
}
313+
314+
/// https://url.spec.whatwg.org/#concept-domain-to-unicode
315+
pub fn domain_to_unicode(domain: &str) -> (String, Vec<Error>) {
316+
uts46_to_unicode(domain, Uts46Flags {
317+
use_std3_ascii_rules: false,
318+
319+
// Unused:
320+
transitional_processing: true,
321+
verify_dns_length: false,
322+
})
323+
}

src/idna_mapping.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@
88

99
// Generated by make_idna_table.py
1010

11-
use idna::Mapping::*;
12-
use idna::Range;
13-
14-
pub static TABLE: &'static [Range] = &[
11+
static TABLE: &'static [Range] = &[
1512

1613
Range { from: '\0', to: ',', mapping: DisallowedStd3Valid },
1714
Range { from: '-', to: '.', mapping: Valid },

src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ pub mod form_urlencoded;
173173
pub mod punycode;
174174
pub mod format;
175175
pub mod idna;
176-
mod idna_mapping;
177176

178177
/// The parsed representation of an absolute URL.
179178
#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]

0 commit comments

Comments
 (0)