/rust/registry/src/index.crates.io-6f17d22bba15001f/idna-0.5.0/src/uts46.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2013-2014 The rust-url developers. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | | // option. This file may not be copied, modified, or distributed |
7 | | // except according to those terms. |
8 | | |
9 | | //! [*Unicode IDNA Compatibility Processing* |
10 | | //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) |
11 | | |
12 | | use self::Mapping::*; |
13 | | use crate::punycode; |
14 | | |
15 | | use alloc::string::String; |
16 | | use core::fmt; |
17 | | use unicode_bidi::{bidi_class, BidiClass}; |
18 | | use unicode_normalization::char::is_combining_mark; |
19 | | use unicode_normalization::{is_nfc, UnicodeNormalization}; |
20 | | |
21 | | include!("uts46_mapping_table.rs"); |
22 | | |
23 | | const PUNYCODE_PREFIX: &str = "xn--"; |
24 | | |
25 | | #[derive(Debug)] |
26 | | struct StringTableSlice { |
27 | | // Store these as separate fields so the structure will have an |
28 | | // alignment of 1 and thus pack better into the Mapping enum, below. |
29 | | byte_start_lo: u8, |
30 | | byte_start_hi: u8, |
31 | | byte_len: u8, |
32 | | } |
33 | | |
34 | 0 | fn decode_slice(slice: &StringTableSlice) -> &'static str { |
35 | 0 | let lo = slice.byte_start_lo as usize; |
36 | 0 | let hi = slice.byte_start_hi as usize; |
37 | 0 | let start = (hi << 8) | lo; |
38 | 0 | let len = slice.byte_len as usize; |
39 | 0 | &STRING_TABLE[start..(start + len)] |
40 | 0 | } |
41 | | |
42 | | #[repr(u8)] |
43 | | #[derive(Debug)] |
44 | | enum Mapping { |
45 | | Valid, |
46 | | Ignored, |
47 | | Mapped(StringTableSlice), |
48 | | Deviation(StringTableSlice), |
49 | | Disallowed, |
50 | | DisallowedStd3Valid, |
51 | | DisallowedStd3Mapped(StringTableSlice), |
52 | | DisallowedIdna2008, |
53 | | } |
54 | | |
55 | 0 | fn find_char(codepoint: char) -> &'static Mapping { |
56 | 0 | let idx = match TABLE.binary_search_by_key(&codepoint, |&val| val.0) { |
57 | 0 | Ok(idx) => idx, |
58 | 0 | Err(idx) => idx - 1, |
59 | | }; |
60 | | |
61 | | const SINGLE_MARKER: u16 = 1 << 15; |
62 | | |
63 | 0 | let (base, x) = TABLE[idx]; |
64 | 0 | let single = (x & SINGLE_MARKER) != 0; |
65 | 0 | let offset = !SINGLE_MARKER & x; |
66 | 0 |
|
67 | 0 | if single { |
68 | 0 | &MAPPING_TABLE[offset as usize] |
69 | | } else { |
70 | 0 | &MAPPING_TABLE[(offset + (codepoint as u16 - base as u16)) as usize] |
71 | | } |
72 | 0 | } |
73 | | |
74 | | struct Mapper<'a> { |
75 | | chars: core::str::Chars<'a>, |
76 | | config: Config, |
77 | | errors: &'a mut Errors, |
78 | | slice: Option<core::str::Chars<'static>>, |
79 | | } |
80 | | |
81 | | impl<'a> Iterator for Mapper<'a> { |
82 | | type Item = char; |
83 | | |
84 | 0 | fn next(&mut self) -> Option<Self::Item> { |
85 | | loop { |
86 | 0 | if let Some(s) = &mut self.slice { |
87 | 0 | match s.next() { |
88 | 0 | Some(c) => return Some(c), |
89 | 0 | None => { |
90 | 0 | self.slice = None; |
91 | 0 | } |
92 | | } |
93 | 0 | } |
94 | | |
95 | 0 | let codepoint = self.chars.next()?; |
96 | 0 | if let '.' | '-' | 'a'..='z' | '0'..='9' = codepoint { |
97 | 0 | return Some(codepoint); |
98 | 0 | } |
99 | 0 |
|
100 | 0 | return Some(match *find_char(codepoint) { |
101 | 0 | Mapping::Valid => codepoint, |
102 | 0 | Mapping::Ignored => continue, |
103 | 0 | Mapping::Mapped(ref slice) => { |
104 | 0 | self.slice = Some(decode_slice(slice).chars()); |
105 | 0 | continue; |
106 | | } |
107 | 0 | Mapping::Deviation(ref slice) => { |
108 | 0 | if self.config.transitional_processing { |
109 | 0 | self.slice = Some(decode_slice(slice).chars()); |
110 | 0 | continue; |
111 | | } else { |
112 | 0 | codepoint |
113 | | } |
114 | | } |
115 | | Mapping::Disallowed => { |
116 | 0 | self.errors.disallowed_character = true; |
117 | 0 | codepoint |
118 | | } |
119 | | Mapping::DisallowedStd3Valid => { |
120 | 0 | if self.config.use_std3_ascii_rules { |
121 | 0 | self.errors.disallowed_by_std3_ascii_rules = true; |
122 | 0 | }; |
123 | 0 | codepoint |
124 | | } |
125 | 0 | Mapping::DisallowedStd3Mapped(ref slice) => { |
126 | 0 | if self.config.use_std3_ascii_rules { |
127 | 0 | self.errors.disallowed_mapped_in_std3 = true; |
128 | 0 | }; |
129 | 0 | self.slice = Some(decode_slice(slice).chars()); |
130 | 0 | continue; |
131 | | } |
132 | | Mapping::DisallowedIdna2008 => { |
133 | 0 | if self.config.use_idna_2008_rules { |
134 | 0 | self.errors.disallowed_in_idna_2008 = true; |
135 | 0 | } |
136 | 0 | codepoint |
137 | | } |
138 | | }); |
139 | | } |
140 | 0 | } |
141 | | } |
142 | | |
143 | | // http://tools.ietf.org/html/rfc5893#section-2 |
144 | 0 | fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { |
145 | 0 | // Rule 0: Bidi Rules apply to Bidi Domain Names: a name with at least one RTL label. A label |
146 | 0 | // is RTL if it contains at least one character of bidi class R, AL or AN. |
147 | 0 | if !is_bidi_domain { |
148 | 0 | return true; |
149 | 0 | } |
150 | 0 |
|
151 | 0 | let mut chars = label.chars(); |
152 | 0 | let first_char_class = match chars.next() { |
153 | 0 | Some(c) => bidi_class(c), |
154 | 0 | None => return true, // empty string |
155 | | }; |
156 | | |
157 | 0 | match first_char_class { |
158 | | // LTR label |
159 | | BidiClass::L => { |
160 | | // Rule 5 |
161 | 0 | for c in chars.by_ref() { |
162 | 0 | if !matches!( |
163 | 0 | bidi_class(c), |
164 | | BidiClass::L |
165 | | | BidiClass::EN |
166 | | | BidiClass::ES |
167 | | | BidiClass::CS |
168 | | | BidiClass::ET |
169 | | | BidiClass::ON |
170 | | | BidiClass::BN |
171 | | | BidiClass::NSM |
172 | | ) { |
173 | 0 | return false; |
174 | 0 | } |
175 | | } |
176 | | |
177 | | // Rule 6 |
178 | | // must end in L or EN followed by 0 or more NSM |
179 | 0 | let mut rev_chars = label.chars().rev(); |
180 | 0 | let mut last_non_nsm = rev_chars.next(); |
181 | | loop { |
182 | 0 | match last_non_nsm { |
183 | 0 | Some(c) if bidi_class(c) == BidiClass::NSM => { |
184 | 0 | last_non_nsm = rev_chars.next(); |
185 | 0 | continue; |
186 | | } |
187 | | _ => { |
188 | 0 | break; |
189 | | } |
190 | | } |
191 | | } |
192 | 0 | match last_non_nsm { |
193 | 0 | Some(c) if bidi_class(c) == BidiClass::L || bidi_class(c) == BidiClass::EN => {} |
194 | | Some(_) => { |
195 | 0 | return false; |
196 | | } |
197 | 0 | _ => {} |
198 | | } |
199 | | } |
200 | | |
201 | | // RTL label |
202 | | BidiClass::R | BidiClass::AL => { |
203 | 0 | let mut found_en = false; |
204 | 0 | let mut found_an = false; |
205 | | |
206 | | // Rule 2 |
207 | 0 | for c in chars { |
208 | 0 | let char_class = bidi_class(c); |
209 | 0 | if char_class == BidiClass::EN { |
210 | 0 | found_en = true; |
211 | 0 | } else if char_class == BidiClass::AN { |
212 | 0 | found_an = true; |
213 | 0 | } |
214 | | |
215 | 0 | if !matches!( |
216 | 0 | char_class, |
217 | | BidiClass::R |
218 | | | BidiClass::AL |
219 | | | BidiClass::AN |
220 | | | BidiClass::EN |
221 | | | BidiClass::ES |
222 | | | BidiClass::CS |
223 | | | BidiClass::ET |
224 | | | BidiClass::ON |
225 | | | BidiClass::BN |
226 | | | BidiClass::NSM |
227 | | ) { |
228 | 0 | return false; |
229 | 0 | } |
230 | | } |
231 | | // Rule 3 |
232 | 0 | let mut rev_chars = label.chars().rev(); |
233 | 0 | let mut last = rev_chars.next(); |
234 | | loop { |
235 | | // must end in L or EN followed by 0 or more NSM |
236 | 0 | match last { |
237 | 0 | Some(c) if bidi_class(c) == BidiClass::NSM => { |
238 | 0 | last = rev_chars.next(); |
239 | 0 | continue; |
240 | | } |
241 | | _ => { |
242 | 0 | break; |
243 | | } |
244 | | } |
245 | | } |
246 | 0 | match last { |
247 | 0 | Some(c) |
248 | 0 | if matches!( |
249 | 0 | bidi_class(c), |
250 | | BidiClass::R | BidiClass::AL | BidiClass::EN | BidiClass::AN |
251 | 0 | ) => {} |
252 | | _ => { |
253 | 0 | return false; |
254 | | } |
255 | | } |
256 | | |
257 | | // Rule 4 |
258 | 0 | if found_an && found_en { |
259 | 0 | return false; |
260 | 0 | } |
261 | | } |
262 | | |
263 | | // Rule 1: Should start with L or R/AL |
264 | | _ => { |
265 | 0 | return false; |
266 | | } |
267 | | } |
268 | | |
269 | 0 | true |
270 | 0 | } |
271 | | |
272 | | /// Check the validity criteria for the given label |
273 | | /// |
274 | | /// V1 (NFC) and V8 (Bidi) are checked inside `processing()` to prevent doing duplicate work. |
275 | | /// |
276 | | /// http://www.unicode.org/reports/tr46/#Validity_Criteria |
277 | 0 | fn check_validity(label: &str, config: Config, errors: &mut Errors) { |
278 | 0 | let first_char = label.chars().next(); |
279 | 0 | if first_char.is_none() { |
280 | | // Empty string, pass |
281 | 0 | return; |
282 | 0 | } |
283 | 0 |
|
284 | 0 | // V2: No U+002D HYPHEN-MINUS in both third and fourth positions. |
285 | 0 | // |
286 | 0 | // NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the |
287 | 0 | // third and fourth positions. But nobody follows this criteria. See the spec issue below: |
288 | 0 | // https://github.com/whatwg/url/issues/53 |
289 | 0 |
|
290 | 0 | // V3: neither begin nor end with a U+002D HYPHEN-MINUS |
291 | 0 | if config.check_hyphens && (label.starts_with('-') || label.ends_with('-')) { |
292 | 0 | errors.check_hyphens = true; |
293 | 0 | return; |
294 | 0 | } |
295 | 0 |
|
296 | 0 | // V4: not contain a U+002E FULL STOP |
297 | 0 | // |
298 | 0 | // Here, label can't contain '.' since the input is from .split('.') |
299 | 0 |
|
300 | 0 | // V5: not begin with a GC=Mark |
301 | 0 | if is_combining_mark(first_char.unwrap()) { |
302 | 0 | errors.start_combining_mark = true; |
303 | 0 | return; |
304 | 0 | } |
305 | 0 |
|
306 | 0 | // V6: Check against Mapping Table |
307 | 0 | if label.chars().any(|c| match *find_char(c) { |
308 | 0 | Mapping::Valid | Mapping::DisallowedIdna2008 => false, |
309 | 0 | Mapping::Deviation(_) => config.transitional_processing, |
310 | 0 | Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules, |
311 | 0 | _ => true, |
312 | 0 | }) { |
313 | 0 | errors.invalid_mapping = true; |
314 | 0 | } |
315 | | |
316 | | // V7: ContextJ rules |
317 | | // |
318 | | // TODO: Implement rules and add *CheckJoiners* flag. |
319 | | |
320 | | // V8: Bidi rules are checked inside `processing()` |
321 | 0 | } |
322 | | |
323 | | // Detect simple cases: all lowercase ASCII characters and digits where none |
324 | | // of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen. |
325 | 0 | fn is_simple(domain: &str) -> bool { |
326 | 0 | if domain.is_empty() { |
327 | 0 | return false; |
328 | 0 | } |
329 | 0 | let (mut prev, mut puny_prefix) = ('?', 0); |
330 | 0 | for c in domain.chars() { |
331 | 0 | if c == '.' { |
332 | 0 | if prev == '-' { |
333 | 0 | return false; |
334 | 0 | } |
335 | 0 | puny_prefix = 0; |
336 | 0 | continue; |
337 | 0 | } else if puny_prefix == 0 && c == '-' { |
338 | 0 | return false; |
339 | 0 | } else if puny_prefix < 5 { |
340 | 0 | if c == ['x', 'n', '-', '-'][puny_prefix] { |
341 | 0 | puny_prefix += 1; |
342 | 0 | if puny_prefix == 4 { |
343 | 0 | return false; |
344 | 0 | } |
345 | 0 | } else { |
346 | 0 | puny_prefix = 5; |
347 | 0 | } |
348 | 0 | } |
349 | 0 | if !c.is_ascii_lowercase() && !c.is_ascii_digit() { |
350 | 0 | return false; |
351 | 0 | } |
352 | 0 | prev = c; |
353 | | } |
354 | | |
355 | 0 | true |
356 | 0 | } |
357 | | |
358 | | /// http://www.unicode.org/reports/tr46/#Processing |
359 | 0 | fn processing( |
360 | 0 | domain: &str, |
361 | 0 | config: Config, |
362 | 0 | normalized: &mut String, |
363 | 0 | output: &mut String, |
364 | 0 | ) -> Errors { |
365 | 0 | normalized.clear(); |
366 | 0 | let mut errors = Errors::default(); |
367 | 0 | let offset = output.len(); |
368 | 0 |
|
369 | 0 | let iter = Mapper { |
370 | 0 | chars: domain.chars(), |
371 | 0 | config, |
372 | 0 | errors: &mut errors, |
373 | 0 | slice: None, |
374 | 0 | }; |
375 | 0 |
|
376 | 0 | normalized.extend(iter.nfc()); |
377 | 0 |
|
378 | 0 | let mut decoder = punycode::Decoder::default(); |
379 | 0 | let non_transitional = config.transitional_processing(false); |
380 | 0 | let (mut first, mut has_bidi_labels) = (true, false); |
381 | 0 | for label in normalized.split('.') { |
382 | 0 | if !first { |
383 | 0 | output.push('.'); |
384 | 0 | } |
385 | 0 | first = false; |
386 | 0 | if let Some(remainder) = label.strip_prefix(PUNYCODE_PREFIX) { |
387 | 0 | match decoder.decode(remainder) { |
388 | 0 | Ok(decode) => { |
389 | 0 | let start = output.len(); |
390 | 0 | output.extend(decode); |
391 | 0 | let decoded_label = &output[start..]; |
392 | 0 |
|
393 | 0 | if !has_bidi_labels { |
394 | 0 | has_bidi_labels |= is_bidi_domain(decoded_label); |
395 | 0 | } |
396 | | |
397 | 0 | if !errors.is_err() { |
398 | 0 | if !is_nfc(decoded_label) { |
399 | 0 | errors.nfc = true; |
400 | 0 | } else { |
401 | 0 | check_validity(decoded_label, non_transitional, &mut errors); |
402 | 0 | } |
403 | 0 | } |
404 | | } |
405 | 0 | Err(()) => { |
406 | 0 | has_bidi_labels = true; |
407 | 0 | errors.punycode = true; |
408 | 0 | } |
409 | | } |
410 | | } else { |
411 | 0 | if !has_bidi_labels { |
412 | 0 | has_bidi_labels |= is_bidi_domain(label); |
413 | 0 | } |
414 | | |
415 | | // `normalized` is already `NFC` so we can skip that check |
416 | 0 | check_validity(label, config, &mut errors); |
417 | 0 | output.push_str(label) |
418 | | } |
419 | | } |
420 | | |
421 | 0 | for label in output[offset..].split('.') { |
422 | | // V8: Bidi rules |
423 | | // |
424 | | // TODO: Add *CheckBidi* flag |
425 | 0 | if !passes_bidi(label, has_bidi_labels) { |
426 | 0 | errors.check_bidi = true; |
427 | 0 | break; |
428 | 0 | } |
429 | | } |
430 | | |
431 | 0 | errors |
432 | 0 | } |
433 | | |
434 | | #[derive(Default)] |
435 | | pub struct Idna { |
436 | | config: Config, |
437 | | normalized: String, |
438 | | output: String, |
439 | | } |
440 | | |
441 | | impl Idna { |
442 | 0 | pub fn new(config: Config) -> Self { |
443 | 0 | Self { |
444 | 0 | config, |
445 | 0 | normalized: String::new(), |
446 | 0 | output: String::new(), |
447 | 0 | } |
448 | 0 | } |
449 | | |
450 | 0 | pub fn to_ascii_inner(&mut self, domain: &str, out: &mut String) -> Errors { |
451 | 0 | if is_simple(domain) { |
452 | 0 | out.push_str(domain); |
453 | 0 | return Errors::default(); |
454 | 0 | } |
455 | 0 | let mut errors = processing(domain, self.config, &mut self.normalized, out); |
456 | 0 | self.output = core::mem::replace(out, String::with_capacity(out.len())); |
457 | 0 | let mut first = true; |
458 | 0 | for label in self.output.split('.') { |
459 | 0 | if !first { |
460 | 0 | out.push('.'); |
461 | 0 | } |
462 | 0 | first = false; |
463 | 0 |
|
464 | 0 | if label.is_ascii() { |
465 | 0 | out.push_str(label); |
466 | 0 | } else { |
467 | 0 | let offset = out.len(); |
468 | 0 | out.push_str(PUNYCODE_PREFIX); |
469 | 0 | if let Err(()) = punycode::encode_into(label.chars(), out) { |
470 | 0 | errors.punycode = true; |
471 | 0 | out.truncate(offset); |
472 | 0 | } |
473 | | } |
474 | | } |
475 | 0 | errors |
476 | 0 | } |
477 | | |
478 | | /// http://www.unicode.org/reports/tr46/#ToASCII |
479 | | #[allow(clippy::wrong_self_convention)] |
480 | 0 | pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> { |
481 | 0 | let mut errors = self.to_ascii_inner(domain, out); |
482 | 0 |
|
483 | 0 | if self.config.verify_dns_length { |
484 | 0 | let domain = if out.ends_with('.') { |
485 | 0 | &out[..out.len() - 1] |
486 | | } else { |
487 | 0 | &*out |
488 | | }; |
489 | 0 | if domain.is_empty() || domain.split('.').any(|label| label.is_empty()) { |
490 | 0 | errors.too_short_for_dns = true; |
491 | 0 | } |
492 | 0 | if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { |
493 | 0 | errors.too_long_for_dns = true; |
494 | 0 | } |
495 | 0 | } |
496 | | |
497 | 0 | errors.into() |
498 | 0 | } |
499 | | |
500 | | /// http://www.unicode.org/reports/tr46/#ToUnicode |
501 | | #[allow(clippy::wrong_self_convention)] |
502 | 0 | pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> { |
503 | 0 | if is_simple(domain) { |
504 | 0 | out.push_str(domain); |
505 | 0 | return Errors::default().into(); |
506 | 0 | } |
507 | 0 | processing(domain, self.config, &mut self.normalized, out).into() |
508 | 0 | } |
509 | | } |
510 | | |
511 | | #[derive(Clone, Copy)] |
512 | | #[must_use] |
513 | | pub struct Config { |
514 | | use_std3_ascii_rules: bool, |
515 | | transitional_processing: bool, |
516 | | verify_dns_length: bool, |
517 | | check_hyphens: bool, |
518 | | use_idna_2008_rules: bool, |
519 | | } |
520 | | |
521 | | /// The defaults are that of https://url.spec.whatwg.org/#idna |
522 | | impl Default for Config { |
523 | 0 | fn default() -> Self { |
524 | 0 | Config { |
525 | 0 | use_std3_ascii_rules: false, |
526 | 0 | transitional_processing: false, |
527 | 0 | check_hyphens: false, |
528 | 0 | // check_bidi: true, |
529 | 0 | // check_joiners: true, |
530 | 0 |
|
531 | 0 | // Only use for to_ascii, not to_unicode |
532 | 0 | verify_dns_length: false, |
533 | 0 | use_idna_2008_rules: false, |
534 | 0 | } |
535 | 0 | } |
536 | | } |
537 | | |
538 | | impl Config { |
539 | | #[inline] |
540 | 0 | pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { |
541 | 0 | self.use_std3_ascii_rules = value; |
542 | 0 | self |
543 | 0 | } |
544 | | |
545 | | #[inline] |
546 | 0 | pub fn transitional_processing(mut self, value: bool) -> Self { |
547 | 0 | self.transitional_processing = value; |
548 | 0 | self |
549 | 0 | } |
550 | | |
551 | | #[inline] |
552 | 0 | pub fn verify_dns_length(mut self, value: bool) -> Self { |
553 | 0 | self.verify_dns_length = value; |
554 | 0 | self |
555 | 0 | } |
556 | | |
557 | | #[inline] |
558 | 0 | pub fn check_hyphens(mut self, value: bool) -> Self { |
559 | 0 | self.check_hyphens = value; |
560 | 0 | self |
561 | 0 | } |
562 | | |
563 | | #[inline] |
564 | 0 | pub fn use_idna_2008_rules(mut self, value: bool) -> Self { |
565 | 0 | self.use_idna_2008_rules = value; |
566 | 0 | self |
567 | 0 | } |
568 | | |
569 | | /// http://www.unicode.org/reports/tr46/#ToASCII |
570 | 0 | pub fn to_ascii(self, domain: &str) -> Result<String, Errors> { |
571 | 0 | let mut result = String::with_capacity(domain.len()); |
572 | 0 | let mut codec = Idna::new(self); |
573 | 0 | codec.to_ascii(domain, &mut result).map(|()| result) |
574 | 0 | } |
575 | | |
576 | | /// http://www.unicode.org/reports/tr46/#ToUnicode |
577 | 0 | pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { |
578 | 0 | let mut codec = Idna::new(self); |
579 | 0 | let mut out = String::with_capacity(domain.len()); |
580 | 0 | let result = codec.to_unicode(domain, &mut out); |
581 | 0 | (out, result) |
582 | 0 | } |
583 | | } |
584 | | |
585 | 0 | fn is_bidi_domain(s: &str) -> bool { |
586 | 0 | for c in s.chars() { |
587 | 0 | if c.is_ascii_graphic() { |
588 | 0 | continue; |
589 | 0 | } |
590 | 0 | match bidi_class(c) { |
591 | 0 | BidiClass::R | BidiClass::AL | BidiClass::AN => return true, |
592 | 0 | _ => {} |
593 | | } |
594 | | } |
595 | 0 | false |
596 | 0 | } |
597 | | |
598 | | /// Errors recorded during UTS #46 processing. |
599 | | /// |
600 | | /// This is opaque for now, indicating what types of errors have been encountered at least once. |
601 | | /// More details may be exposed in the future. |
602 | | #[derive(Default)] |
603 | | pub struct Errors { |
604 | | punycode: bool, |
605 | | check_hyphens: bool, |
606 | | check_bidi: bool, |
607 | | start_combining_mark: bool, |
608 | | invalid_mapping: bool, |
609 | | nfc: bool, |
610 | | disallowed_by_std3_ascii_rules: bool, |
611 | | disallowed_mapped_in_std3: bool, |
612 | | disallowed_character: bool, |
613 | | too_long_for_dns: bool, |
614 | | too_short_for_dns: bool, |
615 | | disallowed_in_idna_2008: bool, |
616 | | } |
617 | | |
618 | | impl Errors { |
619 | 0 | fn is_err(&self) -> bool { |
620 | 0 | let Errors { |
621 | 0 | punycode, |
622 | 0 | check_hyphens, |
623 | 0 | check_bidi, |
624 | 0 | start_combining_mark, |
625 | 0 | invalid_mapping, |
626 | 0 | nfc, |
627 | 0 | disallowed_by_std3_ascii_rules, |
628 | 0 | disallowed_mapped_in_std3, |
629 | 0 | disallowed_character, |
630 | 0 | too_long_for_dns, |
631 | 0 | too_short_for_dns, |
632 | 0 | disallowed_in_idna_2008, |
633 | 0 | } = *self; |
634 | 0 | punycode |
635 | 0 | || check_hyphens |
636 | 0 | || check_bidi |
637 | 0 | || start_combining_mark |
638 | 0 | || invalid_mapping |
639 | 0 | || nfc |
640 | 0 | || disallowed_by_std3_ascii_rules |
641 | 0 | || disallowed_mapped_in_std3 |
642 | 0 | || disallowed_character |
643 | 0 | || too_long_for_dns |
644 | 0 | || too_short_for_dns |
645 | 0 | || disallowed_in_idna_2008 |
646 | 0 | } |
647 | | } |
648 | | |
649 | | impl fmt::Debug for Errors { |
650 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
651 | 0 | let Errors { |
652 | 0 | punycode, |
653 | 0 | check_hyphens, |
654 | 0 | check_bidi, |
655 | 0 | start_combining_mark, |
656 | 0 | invalid_mapping, |
657 | 0 | nfc, |
658 | 0 | disallowed_by_std3_ascii_rules, |
659 | 0 | disallowed_mapped_in_std3, |
660 | 0 | disallowed_character, |
661 | 0 | too_long_for_dns, |
662 | 0 | too_short_for_dns, |
663 | 0 | disallowed_in_idna_2008, |
664 | 0 | } = *self; |
665 | 0 |
|
666 | 0 | let fields = [ |
667 | 0 | ("punycode", punycode), |
668 | 0 | ("check_hyphens", check_hyphens), |
669 | 0 | ("check_bidi", check_bidi), |
670 | 0 | ("start_combining_mark", start_combining_mark), |
671 | 0 | ("invalid_mapping", invalid_mapping), |
672 | 0 | ("nfc", nfc), |
673 | 0 | ( |
674 | 0 | "disallowed_by_std3_ascii_rules", |
675 | 0 | disallowed_by_std3_ascii_rules, |
676 | 0 | ), |
677 | 0 | ("disallowed_mapped_in_std3", disallowed_mapped_in_std3), |
678 | 0 | ("disallowed_character", disallowed_character), |
679 | 0 | ("too_long_for_dns", too_long_for_dns), |
680 | 0 | ("too_short_for_dns", too_short_for_dns), |
681 | 0 | ("disallowed_in_idna_2008", disallowed_in_idna_2008), |
682 | 0 | ]; |
683 | 0 |
|
684 | 0 | let mut empty = true; |
685 | 0 | f.write_str("Errors { ")?; |
686 | 0 | for (name, val) in &fields { |
687 | 0 | if *val { |
688 | 0 | if !empty { |
689 | 0 | f.write_str(", ")?; |
690 | 0 | } |
691 | 0 | f.write_str(name)?; |
692 | 0 | empty = false; |
693 | 0 | } |
694 | | } |
695 | | |
696 | 0 | if !empty { |
697 | 0 | f.write_str(" }") |
698 | | } else { |
699 | 0 | f.write_str("}") |
700 | | } |
701 | 0 | } |
702 | | } |
703 | | |
704 | | impl From<Errors> for Result<(), Errors> { |
705 | 0 | fn from(e: Errors) -> Result<(), Errors> { |
706 | 0 | if !e.is_err() { |
707 | 0 | Ok(()) |
708 | | } else { |
709 | 0 | Err(e) |
710 | | } |
711 | 0 | } |
712 | | } |
713 | | |
714 | | #[cfg(feature = "std")] |
715 | | impl std::error::Error for Errors {} |
716 | | |
717 | | impl fmt::Display for Errors { |
718 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
719 | 0 | fmt::Debug::fmt(self, f) |
720 | 0 | } |
721 | | } |
722 | | |
723 | | #[cfg(test)] |
724 | | mod tests { |
725 | | use super::{find_char, Mapping}; |
726 | | |
727 | | #[test] |
728 | | fn mapping_fast_path() { |
729 | | assert_matches!(find_char('-'), &Mapping::Valid); |
730 | | assert_matches!(find_char('.'), &Mapping::Valid); |
731 | | for c in &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] { |
732 | | assert_matches!(find_char(*c), &Mapping::Valid); |
733 | | } |
734 | | for c in &[ |
735 | | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', |
736 | | 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', |
737 | | ] { |
738 | | assert_matches!(find_char(*c), &Mapping::Valid); |
739 | | } |
740 | | } |
741 | | } |