/rust/registry/src/index.crates.io-6f17d22bba15001f/idna-1.0.3/src/uts46.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright The rust-url developers. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | | // option. This file may not be copied, modified, or distributed |
7 | | // except according to those terms. |
8 | | |
9 | | //! This module provides the lower-level API for UTS 46. |
10 | | //! |
11 | | //! [`Uts46::process`] is the core that the other convenience |
12 | | //! methods build on. |
13 | | //! |
14 | | //! UTS 46 flags map to this API as follows: |
15 | | //! |
16 | | //! * _CheckHyphens_ - _true_: [`Hyphens::Check`], _false_: [`Hyphens::Allow`]; the WHATWG URL Standard sets this to _false_ for normal (non-conformance-checker) user agents. |
17 | | //! * _CheckBidi_ - Always _true_; cannot be configured, since this flag is _true_ even when WHATWG URL Standard _beStrict_ is _false_. |
18 | | //! * _CheckJoiners_ - Always _true_; cannot be configured, since this flag is _true_ even when WHATWG URL Standard _beStrict_ is _false_. |
19 | | //! * _UseSTD3ASCIIRules_ - _true_: [`AsciiDenyList::STD3`], _false_: [`AsciiDenyList::EMPTY`]; however, the check the WHATWG URL Standard performs right after the UTS 46 invocation corresponds to [`AsciiDenyList::URL`]. |
20 | | //! * _Transitional_Processing_ - Always _false_ but could be implemented as a preprocessing step. This flag is deprecated and for Web purposes the transition is over in the sense that all of Firefox, Safari, or Chrome set this flag to _false_. |
21 | | //! * _VerifyDnsLength_ - _true_: [`DnsLength::Verify`], _false_: [`DnsLength::Ignore`]; the WHATWG URL Standard sets this to _false_ for normal (non-conformance-checker) user agents. |
22 | | //! * _IgnoreInvalidPunycode_ - Always _false_; cannot be configured. (Not yet covered by the WHATWG URL Standard, but 2 out of 3 major browser clearly behave as if this was _false_). |
23 | | |
24 | | use crate::punycode::Decoder; |
25 | | use crate::punycode::InternalCaller; |
26 | | use alloc::borrow::Cow; |
27 | | use alloc::string::String; |
28 | | use core::fmt::Write; |
29 | | use idna_adapter::*; |
30 | | use smallvec::SmallVec; |
31 | | use utf8_iter::Utf8CharsEx; |
32 | | |
33 | | /// ICU4C-compatible constraint. |
34 | | /// https://unicode-org.atlassian.net/browse/ICU-13727 |
35 | | const PUNYCODE_DECODE_MAX_INPUT_LENGTH: usize = 2000; |
36 | | |
37 | | /// ICU4C-compatible constraint. (Note: ICU4C measures |
38 | | /// UTF-16 and we measure UTF-32. This means that we |
39 | | /// allow longer non-BMP inputs. For this implementation, |
40 | | /// the denial-of-service scaling does not depend on BMP vs. |
41 | | /// non-BMP: only the scalar values matter.) |
42 | | /// |
43 | | /// https://unicode-org.atlassian.net/browse/ICU-13727 |
44 | | const PUNYCODE_ENCODE_MAX_INPUT_LENGTH: usize = 1000; |
45 | | |
46 | | /// For keeping track of what kind of numerals have been |
47 | | /// seen in an RTL label. |
48 | | #[derive(Debug, PartialEq, Eq)] |
49 | | enum RtlNumeralState { |
50 | | Undecided, |
51 | | European, |
52 | | Arabic, |
53 | | } |
54 | | |
55 | | /// Computes the mask for upper-case ASCII. |
56 | 0 | const fn upper_case_mask() -> u128 { |
57 | 0 | let mut accu = 0u128; |
58 | 0 | let mut b = 0u8; |
59 | 0 | while b < 128 { |
60 | 0 | if (b >= b'A') && (b <= b'Z') { |
61 | 0 | accu |= 1u128 << b; |
62 | 0 | } |
63 | 0 | b += 1; |
64 | | } |
65 | 0 | accu |
66 | 0 | } Unexecuted instantiation: idna::uts46::upper_case_mask Unexecuted instantiation: idna::uts46::upper_case_mask Unexecuted instantiation: idna::uts46::upper_case_mask |
67 | | |
68 | | /// Bit set for upper-case ASCII. |
69 | | const UPPER_CASE_MASK: u128 = upper_case_mask(); |
70 | | |
71 | | /// Computes the mask for glyphless ASCII. |
72 | 0 | const fn glyphless_mask() -> u128 { |
73 | 0 | let mut accu = 0u128; |
74 | 0 | let mut b = 0u8; |
75 | 0 | while b < 128 { |
76 | 0 | if (b <= b' ') || (b == 0x7F) { |
77 | 0 | accu |= 1u128 << b; |
78 | 0 | } |
79 | 0 | b += 1; |
80 | | } |
81 | 0 | accu |
82 | 0 | } Unexecuted instantiation: idna::uts46::glyphless_mask Unexecuted instantiation: idna::uts46::glyphless_mask Unexecuted instantiation: idna::uts46::glyphless_mask |
83 | | |
84 | | /// Bit set for glyphless ASCII. |
85 | | const GLYPHLESS_MASK: u128 = glyphless_mask(); |
86 | | |
87 | | /// The mask for the ASCII dot. |
88 | | const DOT_MASK: u128 = 1 << b'.'; |
89 | | |
90 | | /// Computes the ASCII deny list for STD3 ASCII rules. |
91 | 0 | const fn ldh_mask() -> u128 { |
92 | 0 | let mut accu = 0u128; |
93 | 0 | let mut b = 0u8; |
94 | 0 | while b < 128 { |
95 | 0 | if !((b >= b'a' && b <= b'z') || (b >= b'0' && b <= b'9') || b == b'-' || b == b'.') { |
96 | 0 | accu |= 1u128 << b; |
97 | 0 | } |
98 | 0 | b += 1; |
99 | | } |
100 | 0 | accu |
101 | 0 | } Unexecuted instantiation: idna::uts46::ldh_mask Unexecuted instantiation: idna::uts46::ldh_mask Unexecuted instantiation: idna::uts46::ldh_mask |
102 | | |
103 | | const PUNYCODE_PREFIX: u32 = |
104 | | ((b'-' as u32) << 24) | ((b'-' as u32) << 16) | ((b'N' as u32) << 8) | b'X' as u32; |
105 | | |
106 | | const PUNYCODE_PREFIX_MASK: u32 = (0xFF << 24) | (0xFF << 16) | (0xDF << 8) | 0xDF; |
107 | | |
108 | 0 | fn write_punycode_label<W: Write + ?Sized>( |
109 | 0 | label: &[char], |
110 | 0 | sink: &mut W, |
111 | 0 | ) -> Result<(), ProcessingError> { |
112 | 0 | sink.write_str("xn--")?; |
113 | 0 | crate::punycode::encode_into::<_, _, InternalCaller>(label.iter().copied(), sink)?; |
114 | 0 | Ok(()) |
115 | 0 | } Unexecuted instantiation: idna::uts46::write_punycode_label::<alloc::string::String> Unexecuted instantiation: idna::uts46::write_punycode_label::<alloc::string::String> Unexecuted instantiation: idna::uts46::write_punycode_label::<alloc::string::String> |
116 | | |
117 | | #[inline(always)] |
118 | 16.3k | fn has_punycode_prefix(slice: &[u8]) -> bool { |
119 | 16.3k | if slice.len() < 4 { |
120 | 2.89k | return false; |
121 | 13.4k | } |
122 | 13.4k | // Sadly, the optimizer doesn't figure out that more idiomatic code |
123 | 13.4k | // should compile to masking on 32-bit value. |
124 | 13.4k | let a = slice[0]; |
125 | 13.4k | let b = slice[1]; |
126 | 13.4k | let c = slice[2]; |
127 | 13.4k | let d = slice[3]; |
128 | 13.4k | let u = (u32::from(d) << 24) | (u32::from(c) << 16) | (u32::from(b) << 8) | u32::from(a); |
129 | 13.4k | (u & PUNYCODE_PREFIX_MASK) == PUNYCODE_PREFIX |
130 | 16.3k | } idna::uts46::has_punycode_prefix Line | Count | Source | 118 | 16.3k | fn has_punycode_prefix(slice: &[u8]) -> bool { | 119 | 16.3k | if slice.len() < 4 { | 120 | 2.89k | return false; | 121 | 13.4k | } | 122 | 13.4k | // Sadly, the optimizer doesn't figure out that more idiomatic code | 123 | 13.4k | // should compile to masking on 32-bit value. | 124 | 13.4k | let a = slice[0]; | 125 | 13.4k | let b = slice[1]; | 126 | 13.4k | let c = slice[2]; | 127 | 13.4k | let d = slice[3]; | 128 | 13.4k | let u = (u32::from(d) << 24) | (u32::from(c) << 16) | (u32::from(b) << 8) | u32::from(a); | 129 | 13.4k | (u & PUNYCODE_PREFIX_MASK) == PUNYCODE_PREFIX | 130 | 16.3k | } |
Unexecuted instantiation: idna::uts46::has_punycode_prefix Unexecuted instantiation: idna::uts46::has_punycode_prefix |
131 | | |
132 | | #[inline(always)] |
133 | 1.05M | fn in_inclusive_range8(u: u8, start: u8, end: u8) -> bool { |
134 | 1.05M | u.wrapping_sub(start) <= (end - start) |
135 | 1.05M | } idna::uts46::in_inclusive_range8 Line | Count | Source | 133 | 1.05M | fn in_inclusive_range8(u: u8, start: u8, end: u8) -> bool { | 134 | 1.05M | u.wrapping_sub(start) <= (end - start) | 135 | 1.05M | } |
Unexecuted instantiation: idna::uts46::in_inclusive_range8 Unexecuted instantiation: idna::uts46::in_inclusive_range8 |
136 | | |
137 | | #[inline(always)] |
138 | 97.8k | fn in_inclusive_range_char(c: char, start: char, end: char) -> bool { |
139 | 97.8k | u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start)) |
140 | 97.8k | } idna::uts46::in_inclusive_range_char Line | Count | Source | 138 | 97.8k | fn in_inclusive_range_char(c: char, start: char, end: char) -> bool { | 139 | 97.8k | u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start)) | 140 | 97.8k | } |
Unexecuted instantiation: idna::uts46::in_inclusive_range_char Unexecuted instantiation: idna::uts46::in_inclusive_range_char |
141 | | |
142 | | #[inline(always)] |
143 | 52.0k | fn is_passthrough_ascii_label(label: &[u8]) -> bool { |
144 | 52.0k | // XXX if we aren't performing _CheckHyphens_, this could |
145 | 52.0k | // check for "xn--" and pass through YouTube CDN node names. |
146 | 52.0k | if label.len() >= 4 && label[2] == b'-' && label[3] == b'-' { |
147 | 9.43k | return false; |
148 | 42.6k | } |
149 | 42.6k | if let Some((&first, tail)) = label.split_first() { |
150 | | // We need to check the first and last character |
151 | | // more strictly in case this turns out to be a |
152 | | // label in a bidi domain name. This has the side |
153 | | // effect that this function only accepts labels |
154 | | // that also conform to the STD3 rules. |
155 | | // |
156 | | // XXX: If we are in the fail-fast mode (i.e. we don't need |
157 | | // to be able to overwrite anything with U+FFFD), we could |
158 | | // merely record that we've seen a digit here and error out |
159 | | // if we later discover that the domain name is a bidi |
160 | | // domain name. |
161 | 42.6k | if !in_inclusive_range8(first, b'a', b'z') { |
162 | 6.83k | return false; |
163 | 35.8k | } |
164 | 519k | for &b in tail { |
165 | | // If we used LDH_MASK, we'd have to check |
166 | | // the bytes for the ASCII range anyhow. |
167 | 483k | if in_inclusive_range8(b, b'a', b'z') { |
168 | 301k | continue; |
169 | 181k | } |
170 | 181k | if in_inclusive_range8(b, b'0', b'9') { |
171 | 141k | continue; |
172 | 40.8k | } |
173 | 40.8k | if b == b'-' { |
174 | 40.8k | continue; |
175 | 66 | } |
176 | 66 | return false; |
177 | | } |
178 | 35.7k | label.last() != Some(&b'-') |
179 | | } else { |
180 | | // empty |
181 | 0 | true |
182 | | } |
183 | 52.0k | } idna::uts46::is_passthrough_ascii_label Line | Count | Source | 143 | 52.0k | fn is_passthrough_ascii_label(label: &[u8]) -> bool { | 144 | 52.0k | // XXX if we aren't performing _CheckHyphens_, this could | 145 | 52.0k | // check for "xn--" and pass through YouTube CDN node names. | 146 | 52.0k | if label.len() >= 4 && label[2] == b'-' && label[3] == b'-' { | 147 | 9.43k | return false; | 148 | 42.6k | } | 149 | 42.6k | if let Some((&first, tail)) = label.split_first() { | 150 | | // We need to check the first and last character | 151 | | // more strictly in case this turns out to be a | 152 | | // label in a bidi domain name. This has the side | 153 | | // effect that this function only accepts labels | 154 | | // that also conform to the STD3 rules. | 155 | | // | 156 | | // XXX: If we are in the fail-fast mode (i.e. we don't need | 157 | | // to be able to overwrite anything with U+FFFD), we could | 158 | | // merely record that we've seen a digit here and error out | 159 | | // if we later discover that the domain name is a bidi | 160 | | // domain name. | 161 | 42.6k | if !in_inclusive_range8(first, b'a', b'z') { | 162 | 6.83k | return false; | 163 | 35.8k | } | 164 | 519k | for &b in tail { | 165 | | // If we used LDH_MASK, we'd have to check | 166 | | // the bytes for the ASCII range anyhow. | 167 | 483k | if in_inclusive_range8(b, b'a', b'z') { | 168 | 301k | continue; | 169 | 181k | } | 170 | 181k | if in_inclusive_range8(b, b'0', b'9') { | 171 | 141k | continue; | 172 | 40.8k | } | 173 | 40.8k | if b == b'-' { | 174 | 40.8k | continue; | 175 | 66 | } | 176 | 66 | return false; | 177 | | } | 178 | 35.7k | label.last() != Some(&b'-') | 179 | | } else { | 180 | | // empty | 181 | 0 | true | 182 | | } | 183 | 52.0k | } |
Unexecuted instantiation: idna::uts46::is_passthrough_ascii_label Unexecuted instantiation: idna::uts46::is_passthrough_ascii_label |
184 | | |
185 | | #[inline(always)] |
186 | 16.3k | fn split_ascii_fast_path_prefix(label: &[u8]) -> (&[u8], &[u8]) { |
187 | 215k | if let Some(pos) = label.iter().position(|b| !b.is_ascii()) { idna::uts46::split_ascii_fast_path_prefix::{closure#0} Line | Count | Source | 187 | 215k | if let Some(pos) = label.iter().position(|b| !b.is_ascii()) { |
Unexecuted instantiation: idna::uts46::split_ascii_fast_path_prefix::{closure#0} Unexecuted instantiation: idna::uts46::split_ascii_fast_path_prefix::{closure#0} |
188 | 0 | if pos == 0 { |
189 | | // First is non-ASCII |
190 | 0 | (&[], label) |
191 | | } else { |
192 | | // Leave one ASCII character in the suffix |
193 | | // in case it's a letter that a combining |
194 | | // character combines with. |
195 | 0 | let (head, tail) = label.split_at(pos - 1); |
196 | 0 | (head, tail) |
197 | | } |
198 | | } else { |
199 | | // All ASCII |
200 | 16.3k | (label, &[]) |
201 | | } |
202 | 16.3k | } idna::uts46::split_ascii_fast_path_prefix Line | Count | Source | 186 | 16.3k | fn split_ascii_fast_path_prefix(label: &[u8]) -> (&[u8], &[u8]) { | 187 | 16.3k | if let Some(pos) = label.iter().position(|b| !b.is_ascii()) { | 188 | 0 | if pos == 0 { | 189 | | // First is non-ASCII | 190 | 0 | (&[], label) | 191 | | } else { | 192 | | // Leave one ASCII character in the suffix | 193 | | // in case it's a letter that a combining | 194 | | // character combines with. | 195 | 0 | let (head, tail) = label.split_at(pos - 1); | 196 | 0 | (head, tail) | 197 | | } | 198 | | } else { | 199 | | // All ASCII | 200 | 16.3k | (label, &[]) | 201 | | } | 202 | 16.3k | } |
Unexecuted instantiation: idna::uts46::split_ascii_fast_path_prefix Unexecuted instantiation: idna::uts46::split_ascii_fast_path_prefix |
203 | | |
204 | | // Input known to be lower-case, but may contain non-ASCII. |
205 | | #[inline(always)] |
206 | 73.3k | fn apply_ascii_deny_list_to_lower_cased_unicode(c: char, deny_list: u128) -> char { |
207 | 73.3k | if let Some(shifted) = 1u128.checked_shl(u32::from(c)) { |
208 | 6.99k | if (deny_list & shifted) == 0 { |
209 | 6.93k | c |
210 | | } else { |
211 | 60 | '\u{FFFD}' |
212 | | } |
213 | | } else { |
214 | 66.3k | c |
215 | | } |
216 | 73.3k | } idna::uts46::apply_ascii_deny_list_to_lower_cased_unicode Line | Count | Source | 206 | 73.3k | fn apply_ascii_deny_list_to_lower_cased_unicode(c: char, deny_list: u128) -> char { | 207 | 73.3k | if let Some(shifted) = 1u128.checked_shl(u32::from(c)) { | 208 | 6.99k | if (deny_list & shifted) == 0 { | 209 | 6.93k | c | 210 | | } else { | 211 | 60 | '\u{FFFD}' | 212 | | } | 213 | | } else { | 214 | 66.3k | c | 215 | | } | 216 | 73.3k | } |
Unexecuted instantiation: idna::uts46::apply_ascii_deny_list_to_lower_cased_unicode Unexecuted instantiation: idna::uts46::apply_ascii_deny_list_to_lower_cased_unicode |
217 | | |
218 | | // Input known to be ASCII, but may contain upper case ASCII. |
219 | | #[inline(always)] |
220 | 52.4k | fn apply_ascii_deny_list_to_potentially_upper_case_ascii(b: u8, deny_list: u128) -> char { |
221 | 52.4k | if (deny_list & (1u128 << b)) == 0 { |
222 | 52.3k | return char::from(b); |
223 | 84 | } |
224 | 84 | if in_inclusive_range8(b, b'A', b'Z') { |
225 | 0 | return char::from(b + 0x20); |
226 | 84 | } |
227 | 84 | '\u{FFFD}' |
228 | 52.4k | } idna::uts46::apply_ascii_deny_list_to_potentially_upper_case_ascii Line | Count | Source | 220 | 52.4k | fn apply_ascii_deny_list_to_potentially_upper_case_ascii(b: u8, deny_list: u128) -> char { | 221 | 52.4k | if (deny_list & (1u128 << b)) == 0 { | 222 | 52.3k | return char::from(b); | 223 | 84 | } | 224 | 84 | if in_inclusive_range8(b, b'A', b'Z') { | 225 | 0 | return char::from(b + 0x20); | 226 | 84 | } | 227 | 84 | '\u{FFFD}' | 228 | 52.4k | } |
Unexecuted instantiation: idna::uts46::apply_ascii_deny_list_to_potentially_upper_case_ascii Unexecuted instantiation: idna::uts46::apply_ascii_deny_list_to_potentially_upper_case_ascii |
229 | | |
230 | | #[inline(always)] |
231 | 10.2k | fn is_ascii(label: &[char]) -> bool { |
232 | 14.8k | for c in label.iter() { |
233 | 14.8k | if !c.is_ascii() { |
234 | 10.2k | return false; |
235 | 4.58k | } |
236 | | } |
237 | 0 | true |
238 | 10.2k | } Line | Count | Source | 231 | 10.2k | fn is_ascii(label: &[char]) -> bool { | 232 | 14.8k | for c in label.iter() { | 233 | 14.8k | if !c.is_ascii() { | 234 | 10.2k | return false; | 235 | 4.58k | } | 236 | | } | 237 | 0 | true | 238 | 10.2k | } |
Unexecuted instantiation: idna::uts46::is_ascii Unexecuted instantiation: idna::uts46::is_ascii |
239 | | |
240 | | #[derive(PartialEq, Eq, Copy, Clone)] |
241 | | enum PunycodeClassification { |
242 | | Ascii, |
243 | | Unicode, |
244 | | Error, |
245 | | } |
246 | | |
247 | | #[inline(always)] |
248 | 0 | fn classify_for_punycode(label: &[char]) -> PunycodeClassification { |
249 | 0 | let mut iter = label.iter().copied(); |
250 | | loop { |
251 | 0 | if let Some(c) = iter.next() { |
252 | 0 | if c.is_ascii() { |
253 | 0 | continue; |
254 | 0 | } |
255 | 0 | if c == '\u{FFFD}' { |
256 | 0 | return PunycodeClassification::Error; |
257 | 0 | } |
258 | 0 | for c in iter { |
259 | 0 | if c == '\u{FFFD}' { |
260 | 0 | return PunycodeClassification::Error; |
261 | 0 | } |
262 | | } |
263 | 0 | return PunycodeClassification::Unicode; |
264 | 0 | } |
265 | 0 | return PunycodeClassification::Ascii; |
266 | | } |
267 | 0 | } Unexecuted instantiation: idna::uts46::classify_for_punycode Unexecuted instantiation: idna::uts46::classify_for_punycode Unexecuted instantiation: idna::uts46::classify_for_punycode |
268 | | |
269 | | /// The ASCII deny list to be applied. |
270 | | #[derive(PartialEq, Eq, Copy, Clone)] |
271 | | #[repr(transparent)] |
272 | | pub struct AsciiDenyList { |
273 | | bits: u128, |
274 | | } |
275 | | |
276 | | impl AsciiDenyList { |
277 | | /// Computes (preferably at compile time) an ASCII deny list. |
278 | | /// |
279 | | /// Setting `deny_glyphless` to `true` denies U+0020 SPACE and below |
280 | | /// as well as U+007F DELETE for convenience without having to list |
281 | | /// these characters in the `deny_list` string. |
282 | | /// |
283 | | /// `deny_list` is the list of ASCII characters to deny. This |
284 | | /// list must not contain any of: |
285 | | /// * Letters |
286 | | /// * Digits |
287 | | /// * Hyphen |
288 | | /// * Dot (period / full-stop) |
289 | | /// * Non-ASCII |
290 | | /// |
291 | | /// # Panics |
292 | | /// |
293 | | /// If the deny list contains characters listed as prohibited above. |
294 | 0 | pub const fn new(deny_glyphless: bool, deny_list: &str) -> Self { |
295 | 0 | let mut bits = UPPER_CASE_MASK; |
296 | 0 | if deny_glyphless { |
297 | 0 | bits |= GLYPHLESS_MASK; |
298 | 0 | } |
299 | 0 | let mut i = 0; |
300 | 0 | let bytes = deny_list.as_bytes(); |
301 | 0 | while i < bytes.len() { |
302 | 0 | let b = bytes[i]; |
303 | 0 | assert!(b < 0x80, "ASCII deny list must be ASCII."); |
304 | | // assert_ne not yet available in const context. |
305 | 0 | assert!(b != b'.', "ASCII deny list must not contain the dot."); |
306 | 0 | assert!(b != b'-', "ASCII deny list must not contain the hyphen."); |
307 | 0 | assert!( |
308 | 0 | !((b >= b'0') && (b <= b'9')), |
309 | | "ASCII deny list must not contain digits." |
310 | | ); |
311 | 0 | assert!( |
312 | 0 | !((b >= b'a') && (b <= b'z')), |
313 | | "ASCII deny list must not contain letters." |
314 | | ); |
315 | 0 | assert!( |
316 | 0 | !((b >= b'A') && (b <= b'Z')), |
317 | | "ASCII deny list must not contain letters." |
318 | | ); |
319 | 0 | bits |= 1u128 << b; |
320 | 0 | i += 1; |
321 | | } |
322 | 0 | AsciiDenyList { bits } |
323 | 0 | } Unexecuted instantiation: <idna::uts46::AsciiDenyList>::new Unexecuted instantiation: <idna::uts46::AsciiDenyList>::new Unexecuted instantiation: <idna::uts46::AsciiDenyList>::new |
324 | | |
325 | | /// No ASCII deny list. This corresponds to _UseSTD3ASCIIRules=false_. |
326 | | /// |
327 | | /// Equivalent to `AsciiDenyList::new(false, "")`. |
328 | | /// |
329 | | /// Note: Not denying the space and control characters can result in |
330 | | /// strange behavior. Without a deny list provided to the UTS 46 |
331 | | /// operation, the caller is expected perform filtering afterwards, |
332 | | /// but it's more efficient to use `AsciiDenyList` than post-processing, |
333 | | /// because the internals of this crate can optimize away checks in |
334 | | /// certain cases. |
335 | | pub const EMPTY: AsciiDenyList = AsciiDenyList::new(false, ""); |
336 | | |
337 | | /// The STD3 deny list. This corresponds to _UseSTD3ASCIIRules=true_. |
338 | | /// |
339 | | /// Note that this deny list rejects the underscore, which occurs in |
340 | | /// pseudo-hosts used by various TXT record-based protocols, and also |
341 | | /// characters that may occurs in non-DNS naming, such as NetBIOS. |
342 | | pub const STD3: AsciiDenyList = AsciiDenyList { bits: ldh_mask() }; |
343 | | |
344 | | /// [Forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point) from the WHATWG URL Standard. |
345 | | /// |
346 | | /// Equivalent to `AsciiDenyList::new(true, "%#/:<>?@[\\]^|")`. |
347 | | /// |
348 | | /// Note that this deny list rejects IPv6 addresses, so (as in URL |
349 | | /// parsing) you need to check for IPv6 addresses first and not |
350 | | /// put them through UTS 46 processing. |
351 | | pub const URL: AsciiDenyList = AsciiDenyList::new(true, "%#/:<>?@[\\]^|"); |
352 | | } |
353 | | |
354 | | /// The _CheckHyphens_ mode. |
355 | | #[derive(PartialEq, Eq, Copy, Clone)] |
356 | | #[non_exhaustive] // non_exhaustive in case a middle mode that prohibits only first and last position needs to be added |
357 | | pub enum Hyphens { |
358 | | /// _CheckHyphens=false_: Do not place positional restrictions on hyphens. |
359 | | /// |
360 | | /// This mode is used by the WHATWG URL Standard for normal User Agent processing |
361 | | /// (i.e. not conformance checking). |
362 | | Allow, |
363 | | |
364 | | /// Prohibit hyphens in the first and last position in the label but allow in |
365 | | /// the third and fourth position. |
366 | | /// |
367 | | /// Note that this mode rejects real-world names, including some GitHub user pages. |
368 | | CheckFirstLast, |
369 | | |
370 | | /// _CheckHyphens=true_: Prohibit hyphens in the first, third, fourth, |
371 | | /// and last position in the label. |
372 | | /// |
373 | | /// Note that this mode rejects real-world names, including YouTube CDN nodes |
374 | | /// and some GitHub user pages. |
375 | | Check, |
376 | | } |
377 | | |
378 | | /// The UTS 46 _VerifyDNSLength_ flag. |
379 | | #[derive(PartialEq, Eq, Copy, Clone)] |
380 | | #[non_exhaustive] |
381 | | pub enum DnsLength { |
382 | | /// _VerifyDNSLength=false_. (Possibly relevant for allowing non-DNS naming systems.) |
383 | | Ignore, |
384 | | /// _VerifyDNSLength=true_ with the exception that the trailing root label dot is |
385 | | /// allowed. |
386 | | VerifyAllowRootDot, |
387 | | /// _VerifyDNSLength=true_. (The trailing root label dot is not allowed.) |
388 | | Verify, |
389 | | } |
390 | | |
391 | | /// Policy for customizing behavior in case of an error. |
392 | | #[derive(PartialEq, Eq, Copy, Clone)] |
393 | | #[non_exhaustive] |
394 | | pub enum ErrorPolicy { |
395 | | /// Return as early as possible without producing output in case of error. |
396 | | FailFast, |
397 | | /// In case of error, mark errors with the REPLACEMENT CHARACTER. (The output |
398 | | /// containing REPLACEMENT CHARACTERs may be show to the user to illustrate |
399 | | /// what was wrong but must not be used for naming in a network protocol.) |
400 | | MarkErrors, |
401 | | } |
402 | | |
403 | | /// The success outcome of [`Uts46::process`] |
404 | | #[derive(PartialEq, Eq, Copy, Clone, Debug)] |
405 | | pub enum ProcessingSuccess { |
406 | | /// There were no errors. The caller must consider the input to be the output. |
407 | | /// |
408 | | /// This asserts that the input can be safely passed to [`core::str::from_utf8_unchecked`]. |
409 | | /// |
410 | | /// (Distinct from `WroteToSink` in order to allow `Cow` behavior to be implemented on top of |
411 | | /// [`Uts46::process`].) |
412 | | Passthrough, |
413 | | |
414 | | /// There were no errors. The caller must consider what was written to the sink to be the output. |
415 | | /// |
416 | | /// (Distinct from `Passthrough` in order to allow `Cow` behavior to be implemented on top of |
417 | | /// [`Uts46::process`].) |
418 | | WroteToSink, |
419 | | } |
420 | | |
421 | | /// The failure outcome of [`Uts46::process`] |
422 | | #[derive(PartialEq, Eq, Copy, Clone, Debug)] |
423 | | pub enum ProcessingError { |
424 | | /// There was a validity error according to the chosen options. |
425 | | /// |
426 | | /// In case of `Operation::ToAscii`, there is no output. Otherwise, output was written to the |
427 | | /// sink and the output contains at least one U+FFFD REPLACEMENT CHARACTER to denote an error. |
428 | | ValidityError, |
429 | | |
430 | | /// The sink emitted [`core::fmt::Error`]. The partial output written to the sink must not |
431 | | /// be used. |
432 | | SinkError, |
433 | | } |
434 | | |
435 | | impl From<core::fmt::Error> for ProcessingError { |
436 | 0 | fn from(_: core::fmt::Error) -> Self { |
437 | 0 | ProcessingError::SinkError |
438 | 0 | } Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<core::fmt::Error>>::from Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<core::fmt::Error>>::from Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<core::fmt::Error>>::from |
439 | | } |
440 | | |
441 | | impl From<crate::punycode::PunycodeEncodeError> for ProcessingError { |
442 | 0 | fn from(_: crate::punycode::PunycodeEncodeError) -> Self { |
443 | 0 | unreachable!( |
444 | 0 | "Punycode overflows should not be possible due to PUNYCODE_ENCODE_MAX_INPUT_LENGTH" |
445 | 0 | ); Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<idna::punycode::PunycodeEncodeError>>::from Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<idna::punycode::PunycodeEncodeError>>::from Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<idna::punycode::PunycodeEncodeError>>::from |
446 | | } |
447 | | } |
448 | | |
449 | | #[derive(Debug, Clone, Copy)] |
450 | | enum AlreadyAsciiLabel<'a> { |
451 | | MixedCaseAscii(&'a [u8]), |
452 | | MixedCasePunycode(&'a [u8]), |
453 | | Other, |
454 | | } |
455 | | |
456 | | /// Performs the _VerifyDNSLength_ check on the output of the _ToASCII_ operation. |
457 | | /// |
458 | | /// If the second argument is `false`, the trailing root label dot is allowed. |
459 | | /// |
460 | | /// # Panics |
461 | | /// |
462 | | /// Panics in debug mode if the argument isn't ASCII. |
463 | 0 | pub fn verify_dns_length(domain_name: &str, allow_trailing_dot: bool) -> bool { |
464 | 0 | let bytes = domain_name.as_bytes(); |
465 | 0 | debug_assert!(bytes.is_ascii()); |
466 | 0 | let domain_name_without_trailing_dot = if let Some(without) = bytes.strip_suffix(b".") { |
467 | 0 | if !allow_trailing_dot { |
468 | 0 | return false; |
469 | 0 | } |
470 | 0 | without |
471 | | } else { |
472 | 0 | bytes |
473 | | }; |
474 | 0 | if domain_name_without_trailing_dot.len() > 253 { |
475 | 0 | return false; |
476 | 0 | } |
477 | 0 | for label in domain_name_without_trailing_dot.split(|b| *b == b'.') { Unexecuted instantiation: idna::uts46::verify_dns_length::{closure#0} Unexecuted instantiation: idna::uts46::verify_dns_length::{closure#0} Unexecuted instantiation: idna::uts46::verify_dns_length::{closure#0} |
478 | 0 | if label.is_empty() { |
479 | 0 | return false; |
480 | 0 | } |
481 | 0 | if label.len() > 63 { |
482 | 0 | return false; |
483 | 0 | } |
484 | | } |
485 | 0 | true |
486 | 0 | } Unexecuted instantiation: idna::uts46::verify_dns_length Unexecuted instantiation: idna::uts46::verify_dns_length Unexecuted instantiation: idna::uts46::verify_dns_length |
487 | | |
488 | | /// An implementation of UTS #46. |
489 | | pub struct Uts46 { |
490 | | data: idna_adapter::Adapter, |
491 | | } |
492 | | |
493 | | #[cfg(feature = "compiled_data")] |
494 | | impl Default for Uts46 { |
495 | 0 | fn default() -> Self { |
496 | 0 | Self::new() |
497 | 0 | } Unexecuted instantiation: <idna::uts46::Uts46 as core::default::Default>::default Unexecuted instantiation: <idna::uts46::Uts46 as core::default::Default>::default Unexecuted instantiation: <idna::uts46::Uts46 as core::default::Default>::default |
498 | | } |
499 | | |
500 | | impl Uts46 { |
501 | | /// Constructor using data compiled into the binary. |
502 | | #[cfg(feature = "compiled_data")] |
503 | 90.0k | pub const fn new() -> Self { |
504 | 90.0k | Self { |
505 | 90.0k | data: idna_adapter::Adapter::new(), |
506 | 90.0k | } |
507 | 90.0k | } <idna::uts46::Uts46>::new Line | Count | Source | 503 | 90.0k | pub const fn new() -> Self { | 504 | 90.0k | Self { | 505 | 90.0k | data: idna_adapter::Adapter::new(), | 506 | 90.0k | } | 507 | 90.0k | } |
Unexecuted instantiation: <idna::uts46::Uts46>::new Unexecuted instantiation: <idna::uts46::Uts46>::new |
508 | | |
509 | | // XXX Should there be an `icu_provider` feature for enabling |
510 | | // a constructor for run-time data loading? |
511 | | |
512 | | /// Performs the [ToASCII](https://www.unicode.org/reports/tr46/#ToASCII) operation |
513 | | /// from UTS #46 with the options indicated. |
514 | | /// |
515 | | /// # Arguments |
516 | | /// |
517 | | /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by |
518 | | /// this method and input that is not well-formed UTF-8 is treated as an error. If you |
519 | | /// already have a `&str`, call `.as_bytes()` on it.) |
520 | | /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46 |
521 | | /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point |
522 | | /// processing is handled via this argument. Most callers are probably the best off |
523 | | /// by using [`AsciiDenyList::URL`] here. |
524 | | /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best |
525 | | /// off by using [`Hyphens::Allow`] here. |
526 | | /// * `dns_length` - The UTS 46 _VerifyDNSLength_ flag. |
527 | 90.0k | pub fn to_ascii<'a>( |
528 | 90.0k | &self, |
529 | 90.0k | domain_name: &'a [u8], |
530 | 90.0k | ascii_deny_list: AsciiDenyList, |
531 | 90.0k | hyphens: Hyphens, |
532 | 90.0k | dns_length: DnsLength, |
533 | 90.0k | ) -> Result<Cow<'a, str>, crate::Errors> { |
534 | 90.0k | let mut s = String::new(); |
535 | 90.0k | match self.process( |
536 | 90.0k | domain_name, |
537 | 90.0k | ascii_deny_list, |
538 | 90.0k | hyphens, |
539 | 90.0k | ErrorPolicy::FailFast, |
540 | 90.0k | |_, _, _| false, <idna::uts46::Uts46>::to_ascii::{closure#0} Line | Count | Source | 540 | 5.06k | |_, _, _| false, |
Unexecuted instantiation: <idna::uts46::Uts46>::to_ascii::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::to_ascii::{closure#0} |
541 | 90.0k | &mut s, |
542 | 90.0k | None, |
543 | 90.0k | ) { |
544 | | // SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII. |
545 | | Ok(ProcessingSuccess::Passthrough) => { |
546 | 85.6k | let cow = Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) }); |
547 | 85.6k | if dns_length != DnsLength::Ignore |
548 | 0 | && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot) |
549 | | { |
550 | 0 | Err(crate::Errors::default()) |
551 | | } else { |
552 | 85.6k | Ok(cow) |
553 | | } |
554 | | } |
555 | | Ok(ProcessingSuccess::WroteToSink) => { |
556 | 0 | let cow: Cow<'_, str> = Cow::Owned(s); |
557 | 0 | if dns_length != DnsLength::Ignore |
558 | 0 | && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot) |
559 | | { |
560 | 0 | Err(crate::Errors::default()) |
561 | | } else { |
562 | 0 | Ok(cow) |
563 | | } |
564 | | } |
565 | 4.37k | Err(ProcessingError::ValidityError) => Err(crate::Errors::default()), |
566 | 0 | Err(ProcessingError::SinkError) => unreachable!(), |
567 | | } |
568 | 90.0k | } <idna::uts46::Uts46>::to_ascii Line | Count | Source | 527 | 90.0k | pub fn to_ascii<'a>( | 528 | 90.0k | &self, | 529 | 90.0k | domain_name: &'a [u8], | 530 | 90.0k | ascii_deny_list: AsciiDenyList, | 531 | 90.0k | hyphens: Hyphens, | 532 | 90.0k | dns_length: DnsLength, | 533 | 90.0k | ) -> Result<Cow<'a, str>, crate::Errors> { | 534 | 90.0k | let mut s = String::new(); | 535 | 90.0k | match self.process( | 536 | 90.0k | domain_name, | 537 | 90.0k | ascii_deny_list, | 538 | 90.0k | hyphens, | 539 | 90.0k | ErrorPolicy::FailFast, | 540 | 90.0k | |_, _, _| false, | 541 | 90.0k | &mut s, | 542 | 90.0k | None, | 543 | 90.0k | ) { | 544 | | // SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII. | 545 | | Ok(ProcessingSuccess::Passthrough) => { | 546 | 85.6k | let cow = Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) }); | 547 | 85.6k | if dns_length != DnsLength::Ignore | 548 | 0 | && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot) | 549 | | { | 550 | 0 | Err(crate::Errors::default()) | 551 | | } else { | 552 | 85.6k | Ok(cow) | 553 | | } | 554 | | } | 555 | | Ok(ProcessingSuccess::WroteToSink) => { | 556 | 0 | let cow: Cow<'_, str> = Cow::Owned(s); | 557 | 0 | if dns_length != DnsLength::Ignore | 558 | 0 | && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot) | 559 | | { | 560 | 0 | Err(crate::Errors::default()) | 561 | | } else { | 562 | 0 | Ok(cow) | 563 | | } | 564 | | } | 565 | 4.37k | Err(ProcessingError::ValidityError) => Err(crate::Errors::default()), | 566 | 0 | Err(ProcessingError::SinkError) => unreachable!(), | 567 | | } | 568 | 90.0k | } |
Unexecuted instantiation: <idna::uts46::Uts46>::to_ascii Unexecuted instantiation: <idna::uts46::Uts46>::to_ascii |
569 | | |
570 | | /// Performs the [ToUnicode](https://www.unicode.org/reports/tr46/#ToUnicode) operation |
571 | | /// from UTS #46 according to the options given. When there |
572 | | /// are errors, there is still output, which may be rendered user, even through |
573 | | /// the output must not be used in networking protocols. Errors are denoted |
574 | | /// by U+FFFD REPLACEMENT CHARACTERs in the output. (That is, if the second item of the |
575 | | /// return tuple is `Err`, the first item of the return tuple is guaranteed to contain |
576 | | /// at least one U+FFFD.) |
577 | | /// |
578 | | /// Most applications probably shouldn't use this method and should be using |
579 | | /// [`Uts46::to_user_interface`] instead. |
580 | | /// |
581 | | /// # Arguments |
582 | | /// |
583 | | /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by |
584 | | /// this method and input that is not well-formed UTF-8 is treated as an error. If you |
585 | | /// already have a `&str`, call `.as_bytes()` on it.) |
586 | | /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46 |
587 | | /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point |
588 | | /// processing is handled via this argument. Most callers are probably the best off |
589 | | /// by using [`AsciiDenyList::URL`] here. |
590 | | /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best |
591 | | /// off by using [`Hyphens::Allow`] here. |
592 | 0 | pub fn to_unicode<'a>( |
593 | 0 | &self, |
594 | 0 | domain_name: &'a [u8], |
595 | 0 | ascii_deny_list: AsciiDenyList, |
596 | 0 | hyphens: Hyphens, |
597 | 0 | ) -> (Cow<'a, str>, Result<(), crate::Errors>) { |
598 | 0 | self.to_user_interface(domain_name, ascii_deny_list, hyphens, |_, _, _| true) Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode::{closure#0} |
599 | 0 | } Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode |
600 | | |
601 | | /// Performs the [ToUnicode](https://www.unicode.org/reports/tr46/#ToUnicode) operation |
602 | | /// from UTS #46 according to options given with some |
603 | | /// error-free Unicode labels output according to |
604 | | /// [ToASCII](https://www.unicode.org/reports/tr46/#ToASCII) instead as decided by |
605 | | /// application policy implemented via the `output_as_unicode` closure. The purpose |
606 | | /// is to convert user-visible domains to the Unicode form in general but to render |
607 | | /// potentially misleading labels as Punycode. |
608 | | /// |
609 | | /// This is an imperfect security mechanism, because [the Punycode form itself may be |
610 | | /// resemble a user-recognizable name](https://www.unicode.org/reports/tr36/#TablePunycodeSpoofing). |
611 | | /// However, since this mechanism is common practice, this API provides support for The |
612 | | /// the mechanism. |
613 | | /// |
614 | | /// ASCII labels always pass through as ASCII and labels with errors always pass through |
615 | | /// as Unicode. For non-erroneous labels that contain at least one non-ASCII character |
616 | | /// (implies non-empty), `output_as_unicode` is called with the Unicode form of the label, |
617 | | /// the TLD (potentially empty), and a flag indicating whether the domain name as a whole |
618 | | /// is a bidi domain name. If the return value is `true`, the label passes through as |
619 | | /// Unicode. If the return value is `false`, the label is converted to Punycode. |
620 | | /// |
621 | | /// When there are errors, there is still output, which may be rendered user, even through |
622 | | /// the output must not be used in networking protocols. Errors are denoted by |
623 | | /// U+FFFD REPLACEMENT CHARACTERs in the output. (That is, if the second item |
624 | | /// of the return tuple is `Err`, the first item of the return tuple is guaranteed to contain |
625 | | /// at least one U+FFFD.) Labels that contain errors are not converted to Punycode. |
626 | | /// |
627 | | /// # Arguments |
628 | | /// |
629 | | /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by |
630 | | /// this method and input that is not well-formed UTF-8 is treated as an error. If you |
631 | | /// already have a `&str`, call `.as_bytes()` on it.) |
632 | | /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46 |
633 | | /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point |
634 | | /// processing is handled via this argument. Most callers are probably the best off |
635 | | /// by using [`AsciiDenyList::URL`] here. |
636 | | /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best |
637 | | /// off by using [`Hyphens::Allow`] here. |
638 | | /// * `output_as_unicode` - A closure for deciding if a label should be output as Unicode |
639 | | /// (as opposed to Punycode). The first argument is the label for which a decision is |
640 | | /// needed (always non-empty slice). The second argument is the TLD (potentially empty). |
641 | | /// The third argument is `true` iff the domain name as a whole is a bidi domain name. |
642 | | /// Only non-erroneous labels that contain at least one non-ASCII character are passed |
643 | | /// to the closure as the first argument. The second and third argument values are |
644 | | /// guaranteed to remain the same during a single call to `process`, and the closure |
645 | | /// may cache computations derived from the second and third argument (hence the |
646 | | /// `FnMut` type). |
647 | 0 | pub fn to_user_interface<'a, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>( |
648 | 0 | &self, |
649 | 0 | domain_name: &'a [u8], |
650 | 0 | ascii_deny_list: AsciiDenyList, |
651 | 0 | hyphens: Hyphens, |
652 | 0 | output_as_unicode: OutputUnicode, |
653 | 0 | ) -> (Cow<'a, str>, Result<(), crate::Errors>) { |
654 | 0 | let mut s = String::new(); |
655 | 0 | match self.process( |
656 | 0 | domain_name, |
657 | 0 | ascii_deny_list, |
658 | 0 | hyphens, |
659 | 0 | ErrorPolicy::MarkErrors, |
660 | 0 | output_as_unicode, |
661 | 0 | &mut s, |
662 | 0 | None, |
663 | 0 | ) { |
664 | | // SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII. |
665 | 0 | Ok(ProcessingSuccess::Passthrough) => ( |
666 | 0 | Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) }), |
667 | 0 | Ok(()), |
668 | 0 | ), |
669 | 0 | Ok(ProcessingSuccess::WroteToSink) => (Cow::Owned(s), Ok(())), |
670 | 0 | Err(ProcessingError::ValidityError) => (Cow::Owned(s), Err(crate::Errors::default())), |
671 | 0 | Err(ProcessingError::SinkError) => unreachable!(), |
672 | | } |
673 | 0 | } Unexecuted instantiation: <idna::uts46::Uts46>::to_user_interface::<<idna::uts46::Uts46>::to_unicode::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::to_user_interface::<<idna::uts46::Uts46>::to_unicode::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::to_user_interface::<<idna::uts46::Uts46>::to_unicode::{closure#0}> |
674 | | |
675 | | /// The lower-level function that [`Uts46::to_ascii`], [`Uts46::to_unicode`], and |
676 | | /// [`Uts46::to_user_interface`] are built on to allow support for output types other |
677 | | /// than `Cow<'a, str>` (e.g. string types in a non-Rust programming language). |
678 | | /// |
679 | | /// # Arguments |
680 | | /// |
681 | | /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by |
682 | | /// this method and input that is not well-formed UTF-8 is treated as an error. If you |
683 | | /// already have a `&str`, call `.as_bytes()` on it.) |
684 | | /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46 |
685 | | /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point |
686 | | /// processing is handled via this argument. Most callers are probably the best off |
687 | | /// by using [`AsciiDenyList::URL`] here. |
688 | | /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best |
689 | | /// off by using [`Hyphens::Allow`] here. |
690 | | /// * `error_policy` - Whether to fail fast or to produce output that may be rendered |
691 | | /// for the user to examine in case of errors. |
692 | | /// * `output_as_unicode` - A closure for deciding if a label should be output as Unicode |
693 | | /// (as opposed to Punycode). The first argument is the label for which a decision is |
694 | | /// needed (always non-empty slice). The second argument is the TLD (potentially empty). |
695 | | /// The third argument is `true` iff the domain name as a whole is a bidi domain name. |
696 | | /// Only non-erroneous labels that contain at least one non-ASCII character are passed |
697 | | /// to the closure as the first argument. The second and third argument values are |
698 | | /// guaranteed to remain the same during a single call to `process`, and the closure |
699 | | /// may cache computations derived from the second and third argument (hence the |
700 | | /// `FnMut` type). To perform the _ToASCII_ operation, `|_, _, _| false` must be |
701 | | /// passed as the closure. To perform the _ToUnicode_ operation, `|_, _, _| true` must |
702 | | /// be passed as the closure. A more complex closure may be used to prepare a domain |
703 | | /// name for display in a user interface so that labels are converted to the Unicode |
704 | | /// form in general but potentially misleading labels are converted to the Punycode |
705 | | /// form. |
706 | | /// * `sink` - The object that receives the output (in the non-passthrough case). |
707 | | /// * `ascii_sink` - A second sink that receives the _ToASCII_ form only if there |
708 | | /// were no errors and `sink` received at least one character of non-ASCII output. |
709 | | /// The purpose of this argument is to enable a user interface display form of the |
710 | | /// domain and the _ToASCII_ form of the domain to be computed efficiently together. |
711 | | /// This argument is useless when `output_as_unicode` always returns `false`, in |
712 | | /// which case the _ToASCII_ form ends up in `sink` already. If `ascii_sink` receives |
713 | | /// no output and the return value is `Ok(ProcessingSuccess::WroteToSink)`, use the |
714 | | /// output received by `sink` also as the _ToASCII_ result. |
715 | | /// |
716 | | /// # Return value |
717 | | /// |
718 | | /// * `Ok(ProcessingSuccess::Passthrough)` - The caller must treat |
719 | | /// `unsafe { core::str::from_utf8_unchecked(domain_name) }` as the output. (This |
720 | | /// return value asserts that calling `core::str::from_utf8_unchecked(domain_name)` |
721 | | /// is safe.) |
722 | | /// * `Ok(ProcessingSuccess::WroteToSink)` - The caller must treat was was written |
723 | | /// to `sink` as the output. If another sink was passed as `ascii_sink` but it did |
724 | | /// not receive output, the caller must treat what was written to `sink` also as |
725 | | /// the _ToASCII_ output. Otherwise, if `ascii_sink` received output, the caller |
726 | | /// must treat what was written to `ascii_sink` as the _ToASCII_ output. |
727 | | /// * `Err(ProcessingError::ValidityError)` - The input was in error and must |
728 | | /// not be used for DNS lookup or otherwise in a network protocol. If `error_policy` |
729 | | /// was `ErrorPolicy::MarkErrors`, the output written to `sink` may be displayed |
730 | | /// to the user as an illustration of where the error was or the errors were. |
731 | | /// * `Err(ProcessingError::SinkError)` - Either `sink` or `ascii_sink` returned |
732 | | /// [`core::fmt::Error`]. The partial output written to `sink` `ascii_sink` must not |
733 | | /// be used. If `W` never returns [`core::fmt::Error`], this method never returns |
734 | | /// `Err(ProcessingError::SinkError)`. |
735 | | /// |
736 | | /// # Safety-usable invariant |
737 | | /// |
738 | | /// If the return value is `Ok(ProcessingSuccess::Passthrough)`, `domain_name` is |
739 | | /// ASCII and `core::str::from_utf8_unchecked(domain_name)` is safe. (Note: |
740 | | /// Other return values do _not_ imply that `domain_name` wasn't ASCII!) |
741 | | /// |
742 | | /// # Security considerations |
743 | | /// |
744 | | /// Showing labels whose Unicode form might mislead the user as Punycode instead is |
745 | | /// an imperfect security mechanism, because [the Punycode form itself may be resemble |
746 | | /// a user-recognizable name](https://www.unicode.org/reports/tr36/#TablePunycodeSpoofing). |
747 | | /// However, since this mechanism is common practice, this API provides support for the |
748 | | /// the mechanism. |
749 | | /// |
750 | | /// Punycode processing is quadratic, so to avoid denial of service, this method imposes |
751 | | /// length limits on Punycode treating especially long inputs as being in error. These |
752 | | /// limits are well higher than the DNS length limits and are not more restrictive than |
753 | | /// the limits imposed by ICU4C. |
754 | | #[allow(clippy::too_many_arguments)] |
755 | 90.0k | pub fn process<W: Write + ?Sized, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>( |
756 | 90.0k | &self, |
757 | 90.0k | domain_name: &[u8], |
758 | 90.0k | ascii_deny_list: AsciiDenyList, |
759 | 90.0k | hyphens: Hyphens, |
760 | 90.0k | error_policy: ErrorPolicy, |
761 | 90.0k | mut output_as_unicode: OutputUnicode, |
762 | 90.0k | sink: &mut W, |
763 | 90.0k | ascii_sink: Option<&mut W>, |
764 | 90.0k | ) -> Result<ProcessingSuccess, ProcessingError> { |
765 | 90.0k | let fail_fast = error_policy == ErrorPolicy::FailFast; |
766 | 90.0k | let mut domain_buffer = SmallVec::<[char; 253]>::new(); |
767 | 90.0k | let mut already_punycode = SmallVec::<[AlreadyAsciiLabel; 8]>::new(); |
768 | 90.0k | // `process_inner` could be pasted inline here, but it's out of line in order |
769 | 90.0k | // to avoid duplicating that code when monomorphizing over `W` and `OutputUnicode`. |
770 | 90.0k | let (passthrough_up_to, is_bidi, had_errors) = self.process_inner( |
771 | 90.0k | domain_name, |
772 | 90.0k | ascii_deny_list, |
773 | 90.0k | hyphens, |
774 | 90.0k | fail_fast, |
775 | 90.0k | &mut domain_buffer, |
776 | 90.0k | &mut already_punycode, |
777 | 90.0k | ); |
778 | 90.0k | if passthrough_up_to == domain_name.len() { |
779 | 73.7k | debug_assert!(!had_errors); |
780 | 73.7k | return Ok(ProcessingSuccess::Passthrough); |
781 | 16.3k | } |
782 | 16.3k | // Checked only after passthrough as a micro optimization. |
783 | 16.3k | if fail_fast && had_errors { |
784 | 4.37k | return Err(ProcessingError::ValidityError); |
785 | 11.9k | } |
786 | 11.9k | debug_assert_eq!(had_errors, domain_buffer.contains(&'\u{FFFD}')); |
787 | 11.9k | let without_dot = if let Some(without_dot) = domain_buffer.strip_suffix(&['.']) { |
788 | 0 | without_dot |
789 | | } else { |
790 | 11.9k | &domain_buffer[..] |
791 | | }; |
792 | | // unwrap is OK, because we always have at least one label |
793 | 108k | let tld = without_dot.rsplit(|c| *c == '.').next().unwrap(); Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#0} <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#0} Line | Count | Source | 793 | 108k | let tld = without_dot.rsplit(|c| *c == '.').next().unwrap(); |
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#0} |
794 | 11.9k | let mut had_unicode_output = false; |
795 | 11.9k | let mut seen_label = false; |
796 | 11.9k | let mut already_punycode_iter = already_punycode.iter(); |
797 | 11.9k | let mut passthrough_up_to_extended = passthrough_up_to; |
798 | 11.9k | let mut flushed_prefix = false; |
799 | 108k | for label in domain_buffer.split(|c| *c == '.') { Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#1} <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#1} Line | Count | Source | 799 | 108k | for label in domain_buffer.split(|c| *c == '.') { |
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#1} |
800 | | // Unwrap is OK, because there are supposed to be as many items in |
801 | | // `already_punycode` as there are labels. |
802 | 11.9k | let input_punycode = *already_punycode_iter.next().unwrap(); |
803 | 11.9k | if seen_label { |
804 | 0 | if flushed_prefix { |
805 | 0 | sink.write_char('.')?; |
806 | | } else { |
807 | 0 | debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.'); |
808 | 0 | passthrough_up_to_extended += 1; |
809 | 0 | if passthrough_up_to_extended == domain_name.len() { |
810 | 0 | debug_assert!(!had_errors); |
811 | 0 | return Ok(ProcessingSuccess::Passthrough); |
812 | 0 | } |
813 | | } |
814 | 11.9k | } |
815 | 11.9k | seen_label = true; |
816 | | |
817 | 11.9k | if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode { |
818 | 0 | if let Some(first_upper_case) = |
819 | 52.2k | mixed_case.iter().position(|c| c.is_ascii_uppercase()) Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#2} <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#2} Line | Count | Source | 819 | 52.2k | mixed_case.iter().position(|c| c.is_ascii_uppercase()) |
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#2} |
820 | | { |
821 | 0 | let (head, tail) = mixed_case.split_at(first_upper_case); |
822 | 0 | let slice_to_write = if flushed_prefix { |
823 | 0 | head |
824 | | } else { |
825 | 0 | flushed_prefix = true; |
826 | 0 | passthrough_up_to_extended += head.len(); |
827 | 0 | debug_assert_ne!(passthrough_up_to_extended, domain_name.len()); |
828 | 0 | &domain_name[..passthrough_up_to_extended] |
829 | | }; |
830 | | // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII. |
831 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?; |
832 | 0 | for c in tail.iter() { |
833 | 0 | sink.write_char(char::from(c.to_ascii_lowercase()))?; |
834 | | } |
835 | 6.89k | } else if flushed_prefix { |
836 | | // SAFETY: `mixed_case` is known to be ASCII. |
837 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?; |
838 | | } else { |
839 | 6.89k | passthrough_up_to_extended += mixed_case.len(); |
840 | 6.89k | if passthrough_up_to_extended == domain_name.len() { |
841 | 6.89k | debug_assert!(!had_errors); |
842 | 6.89k | return Ok(ProcessingSuccess::Passthrough); |
843 | 0 | } |
844 | | } |
845 | 0 | continue; |
846 | 5.06k | } |
847 | | |
848 | 5.06k | let potentially_punycode = if fail_fast { |
849 | 5.06k | debug_assert!(classify_for_punycode(label) != PunycodeClassification::Error); |
850 | 5.06k | !is_ascii(label) |
851 | | } else { |
852 | 0 | classify_for_punycode(label) == PunycodeClassification::Unicode |
853 | | }; |
854 | 5.06k | let passthrough = if potentially_punycode { |
855 | 5.06k | let unicode = output_as_unicode(label, tld, is_bidi); |
856 | 5.06k | had_unicode_output |= unicode; |
857 | 5.06k | unicode |
858 | | } else { |
859 | 0 | true |
860 | | }; |
861 | 5.06k | if passthrough { |
862 | 0 | if !flushed_prefix { |
863 | 0 | flushed_prefix = true; |
864 | 0 | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. |
865 | 0 | sink.write_str(unsafe { |
866 | 0 | core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended]) |
867 | 0 | })?; |
868 | 0 | } |
869 | 0 | for c in label.iter().copied() { |
870 | 0 | sink.write_char(c)?; |
871 | | } |
872 | 5.06k | } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode { |
873 | 0 | if let Some(first_upper_case) = |
874 | 92.5k | mixed_case.iter().position(|c| c.is_ascii_uppercase()) Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#3} <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#3} Line | Count | Source | 874 | 92.5k | mixed_case.iter().position(|c| c.is_ascii_uppercase()) |
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#3} |
875 | | { |
876 | 0 | let (head, tail) = mixed_case.split_at(first_upper_case); |
877 | 0 | let slice_to_write = if flushed_prefix { |
878 | 0 | head |
879 | | } else { |
880 | 0 | flushed_prefix = true; |
881 | 0 | passthrough_up_to_extended += head.len(); |
882 | 0 | debug_assert_ne!(passthrough_up_to_extended, domain_name.len()); |
883 | 0 | &domain_name[..passthrough_up_to_extended] |
884 | | }; |
885 | | // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII. |
886 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?; |
887 | 0 | for c in tail.iter() { |
888 | 0 | sink.write_char(char::from(c.to_ascii_lowercase()))?; |
889 | | } |
890 | 5.06k | } else if flushed_prefix { |
891 | | // SAFETY: `mixed_case` is known to be ASCII. |
892 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?; |
893 | | } else { |
894 | 5.06k | passthrough_up_to_extended += mixed_case.len(); |
895 | 5.06k | if passthrough_up_to_extended == domain_name.len() { |
896 | 5.06k | debug_assert!(!had_errors); |
897 | 5.06k | return Ok(ProcessingSuccess::Passthrough); |
898 | 0 | } |
899 | | } |
900 | | } else { |
901 | 0 | if !flushed_prefix { |
902 | 0 | flushed_prefix = true; |
903 | 0 | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. |
904 | 0 | sink.write_str(unsafe { |
905 | 0 | core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended]) |
906 | 0 | })?; |
907 | 0 | } |
908 | 0 | write_punycode_label(label, sink)?; |
909 | | } |
910 | | } |
911 | | |
912 | 0 | if had_errors { |
913 | 0 | return Err(ProcessingError::ValidityError); |
914 | 0 | } |
915 | 0 |
|
916 | 0 | if had_unicode_output { |
917 | 0 | if let Some(sink) = ascii_sink { |
918 | 0 | let mut seen_label = false; |
919 | 0 | let mut already_punycode_iter = already_punycode.iter(); |
920 | 0 | let mut passthrough_up_to_extended = passthrough_up_to; |
921 | 0 | let mut flushed_prefix = false; |
922 | 0 | for label in domain_buffer.split(|c| *c == '.') { Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#4} |
923 | | // Unwrap is OK, because there are supposed to be as many items in |
924 | | // `already_punycode` as there are labels. |
925 | 0 | let input_punycode = *already_punycode_iter.next().unwrap(); |
926 | 0 | if seen_label { |
927 | 0 | if flushed_prefix { |
928 | 0 | sink.write_char('.')?; |
929 | | } else { |
930 | 0 | debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.'); |
931 | 0 | passthrough_up_to_extended += 1; |
932 | | } |
933 | 0 | } |
934 | 0 | seen_label = true; |
935 | | |
936 | 0 | if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode { |
937 | 0 | if let Some(first_upper_case) = |
938 | 0 | mixed_case.iter().position(|c| c.is_ascii_uppercase()) Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#5} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#5} |
939 | | { |
940 | 0 | let (head, tail) = mixed_case.split_at(first_upper_case); |
941 | 0 | let slice_to_write = if flushed_prefix { |
942 | 0 | head |
943 | | } else { |
944 | 0 | flushed_prefix = true; |
945 | 0 | passthrough_up_to_extended += head.len(); |
946 | 0 | debug_assert_ne!(passthrough_up_to_extended, domain_name.len()); |
947 | 0 | &domain_name[..passthrough_up_to_extended] |
948 | | }; |
949 | | // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII. |
950 | 0 | sink.write_str(unsafe { |
951 | 0 | core::str::from_utf8_unchecked(slice_to_write) |
952 | 0 | })?; |
953 | 0 | for c in tail.iter() { |
954 | 0 | sink.write_char(char::from(c.to_ascii_lowercase()))?; |
955 | | } |
956 | 0 | } else if flushed_prefix { |
957 | | // SAFETY: `mixed_case` is known to be ASCII. |
958 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?; |
959 | 0 | } else { |
960 | 0 | passthrough_up_to_extended += mixed_case.len(); |
961 | 0 | } |
962 | 0 | continue; |
963 | 0 | } |
964 | 0 |
|
965 | 0 | if is_ascii(label) { |
966 | 0 | if !flushed_prefix { |
967 | 0 | flushed_prefix = true; |
968 | 0 | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. |
969 | 0 | sink.write_str(unsafe { |
970 | 0 | core::str::from_utf8_unchecked( |
971 | 0 | &domain_name[..passthrough_up_to_extended], |
972 | 0 | ) |
973 | 0 | })?; |
974 | 0 | } |
975 | 0 | for c in label.iter().copied() { |
976 | 0 | sink.write_char(c)?; |
977 | | } |
978 | 0 | } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode |
979 | | { |
980 | 0 | if let Some(first_upper_case) = |
981 | 0 | mixed_case.iter().position(|c| c.is_ascii_uppercase()) Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#6} Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#6} |
982 | | { |
983 | 0 | let (head, tail) = mixed_case.split_at(first_upper_case); |
984 | 0 | let slice_to_write = if flushed_prefix { |
985 | 0 | head |
986 | | } else { |
987 | 0 | flushed_prefix = true; |
988 | 0 | passthrough_up_to_extended += head.len(); |
989 | 0 | debug_assert_ne!(passthrough_up_to_extended, domain_name.len()); |
990 | 0 | &domain_name[..passthrough_up_to_extended] |
991 | | }; |
992 | | // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII. |
993 | 0 | sink.write_str(unsafe { |
994 | 0 | core::str::from_utf8_unchecked(slice_to_write) |
995 | 0 | })?; |
996 | 0 | for c in tail.iter() { |
997 | 0 | sink.write_char(char::from(c.to_ascii_lowercase()))?; |
998 | | } |
999 | 0 | } else if flushed_prefix { |
1000 | | // SAFETY: `mixed_case` is known to be ASCII. |
1001 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?; |
1002 | 0 | } else { |
1003 | 0 | passthrough_up_to_extended += mixed_case.len(); |
1004 | 0 | } |
1005 | | } else { |
1006 | 0 | if !flushed_prefix { |
1007 | 0 | flushed_prefix = true; |
1008 | 0 | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. |
1009 | 0 | sink.write_str(unsafe { |
1010 | 0 | core::str::from_utf8_unchecked( |
1011 | 0 | &domain_name[..passthrough_up_to_extended], |
1012 | 0 | ) |
1013 | 0 | })?; |
1014 | 0 | } |
1015 | 0 | write_punycode_label(label, sink)?; |
1016 | | } |
1017 | | } |
1018 | 0 | if !flushed_prefix { |
1019 | | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. |
1020 | 0 | sink.write_str(unsafe { |
1021 | 0 | core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended]) |
1022 | 0 | })?; |
1023 | 0 | } |
1024 | 0 | } |
1025 | 0 | } |
1026 | 0 | Ok(ProcessingSuccess::WroteToSink) |
1027 | 90.0k | } Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}> <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}> Line | Count | Source | 755 | 90.0k | pub fn process<W: Write + ?Sized, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>( | 756 | 90.0k | &self, | 757 | 90.0k | domain_name: &[u8], | 758 | 90.0k | ascii_deny_list: AsciiDenyList, | 759 | 90.0k | hyphens: Hyphens, | 760 | 90.0k | error_policy: ErrorPolicy, | 761 | 90.0k | mut output_as_unicode: OutputUnicode, | 762 | 90.0k | sink: &mut W, | 763 | 90.0k | ascii_sink: Option<&mut W>, | 764 | 90.0k | ) -> Result<ProcessingSuccess, ProcessingError> { | 765 | 90.0k | let fail_fast = error_policy == ErrorPolicy::FailFast; | 766 | 90.0k | let mut domain_buffer = SmallVec::<[char; 253]>::new(); | 767 | 90.0k | let mut already_punycode = SmallVec::<[AlreadyAsciiLabel; 8]>::new(); | 768 | 90.0k | // `process_inner` could be pasted inline here, but it's out of line in order | 769 | 90.0k | // to avoid duplicating that code when monomorphizing over `W` and `OutputUnicode`. | 770 | 90.0k | let (passthrough_up_to, is_bidi, had_errors) = self.process_inner( | 771 | 90.0k | domain_name, | 772 | 90.0k | ascii_deny_list, | 773 | 90.0k | hyphens, | 774 | 90.0k | fail_fast, | 775 | 90.0k | &mut domain_buffer, | 776 | 90.0k | &mut already_punycode, | 777 | 90.0k | ); | 778 | 90.0k | if passthrough_up_to == domain_name.len() { | 779 | 73.7k | debug_assert!(!had_errors); | 780 | 73.7k | return Ok(ProcessingSuccess::Passthrough); | 781 | 16.3k | } | 782 | 16.3k | // Checked only after passthrough as a micro optimization. | 783 | 16.3k | if fail_fast && had_errors { | 784 | 4.37k | return Err(ProcessingError::ValidityError); | 785 | 11.9k | } | 786 | 11.9k | debug_assert_eq!(had_errors, domain_buffer.contains(&'\u{FFFD}')); | 787 | 11.9k | let without_dot = if let Some(without_dot) = domain_buffer.strip_suffix(&['.']) { | 788 | 0 | without_dot | 789 | | } else { | 790 | 11.9k | &domain_buffer[..] | 791 | | }; | 792 | | // unwrap is OK, because we always have at least one label | 793 | 11.9k | let tld = without_dot.rsplit(|c| *c == '.').next().unwrap(); | 794 | 11.9k | let mut had_unicode_output = false; | 795 | 11.9k | let mut seen_label = false; | 796 | 11.9k | let mut already_punycode_iter = already_punycode.iter(); | 797 | 11.9k | let mut passthrough_up_to_extended = passthrough_up_to; | 798 | 11.9k | let mut flushed_prefix = false; | 799 | 11.9k | for label in domain_buffer.split(|c| *c == '.') { | 800 | | // Unwrap is OK, because there are supposed to be as many items in | 801 | | // `already_punycode` as there are labels. | 802 | 11.9k | let input_punycode = *already_punycode_iter.next().unwrap(); | 803 | 11.9k | if seen_label { | 804 | 0 | if flushed_prefix { | 805 | 0 | sink.write_char('.')?; | 806 | | } else { | 807 | 0 | debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.'); | 808 | 0 | passthrough_up_to_extended += 1; | 809 | 0 | if passthrough_up_to_extended == domain_name.len() { | 810 | 0 | debug_assert!(!had_errors); | 811 | 0 | return Ok(ProcessingSuccess::Passthrough); | 812 | 0 | } | 813 | | } | 814 | 11.9k | } | 815 | 11.9k | seen_label = true; | 816 | | | 817 | 11.9k | if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode { | 818 | 0 | if let Some(first_upper_case) = | 819 | 6.89k | mixed_case.iter().position(|c| c.is_ascii_uppercase()) | 820 | | { | 821 | 0 | let (head, tail) = mixed_case.split_at(first_upper_case); | 822 | 0 | let slice_to_write = if flushed_prefix { | 823 | 0 | head | 824 | | } else { | 825 | 0 | flushed_prefix = true; | 826 | 0 | passthrough_up_to_extended += head.len(); | 827 | 0 | debug_assert_ne!(passthrough_up_to_extended, domain_name.len()); | 828 | 0 | &domain_name[..passthrough_up_to_extended] | 829 | | }; | 830 | | // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII. | 831 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?; | 832 | 0 | for c in tail.iter() { | 833 | 0 | sink.write_char(char::from(c.to_ascii_lowercase()))?; | 834 | | } | 835 | 6.89k | } else if flushed_prefix { | 836 | | // SAFETY: `mixed_case` is known to be ASCII. | 837 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?; | 838 | | } else { | 839 | 6.89k | passthrough_up_to_extended += mixed_case.len(); | 840 | 6.89k | if passthrough_up_to_extended == domain_name.len() { | 841 | 6.89k | debug_assert!(!had_errors); | 842 | 6.89k | return Ok(ProcessingSuccess::Passthrough); | 843 | 0 | } | 844 | | } | 845 | 0 | continue; | 846 | 5.06k | } | 847 | | | 848 | 5.06k | let potentially_punycode = if fail_fast { | 849 | 5.06k | debug_assert!(classify_for_punycode(label) != PunycodeClassification::Error); | 850 | 5.06k | !is_ascii(label) | 851 | | } else { | 852 | 0 | classify_for_punycode(label) == PunycodeClassification::Unicode | 853 | | }; | 854 | 5.06k | let passthrough = if potentially_punycode { | 855 | 5.06k | let unicode = output_as_unicode(label, tld, is_bidi); | 856 | 5.06k | had_unicode_output |= unicode; | 857 | 5.06k | unicode | 858 | | } else { | 859 | 0 | true | 860 | | }; | 861 | 5.06k | if passthrough { | 862 | 0 | if !flushed_prefix { | 863 | 0 | flushed_prefix = true; | 864 | 0 | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. | 865 | 0 | sink.write_str(unsafe { | 866 | 0 | core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended]) | 867 | 0 | })?; | 868 | 0 | } | 869 | 0 | for c in label.iter().copied() { | 870 | 0 | sink.write_char(c)?; | 871 | | } | 872 | 5.06k | } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode { | 873 | 0 | if let Some(first_upper_case) = | 874 | 5.06k | mixed_case.iter().position(|c| c.is_ascii_uppercase()) | 875 | | { | 876 | 0 | let (head, tail) = mixed_case.split_at(first_upper_case); | 877 | 0 | let slice_to_write = if flushed_prefix { | 878 | 0 | head | 879 | | } else { | 880 | 0 | flushed_prefix = true; | 881 | 0 | passthrough_up_to_extended += head.len(); | 882 | 0 | debug_assert_ne!(passthrough_up_to_extended, domain_name.len()); | 883 | 0 | &domain_name[..passthrough_up_to_extended] | 884 | | }; | 885 | | // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII. | 886 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?; | 887 | 0 | for c in tail.iter() { | 888 | 0 | sink.write_char(char::from(c.to_ascii_lowercase()))?; | 889 | | } | 890 | 5.06k | } else if flushed_prefix { | 891 | | // SAFETY: `mixed_case` is known to be ASCII. | 892 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?; | 893 | | } else { | 894 | 5.06k | passthrough_up_to_extended += mixed_case.len(); | 895 | 5.06k | if passthrough_up_to_extended == domain_name.len() { | 896 | 5.06k | debug_assert!(!had_errors); | 897 | 5.06k | return Ok(ProcessingSuccess::Passthrough); | 898 | 0 | } | 899 | | } | 900 | | } else { | 901 | 0 | if !flushed_prefix { | 902 | 0 | flushed_prefix = true; | 903 | 0 | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. | 904 | 0 | sink.write_str(unsafe { | 905 | 0 | core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended]) | 906 | 0 | })?; | 907 | 0 | } | 908 | 0 | write_punycode_label(label, sink)?; | 909 | | } | 910 | | } | 911 | | | 912 | 0 | if had_errors { | 913 | 0 | return Err(ProcessingError::ValidityError); | 914 | 0 | } | 915 | 0 |
| 916 | 0 | if had_unicode_output { | 917 | 0 | if let Some(sink) = ascii_sink { | 918 | 0 | let mut seen_label = false; | 919 | 0 | let mut already_punycode_iter = already_punycode.iter(); | 920 | 0 | let mut passthrough_up_to_extended = passthrough_up_to; | 921 | 0 | let mut flushed_prefix = false; | 922 | 0 | for label in domain_buffer.split(|c| *c == '.') { | 923 | | // Unwrap is OK, because there are supposed to be as many items in | 924 | | // `already_punycode` as there are labels. | 925 | 0 | let input_punycode = *already_punycode_iter.next().unwrap(); | 926 | 0 | if seen_label { | 927 | 0 | if flushed_prefix { | 928 | 0 | sink.write_char('.')?; | 929 | | } else { | 930 | 0 | debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.'); | 931 | 0 | passthrough_up_to_extended += 1; | 932 | | } | 933 | 0 | } | 934 | 0 | seen_label = true; | 935 | | | 936 | 0 | if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode { | 937 | 0 | if let Some(first_upper_case) = | 938 | 0 | mixed_case.iter().position(|c| c.is_ascii_uppercase()) | 939 | | { | 940 | 0 | let (head, tail) = mixed_case.split_at(first_upper_case); | 941 | 0 | let slice_to_write = if flushed_prefix { | 942 | 0 | head | 943 | | } else { | 944 | 0 | flushed_prefix = true; | 945 | 0 | passthrough_up_to_extended += head.len(); | 946 | 0 | debug_assert_ne!(passthrough_up_to_extended, domain_name.len()); | 947 | 0 | &domain_name[..passthrough_up_to_extended] | 948 | | }; | 949 | | // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII. | 950 | 0 | sink.write_str(unsafe { | 951 | 0 | core::str::from_utf8_unchecked(slice_to_write) | 952 | 0 | })?; | 953 | 0 | for c in tail.iter() { | 954 | 0 | sink.write_char(char::from(c.to_ascii_lowercase()))?; | 955 | | } | 956 | 0 | } else if flushed_prefix { | 957 | | // SAFETY: `mixed_case` is known to be ASCII. | 958 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?; | 959 | 0 | } else { | 960 | 0 | passthrough_up_to_extended += mixed_case.len(); | 961 | 0 | } | 962 | 0 | continue; | 963 | 0 | } | 964 | 0 |
| 965 | 0 | if is_ascii(label) { | 966 | 0 | if !flushed_prefix { | 967 | 0 | flushed_prefix = true; | 968 | 0 | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. | 969 | 0 | sink.write_str(unsafe { | 970 | 0 | core::str::from_utf8_unchecked( | 971 | 0 | &domain_name[..passthrough_up_to_extended], | 972 | 0 | ) | 973 | 0 | })?; | 974 | 0 | } | 975 | 0 | for c in label.iter().copied() { | 976 | 0 | sink.write_char(c)?; | 977 | | } | 978 | 0 | } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode | 979 | | { | 980 | 0 | if let Some(first_upper_case) = | 981 | 0 | mixed_case.iter().position(|c| c.is_ascii_uppercase()) | 982 | | { | 983 | 0 | let (head, tail) = mixed_case.split_at(first_upper_case); | 984 | 0 | let slice_to_write = if flushed_prefix { | 985 | 0 | head | 986 | | } else { | 987 | 0 | flushed_prefix = true; | 988 | 0 | passthrough_up_to_extended += head.len(); | 989 | 0 | debug_assert_ne!(passthrough_up_to_extended, domain_name.len()); | 990 | 0 | &domain_name[..passthrough_up_to_extended] | 991 | | }; | 992 | | // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII. | 993 | 0 | sink.write_str(unsafe { | 994 | 0 | core::str::from_utf8_unchecked(slice_to_write) | 995 | 0 | })?; | 996 | 0 | for c in tail.iter() { | 997 | 0 | sink.write_char(char::from(c.to_ascii_lowercase()))?; | 998 | | } | 999 | 0 | } else if flushed_prefix { | 1000 | | // SAFETY: `mixed_case` is known to be ASCII. | 1001 | 0 | sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?; | 1002 | 0 | } else { | 1003 | 0 | passthrough_up_to_extended += mixed_case.len(); | 1004 | 0 | } | 1005 | | } else { | 1006 | 0 | if !flushed_prefix { | 1007 | 0 | flushed_prefix = true; | 1008 | 0 | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. | 1009 | 0 | sink.write_str(unsafe { | 1010 | 0 | core::str::from_utf8_unchecked( | 1011 | 0 | &domain_name[..passthrough_up_to_extended], | 1012 | 0 | ) | 1013 | 0 | })?; | 1014 | 0 | } | 1015 | 0 | write_punycode_label(label, sink)?; | 1016 | | } | 1017 | | } | 1018 | 0 | if !flushed_prefix { | 1019 | | // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII. | 1020 | 0 | sink.write_str(unsafe { | 1021 | 0 | core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended]) | 1022 | 0 | })?; | 1023 | 0 | } | 1024 | 0 | } | 1025 | 0 | } | 1026 | 0 | Ok(ProcessingSuccess::WroteToSink) | 1027 | 90.0k | } |
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}> Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}> |
1028 | | |
1029 | | /// The part of `process` that doesn't need to be generic over the sink. |
1030 | | #[inline(always)] |
1031 | 90.0k | fn process_inner<'a>( |
1032 | 90.0k | &self, |
1033 | 90.0k | domain_name: &'a [u8], |
1034 | 90.0k | ascii_deny_list: AsciiDenyList, |
1035 | 90.0k | hyphens: Hyphens, |
1036 | 90.0k | fail_fast: bool, |
1037 | 90.0k | domain_buffer: &mut SmallVec<[char; 253]>, |
1038 | 90.0k | already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>, |
1039 | 90.0k | ) -> (usize, bool, bool) { |
1040 | 90.0k | // Sadly, this even faster-path ASCII tier is needed to avoid regressing |
1041 | 90.0k | // performance. |
1042 | 90.0k | let mut iter = domain_name.iter(); |
1043 | 90.0k | let mut most_recent_label_start = iter.clone(); |
1044 | | loop { |
1045 | 380k | if let Some(&b) = iter.next() { |
1046 | 342k | if in_inclusive_range8(b, b'a', b'z') { |
1047 | 290k | continue; |
1048 | 52.0k | } |
1049 | 52.0k | if b == b'.' { |
1050 | 0 | most_recent_label_start = iter.clone(); |
1051 | 0 | continue; |
1052 | 52.0k | } |
1053 | 52.0k | return self.process_innermost( |
1054 | 52.0k | domain_name, |
1055 | 52.0k | ascii_deny_list, |
1056 | 52.0k | hyphens, |
1057 | 52.0k | fail_fast, |
1058 | 52.0k | domain_buffer, |
1059 | 52.0k | already_punycode, |
1060 | 52.0k | most_recent_label_start.as_slice(), |
1061 | 52.0k | ); |
1062 | | } else { |
1063 | | // Success! The whole input passes through on the fastest path! |
1064 | 37.9k | return (domain_name.len(), false, false); |
1065 | | } |
1066 | | } |
1067 | 90.0k | } <idna::uts46::Uts46>::process_inner Line | Count | Source | 1031 | 90.0k | fn process_inner<'a>( | 1032 | 90.0k | &self, | 1033 | 90.0k | domain_name: &'a [u8], | 1034 | 90.0k | ascii_deny_list: AsciiDenyList, | 1035 | 90.0k | hyphens: Hyphens, | 1036 | 90.0k | fail_fast: bool, | 1037 | 90.0k | domain_buffer: &mut SmallVec<[char; 253]>, | 1038 | 90.0k | already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>, | 1039 | 90.0k | ) -> (usize, bool, bool) { | 1040 | 90.0k | // Sadly, this even faster-path ASCII tier is needed to avoid regressing | 1041 | 90.0k | // performance. | 1042 | 90.0k | let mut iter = domain_name.iter(); | 1043 | 90.0k | let mut most_recent_label_start = iter.clone(); | 1044 | | loop { | 1045 | 380k | if let Some(&b) = iter.next() { | 1046 | 342k | if in_inclusive_range8(b, b'a', b'z') { | 1047 | 290k | continue; | 1048 | 52.0k | } | 1049 | 52.0k | if b == b'.' { | 1050 | 0 | most_recent_label_start = iter.clone(); | 1051 | 0 | continue; | 1052 | 52.0k | } | 1053 | 52.0k | return self.process_innermost( | 1054 | 52.0k | domain_name, | 1055 | 52.0k | ascii_deny_list, | 1056 | 52.0k | hyphens, | 1057 | 52.0k | fail_fast, | 1058 | 52.0k | domain_buffer, | 1059 | 52.0k | already_punycode, | 1060 | 52.0k | most_recent_label_start.as_slice(), | 1061 | 52.0k | ); | 1062 | | } else { | 1063 | | // Success! The whole input passes through on the fastest path! | 1064 | 37.9k | return (domain_name.len(), false, false); | 1065 | | } | 1066 | | } | 1067 | 90.0k | } |
Unexecuted instantiation: <idna::uts46::Uts46>::process_inner Unexecuted instantiation: <idna::uts46::Uts46>::process_inner |
1068 | | |
1069 | | /// The part of `process` that doesn't need to be generic over the sink and |
1070 | | /// can avoid monomorphizing in the interest of code size. |
1071 | | /// Separating this into a different stack frame compared to `process_inner` |
1072 | | /// improves performance in the ICU4X case. |
1073 | | #[allow(clippy::too_many_arguments)] |
1074 | | #[inline(never)] |
1075 | 52.0k | fn process_innermost<'a>( |
1076 | 52.0k | &self, |
1077 | 52.0k | domain_name: &'a [u8], |
1078 | 52.0k | ascii_deny_list: AsciiDenyList, |
1079 | 52.0k | hyphens: Hyphens, |
1080 | 52.0k | fail_fast: bool, |
1081 | 52.0k | domain_buffer: &mut SmallVec<[char; 253]>, |
1082 | 52.0k | already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>, |
1083 | 52.0k | tail: &'a [u8], |
1084 | 52.0k | ) -> (usize, bool, bool) { |
1085 | 52.0k | let deny_list = ascii_deny_list.bits; |
1086 | 52.0k | let deny_list_deny_dot = deny_list | DOT_MASK; |
1087 | 52.0k | |
1088 | 52.0k | let mut had_errors = false; |
1089 | 52.0k | |
1090 | 52.0k | let mut passthrough_up_to = domain_name.len() - tail.len(); // Index into `domain_name` |
1091 | 52.0k | // 253 ASCII characters is the max length for a valid domain name |
1092 | 52.0k | // (excluding the root dot). |
1093 | 52.0k | let mut current_label_start; // Index into `domain_buffer` |
1094 | 52.0k | let mut seen_label = false; |
1095 | 52.0k | let mut in_prefix = true; |
1096 | 734k | for label in tail.split(|b| *b == b'.') { <idna::uts46::Uts46>::process_innermost::{closure#0} Line | Count | Source | 1096 | 734k | for label in tail.split(|b| *b == b'.') { |
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#0} |
1097 | | // We check for passthrough only for the prefix. That is, if we |
1098 | | // haven't moved on and started filling `domain_buffer`. Keeping |
1099 | | // this stuff in one loop where the first items keep being skipped |
1100 | | // once they have been skipped at least once instead of working |
1101 | | // this into a fancier loop structure in order to make sure that |
1102 | | // no item from the iterator is lost or processed twice. |
1103 | | // Furthermore, after the passthrough fails, restarting the |
1104 | | // normalization process after each pre-existing ASCII dot also |
1105 | | // provides an opportunity for the processing to get back onto |
1106 | | // an ASCII fast path that bypasses the normalizer for ASCII |
1107 | | // after a pre-existing ASCII dot (pre-existing in the sense |
1108 | | // of not coming from e.g. normalizing an ideographic dot). |
1109 | 52.0k | if in_prefix && is_passthrough_ascii_label(label) { |
1110 | 35.7k | if seen_label { |
1111 | 0 | debug_assert_eq!(domain_name[passthrough_up_to], b'.'); |
1112 | 0 | passthrough_up_to += 1; |
1113 | 35.7k | } |
1114 | 35.7k | seen_label = true; |
1115 | 35.7k | |
1116 | 35.7k | passthrough_up_to += label.len(); |
1117 | 35.7k | continue; |
1118 | 16.3k | } |
1119 | 16.3k | if seen_label { |
1120 | 0 | if in_prefix { |
1121 | 0 | debug_assert_eq!(domain_name[passthrough_up_to], b'.'); |
1122 | 0 | passthrough_up_to += 1; |
1123 | 0 | } else { |
1124 | 0 | domain_buffer.push('.'); |
1125 | 0 | } |
1126 | 16.3k | } |
1127 | 16.3k | seen_label = true; |
1128 | 16.3k | in_prefix = false; |
1129 | 16.3k | current_label_start = domain_buffer.len(); |
1130 | 16.3k | if !label.is_empty() { |
1131 | 16.3k | let (ascii, non_ascii) = split_ascii_fast_path_prefix(label); |
1132 | 16.3k | let non_punycode_ascii_label = if non_ascii.is_empty() { |
1133 | 16.3k | if has_punycode_prefix(ascii) { |
1134 | 9.35k | if (ascii.last() != Some(&b'-')) |
1135 | 9.35k | && (ascii.len() - 4 <= PUNYCODE_DECODE_MAX_INPUT_LENGTH) |
1136 | | { |
1137 | 9.12k | if let Ok(decode) = |
1138 | 9.35k | Decoder::default().decode::<u8, InternalCaller>(&ascii[4..]) |
1139 | | { |
1140 | | // 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4 |
1141 | | // characters. |
1142 | 9.12k | let mut label_buffer = SmallVec::<[char; 59]>::new(); |
1143 | 9.12k | label_buffer.extend(decode); |
1144 | 9.12k | |
1145 | 9.12k | if self.after_punycode_decode( |
1146 | 9.12k | domain_buffer, |
1147 | 9.12k | current_label_start, |
1148 | 9.12k | &label_buffer, |
1149 | 9.12k | deny_list_deny_dot, |
1150 | 9.12k | fail_fast, |
1151 | 9.12k | &mut had_errors, |
1152 | 9.12k | ) { |
1153 | 3.10k | return (0, false, true); |
1154 | 6.01k | } |
1155 | 6.01k | |
1156 | 6.01k | if self.check_label( |
1157 | 6.01k | hyphens, |
1158 | 6.01k | &mut domain_buffer[current_label_start..], |
1159 | 6.01k | fail_fast, |
1160 | 6.01k | &mut had_errors, |
1161 | 6.01k | true, |
1162 | 6.01k | true, |
1163 | 6.01k | ) { |
1164 | 844 | return (0, false, true); |
1165 | 5.16k | } |
1166 | | } else { |
1167 | | // Punycode failed |
1168 | 238 | if fail_fast { |
1169 | 238 | return (0, false, true); |
1170 | 0 | } |
1171 | 0 | had_errors = true; |
1172 | 0 | domain_buffer.push('\u{FFFD}'); |
1173 | 0 | let mut iter = ascii.iter(); |
1174 | 0 | // Discard the first character that we replaced. |
1175 | 0 | let _ = iter.next(); |
1176 | 0 | domain_buffer.extend(iter.map(|c| { |
1177 | 0 | // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does |
1178 | 0 | // not matter. |
1179 | 0 | apply_ascii_deny_list_to_potentially_upper_case_ascii( |
1180 | 0 | *c, deny_list, |
1181 | 0 | ) |
1182 | 0 | })); Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#1} |
1183 | 0 | }; |
1184 | | // If there were errors, we won't be trying to use this |
1185 | | // anyway later, so it's fine to put it here unconditionally. |
1186 | 5.16k | already_punycode.push(AlreadyAsciiLabel::MixedCasePunycode(label)); |
1187 | 5.16k | continue; |
1188 | 0 | } else if fail_fast { |
1189 | 0 | return (0, false, true); |
1190 | 0 | } |
1191 | 0 | // Else fall through to the complex path and rediscover error |
1192 | 0 | // there. |
1193 | 0 | false |
1194 | | } else { |
1195 | 6.97k | true |
1196 | | } |
1197 | | } else { |
1198 | 0 | false |
1199 | | }; |
1200 | 52.4k | for c in ascii.iter().map(|c| { |
1201 | 52.4k | // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does |
1202 | 52.4k | // not matter. |
1203 | 52.4k | apply_ascii_deny_list_to_potentially_upper_case_ascii(*c, deny_list) |
1204 | 52.4k | }) { <idna::uts46::Uts46>::process_innermost::{closure#2} Line | Count | Source | 1200 | 52.4k | for c in ascii.iter().map(|c| { | 1201 | 52.4k | // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does | 1202 | 52.4k | // not matter. | 1203 | 52.4k | apply_ascii_deny_list_to_potentially_upper_case_ascii(*c, deny_list) | 1204 | 52.4k | }) { |
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#2} Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#2} |
1205 | 52.4k | if c == '\u{FFFD}' { |
1206 | 84 | if fail_fast { |
1207 | 84 | return (0, false, true); |
1208 | 0 | } |
1209 | 0 | had_errors = true; |
1210 | 52.3k | } |
1211 | 52.3k | domain_buffer.push(c); |
1212 | | } |
1213 | 6.89k | if non_punycode_ascii_label { |
1214 | 6.89k | if hyphens != Hyphens::Allow |
1215 | 0 | && check_hyphens( |
1216 | 0 | &mut domain_buffer[current_label_start..], |
1217 | 0 | hyphens == Hyphens::CheckFirstLast, |
1218 | 0 | fail_fast, |
1219 | 0 | &mut had_errors, |
1220 | 0 | ) |
1221 | | { |
1222 | 0 | return (0, false, true); |
1223 | 6.89k | } |
1224 | 6.89k | already_punycode.push(if had_errors { |
1225 | 0 | AlreadyAsciiLabel::Other |
1226 | | } else { |
1227 | 6.89k | AlreadyAsciiLabel::MixedCaseAscii(label) |
1228 | | }); |
1229 | 6.89k | continue; |
1230 | 0 | } |
1231 | 0 | already_punycode.push(AlreadyAsciiLabel::Other); |
1232 | 0 | let mut first_needs_combining_mark_check = ascii.is_empty(); |
1233 | 0 | let mut needs_contextj_check = !non_ascii.is_empty(); |
1234 | 0 | let mut mapping = self |
1235 | 0 | .data |
1236 | 0 | .map_normalize(non_ascii.chars()) |
1237 | 0 | .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list)); Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#3} Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#3} |
1238 | | loop { |
1239 | 0 | let n = mapping.next(); |
1240 | 0 | match n { |
1241 | | None | Some('.') => { |
1242 | 0 | if domain_buffer[current_label_start..] |
1243 | 0 | .starts_with(&['x', 'n', '-', '-']) |
1244 | | { |
1245 | 0 | let mut punycode_precondition_failed = false; |
1246 | 0 | for c in domain_buffer[current_label_start + 4..].iter_mut() { |
1247 | 0 | if !c.is_ascii() { |
1248 | 0 | if fail_fast { |
1249 | 0 | return (0, false, true); |
1250 | 0 | } |
1251 | 0 | had_errors = true; |
1252 | 0 | *c = '\u{FFFD}'; |
1253 | 0 | punycode_precondition_failed = true; |
1254 | 0 | } |
1255 | | } |
1256 | | |
1257 | 0 | if let Some(last) = domain_buffer.last_mut() { |
1258 | 0 | if *last == '-' { |
1259 | | // Either there's nothing after the "xn--" prefix |
1260 | | // and we got the last hyphen of "xn--", or there |
1261 | | // are no Punycode digits after the last delimiter |
1262 | | // which would result in Punycode decode outputting |
1263 | | // ASCII only. |
1264 | 0 | if fail_fast { |
1265 | 0 | return (0, false, true); |
1266 | 0 | } |
1267 | 0 | had_errors = true; |
1268 | 0 | *last = '\u{FFFD}'; |
1269 | 0 | punycode_precondition_failed = true; |
1270 | 0 | } |
1271 | | } else { |
1272 | 0 | unreachable!(); |
1273 | | } |
1274 | | |
1275 | | // Reject excessively long input |
1276 | | // https://github.com/whatwg/url/issues/824 |
1277 | | // https://unicode-org.atlassian.net/browse/ICU-13727 |
1278 | 0 | if domain_buffer.len() - current_label_start - 4 |
1279 | 0 | > PUNYCODE_DECODE_MAX_INPUT_LENGTH |
1280 | | { |
1281 | 0 | if fail_fast { |
1282 | 0 | return (0, false, true); |
1283 | 0 | } |
1284 | 0 | had_errors = true; |
1285 | 0 | domain_buffer[current_label_start |
1286 | 0 | + 4 |
1287 | 0 | + PUNYCODE_DECODE_MAX_INPUT_LENGTH] = '\u{FFFD}'; |
1288 | 0 | punycode_precondition_failed = true; |
1289 | 0 | } |
1290 | | |
1291 | 0 | if !punycode_precondition_failed { |
1292 | 0 | if let Ok(decode) = Decoder::default() |
1293 | 0 | .decode::<char, InternalCaller>( |
1294 | 0 | &domain_buffer[current_label_start + 4..], |
1295 | 0 | ) |
1296 | | { |
1297 | 0 | first_needs_combining_mark_check = true; |
1298 | 0 | needs_contextj_check = true; |
1299 | 0 | // 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4 |
1300 | 0 | // characters. |
1301 | 0 | let mut label_buffer = SmallVec::<[char; 59]>::new(); |
1302 | 0 | label_buffer.extend(decode); |
1303 | 0 |
|
1304 | 0 | domain_buffer.truncate(current_label_start); |
1305 | 0 | if self.after_punycode_decode( |
1306 | 0 | domain_buffer, |
1307 | 0 | current_label_start, |
1308 | 0 | &label_buffer, |
1309 | 0 | deny_list_deny_dot, |
1310 | 0 | fail_fast, |
1311 | 0 | &mut had_errors, |
1312 | 0 | ) { |
1313 | 0 | return (0, false, true); |
1314 | 0 | } |
1315 | | } else { |
1316 | | // Punycode failed |
1317 | 0 | if fail_fast { |
1318 | 0 | return (0, false, true); |
1319 | 0 | } |
1320 | 0 | had_errors = true; |
1321 | 0 | domain_buffer[current_label_start] = '\u{FFFD}'; |
1322 | 0 | needs_contextj_check = false; // ASCII label |
1323 | 0 | first_needs_combining_mark_check = false; |
1324 | | }; |
1325 | 0 | } else { |
1326 | 0 | first_needs_combining_mark_check = false; |
1327 | 0 | needs_contextj_check = false; // Non-ASCII already turned to U+FFFD. |
1328 | 0 | } |
1329 | 0 | } |
1330 | 0 | if self.check_label( |
1331 | 0 | hyphens, |
1332 | 0 | &mut domain_buffer[current_label_start..], |
1333 | 0 | fail_fast, |
1334 | 0 | &mut had_errors, |
1335 | 0 | first_needs_combining_mark_check, |
1336 | 0 | needs_contextj_check, |
1337 | 0 | ) { |
1338 | 0 | return (0, false, true); |
1339 | 0 | } |
1340 | 0 |
|
1341 | 0 | if n.is_none() { |
1342 | 0 | break; |
1343 | 0 | } |
1344 | 0 | domain_buffer.push('.'); |
1345 | 0 | current_label_start = domain_buffer.len(); |
1346 | 0 | first_needs_combining_mark_check = true; |
1347 | 0 | needs_contextj_check = true; |
1348 | 0 | already_punycode.push(AlreadyAsciiLabel::Other); |
1349 | | } |
1350 | 0 | Some(c) => { |
1351 | 0 | if c == '\u{FFFD}' { |
1352 | 0 | if fail_fast { |
1353 | 0 | return (0, false, true); |
1354 | 0 | } |
1355 | 0 | had_errors = true; |
1356 | 0 | } |
1357 | 0 | domain_buffer.push(c); |
1358 | | } |
1359 | | } |
1360 | | } |
1361 | 0 | } else { |
1362 | 0 | // Empty label |
1363 | 0 | already_punycode.push(AlreadyAsciiLabel::MixedCaseAscii(label)); |
1364 | 0 | } |
1365 | | } |
1366 | | |
1367 | 47.8k | let is_bidi = self.is_bidi(domain_buffer); |
1368 | 47.8k | if is_bidi { |
1369 | 9.16k | for label in domain_buffer.split_mut(|c| *c == '.') { <idna::uts46::Uts46>::process_innermost::{closure#4} Line | Count | Source | 1369 | 9.16k | for label in domain_buffer.split_mut(|c| *c == '.') { |
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#4} Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#4} |
1370 | 1.28k | if let Some((first, tail)) = label.split_first_mut() { |
1371 | 1.28k | let first_bc = self.data.bidi_class(*first); |
1372 | 1.28k | if !FIRST_BC_MASK.intersects(first_bc.to_mask()) { |
1373 | | // Neither RTL label nor LTR label |
1374 | 4 | if fail_fast { |
1375 | 4 | return (0, false, true); |
1376 | 0 | } |
1377 | 0 | had_errors = true; |
1378 | 0 | *first = '\u{FFFD}'; |
1379 | 0 | continue; |
1380 | 1.28k | } |
1381 | 1.28k | let is_ltr = first_bc.is_ltr(); |
1382 | 1.28k | // Trim NSM |
1383 | 1.28k | let mut middle = tail; |
1384 | | #[allow(clippy::while_let_loop)] |
1385 | | loop { |
1386 | 3.65k | if let Some((last, prior)) = middle.split_last_mut() { |
1387 | 3.28k | let last_bc = self.data.bidi_class(*last); |
1388 | 3.28k | if last_bc.is_nonspacing_mark() { |
1389 | 2.36k | middle = prior; |
1390 | 2.36k | continue; |
1391 | 912 | } |
1392 | 912 | let last_mask = if is_ltr { LAST_LTR_MASK } else { LAST_RTL_MASK }; |
1393 | 912 | if !last_mask.intersects(last_bc.to_mask()) { |
1394 | 100 | if fail_fast { |
1395 | 100 | return (0, false, true); |
1396 | 0 | } |
1397 | 0 | had_errors = true; |
1398 | 0 | *last = '\u{FFFD}'; |
1399 | 812 | } |
1400 | 812 | if is_ltr { |
1401 | 0 | for c in prior.iter_mut() { |
1402 | 0 | let bc = self.data.bidi_class(*c); |
1403 | 0 | if !MIDDLE_LTR_MASK.intersects(bc.to_mask()) { |
1404 | 0 | if fail_fast { |
1405 | 0 | return (0, false, true); |
1406 | 0 | } |
1407 | 0 | had_errors = true; |
1408 | 0 | *c = '\u{FFFD}'; |
1409 | 0 | } |
1410 | | } |
1411 | | } else { |
1412 | 812 | let mut numeral_state = RtlNumeralState::Undecided; |
1413 | 3.95k | for c in prior.iter_mut() { |
1414 | 3.95k | let bc = self.data.bidi_class(*c); |
1415 | 3.95k | if !MIDDLE_RTL_MASK.intersects(bc.to_mask()) { |
1416 | 0 | if fail_fast { |
1417 | 0 | return (0, false, true); |
1418 | 0 | } |
1419 | 0 | had_errors = true; |
1420 | 0 | *c = '\u{FFFD}'; |
1421 | | } else { |
1422 | 3.95k | match numeral_state { |
1423 | | RtlNumeralState::Undecided => { |
1424 | 3.02k | if bc.is_european_number() { |
1425 | 94 | numeral_state = RtlNumeralState::European; |
1426 | 2.93k | } else if bc.is_arabic_number() { |
1427 | 8 | numeral_state = RtlNumeralState::Arabic; |
1428 | 2.92k | } |
1429 | | } |
1430 | | RtlNumeralState::European => { |
1431 | 696 | if bc.is_arabic_number() { |
1432 | 0 | if fail_fast { |
1433 | 0 | return (0, false, true); |
1434 | 0 | } |
1435 | 0 | had_errors = true; |
1436 | 0 | *c = '\u{FFFD}'; |
1437 | 696 | } |
1438 | | } |
1439 | | RtlNumeralState::Arabic => { |
1440 | 234 | if bc.is_european_number() { |
1441 | 0 | if fail_fast { |
1442 | 0 | return (0, false, true); |
1443 | 0 | } |
1444 | 0 | had_errors = true; |
1445 | 0 | *c = '\u{FFFD}'; |
1446 | 234 | } |
1447 | | } |
1448 | | } |
1449 | | } |
1450 | | } |
1451 | 812 | if (numeral_state == RtlNumeralState::European |
1452 | 94 | && last_bc.is_arabic_number()) |
1453 | 812 | || (numeral_state == RtlNumeralState::Arabic |
1454 | 8 | && last_bc.is_european_number()) |
1455 | | { |
1456 | 0 | if fail_fast { |
1457 | 0 | return (0, false, true); |
1458 | 0 | } |
1459 | 0 | had_errors = true; |
1460 | 0 | *last = '\u{FFFD}'; |
1461 | 812 | } |
1462 | | } |
1463 | 812 | break; |
1464 | | } else { |
1465 | | // One-character label or label where |
1466 | | // everything after the first character |
1467 | | // is just non-spacing marks. |
1468 | 372 | break; |
1469 | | } |
1470 | | } |
1471 | 0 | } |
1472 | | } |
1473 | 46.5k | } |
1474 | | |
1475 | 47.7k | (passthrough_up_to, is_bidi, had_errors) |
1476 | 52.0k | } <idna::uts46::Uts46>::process_innermost Line | Count | Source | 1075 | 52.0k | fn process_innermost<'a>( | 1076 | 52.0k | &self, | 1077 | 52.0k | domain_name: &'a [u8], | 1078 | 52.0k | ascii_deny_list: AsciiDenyList, | 1079 | 52.0k | hyphens: Hyphens, | 1080 | 52.0k | fail_fast: bool, | 1081 | 52.0k | domain_buffer: &mut SmallVec<[char; 253]>, | 1082 | 52.0k | already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>, | 1083 | 52.0k | tail: &'a [u8], | 1084 | 52.0k | ) -> (usize, bool, bool) { | 1085 | 52.0k | let deny_list = ascii_deny_list.bits; | 1086 | 52.0k | let deny_list_deny_dot = deny_list | DOT_MASK; | 1087 | 52.0k | | 1088 | 52.0k | let mut had_errors = false; | 1089 | 52.0k | | 1090 | 52.0k | let mut passthrough_up_to = domain_name.len() - tail.len(); // Index into `domain_name` | 1091 | 52.0k | // 253 ASCII characters is the max length for a valid domain name | 1092 | 52.0k | // (excluding the root dot). | 1093 | 52.0k | let mut current_label_start; // Index into `domain_buffer` | 1094 | 52.0k | let mut seen_label = false; | 1095 | 52.0k | let mut in_prefix = true; | 1096 | 52.0k | for label in tail.split(|b| *b == b'.') { | 1097 | | // We check for passthrough only for the prefix. That is, if we | 1098 | | // haven't moved on and started filling `domain_buffer`. Keeping | 1099 | | // this stuff in one loop where the first items keep being skipped | 1100 | | // once they have been skipped at least once instead of working | 1101 | | // this into a fancier loop structure in order to make sure that | 1102 | | // no item from the iterator is lost or processed twice. | 1103 | | // Furthermore, after the passthrough fails, restarting the | 1104 | | // normalization process after each pre-existing ASCII dot also | 1105 | | // provides an opportunity for the processing to get back onto | 1106 | | // an ASCII fast path that bypasses the normalizer for ASCII | 1107 | | // after a pre-existing ASCII dot (pre-existing in the sense | 1108 | | // of not coming from e.g. normalizing an ideographic dot). | 1109 | 52.0k | if in_prefix && is_passthrough_ascii_label(label) { | 1110 | 35.7k | if seen_label { | 1111 | 0 | debug_assert_eq!(domain_name[passthrough_up_to], b'.'); | 1112 | 0 | passthrough_up_to += 1; | 1113 | 35.7k | } | 1114 | 35.7k | seen_label = true; | 1115 | 35.7k | | 1116 | 35.7k | passthrough_up_to += label.len(); | 1117 | 35.7k | continue; | 1118 | 16.3k | } | 1119 | 16.3k | if seen_label { | 1120 | 0 | if in_prefix { | 1121 | 0 | debug_assert_eq!(domain_name[passthrough_up_to], b'.'); | 1122 | 0 | passthrough_up_to += 1; | 1123 | 0 | } else { | 1124 | 0 | domain_buffer.push('.'); | 1125 | 0 | } | 1126 | 16.3k | } | 1127 | 16.3k | seen_label = true; | 1128 | 16.3k | in_prefix = false; | 1129 | 16.3k | current_label_start = domain_buffer.len(); | 1130 | 16.3k | if !label.is_empty() { | 1131 | 16.3k | let (ascii, non_ascii) = split_ascii_fast_path_prefix(label); | 1132 | 16.3k | let non_punycode_ascii_label = if non_ascii.is_empty() { | 1133 | 16.3k | if has_punycode_prefix(ascii) { | 1134 | 9.35k | if (ascii.last() != Some(&b'-')) | 1135 | 9.35k | && (ascii.len() - 4 <= PUNYCODE_DECODE_MAX_INPUT_LENGTH) | 1136 | | { | 1137 | 9.12k | if let Ok(decode) = | 1138 | 9.35k | Decoder::default().decode::<u8, InternalCaller>(&ascii[4..]) | 1139 | | { | 1140 | | // 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4 | 1141 | | // characters. | 1142 | 9.12k | let mut label_buffer = SmallVec::<[char; 59]>::new(); | 1143 | 9.12k | label_buffer.extend(decode); | 1144 | 9.12k | | 1145 | 9.12k | if self.after_punycode_decode( | 1146 | 9.12k | domain_buffer, | 1147 | 9.12k | current_label_start, | 1148 | 9.12k | &label_buffer, | 1149 | 9.12k | deny_list_deny_dot, | 1150 | 9.12k | fail_fast, | 1151 | 9.12k | &mut had_errors, | 1152 | 9.12k | ) { | 1153 | 3.10k | return (0, false, true); | 1154 | 6.01k | } | 1155 | 6.01k | | 1156 | 6.01k | if self.check_label( | 1157 | 6.01k | hyphens, | 1158 | 6.01k | &mut domain_buffer[current_label_start..], | 1159 | 6.01k | fail_fast, | 1160 | 6.01k | &mut had_errors, | 1161 | 6.01k | true, | 1162 | 6.01k | true, | 1163 | 6.01k | ) { | 1164 | 844 | return (0, false, true); | 1165 | 5.16k | } | 1166 | | } else { | 1167 | | // Punycode failed | 1168 | 238 | if fail_fast { | 1169 | 238 | return (0, false, true); | 1170 | 0 | } | 1171 | 0 | had_errors = true; | 1172 | 0 | domain_buffer.push('\u{FFFD}'); | 1173 | 0 | let mut iter = ascii.iter(); | 1174 | 0 | // Discard the first character that we replaced. | 1175 | 0 | let _ = iter.next(); | 1176 | 0 | domain_buffer.extend(iter.map(|c| { | 1177 | | // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does | 1178 | | // not matter. | 1179 | | apply_ascii_deny_list_to_potentially_upper_case_ascii( | 1180 | | *c, deny_list, | 1181 | | ) | 1182 | 0 | })); | 1183 | 0 | }; | 1184 | | // If there were errors, we won't be trying to use this | 1185 | | // anyway later, so it's fine to put it here unconditionally. | 1186 | 5.16k | already_punycode.push(AlreadyAsciiLabel::MixedCasePunycode(label)); | 1187 | 5.16k | continue; | 1188 | 0 | } else if fail_fast { | 1189 | 0 | return (0, false, true); | 1190 | 0 | } | 1191 | 0 | // Else fall through to the complex path and rediscover error | 1192 | 0 | // there. | 1193 | 0 | false | 1194 | | } else { | 1195 | 6.97k | true | 1196 | | } | 1197 | | } else { | 1198 | 0 | false | 1199 | | }; | 1200 | 52.4k | for c in ascii.iter().map(|c| { | 1201 | | // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does | 1202 | | // not matter. | 1203 | | apply_ascii_deny_list_to_potentially_upper_case_ascii(*c, deny_list) | 1204 | 6.97k | }) { | 1205 | 52.4k | if c == '\u{FFFD}' { | 1206 | 84 | if fail_fast { | 1207 | 84 | return (0, false, true); | 1208 | 0 | } | 1209 | 0 | had_errors = true; | 1210 | 52.3k | } | 1211 | 52.3k | domain_buffer.push(c); | 1212 | | } | 1213 | 6.89k | if non_punycode_ascii_label { | 1214 | 6.89k | if hyphens != Hyphens::Allow | 1215 | 0 | && check_hyphens( | 1216 | 0 | &mut domain_buffer[current_label_start..], | 1217 | 0 | hyphens == Hyphens::CheckFirstLast, | 1218 | 0 | fail_fast, | 1219 | 0 | &mut had_errors, | 1220 | 0 | ) | 1221 | | { | 1222 | 0 | return (0, false, true); | 1223 | 6.89k | } | 1224 | 6.89k | already_punycode.push(if had_errors { | 1225 | 0 | AlreadyAsciiLabel::Other | 1226 | | } else { | 1227 | 6.89k | AlreadyAsciiLabel::MixedCaseAscii(label) | 1228 | | }); | 1229 | 6.89k | continue; | 1230 | 0 | } | 1231 | 0 | already_punycode.push(AlreadyAsciiLabel::Other); | 1232 | 0 | let mut first_needs_combining_mark_check = ascii.is_empty(); | 1233 | 0 | let mut needs_contextj_check = !non_ascii.is_empty(); | 1234 | 0 | let mut mapping = self | 1235 | 0 | .data | 1236 | 0 | .map_normalize(non_ascii.chars()) | 1237 | 0 | .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list)); | 1238 | | loop { | 1239 | 0 | let n = mapping.next(); | 1240 | 0 | match n { | 1241 | | None | Some('.') => { | 1242 | 0 | if domain_buffer[current_label_start..] | 1243 | 0 | .starts_with(&['x', 'n', '-', '-']) | 1244 | | { | 1245 | 0 | let mut punycode_precondition_failed = false; | 1246 | 0 | for c in domain_buffer[current_label_start + 4..].iter_mut() { | 1247 | 0 | if !c.is_ascii() { | 1248 | 0 | if fail_fast { | 1249 | 0 | return (0, false, true); | 1250 | 0 | } | 1251 | 0 | had_errors = true; | 1252 | 0 | *c = '\u{FFFD}'; | 1253 | 0 | punycode_precondition_failed = true; | 1254 | 0 | } | 1255 | | } | 1256 | | | 1257 | 0 | if let Some(last) = domain_buffer.last_mut() { | 1258 | 0 | if *last == '-' { | 1259 | | // Either there's nothing after the "xn--" prefix | 1260 | | // and we got the last hyphen of "xn--", or there | 1261 | | // are no Punycode digits after the last delimiter | 1262 | | // which would result in Punycode decode outputting | 1263 | | // ASCII only. | 1264 | 0 | if fail_fast { | 1265 | 0 | return (0, false, true); | 1266 | 0 | } | 1267 | 0 | had_errors = true; | 1268 | 0 | *last = '\u{FFFD}'; | 1269 | 0 | punycode_precondition_failed = true; | 1270 | 0 | } | 1271 | | } else { | 1272 | 0 | unreachable!(); | 1273 | | } | 1274 | | | 1275 | | // Reject excessively long input | 1276 | | // https://github.com/whatwg/url/issues/824 | 1277 | | // https://unicode-org.atlassian.net/browse/ICU-13727 | 1278 | 0 | if domain_buffer.len() - current_label_start - 4 | 1279 | 0 | > PUNYCODE_DECODE_MAX_INPUT_LENGTH | 1280 | | { | 1281 | 0 | if fail_fast { | 1282 | 0 | return (0, false, true); | 1283 | 0 | } | 1284 | 0 | had_errors = true; | 1285 | 0 | domain_buffer[current_label_start | 1286 | 0 | + 4 | 1287 | 0 | + PUNYCODE_DECODE_MAX_INPUT_LENGTH] = '\u{FFFD}'; | 1288 | 0 | punycode_precondition_failed = true; | 1289 | 0 | } | 1290 | | | 1291 | 0 | if !punycode_precondition_failed { | 1292 | 0 | if let Ok(decode) = Decoder::default() | 1293 | 0 | .decode::<char, InternalCaller>( | 1294 | 0 | &domain_buffer[current_label_start + 4..], | 1295 | 0 | ) | 1296 | | { | 1297 | 0 | first_needs_combining_mark_check = true; | 1298 | 0 | needs_contextj_check = true; | 1299 | 0 | // 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4 | 1300 | 0 | // characters. | 1301 | 0 | let mut label_buffer = SmallVec::<[char; 59]>::new(); | 1302 | 0 | label_buffer.extend(decode); | 1303 | 0 |
| 1304 | 0 | domain_buffer.truncate(current_label_start); | 1305 | 0 | if self.after_punycode_decode( | 1306 | 0 | domain_buffer, | 1307 | 0 | current_label_start, | 1308 | 0 | &label_buffer, | 1309 | 0 | deny_list_deny_dot, | 1310 | 0 | fail_fast, | 1311 | 0 | &mut had_errors, | 1312 | 0 | ) { | 1313 | 0 | return (0, false, true); | 1314 | 0 | } | 1315 | | } else { | 1316 | | // Punycode failed | 1317 | 0 | if fail_fast { | 1318 | 0 | return (0, false, true); | 1319 | 0 | } | 1320 | 0 | had_errors = true; | 1321 | 0 | domain_buffer[current_label_start] = '\u{FFFD}'; | 1322 | 0 | needs_contextj_check = false; // ASCII label | 1323 | 0 | first_needs_combining_mark_check = false; | 1324 | | }; | 1325 | 0 | } else { | 1326 | 0 | first_needs_combining_mark_check = false; | 1327 | 0 | needs_contextj_check = false; // Non-ASCII already turned to U+FFFD. | 1328 | 0 | } | 1329 | 0 | } | 1330 | 0 | if self.check_label( | 1331 | 0 | hyphens, | 1332 | 0 | &mut domain_buffer[current_label_start..], | 1333 | 0 | fail_fast, | 1334 | 0 | &mut had_errors, | 1335 | 0 | first_needs_combining_mark_check, | 1336 | 0 | needs_contextj_check, | 1337 | 0 | ) { | 1338 | 0 | return (0, false, true); | 1339 | 0 | } | 1340 | 0 |
| 1341 | 0 | if n.is_none() { | 1342 | 0 | break; | 1343 | 0 | } | 1344 | 0 | domain_buffer.push('.'); | 1345 | 0 | current_label_start = domain_buffer.len(); | 1346 | 0 | first_needs_combining_mark_check = true; | 1347 | 0 | needs_contextj_check = true; | 1348 | 0 | already_punycode.push(AlreadyAsciiLabel::Other); | 1349 | | } | 1350 | 0 | Some(c) => { | 1351 | 0 | if c == '\u{FFFD}' { | 1352 | 0 | if fail_fast { | 1353 | 0 | return (0, false, true); | 1354 | 0 | } | 1355 | 0 | had_errors = true; | 1356 | 0 | } | 1357 | 0 | domain_buffer.push(c); | 1358 | | } | 1359 | | } | 1360 | | } | 1361 | 0 | } else { | 1362 | 0 | // Empty label | 1363 | 0 | already_punycode.push(AlreadyAsciiLabel::MixedCaseAscii(label)); | 1364 | 0 | } | 1365 | | } | 1366 | | | 1367 | 47.8k | let is_bidi = self.is_bidi(domain_buffer); | 1368 | 47.8k | if is_bidi { | 1369 | 1.28k | for label in domain_buffer.split_mut(|c| *c == '.') { | 1370 | 1.28k | if let Some((first, tail)) = label.split_first_mut() { | 1371 | 1.28k | let first_bc = self.data.bidi_class(*first); | 1372 | 1.28k | if !FIRST_BC_MASK.intersects(first_bc.to_mask()) { | 1373 | | // Neither RTL label nor LTR label | 1374 | 4 | if fail_fast { | 1375 | 4 | return (0, false, true); | 1376 | 0 | } | 1377 | 0 | had_errors = true; | 1378 | 0 | *first = '\u{FFFD}'; | 1379 | 0 | continue; | 1380 | 1.28k | } | 1381 | 1.28k | let is_ltr = first_bc.is_ltr(); | 1382 | 1.28k | // Trim NSM | 1383 | 1.28k | let mut middle = tail; | 1384 | | #[allow(clippy::while_let_loop)] | 1385 | | loop { | 1386 | 3.65k | if let Some((last, prior)) = middle.split_last_mut() { | 1387 | 3.28k | let last_bc = self.data.bidi_class(*last); | 1388 | 3.28k | if last_bc.is_nonspacing_mark() { | 1389 | 2.36k | middle = prior; | 1390 | 2.36k | continue; | 1391 | 912 | } | 1392 | 912 | let last_mask = if is_ltr { LAST_LTR_MASK } else { LAST_RTL_MASK }; | 1393 | 912 | if !last_mask.intersects(last_bc.to_mask()) { | 1394 | 100 | if fail_fast { | 1395 | 100 | return (0, false, true); | 1396 | 0 | } | 1397 | 0 | had_errors = true; | 1398 | 0 | *last = '\u{FFFD}'; | 1399 | 812 | } | 1400 | 812 | if is_ltr { | 1401 | 0 | for c in prior.iter_mut() { | 1402 | 0 | let bc = self.data.bidi_class(*c); | 1403 | 0 | if !MIDDLE_LTR_MASK.intersects(bc.to_mask()) { | 1404 | 0 | if fail_fast { | 1405 | 0 | return (0, false, true); | 1406 | 0 | } | 1407 | 0 | had_errors = true; | 1408 | 0 | *c = '\u{FFFD}'; | 1409 | 0 | } | 1410 | | } | 1411 | | } else { | 1412 | 812 | let mut numeral_state = RtlNumeralState::Undecided; | 1413 | 3.95k | for c in prior.iter_mut() { | 1414 | 3.95k | let bc = self.data.bidi_class(*c); | 1415 | 3.95k | if !MIDDLE_RTL_MASK.intersects(bc.to_mask()) { | 1416 | 0 | if fail_fast { | 1417 | 0 | return (0, false, true); | 1418 | 0 | } | 1419 | 0 | had_errors = true; | 1420 | 0 | *c = '\u{FFFD}'; | 1421 | | } else { | 1422 | 3.95k | match numeral_state { | 1423 | | RtlNumeralState::Undecided => { | 1424 | 3.02k | if bc.is_european_number() { | 1425 | 94 | numeral_state = RtlNumeralState::European; | 1426 | 2.93k | } else if bc.is_arabic_number() { | 1427 | 8 | numeral_state = RtlNumeralState::Arabic; | 1428 | 2.92k | } | 1429 | | } | 1430 | | RtlNumeralState::European => { | 1431 | 696 | if bc.is_arabic_number() { | 1432 | 0 | if fail_fast { | 1433 | 0 | return (0, false, true); | 1434 | 0 | } | 1435 | 0 | had_errors = true; | 1436 | 0 | *c = '\u{FFFD}'; | 1437 | 696 | } | 1438 | | } | 1439 | | RtlNumeralState::Arabic => { | 1440 | 234 | if bc.is_european_number() { | 1441 | 0 | if fail_fast { | 1442 | 0 | return (0, false, true); | 1443 | 0 | } | 1444 | 0 | had_errors = true; | 1445 | 0 | *c = '\u{FFFD}'; | 1446 | 234 | } | 1447 | | } | 1448 | | } | 1449 | | } | 1450 | | } | 1451 | 812 | if (numeral_state == RtlNumeralState::European | 1452 | 94 | && last_bc.is_arabic_number()) | 1453 | 812 | || (numeral_state == RtlNumeralState::Arabic | 1454 | 8 | && last_bc.is_european_number()) | 1455 | | { | 1456 | 0 | if fail_fast { | 1457 | 0 | return (0, false, true); | 1458 | 0 | } | 1459 | 0 | had_errors = true; | 1460 | 0 | *last = '\u{FFFD}'; | 1461 | 812 | } | 1462 | | } | 1463 | 812 | break; | 1464 | | } else { | 1465 | | // One-character label or label where | 1466 | | // everything after the first character | 1467 | | // is just non-spacing marks. | 1468 | 372 | break; | 1469 | | } | 1470 | | } | 1471 | 0 | } | 1472 | | } | 1473 | 46.5k | } | 1474 | | | 1475 | 47.7k | (passthrough_up_to, is_bidi, had_errors) | 1476 | 52.0k | } |
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost |
1477 | | |
1478 | | #[inline(never)] |
1479 | 9.12k | fn after_punycode_decode( |
1480 | 9.12k | &self, |
1481 | 9.12k | domain_buffer: &mut SmallVec<[char; 253]>, |
1482 | 9.12k | current_label_start: usize, |
1483 | 9.12k | label_buffer: &[char], |
1484 | 9.12k | deny_list_deny_dot: u128, |
1485 | 9.12k | fail_fast: bool, |
1486 | 9.12k | had_errors: &mut bool, |
1487 | 9.12k | ) -> bool { |
1488 | 73.3k | for c in self |
1489 | 9.12k | .data |
1490 | 9.12k | .normalize_validate(label_buffer.iter().copied()) |
1491 | 73.3k | .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list_deny_dot)) <idna::uts46::Uts46>::after_punycode_decode::{closure#0} Line | Count | Source | 1491 | 73.3k | .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list_deny_dot)) |
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode::{closure#0} Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode::{closure#0} |
1492 | | { |
1493 | 73.3k | if c == '\u{FFFD}' { |
1494 | 2.60k | if fail_fast { |
1495 | 2.60k | return true; |
1496 | 0 | } |
1497 | 0 | *had_errors = true; |
1498 | 70.7k | } |
1499 | 70.7k | domain_buffer.push(c); |
1500 | | } |
1501 | 6.51k | let normalized = &mut domain_buffer[current_label_start..]; |
1502 | 6.51k | if let Err(()) = |
1503 | 6.51k | normalized |
1504 | 6.51k | .iter_mut() |
1505 | 6.51k | .zip(label_buffer.iter()) |
1506 | 64.9k | .try_for_each(|(norm_c, decoded_c)| { |
1507 | 64.9k | if *norm_c == *decoded_c { |
1508 | 64.3k | Ok(()) |
1509 | | } else { |
1510 | | // Mark the first difference |
1511 | 506 | *norm_c = '\u{FFFD}'; |
1512 | 506 | Err(()) |
1513 | | } |
1514 | 64.9k | }) <idna::uts46::Uts46>::after_punycode_decode::{closure#1} Line | Count | Source | 1506 | 64.9k | .try_for_each(|(norm_c, decoded_c)| { | 1507 | 64.9k | if *norm_c == *decoded_c { | 1508 | 64.3k | Ok(()) | 1509 | | } else { | 1510 | | // Mark the first difference | 1511 | 506 | *norm_c = '\u{FFFD}'; | 1512 | 506 | Err(()) | 1513 | | } | 1514 | 64.9k | }) |
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode::{closure#1} Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode::{closure#1} |
1515 | | { |
1516 | 506 | if fail_fast { |
1517 | 506 | return true; |
1518 | 0 | } |
1519 | 0 | *had_errors = true; |
1520 | 6.01k | } |
1521 | 6.01k | false |
1522 | 9.12k | } <idna::uts46::Uts46>::after_punycode_decode Line | Count | Source | 1479 | 9.12k | fn after_punycode_decode( | 1480 | 9.12k | &self, | 1481 | 9.12k | domain_buffer: &mut SmallVec<[char; 253]>, | 1482 | 9.12k | current_label_start: usize, | 1483 | 9.12k | label_buffer: &[char], | 1484 | 9.12k | deny_list_deny_dot: u128, | 1485 | 9.12k | fail_fast: bool, | 1486 | 9.12k | had_errors: &mut bool, | 1487 | 9.12k | ) -> bool { | 1488 | 73.3k | for c in self | 1489 | 9.12k | .data | 1490 | 9.12k | .normalize_validate(label_buffer.iter().copied()) | 1491 | 9.12k | .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list_deny_dot)) | 1492 | | { | 1493 | 73.3k | if c == '\u{FFFD}' { | 1494 | 2.60k | if fail_fast { | 1495 | 2.60k | return true; | 1496 | 0 | } | 1497 | 0 | *had_errors = true; | 1498 | 70.7k | } | 1499 | 70.7k | domain_buffer.push(c); | 1500 | | } | 1501 | 6.51k | let normalized = &mut domain_buffer[current_label_start..]; | 1502 | 6.51k | if let Err(()) = | 1503 | 6.51k | normalized | 1504 | 6.51k | .iter_mut() | 1505 | 6.51k | .zip(label_buffer.iter()) | 1506 | 6.51k | .try_for_each(|(norm_c, decoded_c)| { | 1507 | | if *norm_c == *decoded_c { | 1508 | | Ok(()) | 1509 | | } else { | 1510 | | // Mark the first difference | 1511 | | *norm_c = '\u{FFFD}'; | 1512 | | Err(()) | 1513 | | } | 1514 | 6.51k | }) | 1515 | | { | 1516 | 506 | if fail_fast { | 1517 | 506 | return true; | 1518 | 0 | } | 1519 | 0 | *had_errors = true; | 1520 | 6.01k | } | 1521 | 6.01k | false | 1522 | 9.12k | } |
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode |
1523 | | |
1524 | | #[inline(never)] |
1525 | 6.01k | fn check_label( |
1526 | 6.01k | &self, |
1527 | 6.01k | hyphens: Hyphens, |
1528 | 6.01k | mut_label: &mut [char], |
1529 | 6.01k | fail_fast: bool, |
1530 | 6.01k | had_errors: &mut bool, |
1531 | 6.01k | first_needs_combining_mark_check: bool, |
1532 | 6.01k | needs_contextj_check: bool, |
1533 | 6.01k | ) -> bool { |
1534 | 6.01k | if hyphens != Hyphens::Allow |
1535 | 0 | && check_hyphens( |
1536 | 0 | mut_label, |
1537 | 0 | hyphens == Hyphens::CheckFirstLast, |
1538 | 0 | fail_fast, |
1539 | 0 | had_errors, |
1540 | 0 | ) |
1541 | | { |
1542 | 0 | return true; |
1543 | 6.01k | } |
1544 | 6.01k | if first_needs_combining_mark_check { |
1545 | 6.01k | if let Some(first) = mut_label.first_mut() { |
1546 | 6.01k | if self.data.is_mark(*first) { |
1547 | 840 | if fail_fast { |
1548 | 840 | return true; |
1549 | 0 | } |
1550 | 0 | *had_errors = true; |
1551 | 0 | *first = '\u{FFFD}'; |
1552 | 5.17k | } |
1553 | 0 | } |
1554 | 0 | } |
1555 | 5.17k | if needs_contextj_check { |
1556 | | // ContextJ |
1557 | 56.9k | for i in 0..mut_label.len() { |
1558 | 56.9k | let c = mut_label[i]; |
1559 | 56.9k | if !in_inclusive_range_char(c, '\u{200C}', '\u{200D}') { |
1560 | 56.9k | continue; |
1561 | 4 | } |
1562 | 4 | let (head, joiner_and_tail) = mut_label.split_at_mut(i); |
1563 | | |
1564 | 4 | if let Some((joiner, tail)) = joiner_and_tail.split_first_mut() { |
1565 | 4 | if let Some(previous) = head.last() { |
1566 | 0 | if self.data.is_virama(*previous) { |
1567 | 0 | continue; |
1568 | 0 | } |
1569 | | } else { |
1570 | | // No preceding character |
1571 | 4 | if fail_fast { |
1572 | 4 | return true; |
1573 | 0 | } |
1574 | 0 | *had_errors = true; |
1575 | 0 | *joiner = '\u{FFFD}'; |
1576 | 0 | continue; |
1577 | | } |
1578 | 0 | if c == '\u{200D}' { |
1579 | | // ZWJ only has the virama rule |
1580 | 0 | if fail_fast { |
1581 | 0 | return true; |
1582 | 0 | } |
1583 | 0 | *had_errors = true; |
1584 | 0 | *joiner = '\u{FFFD}'; |
1585 | 0 | continue; |
1586 | 0 | } |
1587 | 0 | debug_assert_eq!(c, '\u{200C}'); |
1588 | 0 | if !self.has_appropriately_joining_char( |
1589 | 0 | head.iter().rev().copied(), |
1590 | 0 | LEFT_OR_DUAL_JOINING_MASK, |
1591 | 0 | ) || !self.has_appropriately_joining_char( |
1592 | 0 | tail.iter().copied(), |
1593 | 0 | RIGHT_OR_DUAL_JOINING_MASK, |
1594 | 0 | ) { |
1595 | 0 | if fail_fast { |
1596 | 0 | return true; |
1597 | 0 | } |
1598 | 0 | *had_errors = true; |
1599 | 0 | *joiner = '\u{FFFD}'; |
1600 | 0 | } |
1601 | | } else { |
1602 | 0 | debug_assert!(false); |
1603 | | } |
1604 | | } |
1605 | 0 | } |
1606 | | |
1607 | 5.16k | if !is_ascii(mut_label) && mut_label.len() > PUNYCODE_ENCODE_MAX_INPUT_LENGTH { |
1608 | | // Limit quadratic behavior |
1609 | | // https://github.com/whatwg/url/issues/824 |
1610 | | // https://unicode-org.atlassian.net/browse/ICU-13727 |
1611 | 0 | if fail_fast { |
1612 | 0 | return true; |
1613 | 0 | } |
1614 | 0 | *had_errors = true; |
1615 | 0 | mut_label[PUNYCODE_ENCODE_MAX_INPUT_LENGTH] = '\u{FFFD}'; |
1616 | 5.16k | } |
1617 | 5.16k | false |
1618 | 6.01k | } <idna::uts46::Uts46>::check_label Line | Count | Source | 1525 | 6.01k | fn check_label( | 1526 | 6.01k | &self, | 1527 | 6.01k | hyphens: Hyphens, | 1528 | 6.01k | mut_label: &mut [char], | 1529 | 6.01k | fail_fast: bool, | 1530 | 6.01k | had_errors: &mut bool, | 1531 | 6.01k | first_needs_combining_mark_check: bool, | 1532 | 6.01k | needs_contextj_check: bool, | 1533 | 6.01k | ) -> bool { | 1534 | 6.01k | if hyphens != Hyphens::Allow | 1535 | 0 | && check_hyphens( | 1536 | 0 | mut_label, | 1537 | 0 | hyphens == Hyphens::CheckFirstLast, | 1538 | 0 | fail_fast, | 1539 | 0 | had_errors, | 1540 | 0 | ) | 1541 | | { | 1542 | 0 | return true; | 1543 | 6.01k | } | 1544 | 6.01k | if first_needs_combining_mark_check { | 1545 | 6.01k | if let Some(first) = mut_label.first_mut() { | 1546 | 6.01k | if self.data.is_mark(*first) { | 1547 | 840 | if fail_fast { | 1548 | 840 | return true; | 1549 | 0 | } | 1550 | 0 | *had_errors = true; | 1551 | 0 | *first = '\u{FFFD}'; | 1552 | 5.17k | } | 1553 | 0 | } | 1554 | 0 | } | 1555 | 5.17k | if needs_contextj_check { | 1556 | | // ContextJ | 1557 | 56.9k | for i in 0..mut_label.len() { | 1558 | 56.9k | let c = mut_label[i]; | 1559 | 56.9k | if !in_inclusive_range_char(c, '\u{200C}', '\u{200D}') { | 1560 | 56.9k | continue; | 1561 | 4 | } | 1562 | 4 | let (head, joiner_and_tail) = mut_label.split_at_mut(i); | 1563 | | | 1564 | 4 | if let Some((joiner, tail)) = joiner_and_tail.split_first_mut() { | 1565 | 4 | if let Some(previous) = head.last() { | 1566 | 0 | if self.data.is_virama(*previous) { | 1567 | 0 | continue; | 1568 | 0 | } | 1569 | | } else { | 1570 | | // No preceding character | 1571 | 4 | if fail_fast { | 1572 | 4 | return true; | 1573 | 0 | } | 1574 | 0 | *had_errors = true; | 1575 | 0 | *joiner = '\u{FFFD}'; | 1576 | 0 | continue; | 1577 | | } | 1578 | 0 | if c == '\u{200D}' { | 1579 | | // ZWJ only has the virama rule | 1580 | 0 | if fail_fast { | 1581 | 0 | return true; | 1582 | 0 | } | 1583 | 0 | *had_errors = true; | 1584 | 0 | *joiner = '\u{FFFD}'; | 1585 | 0 | continue; | 1586 | 0 | } | 1587 | 0 | debug_assert_eq!(c, '\u{200C}'); | 1588 | 0 | if !self.has_appropriately_joining_char( | 1589 | 0 | head.iter().rev().copied(), | 1590 | 0 | LEFT_OR_DUAL_JOINING_MASK, | 1591 | 0 | ) || !self.has_appropriately_joining_char( | 1592 | 0 | tail.iter().copied(), | 1593 | 0 | RIGHT_OR_DUAL_JOINING_MASK, | 1594 | 0 | ) { | 1595 | 0 | if fail_fast { | 1596 | 0 | return true; | 1597 | 0 | } | 1598 | 0 | *had_errors = true; | 1599 | 0 | *joiner = '\u{FFFD}'; | 1600 | 0 | } | 1601 | | } else { | 1602 | 0 | debug_assert!(false); | 1603 | | } | 1604 | | } | 1605 | 0 | } | 1606 | | | 1607 | 5.16k | if !is_ascii(mut_label) && mut_label.len() > PUNYCODE_ENCODE_MAX_INPUT_LENGTH { | 1608 | | // Limit quadratic behavior | 1609 | | // https://github.com/whatwg/url/issues/824 | 1610 | | // https://unicode-org.atlassian.net/browse/ICU-13727 | 1611 | 0 | if fail_fast { | 1612 | 0 | return true; | 1613 | 0 | } | 1614 | 0 | *had_errors = true; | 1615 | 0 | mut_label[PUNYCODE_ENCODE_MAX_INPUT_LENGTH] = '\u{FFFD}'; | 1616 | 5.16k | } | 1617 | 5.16k | false | 1618 | 6.01k | } |
Unexecuted instantiation: <idna::uts46::Uts46>::check_label Unexecuted instantiation: <idna::uts46::Uts46>::check_label |
1619 | | |
1620 | | #[inline(always)] |
1621 | 0 | fn has_appropriately_joining_char<I: Iterator<Item = char>>( |
1622 | 0 | &self, |
1623 | 0 | iter: I, |
1624 | 0 | required_mask: JoiningTypeMask, |
1625 | 0 | ) -> bool { |
1626 | 0 | for c in iter { |
1627 | 0 | let jt = self.data.joining_type(c); |
1628 | 0 | if jt.to_mask().intersects(required_mask) { |
1629 | 0 | return true; |
1630 | 0 | } |
1631 | 0 | if jt.is_transparent() { |
1632 | 0 | continue; |
1633 | 0 | } |
1634 | 0 | return false; |
1635 | | } |
1636 | 0 | false |
1637 | 0 | } Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::iter::adapters::rev::Rev<core::slice::iter::Iter<char>>>> Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>> Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::iter::adapters::rev::Rev<core::slice::iter::Iter<char>>>> Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>> Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::iter::adapters::rev::Rev<core::slice::iter::Iter<char>>>> Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>> |
1638 | | |
1639 | | #[inline(always)] |
1640 | 47.8k | fn is_bidi(&self, buffer: &[char]) -> bool { |
1641 | 148k | for &c in buffer { |
1642 | 101k | if c < '\u{0590}' { |
1643 | | // Below Hebrew |
1644 | 82.3k | continue; |
1645 | 19.2k | } |
1646 | 19.2k | if in_inclusive_range_char(c, '\u{0900}', '\u{FB1C}') { |
1647 | 11.5k | debug_assert_ne!(c, '\u{200F}'); // disallowed |
1648 | 11.5k | continue; |
1649 | 7.70k | } |
1650 | 7.70k | if in_inclusive_range_char(c, '\u{1F000}', '\u{3FFFF}') { |
1651 | 656 | continue; |
1652 | 7.04k | } |
1653 | 7.04k | if in_inclusive_range_char(c, '\u{FF00}', '\u{107FF}') { |
1654 | 98 | continue; |
1655 | 6.95k | } |
1656 | 6.95k | if in_inclusive_range_char(c, '\u{11000}', '\u{1E7FF}') { |
1657 | 3.74k | continue; |
1658 | 3.21k | } |
1659 | 3.21k | if RTL_MASK.intersects(self.data.bidi_class(c).to_mask()) { |
1660 | 1.28k | return true; |
1661 | 1.92k | } |
1662 | | } |
1663 | 46.5k | false |
1664 | 47.8k | } <idna::uts46::Uts46>::is_bidi Line | Count | Source | 1640 | 47.8k | fn is_bidi(&self, buffer: &[char]) -> bool { | 1641 | 148k | for &c in buffer { | 1642 | 101k | if c < '\u{0590}' { | 1643 | | // Below Hebrew | 1644 | 82.3k | continue; | 1645 | 19.2k | } | 1646 | 19.2k | if in_inclusive_range_char(c, '\u{0900}', '\u{FB1C}') { | 1647 | 11.5k | debug_assert_ne!(c, '\u{200F}'); // disallowed | 1648 | 11.5k | continue; | 1649 | 7.70k | } | 1650 | 7.70k | if in_inclusive_range_char(c, '\u{1F000}', '\u{3FFFF}') { | 1651 | 656 | continue; | 1652 | 7.04k | } | 1653 | 7.04k | if in_inclusive_range_char(c, '\u{FF00}', '\u{107FF}') { | 1654 | 98 | continue; | 1655 | 6.95k | } | 1656 | 6.95k | if in_inclusive_range_char(c, '\u{11000}', '\u{1E7FF}') { | 1657 | 3.74k | continue; | 1658 | 3.21k | } | 1659 | 3.21k | if RTL_MASK.intersects(self.data.bidi_class(c).to_mask()) { | 1660 | 1.28k | return true; | 1661 | 1.92k | } | 1662 | | } | 1663 | 46.5k | false | 1664 | 47.8k | } |
Unexecuted instantiation: <idna::uts46::Uts46>::is_bidi Unexecuted instantiation: <idna::uts46::Uts46>::is_bidi |
1665 | | } |
1666 | | |
1667 | 0 | fn check_hyphens( |
1668 | 0 | mut_label: &mut [char], |
1669 | 0 | allow_third_fourth: bool, |
1670 | 0 | fail_fast: bool, |
1671 | 0 | had_errors: &mut bool, |
1672 | 0 | ) -> bool { |
1673 | 0 | if let Some(first) = mut_label.first_mut() { |
1674 | 0 | if *first == '-' { |
1675 | 0 | if fail_fast { |
1676 | 0 | return true; |
1677 | 0 | } |
1678 | 0 | *had_errors = true; |
1679 | 0 | *first = '\u{FFFD}'; |
1680 | 0 | } |
1681 | 0 | } |
1682 | 0 | if let Some(last) = mut_label.last_mut() { |
1683 | 0 | if *last == '-' { |
1684 | 0 | if fail_fast { |
1685 | 0 | return true; |
1686 | 0 | } |
1687 | 0 | *had_errors = true; |
1688 | 0 | *last = '\u{FFFD}'; |
1689 | 0 | } |
1690 | 0 | } |
1691 | 0 | if allow_third_fourth { |
1692 | 0 | return false; |
1693 | 0 | } |
1694 | 0 | if mut_label.len() >= 4 && mut_label[2] == '-' && mut_label[3] == '-' { |
1695 | 0 | if fail_fast { |
1696 | 0 | return true; |
1697 | 0 | } |
1698 | 0 | *had_errors = true; |
1699 | 0 | mut_label[2] = '\u{FFFD}'; |
1700 | 0 | mut_label[3] = '\u{FFFD}'; |
1701 | 0 | } |
1702 | 0 | false |
1703 | 0 | } Unexecuted instantiation: idna::uts46::check_hyphens Unexecuted instantiation: idna::uts46::check_hyphens Unexecuted instantiation: idna::uts46::check_hyphens |