Coverage Report

Created: 2024-12-17 06:15

/rust/registry/src/index.crates.io-6f17d22bba15001f/idna-1.0.3/src/uts46.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright The rust-url developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
//! This module provides the lower-level API for UTS 46.
10
//!
11
//! [`Uts46::process`] is the core that the other convenience
12
//! methods build on.
13
//!
14
//! UTS 46 flags map to this API as follows:
15
//!
16
//! * _CheckHyphens_ - _true_: [`Hyphens::Check`], _false_: [`Hyphens::Allow`]; the WHATWG URL Standard sets this to _false_ for normal (non-conformance-checker) user agents.
17
//! * _CheckBidi_ - Always _true_; cannot be configured, since this flag is _true_ even when WHATWG URL Standard _beStrict_ is _false_.
18
//! * _CheckJoiners_ - Always _true_; cannot be configured, since this flag is _true_ even when WHATWG URL Standard _beStrict_ is _false_.
19
//! * _UseSTD3ASCIIRules_ - _true_: [`AsciiDenyList::STD3`], _false_: [`AsciiDenyList::EMPTY`]; however, the check the WHATWG URL Standard performs right after the UTS 46 invocation corresponds to [`AsciiDenyList::URL`].
20
//! * _Transitional_Processing_ - Always _false_ but could be implemented as a preprocessing step. This flag is deprecated and for Web purposes the transition is over in the sense that all of Firefox, Safari, or Chrome set this flag to _false_.
21
//! * _VerifyDnsLength_ - _true_: [`DnsLength::Verify`], _false_: [`DnsLength::Ignore`]; the WHATWG URL Standard sets this to _false_ for normal (non-conformance-checker) user agents.
22
//! * _IgnoreInvalidPunycode_ - Always _false_; cannot be configured. (Not yet covered by the WHATWG URL Standard, but 2 out of 3 major browser clearly behave as if this was _false_).
23
24
use crate::punycode::Decoder;
25
use crate::punycode::InternalCaller;
26
use alloc::borrow::Cow;
27
use alloc::string::String;
28
use core::fmt::Write;
29
use idna_adapter::*;
30
use smallvec::SmallVec;
31
use utf8_iter::Utf8CharsEx;
32
33
/// ICU4C-compatible constraint.
34
/// https://unicode-org.atlassian.net/browse/ICU-13727
35
const PUNYCODE_DECODE_MAX_INPUT_LENGTH: usize = 2000;
36
37
/// ICU4C-compatible constraint. (Note: ICU4C measures
38
/// UTF-16 and we measure UTF-32. This means that we
39
/// allow longer non-BMP inputs. For this implementation,
40
/// the denial-of-service scaling does not depend on BMP vs.
41
/// non-BMP: only the scalar values matter.)
42
///
43
/// https://unicode-org.atlassian.net/browse/ICU-13727
44
const PUNYCODE_ENCODE_MAX_INPUT_LENGTH: usize = 1000;
45
46
/// For keeping track of what kind of numerals have been
47
/// seen in an RTL label.
48
#[derive(Debug, PartialEq, Eq)]
49
enum RtlNumeralState {
50
    Undecided,
51
    European,
52
    Arabic,
53
}
54
55
/// Computes the mask for upper-case ASCII.
56
0
const fn upper_case_mask() -> u128 {
57
0
    let mut accu = 0u128;
58
0
    let mut b = 0u8;
59
0
    while b < 128 {
60
0
        if (b >= b'A') && (b <= b'Z') {
61
0
            accu |= 1u128 << b;
62
0
        }
63
0
        b += 1;
64
    }
65
0
    accu
66
0
}
Unexecuted instantiation: idna::uts46::upper_case_mask
Unexecuted instantiation: idna::uts46::upper_case_mask
Unexecuted instantiation: idna::uts46::upper_case_mask
67
68
/// Bit set for upper-case ASCII.
69
const UPPER_CASE_MASK: u128 = upper_case_mask();
70
71
/// Computes the mask for glyphless ASCII.
72
0
const fn glyphless_mask() -> u128 {
73
0
    let mut accu = 0u128;
74
0
    let mut b = 0u8;
75
0
    while b < 128 {
76
0
        if (b <= b' ') || (b == 0x7F) {
77
0
            accu |= 1u128 << b;
78
0
        }
79
0
        b += 1;
80
    }
81
0
    accu
82
0
}
Unexecuted instantiation: idna::uts46::glyphless_mask
Unexecuted instantiation: idna::uts46::glyphless_mask
Unexecuted instantiation: idna::uts46::glyphless_mask
83
84
/// Bit set for glyphless ASCII.
85
const GLYPHLESS_MASK: u128 = glyphless_mask();
86
87
/// The mask for the ASCII dot.
88
const DOT_MASK: u128 = 1 << b'.';
89
90
/// Computes the ASCII deny list for STD3 ASCII rules.
91
0
const fn ldh_mask() -> u128 {
92
0
    let mut accu = 0u128;
93
0
    let mut b = 0u8;
94
0
    while b < 128 {
95
0
        if !((b >= b'a' && b <= b'z') || (b >= b'0' && b <= b'9') || b == b'-' || b == b'.') {
96
0
            accu |= 1u128 << b;
97
0
        }
98
0
        b += 1;
99
    }
100
0
    accu
101
0
}
Unexecuted instantiation: idna::uts46::ldh_mask
Unexecuted instantiation: idna::uts46::ldh_mask
Unexecuted instantiation: idna::uts46::ldh_mask
102
103
const PUNYCODE_PREFIX: u32 =
104
    ((b'-' as u32) << 24) | ((b'-' as u32) << 16) | ((b'N' as u32) << 8) | b'X' as u32;
105
106
const PUNYCODE_PREFIX_MASK: u32 = (0xFF << 24) | (0xFF << 16) | (0xDF << 8) | 0xDF;
107
108
0
fn write_punycode_label<W: Write + ?Sized>(
109
0
    label: &[char],
110
0
    sink: &mut W,
111
0
) -> Result<(), ProcessingError> {
112
0
    sink.write_str("xn--")?;
113
0
    crate::punycode::encode_into::<_, _, InternalCaller>(label.iter().copied(), sink)?;
114
0
    Ok(())
115
0
}
Unexecuted instantiation: idna::uts46::write_punycode_label::<alloc::string::String>
Unexecuted instantiation: idna::uts46::write_punycode_label::<alloc::string::String>
Unexecuted instantiation: idna::uts46::write_punycode_label::<alloc::string::String>
116
117
#[inline(always)]
118
16.3k
fn has_punycode_prefix(slice: &[u8]) -> bool {
119
16.3k
    if slice.len() < 4 {
120
2.89k
        return false;
121
13.4k
    }
122
13.4k
    // Sadly, the optimizer doesn't figure out that more idiomatic code
123
13.4k
    // should compile to masking on 32-bit value.
124
13.4k
    let a = slice[0];
125
13.4k
    let b = slice[1];
126
13.4k
    let c = slice[2];
127
13.4k
    let d = slice[3];
128
13.4k
    let u = (u32::from(d) << 24) | (u32::from(c) << 16) | (u32::from(b) << 8) | u32::from(a);
129
13.4k
    (u & PUNYCODE_PREFIX_MASK) == PUNYCODE_PREFIX
130
16.3k
}
idna::uts46::has_punycode_prefix
Line
Count
Source
118
16.3k
fn has_punycode_prefix(slice: &[u8]) -> bool {
119
16.3k
    if slice.len() < 4 {
120
2.89k
        return false;
121
13.4k
    }
122
13.4k
    // Sadly, the optimizer doesn't figure out that more idiomatic code
123
13.4k
    // should compile to masking on 32-bit value.
124
13.4k
    let a = slice[0];
125
13.4k
    let b = slice[1];
126
13.4k
    let c = slice[2];
127
13.4k
    let d = slice[3];
128
13.4k
    let u = (u32::from(d) << 24) | (u32::from(c) << 16) | (u32::from(b) << 8) | u32::from(a);
129
13.4k
    (u & PUNYCODE_PREFIX_MASK) == PUNYCODE_PREFIX
130
16.3k
}
Unexecuted instantiation: idna::uts46::has_punycode_prefix
Unexecuted instantiation: idna::uts46::has_punycode_prefix
131
132
#[inline(always)]
133
1.05M
fn in_inclusive_range8(u: u8, start: u8, end: u8) -> bool {
134
1.05M
    u.wrapping_sub(start) <= (end - start)
135
1.05M
}
idna::uts46::in_inclusive_range8
Line
Count
Source
133
1.05M
fn in_inclusive_range8(u: u8, start: u8, end: u8) -> bool {
134
1.05M
    u.wrapping_sub(start) <= (end - start)
135
1.05M
}
Unexecuted instantiation: idna::uts46::in_inclusive_range8
Unexecuted instantiation: idna::uts46::in_inclusive_range8
136
137
#[inline(always)]
138
97.8k
fn in_inclusive_range_char(c: char, start: char, end: char) -> bool {
139
97.8k
    u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start))
140
97.8k
}
idna::uts46::in_inclusive_range_char
Line
Count
Source
138
97.8k
fn in_inclusive_range_char(c: char, start: char, end: char) -> bool {
139
97.8k
    u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start))
140
97.8k
}
Unexecuted instantiation: idna::uts46::in_inclusive_range_char
Unexecuted instantiation: idna::uts46::in_inclusive_range_char
141
142
#[inline(always)]
143
52.0k
fn is_passthrough_ascii_label(label: &[u8]) -> bool {
144
52.0k
    // XXX if we aren't performing _CheckHyphens_, this could
145
52.0k
    // check for "xn--" and pass through YouTube CDN node names.
146
52.0k
    if label.len() >= 4 && label[2] == b'-' && label[3] == b'-' {
147
9.43k
        return false;
148
42.6k
    }
149
42.6k
    if let Some((&first, tail)) = label.split_first() {
150
        // We need to check the first and last character
151
        // more strictly in case this turns out to be a
152
        // label in a bidi domain name. This has the side
153
        // effect that this function only accepts labels
154
        // that also conform to the STD3 rules.
155
        //
156
        // XXX: If we are in the fail-fast mode (i.e. we don't need
157
        // to be able to overwrite anything with U+FFFD), we could
158
        // merely record that we've seen a digit here and error out
159
        // if we later discover that the domain name is a bidi
160
        // domain name.
161
42.6k
        if !in_inclusive_range8(first, b'a', b'z') {
162
6.83k
            return false;
163
35.8k
        }
164
519k
        for &b in tail {
165
            // If we used LDH_MASK, we'd have to check
166
            // the bytes for the ASCII range anyhow.
167
483k
            if in_inclusive_range8(b, b'a', b'z') {
168
301k
                continue;
169
181k
            }
170
181k
            if in_inclusive_range8(b, b'0', b'9') {
171
141k
                continue;
172
40.8k
            }
173
40.8k
            if b == b'-' {
174
40.8k
                continue;
175
66
            }
176
66
            return false;
177
        }
178
35.7k
        label.last() != Some(&b'-')
179
    } else {
180
        // empty
181
0
        true
182
    }
183
52.0k
}
idna::uts46::is_passthrough_ascii_label
Line
Count
Source
143
52.0k
fn is_passthrough_ascii_label(label: &[u8]) -> bool {
144
52.0k
    // XXX if we aren't performing _CheckHyphens_, this could
145
52.0k
    // check for "xn--" and pass through YouTube CDN node names.
146
52.0k
    if label.len() >= 4 && label[2] == b'-' && label[3] == b'-' {
147
9.43k
        return false;
148
42.6k
    }
149
42.6k
    if let Some((&first, tail)) = label.split_first() {
150
        // We need to check the first and last character
151
        // more strictly in case this turns out to be a
152
        // label in a bidi domain name. This has the side
153
        // effect that this function only accepts labels
154
        // that also conform to the STD3 rules.
155
        //
156
        // XXX: If we are in the fail-fast mode (i.e. we don't need
157
        // to be able to overwrite anything with U+FFFD), we could
158
        // merely record that we've seen a digit here and error out
159
        // if we later discover that the domain name is a bidi
160
        // domain name.
161
42.6k
        if !in_inclusive_range8(first, b'a', b'z') {
162
6.83k
            return false;
163
35.8k
        }
164
519k
        for &b in tail {
165
            // If we used LDH_MASK, we'd have to check
166
            // the bytes for the ASCII range anyhow.
167
483k
            if in_inclusive_range8(b, b'a', b'z') {
168
301k
                continue;
169
181k
            }
170
181k
            if in_inclusive_range8(b, b'0', b'9') {
171
141k
                continue;
172
40.8k
            }
173
40.8k
            if b == b'-' {
174
40.8k
                continue;
175
66
            }
176
66
            return false;
177
        }
178
35.7k
        label.last() != Some(&b'-')
179
    } else {
180
        // empty
181
0
        true
182
    }
183
52.0k
}
Unexecuted instantiation: idna::uts46::is_passthrough_ascii_label
Unexecuted instantiation: idna::uts46::is_passthrough_ascii_label
184
185
#[inline(always)]
186
16.3k
fn split_ascii_fast_path_prefix(label: &[u8]) -> (&[u8], &[u8]) {
187
215k
    if let Some(pos) = label.iter().position(|b| !b.is_ascii()) {
idna::uts46::split_ascii_fast_path_prefix::{closure#0}
Line
Count
Source
187
215k
    if let Some(pos) = label.iter().position(|b| !b.is_ascii()) {
Unexecuted instantiation: idna::uts46::split_ascii_fast_path_prefix::{closure#0}
Unexecuted instantiation: idna::uts46::split_ascii_fast_path_prefix::{closure#0}
188
0
        if pos == 0 {
189
            // First is non-ASCII
190
0
            (&[], label)
191
        } else {
192
            // Leave one ASCII character in the suffix
193
            // in case it's a letter that a combining
194
            // character combines with.
195
0
            let (head, tail) = label.split_at(pos - 1);
196
0
            (head, tail)
197
        }
198
    } else {
199
        // All ASCII
200
16.3k
        (label, &[])
201
    }
202
16.3k
}
idna::uts46::split_ascii_fast_path_prefix
Line
Count
Source
186
16.3k
fn split_ascii_fast_path_prefix(label: &[u8]) -> (&[u8], &[u8]) {
187
16.3k
    if let Some(pos) = label.iter().position(|b| !b.is_ascii()) {
188
0
        if pos == 0 {
189
            // First is non-ASCII
190
0
            (&[], label)
191
        } else {
192
            // Leave one ASCII character in the suffix
193
            // in case it's a letter that a combining
194
            // character combines with.
195
0
            let (head, tail) = label.split_at(pos - 1);
196
0
            (head, tail)
197
        }
198
    } else {
199
        // All ASCII
200
16.3k
        (label, &[])
201
    }
202
16.3k
}
Unexecuted instantiation: idna::uts46::split_ascii_fast_path_prefix
Unexecuted instantiation: idna::uts46::split_ascii_fast_path_prefix
203
204
// Input known to be lower-case, but may contain non-ASCII.
205
#[inline(always)]
206
73.3k
fn apply_ascii_deny_list_to_lower_cased_unicode(c: char, deny_list: u128) -> char {
207
73.3k
    if let Some(shifted) = 1u128.checked_shl(u32::from(c)) {
208
6.99k
        if (deny_list & shifted) == 0 {
209
6.93k
            c
210
        } else {
211
60
            '\u{FFFD}'
212
        }
213
    } else {
214
66.3k
        c
215
    }
216
73.3k
}
idna::uts46::apply_ascii_deny_list_to_lower_cased_unicode
Line
Count
Source
206
73.3k
fn apply_ascii_deny_list_to_lower_cased_unicode(c: char, deny_list: u128) -> char {
207
73.3k
    if let Some(shifted) = 1u128.checked_shl(u32::from(c)) {
208
6.99k
        if (deny_list & shifted) == 0 {
209
6.93k
            c
210
        } else {
211
60
            '\u{FFFD}'
212
        }
213
    } else {
214
66.3k
        c
215
    }
216
73.3k
}
Unexecuted instantiation: idna::uts46::apply_ascii_deny_list_to_lower_cased_unicode
Unexecuted instantiation: idna::uts46::apply_ascii_deny_list_to_lower_cased_unicode
217
218
// Input known to be ASCII, but may contain upper case ASCII.
219
#[inline(always)]
220
52.4k
fn apply_ascii_deny_list_to_potentially_upper_case_ascii(b: u8, deny_list: u128) -> char {
221
52.4k
    if (deny_list & (1u128 << b)) == 0 {
222
52.3k
        return char::from(b);
223
84
    }
224
84
    if in_inclusive_range8(b, b'A', b'Z') {
225
0
        return char::from(b + 0x20);
226
84
    }
227
84
    '\u{FFFD}'
228
52.4k
}
idna::uts46::apply_ascii_deny_list_to_potentially_upper_case_ascii
Line
Count
Source
220
52.4k
fn apply_ascii_deny_list_to_potentially_upper_case_ascii(b: u8, deny_list: u128) -> char {
221
52.4k
    if (deny_list & (1u128 << b)) == 0 {
222
52.3k
        return char::from(b);
223
84
    }
224
84
    if in_inclusive_range8(b, b'A', b'Z') {
225
0
        return char::from(b + 0x20);
226
84
    }
227
84
    '\u{FFFD}'
228
52.4k
}
Unexecuted instantiation: idna::uts46::apply_ascii_deny_list_to_potentially_upper_case_ascii
Unexecuted instantiation: idna::uts46::apply_ascii_deny_list_to_potentially_upper_case_ascii
229
230
#[inline(always)]
231
10.2k
fn is_ascii(label: &[char]) -> bool {
232
14.8k
    for c in label.iter() {
233
14.8k
        if !c.is_ascii() {
234
10.2k
            return false;
235
4.58k
        }
236
    }
237
0
    true
238
10.2k
}
idna::uts46::is_ascii
Line
Count
Source
231
10.2k
fn is_ascii(label: &[char]) -> bool {
232
14.8k
    for c in label.iter() {
233
14.8k
        if !c.is_ascii() {
234
10.2k
            return false;
235
4.58k
        }
236
    }
237
0
    true
238
10.2k
}
Unexecuted instantiation: idna::uts46::is_ascii
Unexecuted instantiation: idna::uts46::is_ascii
239
240
#[derive(PartialEq, Eq, Copy, Clone)]
241
enum PunycodeClassification {
242
    Ascii,
243
    Unicode,
244
    Error,
245
}
246
247
#[inline(always)]
248
0
fn classify_for_punycode(label: &[char]) -> PunycodeClassification {
249
0
    let mut iter = label.iter().copied();
250
    loop {
251
0
        if let Some(c) = iter.next() {
252
0
            if c.is_ascii() {
253
0
                continue;
254
0
            }
255
0
            if c == '\u{FFFD}' {
256
0
                return PunycodeClassification::Error;
257
0
            }
258
0
            for c in iter {
259
0
                if c == '\u{FFFD}' {
260
0
                    return PunycodeClassification::Error;
261
0
                }
262
            }
263
0
            return PunycodeClassification::Unicode;
264
0
        }
265
0
        return PunycodeClassification::Ascii;
266
    }
267
0
}
Unexecuted instantiation: idna::uts46::classify_for_punycode
Unexecuted instantiation: idna::uts46::classify_for_punycode
Unexecuted instantiation: idna::uts46::classify_for_punycode
268
269
/// The ASCII deny list to be applied.
270
#[derive(PartialEq, Eq, Copy, Clone)]
271
#[repr(transparent)]
272
pub struct AsciiDenyList {
273
    bits: u128,
274
}
275
276
impl AsciiDenyList {
277
    /// Computes (preferably at compile time) an ASCII deny list.
278
    ///
279
    /// Setting `deny_glyphless` to `true` denies U+0020 SPACE and below
280
    /// as well as U+007F DELETE for convenience without having to list
281
    /// these characters in the `deny_list` string.
282
    ///
283
    /// `deny_list` is the list of ASCII characters to deny. This
284
    /// list must not contain any of:
285
    /// * Letters
286
    /// * Digits
287
    /// * Hyphen
288
    /// * Dot (period / full-stop)
289
    /// * Non-ASCII
290
    ///
291
    /// # Panics
292
    ///
293
    /// If the deny list contains characters listed as prohibited above.
294
0
    pub const fn new(deny_glyphless: bool, deny_list: &str) -> Self {
295
0
        let mut bits = UPPER_CASE_MASK;
296
0
        if deny_glyphless {
297
0
            bits |= GLYPHLESS_MASK;
298
0
        }
299
0
        let mut i = 0;
300
0
        let bytes = deny_list.as_bytes();
301
0
        while i < bytes.len() {
302
0
            let b = bytes[i];
303
0
            assert!(b < 0x80, "ASCII deny list must be ASCII.");
304
            // assert_ne not yet available in const context.
305
0
            assert!(b != b'.', "ASCII deny list must not contain the dot.");
306
0
            assert!(b != b'-', "ASCII deny list must not contain the hyphen.");
307
0
            assert!(
308
0
                !((b >= b'0') && (b <= b'9')),
309
                "ASCII deny list must not contain digits."
310
            );
311
0
            assert!(
312
0
                !((b >= b'a') && (b <= b'z')),
313
                "ASCII deny list must not contain letters."
314
            );
315
0
            assert!(
316
0
                !((b >= b'A') && (b <= b'Z')),
317
                "ASCII deny list must not contain letters."
318
            );
319
0
            bits |= 1u128 << b;
320
0
            i += 1;
321
        }
322
0
        AsciiDenyList { bits }
323
0
    }
Unexecuted instantiation: <idna::uts46::AsciiDenyList>::new
Unexecuted instantiation: <idna::uts46::AsciiDenyList>::new
Unexecuted instantiation: <idna::uts46::AsciiDenyList>::new
324
325
    /// No ASCII deny list. This corresponds to _UseSTD3ASCIIRules=false_.
326
    ///
327
    /// Equivalent to `AsciiDenyList::new(false, "")`.
328
    ///
329
    /// Note: Not denying the space and control characters can result in
330
    /// strange behavior. Without a deny list provided to the UTS 46
331
    /// operation, the caller is expected perform filtering afterwards,
332
    /// but it's more efficient to use `AsciiDenyList` than post-processing,
333
    /// because the internals of this crate can optimize away checks in
334
    /// certain cases.
335
    pub const EMPTY: AsciiDenyList = AsciiDenyList::new(false, "");
336
337
    /// The STD3 deny list. This corresponds to _UseSTD3ASCIIRules=true_.
338
    ///
339
    /// Note that this deny list rejects the underscore, which occurs in
340
    /// pseudo-hosts used by various TXT record-based protocols, and also
341
    /// characters that may occurs in non-DNS naming, such as NetBIOS.
342
    pub const STD3: AsciiDenyList = AsciiDenyList { bits: ldh_mask() };
343
344
    /// [Forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point) from the WHATWG URL Standard.
345
    ///
346
    /// Equivalent to `AsciiDenyList::new(true, "%#/:<>?@[\\]^|")`.
347
    ///
348
    /// Note that this deny list rejects IPv6 addresses, so (as in URL
349
    /// parsing) you need to check for IPv6 addresses first and not
350
    /// put them through UTS 46 processing.
351
    pub const URL: AsciiDenyList = AsciiDenyList::new(true, "%#/:<>?@[\\]^|");
352
}
353
354
/// The _CheckHyphens_ mode.
355
#[derive(PartialEq, Eq, Copy, Clone)]
356
#[non_exhaustive] // non_exhaustive in case a middle mode that prohibits only first and last position needs to be added
357
pub enum Hyphens {
358
    /// _CheckHyphens=false_: Do not place positional restrictions on hyphens.
359
    ///
360
    /// This mode is used by the WHATWG URL Standard for normal User Agent processing
361
    /// (i.e. not conformance checking).
362
    Allow,
363
364
    /// Prohibit hyphens in the first and last position in the label but allow in
365
    /// the third and fourth position.
366
    ///
367
    /// Note that this mode rejects real-world names, including some GitHub user pages.
368
    CheckFirstLast,
369
370
    /// _CheckHyphens=true_: Prohibit hyphens in the first, third, fourth,
371
    /// and last position in the label.
372
    ///
373
    /// Note that this mode rejects real-world names, including YouTube CDN nodes
374
    /// and some GitHub user pages.
375
    Check,
376
}
377
378
/// The UTS 46 _VerifyDNSLength_ flag.
379
#[derive(PartialEq, Eq, Copy, Clone)]
380
#[non_exhaustive]
381
pub enum DnsLength {
382
    /// _VerifyDNSLength=false_. (Possibly relevant for allowing non-DNS naming systems.)
383
    Ignore,
384
    /// _VerifyDNSLength=true_ with the exception that the trailing root label dot is
385
    /// allowed.
386
    VerifyAllowRootDot,
387
    /// _VerifyDNSLength=true_. (The trailing root label dot is not allowed.)
388
    Verify,
389
}
390
391
/// Policy for customizing behavior in case of an error.
392
#[derive(PartialEq, Eq, Copy, Clone)]
393
#[non_exhaustive]
394
pub enum ErrorPolicy {
395
    /// Return as early as possible without producing output in case of error.
396
    FailFast,
397
    /// In case of error, mark errors with the REPLACEMENT CHARACTER. (The output
398
    /// containing REPLACEMENT CHARACTERs may be show to the user to illustrate
399
    /// what was wrong but must not be used for naming in a network protocol.)
400
    MarkErrors,
401
}
402
403
/// The success outcome of [`Uts46::process`]
404
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
405
pub enum ProcessingSuccess {
406
    /// There were no errors. The caller must consider the input to be the output.
407
    ///
408
    /// This asserts that the input can be safely passed to [`core::str::from_utf8_unchecked`].
409
    ///
410
    /// (Distinct from `WroteToSink` in order to allow `Cow` behavior to be implemented on top of
411
    /// [`Uts46::process`].)
412
    Passthrough,
413
414
    /// There were no errors. The caller must consider what was written to the sink to be the output.
415
    ///
416
    /// (Distinct from `Passthrough` in order to allow `Cow` behavior to be implemented on top of
417
    /// [`Uts46::process`].)
418
    WroteToSink,
419
}
420
421
/// The failure outcome of [`Uts46::process`]
422
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
423
pub enum ProcessingError {
424
    /// There was a validity error according to the chosen options.
425
    ///
426
    /// In case of `Operation::ToAscii`, there is no output. Otherwise, output was written to the
427
    /// sink and the output contains at least one U+FFFD REPLACEMENT CHARACTER to denote an error.
428
    ValidityError,
429
430
    /// The sink emitted [`core::fmt::Error`]. The partial output written to the sink must not
431
    /// be used.
432
    SinkError,
433
}
434
435
impl From<core::fmt::Error> for ProcessingError {
436
0
    fn from(_: core::fmt::Error) -> Self {
437
0
        ProcessingError::SinkError
438
0
    }
Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<core::fmt::Error>>::from
Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<core::fmt::Error>>::from
Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<core::fmt::Error>>::from
439
}
440
441
impl From<crate::punycode::PunycodeEncodeError> for ProcessingError {
442
0
    fn from(_: crate::punycode::PunycodeEncodeError) -> Self {
443
0
        unreachable!(
444
0
            "Punycode overflows should not be possible due to PUNYCODE_ENCODE_MAX_INPUT_LENGTH"
445
0
        );
Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<idna::punycode::PunycodeEncodeError>>::from
Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<idna::punycode::PunycodeEncodeError>>::from
Unexecuted instantiation: <idna::uts46::ProcessingError as core::convert::From<idna::punycode::PunycodeEncodeError>>::from
446
    }
447
}
448
449
#[derive(Debug, Clone, Copy)]
450
enum AlreadyAsciiLabel<'a> {
451
    MixedCaseAscii(&'a [u8]),
452
    MixedCasePunycode(&'a [u8]),
453
    Other,
454
}
455
456
/// Performs the _VerifyDNSLength_ check on the output of the _ToASCII_ operation.
457
///
458
/// If the second argument is `false`, the trailing root label dot is allowed.
459
///
460
/// # Panics
461
///
462
/// Panics in debug mode if the argument isn't ASCII.
463
0
pub fn verify_dns_length(domain_name: &str, allow_trailing_dot: bool) -> bool {
464
0
    let bytes = domain_name.as_bytes();
465
0
    debug_assert!(bytes.is_ascii());
466
0
    let domain_name_without_trailing_dot = if let Some(without) = bytes.strip_suffix(b".") {
467
0
        if !allow_trailing_dot {
468
0
            return false;
469
0
        }
470
0
        without
471
    } else {
472
0
        bytes
473
    };
474
0
    if domain_name_without_trailing_dot.len() > 253 {
475
0
        return false;
476
0
    }
477
0
    for label in domain_name_without_trailing_dot.split(|b| *b == b'.') {
Unexecuted instantiation: idna::uts46::verify_dns_length::{closure#0}
Unexecuted instantiation: idna::uts46::verify_dns_length::{closure#0}
Unexecuted instantiation: idna::uts46::verify_dns_length::{closure#0}
478
0
        if label.is_empty() {
479
0
            return false;
480
0
        }
481
0
        if label.len() > 63 {
482
0
            return false;
483
0
        }
484
    }
485
0
    true
486
0
}
Unexecuted instantiation: idna::uts46::verify_dns_length
Unexecuted instantiation: idna::uts46::verify_dns_length
Unexecuted instantiation: idna::uts46::verify_dns_length
487
488
/// An implementation of UTS #46.
489
pub struct Uts46 {
490
    data: idna_adapter::Adapter,
491
}
492
493
#[cfg(feature = "compiled_data")]
494
impl Default for Uts46 {
495
0
    fn default() -> Self {
496
0
        Self::new()
497
0
    }
Unexecuted instantiation: <idna::uts46::Uts46 as core::default::Default>::default
Unexecuted instantiation: <idna::uts46::Uts46 as core::default::Default>::default
Unexecuted instantiation: <idna::uts46::Uts46 as core::default::Default>::default
498
}
499
500
impl Uts46 {
501
    /// Constructor using data compiled into the binary.
502
    #[cfg(feature = "compiled_data")]
503
90.0k
    pub const fn new() -> Self {
504
90.0k
        Self {
505
90.0k
            data: idna_adapter::Adapter::new(),
506
90.0k
        }
507
90.0k
    }
<idna::uts46::Uts46>::new
Line
Count
Source
503
90.0k
    pub const fn new() -> Self {
504
90.0k
        Self {
505
90.0k
            data: idna_adapter::Adapter::new(),
506
90.0k
        }
507
90.0k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::new
Unexecuted instantiation: <idna::uts46::Uts46>::new
508
509
    // XXX Should there be an `icu_provider` feature for enabling
510
    // a constructor for run-time data loading?
511
512
    /// Performs the [ToASCII](https://www.unicode.org/reports/tr46/#ToASCII) operation
513
    /// from UTS #46 with the options indicated.
514
    ///
515
    /// # Arguments
516
    ///
517
    /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by
518
    ///   this method and input that is not well-formed UTF-8 is treated as an error. If you
519
    ///   already have a `&str`, call `.as_bytes()` on it.)
520
    /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46
521
    ///   _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point
522
    ///   processing is handled via this argument. Most callers are probably the best off
523
    ///   by using [`AsciiDenyList::URL`] here.
524
    /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best
525
    ///   off by using [`Hyphens::Allow`] here.
526
    /// * `dns_length` - The UTS 46 _VerifyDNSLength_ flag.
527
90.0k
    pub fn to_ascii<'a>(
528
90.0k
        &self,
529
90.0k
        domain_name: &'a [u8],
530
90.0k
        ascii_deny_list: AsciiDenyList,
531
90.0k
        hyphens: Hyphens,
532
90.0k
        dns_length: DnsLength,
533
90.0k
    ) -> Result<Cow<'a, str>, crate::Errors> {
534
90.0k
        let mut s = String::new();
535
90.0k
        match self.process(
536
90.0k
            domain_name,
537
90.0k
            ascii_deny_list,
538
90.0k
            hyphens,
539
90.0k
            ErrorPolicy::FailFast,
540
90.0k
            |_, _, _| false,
<idna::uts46::Uts46>::to_ascii::{closure#0}
Line
Count
Source
540
5.06k
            |_, _, _| false,
Unexecuted instantiation: <idna::uts46::Uts46>::to_ascii::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::to_ascii::{closure#0}
541
90.0k
            &mut s,
542
90.0k
            None,
543
90.0k
        ) {
544
            // SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
545
            Ok(ProcessingSuccess::Passthrough) => {
546
85.6k
                let cow = Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) });
547
85.6k
                if dns_length != DnsLength::Ignore
548
0
                    && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot)
549
                {
550
0
                    Err(crate::Errors::default())
551
                } else {
552
85.6k
                    Ok(cow)
553
                }
554
            }
555
            Ok(ProcessingSuccess::WroteToSink) => {
556
0
                let cow: Cow<'_, str> = Cow::Owned(s);
557
0
                if dns_length != DnsLength::Ignore
558
0
                    && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot)
559
                {
560
0
                    Err(crate::Errors::default())
561
                } else {
562
0
                    Ok(cow)
563
                }
564
            }
565
4.37k
            Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
566
0
            Err(ProcessingError::SinkError) => unreachable!(),
567
        }
568
90.0k
    }
<idna::uts46::Uts46>::to_ascii
Line
Count
Source
527
90.0k
    pub fn to_ascii<'a>(
528
90.0k
        &self,
529
90.0k
        domain_name: &'a [u8],
530
90.0k
        ascii_deny_list: AsciiDenyList,
531
90.0k
        hyphens: Hyphens,
532
90.0k
        dns_length: DnsLength,
533
90.0k
    ) -> Result<Cow<'a, str>, crate::Errors> {
534
90.0k
        let mut s = String::new();
535
90.0k
        match self.process(
536
90.0k
            domain_name,
537
90.0k
            ascii_deny_list,
538
90.0k
            hyphens,
539
90.0k
            ErrorPolicy::FailFast,
540
90.0k
            |_, _, _| false,
541
90.0k
            &mut s,
542
90.0k
            None,
543
90.0k
        ) {
544
            // SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
545
            Ok(ProcessingSuccess::Passthrough) => {
546
85.6k
                let cow = Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) });
547
85.6k
                if dns_length != DnsLength::Ignore
548
0
                    && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot)
549
                {
550
0
                    Err(crate::Errors::default())
551
                } else {
552
85.6k
                    Ok(cow)
553
                }
554
            }
555
            Ok(ProcessingSuccess::WroteToSink) => {
556
0
                let cow: Cow<'_, str> = Cow::Owned(s);
557
0
                if dns_length != DnsLength::Ignore
558
0
                    && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot)
559
                {
560
0
                    Err(crate::Errors::default())
561
                } else {
562
0
                    Ok(cow)
563
                }
564
            }
565
4.37k
            Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
566
0
            Err(ProcessingError::SinkError) => unreachable!(),
567
        }
568
90.0k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::to_ascii
Unexecuted instantiation: <idna::uts46::Uts46>::to_ascii
569
570
    /// Performs the [ToUnicode](https://www.unicode.org/reports/tr46/#ToUnicode) operation
571
    /// from UTS #46 according to the options given. When there
572
    /// are errors, there is still output, which may be rendered user, even through
573
    /// the output must not be used in networking protocols. Errors are denoted
574
    /// by U+FFFD REPLACEMENT CHARACTERs in the output. (That is, if the second item of the
575
    /// return tuple is `Err`, the first item of the return tuple is guaranteed to contain
576
    /// at least one U+FFFD.)
577
    ///
578
    /// Most applications probably shouldn't use this method and should be using
579
    /// [`Uts46::to_user_interface`] instead.
580
    ///
581
    /// # Arguments
582
    ///
583
    /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by
584
    ///   this method and input that is not well-formed UTF-8 is treated as an error. If you
585
    ///   already have a `&str`, call `.as_bytes()` on it.)
586
    /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46
587
    ///   _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point
588
    ///   processing is handled via this argument. Most callers are probably the best off
589
    ///   by using [`AsciiDenyList::URL`] here.
590
    /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best
591
    ///   off by using [`Hyphens::Allow`] here.
592
0
    pub fn to_unicode<'a>(
593
0
        &self,
594
0
        domain_name: &'a [u8],
595
0
        ascii_deny_list: AsciiDenyList,
596
0
        hyphens: Hyphens,
597
0
    ) -> (Cow<'a, str>, Result<(), crate::Errors>) {
598
0
        self.to_user_interface(domain_name, ascii_deny_list, hyphens, |_, _, _| true)
Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode::{closure#0}
599
0
    }
Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode
Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode
Unexecuted instantiation: <idna::uts46::Uts46>::to_unicode
600
601
    /// Performs the [ToUnicode](https://www.unicode.org/reports/tr46/#ToUnicode) operation
602
    /// from UTS #46 according to options given with some
603
    /// error-free Unicode labels output according to
604
    /// [ToASCII](https://www.unicode.org/reports/tr46/#ToASCII) instead as decided by
605
    /// application policy implemented via the `output_as_unicode` closure. The purpose
606
    /// is to convert user-visible domains to the Unicode form in general but to render
607
    /// potentially misleading labels as Punycode.
608
    ///
609
    /// This is an imperfect security mechanism, because [the Punycode form itself may be
610
    /// resemble a user-recognizable name](https://www.unicode.org/reports/tr36/#TablePunycodeSpoofing).
611
    /// However, since this mechanism is common practice, this API provides support for The
612
    /// the mechanism.
613
    ///
614
    /// ASCII labels always pass through as ASCII and labels with errors always pass through
615
    /// as Unicode. For non-erroneous labels that contain at least one non-ASCII character
616
    /// (implies non-empty), `output_as_unicode` is called with the Unicode form of the label,
617
    /// the TLD (potentially empty), and a flag indicating whether the domain name as a whole
618
    /// is a bidi domain name. If the return value is `true`, the label passes through as
619
    /// Unicode. If the return value is `false`, the label is converted to Punycode.
620
    ///
621
    /// When there are errors, there is still output, which may be rendered user, even through
622
    /// the output must not be used in networking protocols. Errors are denoted by
623
    /// U+FFFD REPLACEMENT CHARACTERs in the output. (That is, if the second item
624
    /// of the return tuple is `Err`, the first item of the return tuple is guaranteed to contain
625
    /// at least one U+FFFD.) Labels that contain errors are not converted to Punycode.
626
    ///
627
    /// # Arguments
628
    ///
629
    /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by
630
    ///   this method and input that is not well-formed UTF-8 is treated as an error. If you
631
    ///   already have a `&str`, call `.as_bytes()` on it.)
632
    /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46
633
    ///   _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point
634
    ///   processing is handled via this argument. Most callers are probably the best off
635
    ///   by using [`AsciiDenyList::URL`] here.
636
    /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best
637
    ///   off by using [`Hyphens::Allow`] here.
638
    /// * `output_as_unicode` - A closure for deciding if a label should be output as Unicode
639
    ///   (as opposed to Punycode). The first argument is the label for which a decision is
640
    ///   needed (always non-empty slice). The second argument is the TLD (potentially empty).
641
    ///   The third argument is `true` iff the domain name as a whole is a bidi domain name.
642
    ///   Only non-erroneous labels that contain at least one non-ASCII character are passed
643
    ///   to the closure as the first argument. The second and third argument values are
644
    ///   guaranteed to remain the same during a single call to `process`, and the closure
645
    ///   may cache computations derived from the second and third argument (hence the
646
    ///   `FnMut` type).
647
0
    pub fn to_user_interface<'a, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>(
648
0
        &self,
649
0
        domain_name: &'a [u8],
650
0
        ascii_deny_list: AsciiDenyList,
651
0
        hyphens: Hyphens,
652
0
        output_as_unicode: OutputUnicode,
653
0
    ) -> (Cow<'a, str>, Result<(), crate::Errors>) {
654
0
        let mut s = String::new();
655
0
        match self.process(
656
0
            domain_name,
657
0
            ascii_deny_list,
658
0
            hyphens,
659
0
            ErrorPolicy::MarkErrors,
660
0
            output_as_unicode,
661
0
            &mut s,
662
0
            None,
663
0
        ) {
664
            // SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
665
0
            Ok(ProcessingSuccess::Passthrough) => (
666
0
                Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) }),
667
0
                Ok(()),
668
0
            ),
669
0
            Ok(ProcessingSuccess::WroteToSink) => (Cow::Owned(s), Ok(())),
670
0
            Err(ProcessingError::ValidityError) => (Cow::Owned(s), Err(crate::Errors::default())),
671
0
            Err(ProcessingError::SinkError) => unreachable!(),
672
        }
673
0
    }
Unexecuted instantiation: <idna::uts46::Uts46>::to_user_interface::<<idna::uts46::Uts46>::to_unicode::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::to_user_interface::<<idna::uts46::Uts46>::to_unicode::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::to_user_interface::<<idna::uts46::Uts46>::to_unicode::{closure#0}>
674
675
    /// The lower-level function that [`Uts46::to_ascii`], [`Uts46::to_unicode`], and
676
    /// [`Uts46::to_user_interface`] are built on to allow support for output types other
677
    /// than `Cow<'a, str>` (e.g. string types in a non-Rust programming language).
678
    ///
679
    /// # Arguments
680
    ///
681
    /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by
682
    ///   this method and input that is not well-formed UTF-8 is treated as an error. If you
683
    ///   already have a `&str`, call `.as_bytes()` on it.)
684
    /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46
685
    ///   _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point
686
    ///   processing is handled via this argument. Most callers are probably the best off
687
    ///   by using [`AsciiDenyList::URL`] here.
688
    /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best
689
    ///   off by using [`Hyphens::Allow`] here.
690
    /// * `error_policy` - Whether to fail fast or to produce output that may be rendered
691
    ///   for the user to examine in case of errors.
692
    /// * `output_as_unicode` - A closure for deciding if a label should be output as Unicode
693
    ///   (as opposed to Punycode). The first argument is the label for which a decision is
694
    ///   needed (always non-empty slice). The second argument is the TLD (potentially empty).
695
    ///   The third argument is `true` iff the domain name as a whole is a bidi domain name.
696
    ///   Only non-erroneous labels that contain at least one non-ASCII character are passed
697
    ///   to the closure as the first argument. The second and third argument values are
698
    ///   guaranteed to remain the same during a single call to `process`, and the closure
699
    ///   may cache computations derived from the second and third argument (hence the
700
    ///   `FnMut` type). To perform the _ToASCII_ operation, `|_, _, _| false` must be
701
    ///   passed as the closure. To perform the _ToUnicode_ operation, `|_, _, _| true` must
702
    ///   be passed as the closure. A more complex closure may be used to prepare a domain
703
    ///   name for display in a user interface so that labels are converted to the Unicode
704
    ///   form in general but potentially misleading labels are converted to the Punycode
705
    ///   form.
706
    /// * `sink` - The object that receives the output (in the non-passthrough case).
707
    /// * `ascii_sink` - A second sink that receives the _ToASCII_ form only if there
708
    ///   were no errors and `sink` received at least one character of non-ASCII output.
709
    ///   The purpose of this argument is to enable a user interface display form of the
710
    ///   domain and the _ToASCII_ form of the domain to be computed efficiently together.
711
    ///   This argument is useless when `output_as_unicode` always returns `false`, in
712
    ///   which case the _ToASCII_ form ends up in `sink` already. If `ascii_sink` receives
713
    ///   no output and the return value is `Ok(ProcessingSuccess::WroteToSink)`, use the
714
    ///   output received by `sink` also as the _ToASCII_ result.
715
    ///
716
    /// # Return value
717
    ///
718
    /// * `Ok(ProcessingSuccess::Passthrough)` - The caller must treat
719
    ///   `unsafe { core::str::from_utf8_unchecked(domain_name) }` as the output. (This
720
    ///   return value asserts that calling `core::str::from_utf8_unchecked(domain_name)`
721
    ///   is safe.)
722
    /// * `Ok(ProcessingSuccess::WroteToSink)` - The caller must treat was was written
723
    ///   to `sink` as the output. If another sink was passed as `ascii_sink` but it did
724
    ///   not receive output, the caller must treat what was written to `sink` also as
725
    ///   the _ToASCII_ output. Otherwise, if `ascii_sink` received output, the caller
726
    ///   must treat what was written to `ascii_sink` as the _ToASCII_ output.
727
    /// * `Err(ProcessingError::ValidityError)` - The input was in error and must
728
    ///   not be used for DNS lookup or otherwise in a network protocol. If `error_policy`
729
    ///   was `ErrorPolicy::MarkErrors`, the output written to `sink` may be displayed
730
    ///   to the user as an illustration of where the error was or the errors were.
731
    /// * `Err(ProcessingError::SinkError)` - Either `sink` or `ascii_sink` returned
732
    ///   [`core::fmt::Error`]. The partial output written to `sink` `ascii_sink` must not
733
    ///   be used. If `W` never returns [`core::fmt::Error`], this method never returns
734
    ///   `Err(ProcessingError::SinkError)`.
735
    ///
736
    /// # Safety-usable invariant
737
    ///
738
    /// If the return value is `Ok(ProcessingSuccess::Passthrough)`, `domain_name` is
739
    /// ASCII and `core::str::from_utf8_unchecked(domain_name)` is safe. (Note:
740
    /// Other return values do _not_ imply that `domain_name` wasn't ASCII!)
741
    ///
742
    /// # Security considerations
743
    ///
744
    /// Showing labels whose Unicode form might mislead the user as Punycode instead is
745
    /// an imperfect security mechanism, because [the Punycode form itself may be resemble
746
    /// a user-recognizable name](https://www.unicode.org/reports/tr36/#TablePunycodeSpoofing).
747
    /// However, since this mechanism is common practice, this API provides support for the
748
    /// the mechanism.
749
    ///
750
    /// Punycode processing is quadratic, so to avoid denial of service, this method imposes
751
    /// length limits on Punycode treating especially long inputs as being in error. These
752
    /// limits are well higher than the DNS length limits and are not more restrictive than
753
    /// the limits imposed by ICU4C.
754
    #[allow(clippy::too_many_arguments)]
755
90.0k
    pub fn process<W: Write + ?Sized, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>(
756
90.0k
        &self,
757
90.0k
        domain_name: &[u8],
758
90.0k
        ascii_deny_list: AsciiDenyList,
759
90.0k
        hyphens: Hyphens,
760
90.0k
        error_policy: ErrorPolicy,
761
90.0k
        mut output_as_unicode: OutputUnicode,
762
90.0k
        sink: &mut W,
763
90.0k
        ascii_sink: Option<&mut W>,
764
90.0k
    ) -> Result<ProcessingSuccess, ProcessingError> {
765
90.0k
        let fail_fast = error_policy == ErrorPolicy::FailFast;
766
90.0k
        let mut domain_buffer = SmallVec::<[char; 253]>::new();
767
90.0k
        let mut already_punycode = SmallVec::<[AlreadyAsciiLabel; 8]>::new();
768
90.0k
        // `process_inner` could be pasted inline here, but it's out of line in order
769
90.0k
        // to avoid duplicating that code when monomorphizing over `W` and `OutputUnicode`.
770
90.0k
        let (passthrough_up_to, is_bidi, had_errors) = self.process_inner(
771
90.0k
            domain_name,
772
90.0k
            ascii_deny_list,
773
90.0k
            hyphens,
774
90.0k
            fail_fast,
775
90.0k
            &mut domain_buffer,
776
90.0k
            &mut already_punycode,
777
90.0k
        );
778
90.0k
        if passthrough_up_to == domain_name.len() {
779
73.7k
            debug_assert!(!had_errors);
780
73.7k
            return Ok(ProcessingSuccess::Passthrough);
781
16.3k
        }
782
16.3k
        // Checked only after passthrough as a micro optimization.
783
16.3k
        if fail_fast && had_errors {
784
4.37k
            return Err(ProcessingError::ValidityError);
785
11.9k
        }
786
11.9k
        debug_assert_eq!(had_errors, domain_buffer.contains(&'\u{FFFD}'));
787
11.9k
        let without_dot = if let Some(without_dot) = domain_buffer.strip_suffix(&['.']) {
788
0
            without_dot
789
        } else {
790
11.9k
            &domain_buffer[..]
791
        };
792
        // unwrap is OK, because we always have at least one label
793
108k
        let tld = without_dot.rsplit(|c| *c == '.').next().unwrap();
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#0}
<idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#0}
Line
Count
Source
793
108k
        let tld = without_dot.rsplit(|c| *c == '.').next().unwrap();
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#0}
794
11.9k
        let mut had_unicode_output = false;
795
11.9k
        let mut seen_label = false;
796
11.9k
        let mut already_punycode_iter = already_punycode.iter();
797
11.9k
        let mut passthrough_up_to_extended = passthrough_up_to;
798
11.9k
        let mut flushed_prefix = false;
799
108k
        for label in domain_buffer.split(|c| *c == '.') {
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#1}
<idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#1}
Line
Count
Source
799
108k
        for label in domain_buffer.split(|c| *c == '.') {
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#1}
800
            // Unwrap is OK, because there are supposed to be as many items in
801
            // `already_punycode` as there are labels.
802
11.9k
            let input_punycode = *already_punycode_iter.next().unwrap();
803
11.9k
            if seen_label {
804
0
                if flushed_prefix {
805
0
                    sink.write_char('.')?;
806
                } else {
807
0
                    debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.');
808
0
                    passthrough_up_to_extended += 1;
809
0
                    if passthrough_up_to_extended == domain_name.len() {
810
0
                        debug_assert!(!had_errors);
811
0
                        return Ok(ProcessingSuccess::Passthrough);
812
0
                    }
813
                }
814
11.9k
            }
815
11.9k
            seen_label = true;
816
817
11.9k
            if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode {
818
0
                if let Some(first_upper_case) =
819
52.2k
                    mixed_case.iter().position(|c| c.is_ascii_uppercase())
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#2}
<idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#2}
Line
Count
Source
819
52.2k
                    mixed_case.iter().position(|c| c.is_ascii_uppercase())
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#2}
820
                {
821
0
                    let (head, tail) = mixed_case.split_at(first_upper_case);
822
0
                    let slice_to_write = if flushed_prefix {
823
0
                        head
824
                    } else {
825
0
                        flushed_prefix = true;
826
0
                        passthrough_up_to_extended += head.len();
827
0
                        debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
828
0
                        &domain_name[..passthrough_up_to_extended]
829
                    };
830
                    // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
831
0
                    sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?;
832
0
                    for c in tail.iter() {
833
0
                        sink.write_char(char::from(c.to_ascii_lowercase()))?;
834
                    }
835
6.89k
                } else if flushed_prefix {
836
                    // SAFETY: `mixed_case` is known to be ASCII.
837
0
                    sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
838
                } else {
839
6.89k
                    passthrough_up_to_extended += mixed_case.len();
840
6.89k
                    if passthrough_up_to_extended == domain_name.len() {
841
6.89k
                        debug_assert!(!had_errors);
842
6.89k
                        return Ok(ProcessingSuccess::Passthrough);
843
0
                    }
844
                }
845
0
                continue;
846
5.06k
            }
847
848
5.06k
            let potentially_punycode = if fail_fast {
849
5.06k
                debug_assert!(classify_for_punycode(label) != PunycodeClassification::Error);
850
5.06k
                !is_ascii(label)
851
            } else {
852
0
                classify_for_punycode(label) == PunycodeClassification::Unicode
853
            };
854
5.06k
            let passthrough = if potentially_punycode {
855
5.06k
                let unicode = output_as_unicode(label, tld, is_bidi);
856
5.06k
                had_unicode_output |= unicode;
857
5.06k
                unicode
858
            } else {
859
0
                true
860
            };
861
5.06k
            if passthrough {
862
0
                if !flushed_prefix {
863
0
                    flushed_prefix = true;
864
0
                    // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
865
0
                    sink.write_str(unsafe {
866
0
                        core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
867
0
                    })?;
868
0
                }
869
0
                for c in label.iter().copied() {
870
0
                    sink.write_char(c)?;
871
                }
872
5.06k
            } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode {
873
0
                if let Some(first_upper_case) =
874
92.5k
                    mixed_case.iter().position(|c| c.is_ascii_uppercase())
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#3}
<idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#3}
Line
Count
Source
874
92.5k
                    mixed_case.iter().position(|c| c.is_ascii_uppercase())
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#3}
875
                {
876
0
                    let (head, tail) = mixed_case.split_at(first_upper_case);
877
0
                    let slice_to_write = if flushed_prefix {
878
0
                        head
879
                    } else {
880
0
                        flushed_prefix = true;
881
0
                        passthrough_up_to_extended += head.len();
882
0
                        debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
883
0
                        &domain_name[..passthrough_up_to_extended]
884
                    };
885
                    // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
886
0
                    sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?;
887
0
                    for c in tail.iter() {
888
0
                        sink.write_char(char::from(c.to_ascii_lowercase()))?;
889
                    }
890
5.06k
                } else if flushed_prefix {
891
                    // SAFETY: `mixed_case` is known to be ASCII.
892
0
                    sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
893
                } else {
894
5.06k
                    passthrough_up_to_extended += mixed_case.len();
895
5.06k
                    if passthrough_up_to_extended == domain_name.len() {
896
5.06k
                        debug_assert!(!had_errors);
897
5.06k
                        return Ok(ProcessingSuccess::Passthrough);
898
0
                    }
899
                }
900
            } else {
901
0
                if !flushed_prefix {
902
0
                    flushed_prefix = true;
903
0
                    // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
904
0
                    sink.write_str(unsafe {
905
0
                        core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
906
0
                    })?;
907
0
                }
908
0
                write_punycode_label(label, sink)?;
909
            }
910
        }
911
912
0
        if had_errors {
913
0
            return Err(ProcessingError::ValidityError);
914
0
        }
915
0
916
0
        if had_unicode_output {
917
0
            if let Some(sink) = ascii_sink {
918
0
                let mut seen_label = false;
919
0
                let mut already_punycode_iter = already_punycode.iter();
920
0
                let mut passthrough_up_to_extended = passthrough_up_to;
921
0
                let mut flushed_prefix = false;
922
0
                for label in domain_buffer.split(|c| *c == '.') {
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#4}
923
                    // Unwrap is OK, because there are supposed to be as many items in
924
                    // `already_punycode` as there are labels.
925
0
                    let input_punycode = *already_punycode_iter.next().unwrap();
926
0
                    if seen_label {
927
0
                        if flushed_prefix {
928
0
                            sink.write_char('.')?;
929
                        } else {
930
0
                            debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.');
931
0
                            passthrough_up_to_extended += 1;
932
                        }
933
0
                    }
934
0
                    seen_label = true;
935
936
0
                    if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode {
937
0
                        if let Some(first_upper_case) =
938
0
                            mixed_case.iter().position(|c| c.is_ascii_uppercase())
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#5}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#5}
939
                        {
940
0
                            let (head, tail) = mixed_case.split_at(first_upper_case);
941
0
                            let slice_to_write = if flushed_prefix {
942
0
                                head
943
                            } else {
944
0
                                flushed_prefix = true;
945
0
                                passthrough_up_to_extended += head.len();
946
0
                                debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
947
0
                                &domain_name[..passthrough_up_to_extended]
948
                            };
949
                            // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
950
0
                            sink.write_str(unsafe {
951
0
                                core::str::from_utf8_unchecked(slice_to_write)
952
0
                            })?;
953
0
                            for c in tail.iter() {
954
0
                                sink.write_char(char::from(c.to_ascii_lowercase()))?;
955
                            }
956
0
                        } else if flushed_prefix {
957
                            // SAFETY: `mixed_case` is known to be ASCII.
958
0
                            sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
959
0
                        } else {
960
0
                            passthrough_up_to_extended += mixed_case.len();
961
0
                        }
962
0
                        continue;
963
0
                    }
964
0
965
0
                    if is_ascii(label) {
966
0
                        if !flushed_prefix {
967
0
                            flushed_prefix = true;
968
0
                            // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
969
0
                            sink.write_str(unsafe {
970
0
                                core::str::from_utf8_unchecked(
971
0
                                    &domain_name[..passthrough_up_to_extended],
972
0
                                )
973
0
                            })?;
974
0
                        }
975
0
                        for c in label.iter().copied() {
976
0
                            sink.write_char(c)?;
977
                        }
978
0
                    } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode
979
                    {
980
0
                        if let Some(first_upper_case) =
981
0
                            mixed_case.iter().position(|c| c.is_ascii_uppercase())
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>::{closure#6}
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>::{closure#6}
982
                        {
983
0
                            let (head, tail) = mixed_case.split_at(first_upper_case);
984
0
                            let slice_to_write = if flushed_prefix {
985
0
                                head
986
                            } else {
987
0
                                flushed_prefix = true;
988
0
                                passthrough_up_to_extended += head.len();
989
0
                                debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
990
0
                                &domain_name[..passthrough_up_to_extended]
991
                            };
992
                            // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
993
0
                            sink.write_str(unsafe {
994
0
                                core::str::from_utf8_unchecked(slice_to_write)
995
0
                            })?;
996
0
                            for c in tail.iter() {
997
0
                                sink.write_char(char::from(c.to_ascii_lowercase()))?;
998
                            }
999
0
                        } else if flushed_prefix {
1000
                            // SAFETY: `mixed_case` is known to be ASCII.
1001
0
                            sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
1002
0
                        } else {
1003
0
                            passthrough_up_to_extended += mixed_case.len();
1004
0
                        }
1005
                    } else {
1006
0
                        if !flushed_prefix {
1007
0
                            flushed_prefix = true;
1008
0
                            // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
1009
0
                            sink.write_str(unsafe {
1010
0
                                core::str::from_utf8_unchecked(
1011
0
                                    &domain_name[..passthrough_up_to_extended],
1012
0
                                )
1013
0
                            })?;
1014
0
                        }
1015
0
                        write_punycode_label(label, sink)?;
1016
                    }
1017
                }
1018
0
                if !flushed_prefix {
1019
                    // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
1020
0
                    sink.write_str(unsafe {
1021
0
                        core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
1022
0
                    })?;
1023
0
                }
1024
0
            }
1025
0
        }
1026
0
        Ok(ProcessingSuccess::WroteToSink)
1027
90.0k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>
<idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>
Line
Count
Source
755
90.0k
    pub fn process<W: Write + ?Sized, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>(
756
90.0k
        &self,
757
90.0k
        domain_name: &[u8],
758
90.0k
        ascii_deny_list: AsciiDenyList,
759
90.0k
        hyphens: Hyphens,
760
90.0k
        error_policy: ErrorPolicy,
761
90.0k
        mut output_as_unicode: OutputUnicode,
762
90.0k
        sink: &mut W,
763
90.0k
        ascii_sink: Option<&mut W>,
764
90.0k
    ) -> Result<ProcessingSuccess, ProcessingError> {
765
90.0k
        let fail_fast = error_policy == ErrorPolicy::FailFast;
766
90.0k
        let mut domain_buffer = SmallVec::<[char; 253]>::new();
767
90.0k
        let mut already_punycode = SmallVec::<[AlreadyAsciiLabel; 8]>::new();
768
90.0k
        // `process_inner` could be pasted inline here, but it's out of line in order
769
90.0k
        // to avoid duplicating that code when monomorphizing over `W` and `OutputUnicode`.
770
90.0k
        let (passthrough_up_to, is_bidi, had_errors) = self.process_inner(
771
90.0k
            domain_name,
772
90.0k
            ascii_deny_list,
773
90.0k
            hyphens,
774
90.0k
            fail_fast,
775
90.0k
            &mut domain_buffer,
776
90.0k
            &mut already_punycode,
777
90.0k
        );
778
90.0k
        if passthrough_up_to == domain_name.len() {
779
73.7k
            debug_assert!(!had_errors);
780
73.7k
            return Ok(ProcessingSuccess::Passthrough);
781
16.3k
        }
782
16.3k
        // Checked only after passthrough as a micro optimization.
783
16.3k
        if fail_fast && had_errors {
784
4.37k
            return Err(ProcessingError::ValidityError);
785
11.9k
        }
786
11.9k
        debug_assert_eq!(had_errors, domain_buffer.contains(&'\u{FFFD}'));
787
11.9k
        let without_dot = if let Some(without_dot) = domain_buffer.strip_suffix(&['.']) {
788
0
            without_dot
789
        } else {
790
11.9k
            &domain_buffer[..]
791
        };
792
        // unwrap is OK, because we always have at least one label
793
11.9k
        let tld = without_dot.rsplit(|c| *c == '.').next().unwrap();
794
11.9k
        let mut had_unicode_output = false;
795
11.9k
        let mut seen_label = false;
796
11.9k
        let mut already_punycode_iter = already_punycode.iter();
797
11.9k
        let mut passthrough_up_to_extended = passthrough_up_to;
798
11.9k
        let mut flushed_prefix = false;
799
11.9k
        for label in domain_buffer.split(|c| *c == '.') {
800
            // Unwrap is OK, because there are supposed to be as many items in
801
            // `already_punycode` as there are labels.
802
11.9k
            let input_punycode = *already_punycode_iter.next().unwrap();
803
11.9k
            if seen_label {
804
0
                if flushed_prefix {
805
0
                    sink.write_char('.')?;
806
                } else {
807
0
                    debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.');
808
0
                    passthrough_up_to_extended += 1;
809
0
                    if passthrough_up_to_extended == domain_name.len() {
810
0
                        debug_assert!(!had_errors);
811
0
                        return Ok(ProcessingSuccess::Passthrough);
812
0
                    }
813
                }
814
11.9k
            }
815
11.9k
            seen_label = true;
816
817
11.9k
            if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode {
818
0
                if let Some(first_upper_case) =
819
6.89k
                    mixed_case.iter().position(|c| c.is_ascii_uppercase())
820
                {
821
0
                    let (head, tail) = mixed_case.split_at(first_upper_case);
822
0
                    let slice_to_write = if flushed_prefix {
823
0
                        head
824
                    } else {
825
0
                        flushed_prefix = true;
826
0
                        passthrough_up_to_extended += head.len();
827
0
                        debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
828
0
                        &domain_name[..passthrough_up_to_extended]
829
                    };
830
                    // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
831
0
                    sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?;
832
0
                    for c in tail.iter() {
833
0
                        sink.write_char(char::from(c.to_ascii_lowercase()))?;
834
                    }
835
6.89k
                } else if flushed_prefix {
836
                    // SAFETY: `mixed_case` is known to be ASCII.
837
0
                    sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
838
                } else {
839
6.89k
                    passthrough_up_to_extended += mixed_case.len();
840
6.89k
                    if passthrough_up_to_extended == domain_name.len() {
841
6.89k
                        debug_assert!(!had_errors);
842
6.89k
                        return Ok(ProcessingSuccess::Passthrough);
843
0
                    }
844
                }
845
0
                continue;
846
5.06k
            }
847
848
5.06k
            let potentially_punycode = if fail_fast {
849
5.06k
                debug_assert!(classify_for_punycode(label) != PunycodeClassification::Error);
850
5.06k
                !is_ascii(label)
851
            } else {
852
0
                classify_for_punycode(label) == PunycodeClassification::Unicode
853
            };
854
5.06k
            let passthrough = if potentially_punycode {
855
5.06k
                let unicode = output_as_unicode(label, tld, is_bidi);
856
5.06k
                had_unicode_output |= unicode;
857
5.06k
                unicode
858
            } else {
859
0
                true
860
            };
861
5.06k
            if passthrough {
862
0
                if !flushed_prefix {
863
0
                    flushed_prefix = true;
864
0
                    // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
865
0
                    sink.write_str(unsafe {
866
0
                        core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
867
0
                    })?;
868
0
                }
869
0
                for c in label.iter().copied() {
870
0
                    sink.write_char(c)?;
871
                }
872
5.06k
            } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode {
873
0
                if let Some(first_upper_case) =
874
5.06k
                    mixed_case.iter().position(|c| c.is_ascii_uppercase())
875
                {
876
0
                    let (head, tail) = mixed_case.split_at(first_upper_case);
877
0
                    let slice_to_write = if flushed_prefix {
878
0
                        head
879
                    } else {
880
0
                        flushed_prefix = true;
881
0
                        passthrough_up_to_extended += head.len();
882
0
                        debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
883
0
                        &domain_name[..passthrough_up_to_extended]
884
                    };
885
                    // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
886
0
                    sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?;
887
0
                    for c in tail.iter() {
888
0
                        sink.write_char(char::from(c.to_ascii_lowercase()))?;
889
                    }
890
5.06k
                } else if flushed_prefix {
891
                    // SAFETY: `mixed_case` is known to be ASCII.
892
0
                    sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
893
                } else {
894
5.06k
                    passthrough_up_to_extended += mixed_case.len();
895
5.06k
                    if passthrough_up_to_extended == domain_name.len() {
896
5.06k
                        debug_assert!(!had_errors);
897
5.06k
                        return Ok(ProcessingSuccess::Passthrough);
898
0
                    }
899
                }
900
            } else {
901
0
                if !flushed_prefix {
902
0
                    flushed_prefix = true;
903
0
                    // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
904
0
                    sink.write_str(unsafe {
905
0
                        core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
906
0
                    })?;
907
0
                }
908
0
                write_punycode_label(label, sink)?;
909
            }
910
        }
911
912
0
        if had_errors {
913
0
            return Err(ProcessingError::ValidityError);
914
0
        }
915
0
916
0
        if had_unicode_output {
917
0
            if let Some(sink) = ascii_sink {
918
0
                let mut seen_label = false;
919
0
                let mut already_punycode_iter = already_punycode.iter();
920
0
                let mut passthrough_up_to_extended = passthrough_up_to;
921
0
                let mut flushed_prefix = false;
922
0
                for label in domain_buffer.split(|c| *c == '.') {
923
                    // Unwrap is OK, because there are supposed to be as many items in
924
                    // `already_punycode` as there are labels.
925
0
                    let input_punycode = *already_punycode_iter.next().unwrap();
926
0
                    if seen_label {
927
0
                        if flushed_prefix {
928
0
                            sink.write_char('.')?;
929
                        } else {
930
0
                            debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.');
931
0
                            passthrough_up_to_extended += 1;
932
                        }
933
0
                    }
934
0
                    seen_label = true;
935
936
0
                    if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode {
937
0
                        if let Some(first_upper_case) =
938
0
                            mixed_case.iter().position(|c| c.is_ascii_uppercase())
939
                        {
940
0
                            let (head, tail) = mixed_case.split_at(first_upper_case);
941
0
                            let slice_to_write = if flushed_prefix {
942
0
                                head
943
                            } else {
944
0
                                flushed_prefix = true;
945
0
                                passthrough_up_to_extended += head.len();
946
0
                                debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
947
0
                                &domain_name[..passthrough_up_to_extended]
948
                            };
949
                            // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
950
0
                            sink.write_str(unsafe {
951
0
                                core::str::from_utf8_unchecked(slice_to_write)
952
0
                            })?;
953
0
                            for c in tail.iter() {
954
0
                                sink.write_char(char::from(c.to_ascii_lowercase()))?;
955
                            }
956
0
                        } else if flushed_prefix {
957
                            // SAFETY: `mixed_case` is known to be ASCII.
958
0
                            sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
959
0
                        } else {
960
0
                            passthrough_up_to_extended += mixed_case.len();
961
0
                        }
962
0
                        continue;
963
0
                    }
964
0
965
0
                    if is_ascii(label) {
966
0
                        if !flushed_prefix {
967
0
                            flushed_prefix = true;
968
0
                            // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
969
0
                            sink.write_str(unsafe {
970
0
                                core::str::from_utf8_unchecked(
971
0
                                    &domain_name[..passthrough_up_to_extended],
972
0
                                )
973
0
                            })?;
974
0
                        }
975
0
                        for c in label.iter().copied() {
976
0
                            sink.write_char(c)?;
977
                        }
978
0
                    } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode
979
                    {
980
0
                        if let Some(first_upper_case) =
981
0
                            mixed_case.iter().position(|c| c.is_ascii_uppercase())
982
                        {
983
0
                            let (head, tail) = mixed_case.split_at(first_upper_case);
984
0
                            let slice_to_write = if flushed_prefix {
985
0
                                head
986
                            } else {
987
0
                                flushed_prefix = true;
988
0
                                passthrough_up_to_extended += head.len();
989
0
                                debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
990
0
                                &domain_name[..passthrough_up_to_extended]
991
                            };
992
                            // SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
993
0
                            sink.write_str(unsafe {
994
0
                                core::str::from_utf8_unchecked(slice_to_write)
995
0
                            })?;
996
0
                            for c in tail.iter() {
997
0
                                sink.write_char(char::from(c.to_ascii_lowercase()))?;
998
                            }
999
0
                        } else if flushed_prefix {
1000
                            // SAFETY: `mixed_case` is known to be ASCII.
1001
0
                            sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
1002
0
                        } else {
1003
0
                            passthrough_up_to_extended += mixed_case.len();
1004
0
                        }
1005
                    } else {
1006
0
                        if !flushed_prefix {
1007
0
                            flushed_prefix = true;
1008
0
                            // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
1009
0
                            sink.write_str(unsafe {
1010
0
                                core::str::from_utf8_unchecked(
1011
0
                                    &domain_name[..passthrough_up_to_extended],
1012
0
                                )
1013
0
                            })?;
1014
0
                        }
1015
0
                        write_punycode_label(label, sink)?;
1016
                    }
1017
                }
1018
0
                if !flushed_prefix {
1019
                    // SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
1020
0
                    sink.write_str(unsafe {
1021
0
                        core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
1022
0
                    })?;
1023
0
                }
1024
0
            }
1025
0
        }
1026
0
        Ok(ProcessingSuccess::WroteToSink)
1027
90.0k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_unicode::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::uts46::Uts46>::to_ascii::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_unicode::{closure#0}>
Unexecuted instantiation: <idna::uts46::Uts46>::process::<alloc::string::String, <idna::deprecated::Idna>::to_ascii::{closure#0}>
1028
1029
    /// The part of `process` that doesn't need to be generic over the sink.
1030
    #[inline(always)]
1031
90.0k
    fn process_inner<'a>(
1032
90.0k
        &self,
1033
90.0k
        domain_name: &'a [u8],
1034
90.0k
        ascii_deny_list: AsciiDenyList,
1035
90.0k
        hyphens: Hyphens,
1036
90.0k
        fail_fast: bool,
1037
90.0k
        domain_buffer: &mut SmallVec<[char; 253]>,
1038
90.0k
        already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>,
1039
90.0k
    ) -> (usize, bool, bool) {
1040
90.0k
        // Sadly, this even faster-path ASCII tier is needed to avoid regressing
1041
90.0k
        // performance.
1042
90.0k
        let mut iter = domain_name.iter();
1043
90.0k
        let mut most_recent_label_start = iter.clone();
1044
        loop {
1045
380k
            if let Some(&b) = iter.next() {
1046
342k
                if in_inclusive_range8(b, b'a', b'z') {
1047
290k
                    continue;
1048
52.0k
                }
1049
52.0k
                if b == b'.' {
1050
0
                    most_recent_label_start = iter.clone();
1051
0
                    continue;
1052
52.0k
                }
1053
52.0k
                return self.process_innermost(
1054
52.0k
                    domain_name,
1055
52.0k
                    ascii_deny_list,
1056
52.0k
                    hyphens,
1057
52.0k
                    fail_fast,
1058
52.0k
                    domain_buffer,
1059
52.0k
                    already_punycode,
1060
52.0k
                    most_recent_label_start.as_slice(),
1061
52.0k
                );
1062
            } else {
1063
                // Success! The whole input passes through on the fastest path!
1064
37.9k
                return (domain_name.len(), false, false);
1065
            }
1066
        }
1067
90.0k
    }
<idna::uts46::Uts46>::process_inner
Line
Count
Source
1031
90.0k
    fn process_inner<'a>(
1032
90.0k
        &self,
1033
90.0k
        domain_name: &'a [u8],
1034
90.0k
        ascii_deny_list: AsciiDenyList,
1035
90.0k
        hyphens: Hyphens,
1036
90.0k
        fail_fast: bool,
1037
90.0k
        domain_buffer: &mut SmallVec<[char; 253]>,
1038
90.0k
        already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>,
1039
90.0k
    ) -> (usize, bool, bool) {
1040
90.0k
        // Sadly, this even faster-path ASCII tier is needed to avoid regressing
1041
90.0k
        // performance.
1042
90.0k
        let mut iter = domain_name.iter();
1043
90.0k
        let mut most_recent_label_start = iter.clone();
1044
        loop {
1045
380k
            if let Some(&b) = iter.next() {
1046
342k
                if in_inclusive_range8(b, b'a', b'z') {
1047
290k
                    continue;
1048
52.0k
                }
1049
52.0k
                if b == b'.' {
1050
0
                    most_recent_label_start = iter.clone();
1051
0
                    continue;
1052
52.0k
                }
1053
52.0k
                return self.process_innermost(
1054
52.0k
                    domain_name,
1055
52.0k
                    ascii_deny_list,
1056
52.0k
                    hyphens,
1057
52.0k
                    fail_fast,
1058
52.0k
                    domain_buffer,
1059
52.0k
                    already_punycode,
1060
52.0k
                    most_recent_label_start.as_slice(),
1061
52.0k
                );
1062
            } else {
1063
                // Success! The whole input passes through on the fastest path!
1064
37.9k
                return (domain_name.len(), false, false);
1065
            }
1066
        }
1067
90.0k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::process_inner
Unexecuted instantiation: <idna::uts46::Uts46>::process_inner
1068
1069
    /// The part of `process` that doesn't need to be generic over the sink and
1070
    /// can avoid monomorphizing in the interest of code size.
1071
    /// Separating this into a different stack frame compared to `process_inner`
1072
    /// improves performance in the ICU4X case.
1073
    #[allow(clippy::too_many_arguments)]
1074
    #[inline(never)]
1075
52.0k
    fn process_innermost<'a>(
1076
52.0k
        &self,
1077
52.0k
        domain_name: &'a [u8],
1078
52.0k
        ascii_deny_list: AsciiDenyList,
1079
52.0k
        hyphens: Hyphens,
1080
52.0k
        fail_fast: bool,
1081
52.0k
        domain_buffer: &mut SmallVec<[char; 253]>,
1082
52.0k
        already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>,
1083
52.0k
        tail: &'a [u8],
1084
52.0k
    ) -> (usize, bool, bool) {
1085
52.0k
        let deny_list = ascii_deny_list.bits;
1086
52.0k
        let deny_list_deny_dot = deny_list | DOT_MASK;
1087
52.0k
1088
52.0k
        let mut had_errors = false;
1089
52.0k
1090
52.0k
        let mut passthrough_up_to = domain_name.len() - tail.len(); // Index into `domain_name`
1091
52.0k
                                                                    // 253 ASCII characters is the max length for a valid domain name
1092
52.0k
                                                                    // (excluding the root dot).
1093
52.0k
        let mut current_label_start; // Index into `domain_buffer`
1094
52.0k
        let mut seen_label = false;
1095
52.0k
        let mut in_prefix = true;
1096
734k
        for label in tail.split(|b| *b == b'.') {
<idna::uts46::Uts46>::process_innermost::{closure#0}
Line
Count
Source
1096
734k
        for label in tail.split(|b| *b == b'.') {
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#0}
1097
            // We check for passthrough only for the prefix. That is, if we
1098
            // haven't moved on and started filling `domain_buffer`. Keeping
1099
            // this stuff in one loop where the first items keep being skipped
1100
            // once they have been skipped at least once instead of working
1101
            // this into a fancier loop structure in order to make sure that
1102
            // no item from the iterator is lost or processed twice.
1103
            // Furthermore, after the passthrough fails, restarting the
1104
            // normalization process after each pre-existing ASCII dot also
1105
            // provides an opportunity for the processing to get back onto
1106
            // an ASCII fast path that bypasses the normalizer for ASCII
1107
            // after a pre-existing ASCII dot (pre-existing in the sense
1108
            // of not coming from e.g. normalizing an ideographic dot).
1109
52.0k
            if in_prefix && is_passthrough_ascii_label(label) {
1110
35.7k
                if seen_label {
1111
0
                    debug_assert_eq!(domain_name[passthrough_up_to], b'.');
1112
0
                    passthrough_up_to += 1;
1113
35.7k
                }
1114
35.7k
                seen_label = true;
1115
35.7k
1116
35.7k
                passthrough_up_to += label.len();
1117
35.7k
                continue;
1118
16.3k
            }
1119
16.3k
            if seen_label {
1120
0
                if in_prefix {
1121
0
                    debug_assert_eq!(domain_name[passthrough_up_to], b'.');
1122
0
                    passthrough_up_to += 1;
1123
0
                } else {
1124
0
                    domain_buffer.push('.');
1125
0
                }
1126
16.3k
            }
1127
16.3k
            seen_label = true;
1128
16.3k
            in_prefix = false;
1129
16.3k
            current_label_start = domain_buffer.len();
1130
16.3k
            if !label.is_empty() {
1131
16.3k
                let (ascii, non_ascii) = split_ascii_fast_path_prefix(label);
1132
16.3k
                let non_punycode_ascii_label = if non_ascii.is_empty() {
1133
16.3k
                    if has_punycode_prefix(ascii) {
1134
9.35k
                        if (ascii.last() != Some(&b'-'))
1135
9.35k
                            && (ascii.len() - 4 <= PUNYCODE_DECODE_MAX_INPUT_LENGTH)
1136
                        {
1137
9.12k
                            if let Ok(decode) =
1138
9.35k
                                Decoder::default().decode::<u8, InternalCaller>(&ascii[4..])
1139
                            {
1140
                                // 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4
1141
                                // characters.
1142
9.12k
                                let mut label_buffer = SmallVec::<[char; 59]>::new();
1143
9.12k
                                label_buffer.extend(decode);
1144
9.12k
1145
9.12k
                                if self.after_punycode_decode(
1146
9.12k
                                    domain_buffer,
1147
9.12k
                                    current_label_start,
1148
9.12k
                                    &label_buffer,
1149
9.12k
                                    deny_list_deny_dot,
1150
9.12k
                                    fail_fast,
1151
9.12k
                                    &mut had_errors,
1152
9.12k
                                ) {
1153
3.10k
                                    return (0, false, true);
1154
6.01k
                                }
1155
6.01k
1156
6.01k
                                if self.check_label(
1157
6.01k
                                    hyphens,
1158
6.01k
                                    &mut domain_buffer[current_label_start..],
1159
6.01k
                                    fail_fast,
1160
6.01k
                                    &mut had_errors,
1161
6.01k
                                    true,
1162
6.01k
                                    true,
1163
6.01k
                                ) {
1164
844
                                    return (0, false, true);
1165
5.16k
                                }
1166
                            } else {
1167
                                // Punycode failed
1168
238
                                if fail_fast {
1169
238
                                    return (0, false, true);
1170
0
                                }
1171
0
                                had_errors = true;
1172
0
                                domain_buffer.push('\u{FFFD}');
1173
0
                                let mut iter = ascii.iter();
1174
0
                                // Discard the first character that we replaced.
1175
0
                                let _ = iter.next();
1176
0
                                domain_buffer.extend(iter.map(|c| {
1177
0
                                    // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does
1178
0
                                    // not matter.
1179
0
                                    apply_ascii_deny_list_to_potentially_upper_case_ascii(
1180
0
                                        *c, deny_list,
1181
0
                                    )
1182
0
                                }));
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#1}
1183
0
                            };
1184
                            // If there were errors, we won't be trying to use this
1185
                            // anyway later, so it's fine to put it here unconditionally.
1186
5.16k
                            already_punycode.push(AlreadyAsciiLabel::MixedCasePunycode(label));
1187
5.16k
                            continue;
1188
0
                        } else if fail_fast {
1189
0
                            return (0, false, true);
1190
0
                        }
1191
0
                        // Else fall through to the complex path and rediscover error
1192
0
                        // there.
1193
0
                        false
1194
                    } else {
1195
6.97k
                        true
1196
                    }
1197
                } else {
1198
0
                    false
1199
                };
1200
52.4k
                for c in ascii.iter().map(|c| {
1201
52.4k
                    // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does
1202
52.4k
                    // not matter.
1203
52.4k
                    apply_ascii_deny_list_to_potentially_upper_case_ascii(*c, deny_list)
1204
52.4k
                }) {
<idna::uts46::Uts46>::process_innermost::{closure#2}
Line
Count
Source
1200
52.4k
                for c in ascii.iter().map(|c| {
1201
52.4k
                    // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does
1202
52.4k
                    // not matter.
1203
52.4k
                    apply_ascii_deny_list_to_potentially_upper_case_ascii(*c, deny_list)
1204
52.4k
                }) {
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#2}
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#2}
1205
52.4k
                    if c == '\u{FFFD}' {
1206
84
                        if fail_fast {
1207
84
                            return (0, false, true);
1208
0
                        }
1209
0
                        had_errors = true;
1210
52.3k
                    }
1211
52.3k
                    domain_buffer.push(c);
1212
                }
1213
6.89k
                if non_punycode_ascii_label {
1214
6.89k
                    if hyphens != Hyphens::Allow
1215
0
                        && check_hyphens(
1216
0
                            &mut domain_buffer[current_label_start..],
1217
0
                            hyphens == Hyphens::CheckFirstLast,
1218
0
                            fail_fast,
1219
0
                            &mut had_errors,
1220
0
                        )
1221
                    {
1222
0
                        return (0, false, true);
1223
6.89k
                    }
1224
6.89k
                    already_punycode.push(if had_errors {
1225
0
                        AlreadyAsciiLabel::Other
1226
                    } else {
1227
6.89k
                        AlreadyAsciiLabel::MixedCaseAscii(label)
1228
                    });
1229
6.89k
                    continue;
1230
0
                }
1231
0
                already_punycode.push(AlreadyAsciiLabel::Other);
1232
0
                let mut first_needs_combining_mark_check = ascii.is_empty();
1233
0
                let mut needs_contextj_check = !non_ascii.is_empty();
1234
0
                let mut mapping = self
1235
0
                    .data
1236
0
                    .map_normalize(non_ascii.chars())
1237
0
                    .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list));
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#3}
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#3}
1238
                loop {
1239
0
                    let n = mapping.next();
1240
0
                    match n {
1241
                        None | Some('.') => {
1242
0
                            if domain_buffer[current_label_start..]
1243
0
                                .starts_with(&['x', 'n', '-', '-'])
1244
                            {
1245
0
                                let mut punycode_precondition_failed = false;
1246
0
                                for c in domain_buffer[current_label_start + 4..].iter_mut() {
1247
0
                                    if !c.is_ascii() {
1248
0
                                        if fail_fast {
1249
0
                                            return (0, false, true);
1250
0
                                        }
1251
0
                                        had_errors = true;
1252
0
                                        *c = '\u{FFFD}';
1253
0
                                        punycode_precondition_failed = true;
1254
0
                                    }
1255
                                }
1256
1257
0
                                if let Some(last) = domain_buffer.last_mut() {
1258
0
                                    if *last == '-' {
1259
                                        // Either there's nothing after the "xn--" prefix
1260
                                        // and we got the last hyphen of "xn--", or there
1261
                                        // are no Punycode digits after the last delimiter
1262
                                        // which would result in Punycode decode outputting
1263
                                        // ASCII only.
1264
0
                                        if fail_fast {
1265
0
                                            return (0, false, true);
1266
0
                                        }
1267
0
                                        had_errors = true;
1268
0
                                        *last = '\u{FFFD}';
1269
0
                                        punycode_precondition_failed = true;
1270
0
                                    }
1271
                                } else {
1272
0
                                    unreachable!();
1273
                                }
1274
1275
                                // Reject excessively long input
1276
                                // https://github.com/whatwg/url/issues/824
1277
                                // https://unicode-org.atlassian.net/browse/ICU-13727
1278
0
                                if domain_buffer.len() - current_label_start - 4
1279
0
                                    > PUNYCODE_DECODE_MAX_INPUT_LENGTH
1280
                                {
1281
0
                                    if fail_fast {
1282
0
                                        return (0, false, true);
1283
0
                                    }
1284
0
                                    had_errors = true;
1285
0
                                    domain_buffer[current_label_start
1286
0
                                        + 4
1287
0
                                        + PUNYCODE_DECODE_MAX_INPUT_LENGTH] = '\u{FFFD}';
1288
0
                                    punycode_precondition_failed = true;
1289
0
                                }
1290
1291
0
                                if !punycode_precondition_failed {
1292
0
                                    if let Ok(decode) = Decoder::default()
1293
0
                                        .decode::<char, InternalCaller>(
1294
0
                                            &domain_buffer[current_label_start + 4..],
1295
0
                                        )
1296
                                    {
1297
0
                                        first_needs_combining_mark_check = true;
1298
0
                                        needs_contextj_check = true;
1299
0
                                        // 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4
1300
0
                                        // characters.
1301
0
                                        let mut label_buffer = SmallVec::<[char; 59]>::new();
1302
0
                                        label_buffer.extend(decode);
1303
0
1304
0
                                        domain_buffer.truncate(current_label_start);
1305
0
                                        if self.after_punycode_decode(
1306
0
                                            domain_buffer,
1307
0
                                            current_label_start,
1308
0
                                            &label_buffer,
1309
0
                                            deny_list_deny_dot,
1310
0
                                            fail_fast,
1311
0
                                            &mut had_errors,
1312
0
                                        ) {
1313
0
                                            return (0, false, true);
1314
0
                                        }
1315
                                    } else {
1316
                                        // Punycode failed
1317
0
                                        if fail_fast {
1318
0
                                            return (0, false, true);
1319
0
                                        }
1320
0
                                        had_errors = true;
1321
0
                                        domain_buffer[current_label_start] = '\u{FFFD}';
1322
0
                                        needs_contextj_check = false; // ASCII label
1323
0
                                        first_needs_combining_mark_check = false;
1324
                                    };
1325
0
                                } else {
1326
0
                                    first_needs_combining_mark_check = false;
1327
0
                                    needs_contextj_check = false; // Non-ASCII already turned to U+FFFD.
1328
0
                                }
1329
0
                            }
1330
0
                            if self.check_label(
1331
0
                                hyphens,
1332
0
                                &mut domain_buffer[current_label_start..],
1333
0
                                fail_fast,
1334
0
                                &mut had_errors,
1335
0
                                first_needs_combining_mark_check,
1336
0
                                needs_contextj_check,
1337
0
                            ) {
1338
0
                                return (0, false, true);
1339
0
                            }
1340
0
1341
0
                            if n.is_none() {
1342
0
                                break;
1343
0
                            }
1344
0
                            domain_buffer.push('.');
1345
0
                            current_label_start = domain_buffer.len();
1346
0
                            first_needs_combining_mark_check = true;
1347
0
                            needs_contextj_check = true;
1348
0
                            already_punycode.push(AlreadyAsciiLabel::Other);
1349
                        }
1350
0
                        Some(c) => {
1351
0
                            if c == '\u{FFFD}' {
1352
0
                                if fail_fast {
1353
0
                                    return (0, false, true);
1354
0
                                }
1355
0
                                had_errors = true;
1356
0
                            }
1357
0
                            domain_buffer.push(c);
1358
                        }
1359
                    }
1360
                }
1361
0
            } else {
1362
0
                // Empty label
1363
0
                already_punycode.push(AlreadyAsciiLabel::MixedCaseAscii(label));
1364
0
            }
1365
        }
1366
1367
47.8k
        let is_bidi = self.is_bidi(domain_buffer);
1368
47.8k
        if is_bidi {
1369
9.16k
            for label in domain_buffer.split_mut(|c| *c == '.') {
<idna::uts46::Uts46>::process_innermost::{closure#4}
Line
Count
Source
1369
9.16k
            for label in domain_buffer.split_mut(|c| *c == '.') {
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#4}
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost::{closure#4}
1370
1.28k
                if let Some((first, tail)) = label.split_first_mut() {
1371
1.28k
                    let first_bc = self.data.bidi_class(*first);
1372
1.28k
                    if !FIRST_BC_MASK.intersects(first_bc.to_mask()) {
1373
                        // Neither RTL label nor LTR label
1374
4
                        if fail_fast {
1375
4
                            return (0, false, true);
1376
0
                        }
1377
0
                        had_errors = true;
1378
0
                        *first = '\u{FFFD}';
1379
0
                        continue;
1380
1.28k
                    }
1381
1.28k
                    let is_ltr = first_bc.is_ltr();
1382
1.28k
                    // Trim NSM
1383
1.28k
                    let mut middle = tail;
1384
                    #[allow(clippy::while_let_loop)]
1385
                    loop {
1386
3.65k
                        if let Some((last, prior)) = middle.split_last_mut() {
1387
3.28k
                            let last_bc = self.data.bidi_class(*last);
1388
3.28k
                            if last_bc.is_nonspacing_mark() {
1389
2.36k
                                middle = prior;
1390
2.36k
                                continue;
1391
912
                            }
1392
912
                            let last_mask = if is_ltr { LAST_LTR_MASK } else { LAST_RTL_MASK };
1393
912
                            if !last_mask.intersects(last_bc.to_mask()) {
1394
100
                                if fail_fast {
1395
100
                                    return (0, false, true);
1396
0
                                }
1397
0
                                had_errors = true;
1398
0
                                *last = '\u{FFFD}';
1399
812
                            }
1400
812
                            if is_ltr {
1401
0
                                for c in prior.iter_mut() {
1402
0
                                    let bc = self.data.bidi_class(*c);
1403
0
                                    if !MIDDLE_LTR_MASK.intersects(bc.to_mask()) {
1404
0
                                        if fail_fast {
1405
0
                                            return (0, false, true);
1406
0
                                        }
1407
0
                                        had_errors = true;
1408
0
                                        *c = '\u{FFFD}';
1409
0
                                    }
1410
                                }
1411
                            } else {
1412
812
                                let mut numeral_state = RtlNumeralState::Undecided;
1413
3.95k
                                for c in prior.iter_mut() {
1414
3.95k
                                    let bc = self.data.bidi_class(*c);
1415
3.95k
                                    if !MIDDLE_RTL_MASK.intersects(bc.to_mask()) {
1416
0
                                        if fail_fast {
1417
0
                                            return (0, false, true);
1418
0
                                        }
1419
0
                                        had_errors = true;
1420
0
                                        *c = '\u{FFFD}';
1421
                                    } else {
1422
3.95k
                                        match numeral_state {
1423
                                            RtlNumeralState::Undecided => {
1424
3.02k
                                                if bc.is_european_number() {
1425
94
                                                    numeral_state = RtlNumeralState::European;
1426
2.93k
                                                } else if bc.is_arabic_number() {
1427
8
                                                    numeral_state = RtlNumeralState::Arabic;
1428
2.92k
                                                }
1429
                                            }
1430
                                            RtlNumeralState::European => {
1431
696
                                                if bc.is_arabic_number() {
1432
0
                                                    if fail_fast {
1433
0
                                                        return (0, false, true);
1434
0
                                                    }
1435
0
                                                    had_errors = true;
1436
0
                                                    *c = '\u{FFFD}';
1437
696
                                                }
1438
                                            }
1439
                                            RtlNumeralState::Arabic => {
1440
234
                                                if bc.is_european_number() {
1441
0
                                                    if fail_fast {
1442
0
                                                        return (0, false, true);
1443
0
                                                    }
1444
0
                                                    had_errors = true;
1445
0
                                                    *c = '\u{FFFD}';
1446
234
                                                }
1447
                                            }
1448
                                        }
1449
                                    }
1450
                                }
1451
812
                                if (numeral_state == RtlNumeralState::European
1452
94
                                    && last_bc.is_arabic_number())
1453
812
                                    || (numeral_state == RtlNumeralState::Arabic
1454
8
                                        && last_bc.is_european_number())
1455
                                {
1456
0
                                    if fail_fast {
1457
0
                                        return (0, false, true);
1458
0
                                    }
1459
0
                                    had_errors = true;
1460
0
                                    *last = '\u{FFFD}';
1461
812
                                }
1462
                            }
1463
812
                            break;
1464
                        } else {
1465
                            // One-character label or label where
1466
                            // everything after the first character
1467
                            // is just non-spacing marks.
1468
372
                            break;
1469
                        }
1470
                    }
1471
0
                }
1472
            }
1473
46.5k
        }
1474
1475
47.7k
        (passthrough_up_to, is_bidi, had_errors)
1476
52.0k
    }
<idna::uts46::Uts46>::process_innermost
Line
Count
Source
1075
52.0k
    fn process_innermost<'a>(
1076
52.0k
        &self,
1077
52.0k
        domain_name: &'a [u8],
1078
52.0k
        ascii_deny_list: AsciiDenyList,
1079
52.0k
        hyphens: Hyphens,
1080
52.0k
        fail_fast: bool,
1081
52.0k
        domain_buffer: &mut SmallVec<[char; 253]>,
1082
52.0k
        already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>,
1083
52.0k
        tail: &'a [u8],
1084
52.0k
    ) -> (usize, bool, bool) {
1085
52.0k
        let deny_list = ascii_deny_list.bits;
1086
52.0k
        let deny_list_deny_dot = deny_list | DOT_MASK;
1087
52.0k
1088
52.0k
        let mut had_errors = false;
1089
52.0k
1090
52.0k
        let mut passthrough_up_to = domain_name.len() - tail.len(); // Index into `domain_name`
1091
52.0k
                                                                    // 253 ASCII characters is the max length for a valid domain name
1092
52.0k
                                                                    // (excluding the root dot).
1093
52.0k
        let mut current_label_start; // Index into `domain_buffer`
1094
52.0k
        let mut seen_label = false;
1095
52.0k
        let mut in_prefix = true;
1096
52.0k
        for label in tail.split(|b| *b == b'.') {
1097
            // We check for passthrough only for the prefix. That is, if we
1098
            // haven't moved on and started filling `domain_buffer`. Keeping
1099
            // this stuff in one loop where the first items keep being skipped
1100
            // once they have been skipped at least once instead of working
1101
            // this into a fancier loop structure in order to make sure that
1102
            // no item from the iterator is lost or processed twice.
1103
            // Furthermore, after the passthrough fails, restarting the
1104
            // normalization process after each pre-existing ASCII dot also
1105
            // provides an opportunity for the processing to get back onto
1106
            // an ASCII fast path that bypasses the normalizer for ASCII
1107
            // after a pre-existing ASCII dot (pre-existing in the sense
1108
            // of not coming from e.g. normalizing an ideographic dot).
1109
52.0k
            if in_prefix && is_passthrough_ascii_label(label) {
1110
35.7k
                if seen_label {
1111
0
                    debug_assert_eq!(domain_name[passthrough_up_to], b'.');
1112
0
                    passthrough_up_to += 1;
1113
35.7k
                }
1114
35.7k
                seen_label = true;
1115
35.7k
1116
35.7k
                passthrough_up_to += label.len();
1117
35.7k
                continue;
1118
16.3k
            }
1119
16.3k
            if seen_label {
1120
0
                if in_prefix {
1121
0
                    debug_assert_eq!(domain_name[passthrough_up_to], b'.');
1122
0
                    passthrough_up_to += 1;
1123
0
                } else {
1124
0
                    domain_buffer.push('.');
1125
0
                }
1126
16.3k
            }
1127
16.3k
            seen_label = true;
1128
16.3k
            in_prefix = false;
1129
16.3k
            current_label_start = domain_buffer.len();
1130
16.3k
            if !label.is_empty() {
1131
16.3k
                let (ascii, non_ascii) = split_ascii_fast_path_prefix(label);
1132
16.3k
                let non_punycode_ascii_label = if non_ascii.is_empty() {
1133
16.3k
                    if has_punycode_prefix(ascii) {
1134
9.35k
                        if (ascii.last() != Some(&b'-'))
1135
9.35k
                            && (ascii.len() - 4 <= PUNYCODE_DECODE_MAX_INPUT_LENGTH)
1136
                        {
1137
9.12k
                            if let Ok(decode) =
1138
9.35k
                                Decoder::default().decode::<u8, InternalCaller>(&ascii[4..])
1139
                            {
1140
                                // 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4
1141
                                // characters.
1142
9.12k
                                let mut label_buffer = SmallVec::<[char; 59]>::new();
1143
9.12k
                                label_buffer.extend(decode);
1144
9.12k
1145
9.12k
                                if self.after_punycode_decode(
1146
9.12k
                                    domain_buffer,
1147
9.12k
                                    current_label_start,
1148
9.12k
                                    &label_buffer,
1149
9.12k
                                    deny_list_deny_dot,
1150
9.12k
                                    fail_fast,
1151
9.12k
                                    &mut had_errors,
1152
9.12k
                                ) {
1153
3.10k
                                    return (0, false, true);
1154
6.01k
                                }
1155
6.01k
1156
6.01k
                                if self.check_label(
1157
6.01k
                                    hyphens,
1158
6.01k
                                    &mut domain_buffer[current_label_start..],
1159
6.01k
                                    fail_fast,
1160
6.01k
                                    &mut had_errors,
1161
6.01k
                                    true,
1162
6.01k
                                    true,
1163
6.01k
                                ) {
1164
844
                                    return (0, false, true);
1165
5.16k
                                }
1166
                            } else {
1167
                                // Punycode failed
1168
238
                                if fail_fast {
1169
238
                                    return (0, false, true);
1170
0
                                }
1171
0
                                had_errors = true;
1172
0
                                domain_buffer.push('\u{FFFD}');
1173
0
                                let mut iter = ascii.iter();
1174
0
                                // Discard the first character that we replaced.
1175
0
                                let _ = iter.next();
1176
0
                                domain_buffer.extend(iter.map(|c| {
1177
                                    // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does
1178
                                    // not matter.
1179
                                    apply_ascii_deny_list_to_potentially_upper_case_ascii(
1180
                                        *c, deny_list,
1181
                                    )
1182
0
                                }));
1183
0
                            };
1184
                            // If there were errors, we won't be trying to use this
1185
                            // anyway later, so it's fine to put it here unconditionally.
1186
5.16k
                            already_punycode.push(AlreadyAsciiLabel::MixedCasePunycode(label));
1187
5.16k
                            continue;
1188
0
                        } else if fail_fast {
1189
0
                            return (0, false, true);
1190
0
                        }
1191
0
                        // Else fall through to the complex path and rediscover error
1192
0
                        // there.
1193
0
                        false
1194
                    } else {
1195
6.97k
                        true
1196
                    }
1197
                } else {
1198
0
                    false
1199
                };
1200
52.4k
                for c in ascii.iter().map(|c| {
1201
                    // Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does
1202
                    // not matter.
1203
                    apply_ascii_deny_list_to_potentially_upper_case_ascii(*c, deny_list)
1204
6.97k
                }) {
1205
52.4k
                    if c == '\u{FFFD}' {
1206
84
                        if fail_fast {
1207
84
                            return (0, false, true);
1208
0
                        }
1209
0
                        had_errors = true;
1210
52.3k
                    }
1211
52.3k
                    domain_buffer.push(c);
1212
                }
1213
6.89k
                if non_punycode_ascii_label {
1214
6.89k
                    if hyphens != Hyphens::Allow
1215
0
                        && check_hyphens(
1216
0
                            &mut domain_buffer[current_label_start..],
1217
0
                            hyphens == Hyphens::CheckFirstLast,
1218
0
                            fail_fast,
1219
0
                            &mut had_errors,
1220
0
                        )
1221
                    {
1222
0
                        return (0, false, true);
1223
6.89k
                    }
1224
6.89k
                    already_punycode.push(if had_errors {
1225
0
                        AlreadyAsciiLabel::Other
1226
                    } else {
1227
6.89k
                        AlreadyAsciiLabel::MixedCaseAscii(label)
1228
                    });
1229
6.89k
                    continue;
1230
0
                }
1231
0
                already_punycode.push(AlreadyAsciiLabel::Other);
1232
0
                let mut first_needs_combining_mark_check = ascii.is_empty();
1233
0
                let mut needs_contextj_check = !non_ascii.is_empty();
1234
0
                let mut mapping = self
1235
0
                    .data
1236
0
                    .map_normalize(non_ascii.chars())
1237
0
                    .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list));
1238
                loop {
1239
0
                    let n = mapping.next();
1240
0
                    match n {
1241
                        None | Some('.') => {
1242
0
                            if domain_buffer[current_label_start..]
1243
0
                                .starts_with(&['x', 'n', '-', '-'])
1244
                            {
1245
0
                                let mut punycode_precondition_failed = false;
1246
0
                                for c in domain_buffer[current_label_start + 4..].iter_mut() {
1247
0
                                    if !c.is_ascii() {
1248
0
                                        if fail_fast {
1249
0
                                            return (0, false, true);
1250
0
                                        }
1251
0
                                        had_errors = true;
1252
0
                                        *c = '\u{FFFD}';
1253
0
                                        punycode_precondition_failed = true;
1254
0
                                    }
1255
                                }
1256
1257
0
                                if let Some(last) = domain_buffer.last_mut() {
1258
0
                                    if *last == '-' {
1259
                                        // Either there's nothing after the "xn--" prefix
1260
                                        // and we got the last hyphen of "xn--", or there
1261
                                        // are no Punycode digits after the last delimiter
1262
                                        // which would result in Punycode decode outputting
1263
                                        // ASCII only.
1264
0
                                        if fail_fast {
1265
0
                                            return (0, false, true);
1266
0
                                        }
1267
0
                                        had_errors = true;
1268
0
                                        *last = '\u{FFFD}';
1269
0
                                        punycode_precondition_failed = true;
1270
0
                                    }
1271
                                } else {
1272
0
                                    unreachable!();
1273
                                }
1274
1275
                                // Reject excessively long input
1276
                                // https://github.com/whatwg/url/issues/824
1277
                                // https://unicode-org.atlassian.net/browse/ICU-13727
1278
0
                                if domain_buffer.len() - current_label_start - 4
1279
0
                                    > PUNYCODE_DECODE_MAX_INPUT_LENGTH
1280
                                {
1281
0
                                    if fail_fast {
1282
0
                                        return (0, false, true);
1283
0
                                    }
1284
0
                                    had_errors = true;
1285
0
                                    domain_buffer[current_label_start
1286
0
                                        + 4
1287
0
                                        + PUNYCODE_DECODE_MAX_INPUT_LENGTH] = '\u{FFFD}';
1288
0
                                    punycode_precondition_failed = true;
1289
0
                                }
1290
1291
0
                                if !punycode_precondition_failed {
1292
0
                                    if let Ok(decode) = Decoder::default()
1293
0
                                        .decode::<char, InternalCaller>(
1294
0
                                            &domain_buffer[current_label_start + 4..],
1295
0
                                        )
1296
                                    {
1297
0
                                        first_needs_combining_mark_check = true;
1298
0
                                        needs_contextj_check = true;
1299
0
                                        // 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4
1300
0
                                        // characters.
1301
0
                                        let mut label_buffer = SmallVec::<[char; 59]>::new();
1302
0
                                        label_buffer.extend(decode);
1303
0
1304
0
                                        domain_buffer.truncate(current_label_start);
1305
0
                                        if self.after_punycode_decode(
1306
0
                                            domain_buffer,
1307
0
                                            current_label_start,
1308
0
                                            &label_buffer,
1309
0
                                            deny_list_deny_dot,
1310
0
                                            fail_fast,
1311
0
                                            &mut had_errors,
1312
0
                                        ) {
1313
0
                                            return (0, false, true);
1314
0
                                        }
1315
                                    } else {
1316
                                        // Punycode failed
1317
0
                                        if fail_fast {
1318
0
                                            return (0, false, true);
1319
0
                                        }
1320
0
                                        had_errors = true;
1321
0
                                        domain_buffer[current_label_start] = '\u{FFFD}';
1322
0
                                        needs_contextj_check = false; // ASCII label
1323
0
                                        first_needs_combining_mark_check = false;
1324
                                    };
1325
0
                                } else {
1326
0
                                    first_needs_combining_mark_check = false;
1327
0
                                    needs_contextj_check = false; // Non-ASCII already turned to U+FFFD.
1328
0
                                }
1329
0
                            }
1330
0
                            if self.check_label(
1331
0
                                hyphens,
1332
0
                                &mut domain_buffer[current_label_start..],
1333
0
                                fail_fast,
1334
0
                                &mut had_errors,
1335
0
                                first_needs_combining_mark_check,
1336
0
                                needs_contextj_check,
1337
0
                            ) {
1338
0
                                return (0, false, true);
1339
0
                            }
1340
0
1341
0
                            if n.is_none() {
1342
0
                                break;
1343
0
                            }
1344
0
                            domain_buffer.push('.');
1345
0
                            current_label_start = domain_buffer.len();
1346
0
                            first_needs_combining_mark_check = true;
1347
0
                            needs_contextj_check = true;
1348
0
                            already_punycode.push(AlreadyAsciiLabel::Other);
1349
                        }
1350
0
                        Some(c) => {
1351
0
                            if c == '\u{FFFD}' {
1352
0
                                if fail_fast {
1353
0
                                    return (0, false, true);
1354
0
                                }
1355
0
                                had_errors = true;
1356
0
                            }
1357
0
                            domain_buffer.push(c);
1358
                        }
1359
                    }
1360
                }
1361
0
            } else {
1362
0
                // Empty label
1363
0
                already_punycode.push(AlreadyAsciiLabel::MixedCaseAscii(label));
1364
0
            }
1365
        }
1366
1367
47.8k
        let is_bidi = self.is_bidi(domain_buffer);
1368
47.8k
        if is_bidi {
1369
1.28k
            for label in domain_buffer.split_mut(|c| *c == '.') {
1370
1.28k
                if let Some((first, tail)) = label.split_first_mut() {
1371
1.28k
                    let first_bc = self.data.bidi_class(*first);
1372
1.28k
                    if !FIRST_BC_MASK.intersects(first_bc.to_mask()) {
1373
                        // Neither RTL label nor LTR label
1374
4
                        if fail_fast {
1375
4
                            return (0, false, true);
1376
0
                        }
1377
0
                        had_errors = true;
1378
0
                        *first = '\u{FFFD}';
1379
0
                        continue;
1380
1.28k
                    }
1381
1.28k
                    let is_ltr = first_bc.is_ltr();
1382
1.28k
                    // Trim NSM
1383
1.28k
                    let mut middle = tail;
1384
                    #[allow(clippy::while_let_loop)]
1385
                    loop {
1386
3.65k
                        if let Some((last, prior)) = middle.split_last_mut() {
1387
3.28k
                            let last_bc = self.data.bidi_class(*last);
1388
3.28k
                            if last_bc.is_nonspacing_mark() {
1389
2.36k
                                middle = prior;
1390
2.36k
                                continue;
1391
912
                            }
1392
912
                            let last_mask = if is_ltr { LAST_LTR_MASK } else { LAST_RTL_MASK };
1393
912
                            if !last_mask.intersects(last_bc.to_mask()) {
1394
100
                                if fail_fast {
1395
100
                                    return (0, false, true);
1396
0
                                }
1397
0
                                had_errors = true;
1398
0
                                *last = '\u{FFFD}';
1399
812
                            }
1400
812
                            if is_ltr {
1401
0
                                for c in prior.iter_mut() {
1402
0
                                    let bc = self.data.bidi_class(*c);
1403
0
                                    if !MIDDLE_LTR_MASK.intersects(bc.to_mask()) {
1404
0
                                        if fail_fast {
1405
0
                                            return (0, false, true);
1406
0
                                        }
1407
0
                                        had_errors = true;
1408
0
                                        *c = '\u{FFFD}';
1409
0
                                    }
1410
                                }
1411
                            } else {
1412
812
                                let mut numeral_state = RtlNumeralState::Undecided;
1413
3.95k
                                for c in prior.iter_mut() {
1414
3.95k
                                    let bc = self.data.bidi_class(*c);
1415
3.95k
                                    if !MIDDLE_RTL_MASK.intersects(bc.to_mask()) {
1416
0
                                        if fail_fast {
1417
0
                                            return (0, false, true);
1418
0
                                        }
1419
0
                                        had_errors = true;
1420
0
                                        *c = '\u{FFFD}';
1421
                                    } else {
1422
3.95k
                                        match numeral_state {
1423
                                            RtlNumeralState::Undecided => {
1424
3.02k
                                                if bc.is_european_number() {
1425
94
                                                    numeral_state = RtlNumeralState::European;
1426
2.93k
                                                } else if bc.is_arabic_number() {
1427
8
                                                    numeral_state = RtlNumeralState::Arabic;
1428
2.92k
                                                }
1429
                                            }
1430
                                            RtlNumeralState::European => {
1431
696
                                                if bc.is_arabic_number() {
1432
0
                                                    if fail_fast {
1433
0
                                                        return (0, false, true);
1434
0
                                                    }
1435
0
                                                    had_errors = true;
1436
0
                                                    *c = '\u{FFFD}';
1437
696
                                                }
1438
                                            }
1439
                                            RtlNumeralState::Arabic => {
1440
234
                                                if bc.is_european_number() {
1441
0
                                                    if fail_fast {
1442
0
                                                        return (0, false, true);
1443
0
                                                    }
1444
0
                                                    had_errors = true;
1445
0
                                                    *c = '\u{FFFD}';
1446
234
                                                }
1447
                                            }
1448
                                        }
1449
                                    }
1450
                                }
1451
812
                                if (numeral_state == RtlNumeralState::European
1452
94
                                    && last_bc.is_arabic_number())
1453
812
                                    || (numeral_state == RtlNumeralState::Arabic
1454
8
                                        && last_bc.is_european_number())
1455
                                {
1456
0
                                    if fail_fast {
1457
0
                                        return (0, false, true);
1458
0
                                    }
1459
0
                                    had_errors = true;
1460
0
                                    *last = '\u{FFFD}';
1461
812
                                }
1462
                            }
1463
812
                            break;
1464
                        } else {
1465
                            // One-character label or label where
1466
                            // everything after the first character
1467
                            // is just non-spacing marks.
1468
372
                            break;
1469
                        }
1470
                    }
1471
0
                }
1472
            }
1473
46.5k
        }
1474
1475
47.7k
        (passthrough_up_to, is_bidi, had_errors)
1476
52.0k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost
Unexecuted instantiation: <idna::uts46::Uts46>::process_innermost
1477
1478
    #[inline(never)]
1479
9.12k
    fn after_punycode_decode(
1480
9.12k
        &self,
1481
9.12k
        domain_buffer: &mut SmallVec<[char; 253]>,
1482
9.12k
        current_label_start: usize,
1483
9.12k
        label_buffer: &[char],
1484
9.12k
        deny_list_deny_dot: u128,
1485
9.12k
        fail_fast: bool,
1486
9.12k
        had_errors: &mut bool,
1487
9.12k
    ) -> bool {
1488
73.3k
        for c in self
1489
9.12k
            .data
1490
9.12k
            .normalize_validate(label_buffer.iter().copied())
1491
73.3k
            .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list_deny_dot))
<idna::uts46::Uts46>::after_punycode_decode::{closure#0}
Line
Count
Source
1491
73.3k
            .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list_deny_dot))
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode::{closure#0}
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode::{closure#0}
1492
        {
1493
73.3k
            if c == '\u{FFFD}' {
1494
2.60k
                if fail_fast {
1495
2.60k
                    return true;
1496
0
                }
1497
0
                *had_errors = true;
1498
70.7k
            }
1499
70.7k
            domain_buffer.push(c);
1500
        }
1501
6.51k
        let normalized = &mut domain_buffer[current_label_start..];
1502
6.51k
        if let Err(()) =
1503
6.51k
            normalized
1504
6.51k
                .iter_mut()
1505
6.51k
                .zip(label_buffer.iter())
1506
64.9k
                .try_for_each(|(norm_c, decoded_c)| {
1507
64.9k
                    if *norm_c == *decoded_c {
1508
64.3k
                        Ok(())
1509
                    } else {
1510
                        // Mark the first difference
1511
506
                        *norm_c = '\u{FFFD}';
1512
506
                        Err(())
1513
                    }
1514
64.9k
                })
<idna::uts46::Uts46>::after_punycode_decode::{closure#1}
Line
Count
Source
1506
64.9k
                .try_for_each(|(norm_c, decoded_c)| {
1507
64.9k
                    if *norm_c == *decoded_c {
1508
64.3k
                        Ok(())
1509
                    } else {
1510
                        // Mark the first difference
1511
506
                        *norm_c = '\u{FFFD}';
1512
506
                        Err(())
1513
                    }
1514
64.9k
                })
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode::{closure#1}
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode::{closure#1}
1515
        {
1516
506
            if fail_fast {
1517
506
                return true;
1518
0
            }
1519
0
            *had_errors = true;
1520
6.01k
        }
1521
6.01k
        false
1522
9.12k
    }
<idna::uts46::Uts46>::after_punycode_decode
Line
Count
Source
1479
9.12k
    fn after_punycode_decode(
1480
9.12k
        &self,
1481
9.12k
        domain_buffer: &mut SmallVec<[char; 253]>,
1482
9.12k
        current_label_start: usize,
1483
9.12k
        label_buffer: &[char],
1484
9.12k
        deny_list_deny_dot: u128,
1485
9.12k
        fail_fast: bool,
1486
9.12k
        had_errors: &mut bool,
1487
9.12k
    ) -> bool {
1488
73.3k
        for c in self
1489
9.12k
            .data
1490
9.12k
            .normalize_validate(label_buffer.iter().copied())
1491
9.12k
            .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list_deny_dot))
1492
        {
1493
73.3k
            if c == '\u{FFFD}' {
1494
2.60k
                if fail_fast {
1495
2.60k
                    return true;
1496
0
                }
1497
0
                *had_errors = true;
1498
70.7k
            }
1499
70.7k
            domain_buffer.push(c);
1500
        }
1501
6.51k
        let normalized = &mut domain_buffer[current_label_start..];
1502
6.51k
        if let Err(()) =
1503
6.51k
            normalized
1504
6.51k
                .iter_mut()
1505
6.51k
                .zip(label_buffer.iter())
1506
6.51k
                .try_for_each(|(norm_c, decoded_c)| {
1507
                    if *norm_c == *decoded_c {
1508
                        Ok(())
1509
                    } else {
1510
                        // Mark the first difference
1511
                        *norm_c = '\u{FFFD}';
1512
                        Err(())
1513
                    }
1514
6.51k
                })
1515
        {
1516
506
            if fail_fast {
1517
506
                return true;
1518
0
            }
1519
0
            *had_errors = true;
1520
6.01k
        }
1521
6.01k
        false
1522
9.12k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode
Unexecuted instantiation: <idna::uts46::Uts46>::after_punycode_decode
1523
1524
    #[inline(never)]
1525
6.01k
    fn check_label(
1526
6.01k
        &self,
1527
6.01k
        hyphens: Hyphens,
1528
6.01k
        mut_label: &mut [char],
1529
6.01k
        fail_fast: bool,
1530
6.01k
        had_errors: &mut bool,
1531
6.01k
        first_needs_combining_mark_check: bool,
1532
6.01k
        needs_contextj_check: bool,
1533
6.01k
    ) -> bool {
1534
6.01k
        if hyphens != Hyphens::Allow
1535
0
            && check_hyphens(
1536
0
                mut_label,
1537
0
                hyphens == Hyphens::CheckFirstLast,
1538
0
                fail_fast,
1539
0
                had_errors,
1540
0
            )
1541
        {
1542
0
            return true;
1543
6.01k
        }
1544
6.01k
        if first_needs_combining_mark_check {
1545
6.01k
            if let Some(first) = mut_label.first_mut() {
1546
6.01k
                if self.data.is_mark(*first) {
1547
840
                    if fail_fast {
1548
840
                        return true;
1549
0
                    }
1550
0
                    *had_errors = true;
1551
0
                    *first = '\u{FFFD}';
1552
5.17k
                }
1553
0
            }
1554
0
        }
1555
5.17k
        if needs_contextj_check {
1556
            // ContextJ
1557
56.9k
            for i in 0..mut_label.len() {
1558
56.9k
                let c = mut_label[i];
1559
56.9k
                if !in_inclusive_range_char(c, '\u{200C}', '\u{200D}') {
1560
56.9k
                    continue;
1561
4
                }
1562
4
                let (head, joiner_and_tail) = mut_label.split_at_mut(i);
1563
1564
4
                if let Some((joiner, tail)) = joiner_and_tail.split_first_mut() {
1565
4
                    if let Some(previous) = head.last() {
1566
0
                        if self.data.is_virama(*previous) {
1567
0
                            continue;
1568
0
                        }
1569
                    } else {
1570
                        // No preceding character
1571
4
                        if fail_fast {
1572
4
                            return true;
1573
0
                        }
1574
0
                        *had_errors = true;
1575
0
                        *joiner = '\u{FFFD}';
1576
0
                        continue;
1577
                    }
1578
0
                    if c == '\u{200D}' {
1579
                        // ZWJ only has the virama rule
1580
0
                        if fail_fast {
1581
0
                            return true;
1582
0
                        }
1583
0
                        *had_errors = true;
1584
0
                        *joiner = '\u{FFFD}';
1585
0
                        continue;
1586
0
                    }
1587
0
                    debug_assert_eq!(c, '\u{200C}');
1588
0
                    if !self.has_appropriately_joining_char(
1589
0
                        head.iter().rev().copied(),
1590
0
                        LEFT_OR_DUAL_JOINING_MASK,
1591
0
                    ) || !self.has_appropriately_joining_char(
1592
0
                        tail.iter().copied(),
1593
0
                        RIGHT_OR_DUAL_JOINING_MASK,
1594
0
                    ) {
1595
0
                        if fail_fast {
1596
0
                            return true;
1597
0
                        }
1598
0
                        *had_errors = true;
1599
0
                        *joiner = '\u{FFFD}';
1600
0
                    }
1601
                } else {
1602
0
                    debug_assert!(false);
1603
                }
1604
            }
1605
0
        }
1606
1607
5.16k
        if !is_ascii(mut_label) && mut_label.len() > PUNYCODE_ENCODE_MAX_INPUT_LENGTH {
1608
            // Limit quadratic behavior
1609
            // https://github.com/whatwg/url/issues/824
1610
            // https://unicode-org.atlassian.net/browse/ICU-13727
1611
0
            if fail_fast {
1612
0
                return true;
1613
0
            }
1614
0
            *had_errors = true;
1615
0
            mut_label[PUNYCODE_ENCODE_MAX_INPUT_LENGTH] = '\u{FFFD}';
1616
5.16k
        }
1617
5.16k
        false
1618
6.01k
    }
<idna::uts46::Uts46>::check_label
Line
Count
Source
1525
6.01k
    fn check_label(
1526
6.01k
        &self,
1527
6.01k
        hyphens: Hyphens,
1528
6.01k
        mut_label: &mut [char],
1529
6.01k
        fail_fast: bool,
1530
6.01k
        had_errors: &mut bool,
1531
6.01k
        first_needs_combining_mark_check: bool,
1532
6.01k
        needs_contextj_check: bool,
1533
6.01k
    ) -> bool {
1534
6.01k
        if hyphens != Hyphens::Allow
1535
0
            && check_hyphens(
1536
0
                mut_label,
1537
0
                hyphens == Hyphens::CheckFirstLast,
1538
0
                fail_fast,
1539
0
                had_errors,
1540
0
            )
1541
        {
1542
0
            return true;
1543
6.01k
        }
1544
6.01k
        if first_needs_combining_mark_check {
1545
6.01k
            if let Some(first) = mut_label.first_mut() {
1546
6.01k
                if self.data.is_mark(*first) {
1547
840
                    if fail_fast {
1548
840
                        return true;
1549
0
                    }
1550
0
                    *had_errors = true;
1551
0
                    *first = '\u{FFFD}';
1552
5.17k
                }
1553
0
            }
1554
0
        }
1555
5.17k
        if needs_contextj_check {
1556
            // ContextJ
1557
56.9k
            for i in 0..mut_label.len() {
1558
56.9k
                let c = mut_label[i];
1559
56.9k
                if !in_inclusive_range_char(c, '\u{200C}', '\u{200D}') {
1560
56.9k
                    continue;
1561
4
                }
1562
4
                let (head, joiner_and_tail) = mut_label.split_at_mut(i);
1563
1564
4
                if let Some((joiner, tail)) = joiner_and_tail.split_first_mut() {
1565
4
                    if let Some(previous) = head.last() {
1566
0
                        if self.data.is_virama(*previous) {
1567
0
                            continue;
1568
0
                        }
1569
                    } else {
1570
                        // No preceding character
1571
4
                        if fail_fast {
1572
4
                            return true;
1573
0
                        }
1574
0
                        *had_errors = true;
1575
0
                        *joiner = '\u{FFFD}';
1576
0
                        continue;
1577
                    }
1578
0
                    if c == '\u{200D}' {
1579
                        // ZWJ only has the virama rule
1580
0
                        if fail_fast {
1581
0
                            return true;
1582
0
                        }
1583
0
                        *had_errors = true;
1584
0
                        *joiner = '\u{FFFD}';
1585
0
                        continue;
1586
0
                    }
1587
0
                    debug_assert_eq!(c, '\u{200C}');
1588
0
                    if !self.has_appropriately_joining_char(
1589
0
                        head.iter().rev().copied(),
1590
0
                        LEFT_OR_DUAL_JOINING_MASK,
1591
0
                    ) || !self.has_appropriately_joining_char(
1592
0
                        tail.iter().copied(),
1593
0
                        RIGHT_OR_DUAL_JOINING_MASK,
1594
0
                    ) {
1595
0
                        if fail_fast {
1596
0
                            return true;
1597
0
                        }
1598
0
                        *had_errors = true;
1599
0
                        *joiner = '\u{FFFD}';
1600
0
                    }
1601
                } else {
1602
0
                    debug_assert!(false);
1603
                }
1604
            }
1605
0
        }
1606
1607
5.16k
        if !is_ascii(mut_label) && mut_label.len() > PUNYCODE_ENCODE_MAX_INPUT_LENGTH {
1608
            // Limit quadratic behavior
1609
            // https://github.com/whatwg/url/issues/824
1610
            // https://unicode-org.atlassian.net/browse/ICU-13727
1611
0
            if fail_fast {
1612
0
                return true;
1613
0
            }
1614
0
            *had_errors = true;
1615
0
            mut_label[PUNYCODE_ENCODE_MAX_INPUT_LENGTH] = '\u{FFFD}';
1616
5.16k
        }
1617
5.16k
        false
1618
6.01k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::check_label
Unexecuted instantiation: <idna::uts46::Uts46>::check_label
1619
1620
    #[inline(always)]
1621
0
    fn has_appropriately_joining_char<I: Iterator<Item = char>>(
1622
0
        &self,
1623
0
        iter: I,
1624
0
        required_mask: JoiningTypeMask,
1625
0
    ) -> bool {
1626
0
        for c in iter {
1627
0
            let jt = self.data.joining_type(c);
1628
0
            if jt.to_mask().intersects(required_mask) {
1629
0
                return true;
1630
0
            }
1631
0
            if jt.is_transparent() {
1632
0
                continue;
1633
0
            }
1634
0
            return false;
1635
        }
1636
0
        false
1637
0
    }
Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::iter::adapters::rev::Rev<core::slice::iter::Iter<char>>>>
Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>>
Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::iter::adapters::rev::Rev<core::slice::iter::Iter<char>>>>
Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>>
Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::iter::adapters::rev::Rev<core::slice::iter::Iter<char>>>>
Unexecuted instantiation: <idna::uts46::Uts46>::has_appropriately_joining_char::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>>
1638
1639
    #[inline(always)]
1640
47.8k
    fn is_bidi(&self, buffer: &[char]) -> bool {
1641
148k
        for &c in buffer {
1642
101k
            if c < '\u{0590}' {
1643
                // Below Hebrew
1644
82.3k
                continue;
1645
19.2k
            }
1646
19.2k
            if in_inclusive_range_char(c, '\u{0900}', '\u{FB1C}') {
1647
11.5k
                debug_assert_ne!(c, '\u{200F}'); // disallowed
1648
11.5k
                continue;
1649
7.70k
            }
1650
7.70k
            if in_inclusive_range_char(c, '\u{1F000}', '\u{3FFFF}') {
1651
656
                continue;
1652
7.04k
            }
1653
7.04k
            if in_inclusive_range_char(c, '\u{FF00}', '\u{107FF}') {
1654
98
                continue;
1655
6.95k
            }
1656
6.95k
            if in_inclusive_range_char(c, '\u{11000}', '\u{1E7FF}') {
1657
3.74k
                continue;
1658
3.21k
            }
1659
3.21k
            if RTL_MASK.intersects(self.data.bidi_class(c).to_mask()) {
1660
1.28k
                return true;
1661
1.92k
            }
1662
        }
1663
46.5k
        false
1664
47.8k
    }
<idna::uts46::Uts46>::is_bidi
Line
Count
Source
1640
47.8k
    fn is_bidi(&self, buffer: &[char]) -> bool {
1641
148k
        for &c in buffer {
1642
101k
            if c < '\u{0590}' {
1643
                // Below Hebrew
1644
82.3k
                continue;
1645
19.2k
            }
1646
19.2k
            if in_inclusive_range_char(c, '\u{0900}', '\u{FB1C}') {
1647
11.5k
                debug_assert_ne!(c, '\u{200F}'); // disallowed
1648
11.5k
                continue;
1649
7.70k
            }
1650
7.70k
            if in_inclusive_range_char(c, '\u{1F000}', '\u{3FFFF}') {
1651
656
                continue;
1652
7.04k
            }
1653
7.04k
            if in_inclusive_range_char(c, '\u{FF00}', '\u{107FF}') {
1654
98
                continue;
1655
6.95k
            }
1656
6.95k
            if in_inclusive_range_char(c, '\u{11000}', '\u{1E7FF}') {
1657
3.74k
                continue;
1658
3.21k
            }
1659
3.21k
            if RTL_MASK.intersects(self.data.bidi_class(c).to_mask()) {
1660
1.28k
                return true;
1661
1.92k
            }
1662
        }
1663
46.5k
        false
1664
47.8k
    }
Unexecuted instantiation: <idna::uts46::Uts46>::is_bidi
Unexecuted instantiation: <idna::uts46::Uts46>::is_bidi
1665
}
1666
1667
0
fn check_hyphens(
1668
0
    mut_label: &mut [char],
1669
0
    allow_third_fourth: bool,
1670
0
    fail_fast: bool,
1671
0
    had_errors: &mut bool,
1672
0
) -> bool {
1673
0
    if let Some(first) = mut_label.first_mut() {
1674
0
        if *first == '-' {
1675
0
            if fail_fast {
1676
0
                return true;
1677
0
            }
1678
0
            *had_errors = true;
1679
0
            *first = '\u{FFFD}';
1680
0
        }
1681
0
    }
1682
0
    if let Some(last) = mut_label.last_mut() {
1683
0
        if *last == '-' {
1684
0
            if fail_fast {
1685
0
                return true;
1686
0
            }
1687
0
            *had_errors = true;
1688
0
            *last = '\u{FFFD}';
1689
0
        }
1690
0
    }
1691
0
    if allow_third_fourth {
1692
0
        return false;
1693
0
    }
1694
0
    if mut_label.len() >= 4 && mut_label[2] == '-' && mut_label[3] == '-' {
1695
0
        if fail_fast {
1696
0
            return true;
1697
0
        }
1698
0
        *had_errors = true;
1699
0
        mut_label[2] = '\u{FFFD}';
1700
0
        mut_label[3] = '\u{FFFD}';
1701
0
    }
1702
0
    false
1703
0
}
Unexecuted instantiation: idna::uts46::check_hyphens
Unexecuted instantiation: idna::uts46::check_hyphens
Unexecuted instantiation: idna::uts46::check_hyphens