Coverage Report

Created: 2025-12-31 06:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/idna-1.1.0/src/punycode.rs
Line
Count
Source
1
// Copyright 2013 The rust-url developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
//! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
10
//!
11
//! Since Punycode fundamentally works on unicode code points,
12
//! `encode` and `decode` take and return slices and vectors of `char`.
13
//! `encode_str` and `decode_to_string` provide convenience wrappers
14
//! that convert from and to Rust’s UTF-8 based `str` and `String` types.
15
16
use alloc::{string::String, vec::Vec};
17
use core::char;
18
use core::fmt::Write;
19
use core::marker::PhantomData;
20
21
// Bootstring parameters for Punycode
22
const BASE: u32 = 36;
23
const T_MIN: u32 = 1;
24
const T_MAX: u32 = 26;
25
const SKEW: u32 = 38;
26
const DAMP: u32 = 700;
27
const INITIAL_BIAS: u32 = 72;
28
const INITIAL_N: u32 = 0x80;
29
30
#[inline]
31
0
fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {
32
0
    delta /= if first_time { DAMP } else { 2 };
33
0
    delta += delta / num_points;
34
0
    let mut k = 0;
35
0
    while delta > ((BASE - T_MIN) * T_MAX) / 2 {
36
0
        delta /= BASE - T_MIN;
37
0
        k += BASE;
38
0
    }
39
0
    k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))
40
0
}
41
42
/// Convert Punycode to an Unicode `String`.
43
///
44
/// Return None on malformed input or overflow.
45
/// Overflow can only happen on inputs that take more than
46
/// 63 encoded bytes, the DNS limit on domain name labels.
47
#[inline]
48
0
pub fn decode_to_string(input: &str) -> Option<String> {
49
    Some(
50
0
        Decoder::default()
51
0
            .decode::<u8, ExternalCaller>(input.as_bytes())
52
0
            .ok()?
53
0
            .collect(),
54
    )
55
0
}
56
57
/// Convert Punycode to Unicode.
58
///
59
/// Return None on malformed input or overflow.
60
/// Overflow can only happen on inputs that take more than
61
/// 63 encoded bytes, the DNS limit on domain name labels.
62
0
pub fn decode(input: &str) -> Option<Vec<char>> {
63
    Some(
64
0
        Decoder::default()
65
0
            .decode::<u8, ExternalCaller>(input.as_bytes())
66
0
            .ok()?
67
0
            .collect(),
68
    )
69
0
}
70
71
/// Marker for internal vs. external caller to retain old API behavior
72
/// while tweaking behavior for internal callers.
73
///
74
/// External callers need overflow checks when encoding, but internal
75
/// callers don't, because `PUNYCODE_ENCODE_MAX_INPUT_LENGTH` is set
76
/// to 1000, and per RFC 3492 section 6.4, the integer variable does
77
/// not need to be able to represent values larger than
78
/// (char::MAX - INITIAL_N) * (PUNYCODE_ENCODE_MAX_INPUT_LENGTH + 1),
79
/// which is less than u32::MAX.
80
///
81
/// External callers need to handle upper-case ASCII when decoding,
82
/// but internal callers don't, because the internal code calls the
83
/// decoder only with lower-case inputs.
84
pub(crate) trait PunycodeCaller {
85
    const EXTERNAL_CALLER: bool;
86
}
87
88
pub(crate) struct InternalCaller;
89
90
impl PunycodeCaller for InternalCaller {
91
    const EXTERNAL_CALLER: bool = false;
92
}
93
94
struct ExternalCaller;
95
96
impl PunycodeCaller for ExternalCaller {
97
    const EXTERNAL_CALLER: bool = true;
98
}
99
100
pub(crate) trait PunycodeCodeUnit {
101
    fn is_delimiter(&self) -> bool;
102
    fn is_ascii(&self) -> bool;
103
    fn digit(&self) -> Option<u32>;
104
    fn char(&self) -> char;
105
    fn char_ascii_lower_case(&self) -> char;
106
}
107
108
impl PunycodeCodeUnit for u8 {
109
0
    fn is_delimiter(&self) -> bool {
110
0
        *self == b'-'
111
0
    }
112
0
    fn is_ascii(&self) -> bool {
113
0
        *self < 0x80
114
0
    }
115
0
    fn digit(&self) -> Option<u32> {
116
0
        let byte = *self;
117
0
        Some(match byte {
118
0
            byte @ b'0'..=b'9' => byte - b'0' + 26,
119
0
            byte @ b'A'..=b'Z' => byte - b'A',
120
0
            byte @ b'a'..=b'z' => byte - b'a',
121
0
            _ => return None,
122
        } as u32)
123
0
    }
124
0
    fn char(&self) -> char {
125
0
        char::from(*self)
126
0
    }
127
0
    fn char_ascii_lower_case(&self) -> char {
128
0
        char::from(self.to_ascii_lowercase())
129
0
    }
130
}
131
132
impl PunycodeCodeUnit for char {
133
0
    fn is_delimiter(&self) -> bool {
134
0
        *self == '-'
135
0
    }
136
0
    fn is_ascii(&self) -> bool {
137
0
        debug_assert!(false); // Unused
138
0
        true
139
0
    }
140
0
    fn digit(&self) -> Option<u32> {
141
0
        let byte = *self;
142
0
        Some(match byte {
143
0
            byte @ '0'..='9' => u32::from(byte) - u32::from('0') + 26,
144
            // byte @ 'A'..='Z' => u32::from(byte) - u32::from('A'), // XXX not needed if no public input
145
0
            byte @ 'a'..='z' => u32::from(byte) - u32::from('a'),
146
0
            _ => return None,
147
        })
148
0
    }
149
0
    fn char(&self) -> char {
150
0
        debug_assert!(false); // Unused
151
0
        *self
152
0
    }
153
0
    fn char_ascii_lower_case(&self) -> char {
154
        // No need to actually lower-case!
155
0
        *self
156
0
    }
157
}
158
159
#[derive(Default)]
160
pub(crate) struct Decoder {
161
    insertions: smallvec::SmallVec<[(usize, char); 59]>,
162
}
163
164
impl Decoder {
165
    /// Split the input iterator and return a Vec with insertions of encoded characters
166
0
    pub(crate) fn decode<'a, T: PunycodeCodeUnit + Copy, C: PunycodeCaller>(
167
0
        &'a mut self,
168
0
        input: &'a [T],
169
0
    ) -> Result<Decode<'a, T, C>, ()> {
170
0
        self.insertions.clear();
171
        // Handle "basic" (ASCII) code points.
172
        // They are encoded as-is before the last delimiter, if any.
173
0
        let (base, input) = if let Some(position) = input.iter().rposition(|c| c.is_delimiter()) {
Unexecuted instantiation: <idna::punycode::Decoder>::decode::<char, idna::punycode::InternalCaller>::{closure#0}
Unexecuted instantiation: <idna::punycode::Decoder>::decode::<u8, idna::punycode::ExternalCaller>::{closure#0}
Unexecuted instantiation: <idna::punycode::Decoder>::decode::<u8, idna::punycode::InternalCaller>::{closure#0}
174
            (
175
0
                &input[..position],
176
0
                if position > 0 {
177
0
                    &input[position + 1..]
178
                } else {
179
0
                    input
180
                },
181
            )
182
        } else {
183
0
            (&input[..0], input)
184
        };
185
186
0
        if C::EXTERNAL_CALLER && !base.iter().all(|c| c.is_ascii()) {
187
0
            return Err(());
188
0
        }
189
190
0
        let base_len = base.len();
191
0
        let mut length = base_len as u32;
192
0
        let mut code_point = INITIAL_N;
193
0
        let mut bias = INITIAL_BIAS;
194
0
        let mut i = 0u32;
195
0
        let mut iter = input.iter();
196
        loop {
197
0
            let previous_i = i;
198
0
            let mut weight = 1;
199
0
            let mut k = BASE;
200
0
            let mut byte = match iter.next() {
201
0
                None => break,
202
0
                Some(byte) => byte,
203
            };
204
205
            // Decode a generalized variable-length integer into delta,
206
            // which gets added to i.
207
            loop {
208
0
                let digit = if let Some(digit) = byte.digit() {
209
0
                    digit
210
                } else {
211
0
                    return Err(());
212
                };
213
0
                let product = digit.checked_mul(weight).ok_or(())?;
214
0
                i = i.checked_add(product).ok_or(())?;
215
0
                let t = if k <= bias {
216
0
                    T_MIN
217
0
                } else if k >= bias + T_MAX {
218
0
                    T_MAX
219
                } else {
220
0
                    k - bias
221
                };
222
0
                if digit < t {
223
0
                    break;
224
0
                }
225
0
                weight = weight.checked_mul(BASE - t).ok_or(())?;
226
0
                k += BASE;
227
0
                byte = match iter.next() {
228
0
                    None => return Err(()), // End of input before the end of this delta
229
0
                    Some(byte) => byte,
230
                };
231
            }
232
233
0
            bias = adapt(i - previous_i, length + 1, previous_i == 0);
234
235
            // i was supposed to wrap around from length+1 to 0,
236
            // incrementing code_point each time.
237
0
            code_point = code_point.checked_add(i / (length + 1)).ok_or(())?;
238
0
            i %= length + 1;
239
0
            let c = match char::from_u32(code_point) {
240
0
                Some(c) => c,
241
0
                None => return Err(()),
242
            };
243
244
            // Move earlier insertions farther out in the string
245
0
            for (idx, _) in &mut self.insertions {
246
0
                if *idx >= i as usize {
247
0
                    *idx += 1;
248
0
                }
249
            }
250
0
            self.insertions.push((i as usize, c));
251
0
            length += 1;
252
0
            i += 1;
253
        }
254
255
0
        self.insertions.sort_by_key(|(i, _)| *i);
256
0
        Ok(Decode {
257
0
            base: base.iter(),
258
0
            insertions: &self.insertions,
259
0
            inserted: 0,
260
0
            position: 0,
261
0
            len: base_len + self.insertions.len(),
262
0
            phantom: PhantomData::<C>,
263
0
        })
264
0
    }
Unexecuted instantiation: <idna::punycode::Decoder>::decode::<char, idna::punycode::InternalCaller>
Unexecuted instantiation: <idna::punycode::Decoder>::decode::<u8, idna::punycode::ExternalCaller>
Unexecuted instantiation: <idna::punycode::Decoder>::decode::<u8, idna::punycode::InternalCaller>
265
}
266
267
pub(crate) struct Decode<'a, T, C>
268
where
269
    T: PunycodeCodeUnit + Copy,
270
    C: PunycodeCaller,
271
{
272
    base: core::slice::Iter<'a, T>,
273
    pub(crate) insertions: &'a [(usize, char)],
274
    inserted: usize,
275
    position: usize,
276
    len: usize,
277
    phantom: PhantomData<C>,
278
}
279
280
impl<T: PunycodeCodeUnit + Copy, C: PunycodeCaller> Iterator for Decode<'_, T, C> {
281
    type Item = char;
282
283
0
    fn next(&mut self) -> Option<Self::Item> {
284
        loop {
285
0
            match self.insertions.get(self.inserted) {
286
0
                Some((pos, c)) if *pos == self.position => {
287
0
                    self.inserted += 1;
288
0
                    self.position += 1;
289
0
                    return Some(*c);
290
                }
291
0
                _ => {}
292
            }
293
0
            if let Some(c) = self.base.next() {
294
0
                self.position += 1;
295
0
                return Some(if C::EXTERNAL_CALLER {
296
0
                    c.char()
297
                } else {
298
0
                    c.char_ascii_lower_case()
299
                });
300
0
            } else if self.inserted >= self.insertions.len() {
301
0
                return None;
302
0
            }
303
        }
304
0
    }
Unexecuted instantiation: <idna::punycode::Decode<char, idna::punycode::InternalCaller> as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <idna::punycode::Decode<u8, idna::punycode::ExternalCaller> as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <idna::punycode::Decode<u8, idna::punycode::InternalCaller> as core::iter::traits::iterator::Iterator>::next
305
306
0
    fn size_hint(&self) -> (usize, Option<usize>) {
307
0
        let len = self.len - self.position;
308
0
        (len, Some(len))
309
0
    }
Unexecuted instantiation: <idna::punycode::Decode<char, idna::punycode::InternalCaller> as core::iter::traits::iterator::Iterator>::size_hint
Unexecuted instantiation: <idna::punycode::Decode<u8, idna::punycode::ExternalCaller> as core::iter::traits::iterator::Iterator>::size_hint
Unexecuted instantiation: <idna::punycode::Decode<u8, idna::punycode::InternalCaller> as core::iter::traits::iterator::Iterator>::size_hint
310
}
311
312
impl<T: PunycodeCodeUnit + Copy, C: PunycodeCaller> ExactSizeIterator for Decode<'_, T, C> {
313
0
    fn len(&self) -> usize {
314
0
        self.len - self.position
315
0
    }
316
}
317
318
/// Convert an Unicode `str` to Punycode.
319
///
320
/// This is a convenience wrapper around `encode`.
321
#[inline]
322
0
pub fn encode_str(input: &str) -> Option<String> {
323
0
    if input.len() > u32::MAX as usize {
324
0
        return None;
325
0
    }
326
0
    let mut buf = String::with_capacity(input.len());
327
0
    encode_into::<_, _, ExternalCaller>(input.chars(), &mut buf)
328
0
        .ok()
329
0
        .map(|()| buf)
330
0
}
331
332
/// Convert Unicode to Punycode.
333
///
334
/// Return None on overflow, which can only happen on inputs that would take more than
335
/// 63 encoded bytes, the DNS limit on domain name labels.
336
0
pub fn encode(input: &[char]) -> Option<String> {
337
0
    if input.len() > u32::MAX as usize {
338
0
        return None;
339
0
    }
340
0
    let mut buf = String::with_capacity(input.len());
341
0
    encode_into::<_, _, ExternalCaller>(input.iter().copied(), &mut buf)
342
0
        .ok()
343
0
        .map(|()| buf)
344
0
}
345
346
pub(crate) enum PunycodeEncodeError {
347
    Overflow,
348
    Sink,
349
}
350
351
impl From<core::fmt::Error> for PunycodeEncodeError {
352
0
    fn from(_: core::fmt::Error) -> Self {
353
0
        Self::Sink
354
0
    }
355
}
356
357
0
pub(crate) fn encode_into<I, W, C>(input: I, output: &mut W) -> Result<(), PunycodeEncodeError>
358
0
where
359
0
    I: Iterator<Item = char> + Clone,
360
0
    W: Write + ?Sized,
361
0
    C: PunycodeCaller,
362
{
363
    // Handle "basic" (ASCII) code points. They are encoded as-is.
364
0
    let (mut input_length, mut basic_length) = (0u32, 0);
365
0
    for c in input.clone() {
366
0
        input_length = input_length
367
0
            .checked_add(1)
368
0
            .ok_or(PunycodeEncodeError::Overflow)?;
369
0
        if c.is_ascii() {
370
0
            output.write_char(c)?;
371
0
            basic_length += 1;
372
0
        }
373
    }
374
375
0
    if !C::EXTERNAL_CALLER {
376
        // We should never get an overflow here with the internal caller being
377
        // length-limited, but let's check anyway once here trusting the math
378
        // from  RFC 3492 section 6.4 and then omit the overflow checks in the
379
        // loop below.
380
0
        let len_plus_one = input_length
381
0
            .checked_add(1)
382
0
            .ok_or(PunycodeEncodeError::Overflow)?;
383
0
        len_plus_one
384
0
            .checked_mul(u32::from(char::MAX) - INITIAL_N)
385
0
            .ok_or(PunycodeEncodeError::Overflow)?;
386
0
    }
387
388
0
    if basic_length > 0 {
389
0
        output.write_char('-')?;
390
0
    }
391
0
    let mut code_point = INITIAL_N;
392
0
    let mut delta = 0u32;
393
0
    let mut bias = INITIAL_BIAS;
394
0
    let mut processed = basic_length;
395
0
    while processed < input_length {
396
        // All code points < code_point have been handled already.
397
        // Find the next larger one.
398
0
        let min_code_point = input
399
0
            .clone()
400
0
            .map(|c| c as u32)
Unexecuted instantiation: idna::punycode::encode_into::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>, alloc::string::String, idna::punycode::ExternalCaller>::{closure#0}
Unexecuted instantiation: idna::punycode::encode_into::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>, alloc::string::String, idna::punycode::InternalCaller>::{closure#0}
401
0
            .filter(|&c| c >= code_point)
Unexecuted instantiation: idna::punycode::encode_into::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>, alloc::string::String, idna::punycode::ExternalCaller>::{closure#1}
Unexecuted instantiation: idna::punycode::encode_into::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>, alloc::string::String, idna::punycode::InternalCaller>::{closure#1}
402
0
            .min()
403
0
            .unwrap();
404
        // Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
405
0
        if C::EXTERNAL_CALLER {
406
0
            let product = (min_code_point - code_point)
407
0
                .checked_mul(processed + 1)
408
0
                .ok_or(PunycodeEncodeError::Overflow)?;
409
0
            delta = delta
410
0
                .checked_add(product)
411
0
                .ok_or(PunycodeEncodeError::Overflow)?;
412
0
        } else {
413
0
            delta += (min_code_point - code_point) * (processed + 1);
414
0
        }
415
0
        code_point = min_code_point;
416
0
        for c in input.clone() {
417
0
            let c = c as u32;
418
0
            if c < code_point {
419
0
                if C::EXTERNAL_CALLER {
420
0
                    delta = delta.checked_add(1).ok_or(PunycodeEncodeError::Overflow)?;
421
0
                } else {
422
0
                    delta += 1;
423
0
                }
424
0
            }
425
0
            if c == code_point {
426
                // Represent delta as a generalized variable-length integer:
427
0
                let mut q = delta;
428
0
                let mut k = BASE;
429
                loop {
430
0
                    let t = if k <= bias {
431
0
                        T_MIN
432
0
                    } else if k >= bias + T_MAX {
433
0
                        T_MAX
434
                    } else {
435
0
                        k - bias
436
                    };
437
0
                    if q < t {
438
0
                        break;
439
0
                    }
440
0
                    let value = t + ((q - t) % (BASE - t));
441
0
                    output.write_char(value_to_digit(value))?;
442
0
                    q = (q - t) / (BASE - t);
443
0
                    k += BASE;
444
                }
445
0
                output.write_char(value_to_digit(q))?;
446
0
                bias = adapt(delta, processed + 1, processed == basic_length);
447
0
                delta = 0;
448
0
                processed += 1;
449
0
            }
450
        }
451
0
        delta += 1;
452
0
        code_point += 1;
453
    }
454
0
    Ok(())
455
0
}
Unexecuted instantiation: idna::punycode::encode_into::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>, alloc::string::String, idna::punycode::ExternalCaller>
Unexecuted instantiation: idna::punycode::encode_into::<core::iter::adapters::copied::Copied<core::slice::iter::Iter<char>>, alloc::string::String, idna::punycode::InternalCaller>
456
457
#[inline]
458
0
fn value_to_digit(value: u32) -> char {
459
0
    match value {
460
0
        0..=25 => (value as u8 + b'a') as char,       // a..z
461
0
        26..=35 => (value as u8 - 26 + b'0') as char, // 0..9
462
0
        _ => panic!(),
463
    }
464
0
}
465
466
#[test]
467
#[ignore = "slow"]
468
#[cfg(target_pointer_width = "64")]
469
fn huge_encode() {
470
    let mut buf = String::new();
471
    assert!(encode_into::<_, _, ExternalCaller>(
472
        core::iter::repeat('ß').take(u32::MAX as usize + 1),
473
        &mut buf
474
    )
475
    .is_err());
476
    assert_eq!(buf.len(), 0);
477
}