Coverage Report

Created: 2026-02-14 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/idna-1.1.0/src/lib.rs
Line
Count
Source
1
// Copyright 2016 The rust-url developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
//! This Rust crate implements IDNA
10
//! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
11
//!
12
//! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
13
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
14
//! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
15
//!
16
//! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
17
//!
18
//! > Initially, domain names were restricted to ASCII characters.
19
//! > A system was introduced in 2003 for internationalized domain names (IDN).
20
//! > This system is called Internationalizing Domain Names for Applications,
21
//! > or IDNA2003 for short.
22
//! > This mechanism supports IDNs by means of a client software transformation
23
//! > into a format known as Punycode.
24
//! > A revision of IDNA was approved in 2010 (IDNA2008).
25
//! > This revision has a number of incompatibilities with IDNA2003.
26
//! >
27
//! > The incompatibilities force implementers of client software,
28
//! > such as browsers and emailers,
29
//! > to face difficult choices during the transition period
30
//! > as registries shift from IDNA2003 to IDNA2008.
31
//! > This document specifies a mechanism
32
//! > that minimizes the impact of this transition for client software,
33
//! > allowing client software to access domains that are valid under either system.
34
#![no_std]
35
36
// For forwards compatibility
37
#[cfg(feature = "std")]
38
extern crate std;
39
40
extern crate alloc;
41
42
#[cfg(not(feature = "alloc"))]
43
compile_error!("the `alloc` feature must be enabled");
44
45
// Avoid a breaking change if in the future there's a use case for
46
// having a Bring-Your-Own-ICU4X-Data constructor for `Uts46` and
47
// not also having compiled data in the binary.
48
#[cfg(not(feature = "compiled_data"))]
49
compile_error!("the `compiled_data` feature must be enabled");
50
51
use alloc::borrow::Cow;
52
use alloc::string::String;
53
pub use uts46::AsciiDenyList;
54
use uts46::Uts46;
55
56
mod deprecated;
57
pub mod punycode;
58
pub mod uts46;
59
60
#[allow(deprecated)]
61
pub use crate::deprecated::{Config, Idna};
62
63
/// Type indicating that there were errors during UTS #46 processing.
64
#[derive(Default, Debug)]
65
#[non_exhaustive]
66
pub struct Errors {}
67
68
impl From<Errors> for Result<(), Errors> {
69
0
    fn from(e: Errors) -> Self {
70
0
        Err(e)
71
0
    }
72
}
73
74
#[cfg(feature = "std")]
75
impl std::error::Error for Errors {}
76
77
#[cfg(not(feature = "std"))]
78
impl core::error::Error for Errors {}
79
80
impl core::fmt::Display for Errors {
81
0
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
82
0
        core::fmt::Debug::fmt(self, f)
83
0
    }
84
}
85
86
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
87
/// version returning a `Cow`.
88
///
89
/// Most applications should be using this function or `domain_to_ascii_from_cow` rather
90
/// than the sibling functions, and most applications should pass [`AsciiDenyList::URL`] as
91
/// the second argument. Passing [`AsciiDenyList::URL`] as the second argument makes this function also
92
/// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point)
93
/// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii)
94
/// algorithm.
95
///
96
/// Returns the ASCII representation a domain name,
97
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
98
/// and using Punycode as necessary.
99
///
100
/// This process may fail.
101
///
102
/// If you have a `&str` instead of `&[u8]`, just call `.as_bytes()` on it before
103
/// passing it to this function. It's still preferable to use this function over
104
/// the sibling functions that take `&str`.
105
0
pub fn domain_to_ascii_cow(
106
0
    domain: &[u8],
107
0
    ascii_deny_list: AsciiDenyList,
108
0
) -> Result<Cow<'_, str>, Errors> {
109
0
    Uts46::new().to_ascii(
110
0
        domain,
111
0
        ascii_deny_list,
112
0
        uts46::Hyphens::Allow,
113
0
        uts46::DnsLength::Ignore,
114
    )
115
0
}
116
117
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
118
/// version accepting and returning a `Cow`.
119
///
120
/// Most applications should be using this function or `domain_to_ascii_cow` rather
121
/// than the sibling functions, and most applications should pass [`AsciiDenyList::URL`] as
122
/// the second argument. Passing [`AsciiDenyList::URL`] as the second argument makes this function also
123
/// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point)
124
/// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii)
125
/// algorithm.
126
///
127
/// Return the ASCII representation a domain name,
128
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
129
/// and using Punycode as necessary.
130
///
131
/// This process may fail.
132
0
pub fn domain_to_ascii_from_cow(
133
0
    domain: Cow<'_, [u8]>,
134
0
    ascii_deny_list: AsciiDenyList,
135
0
) -> Result<Cow<'_, str>, Errors> {
136
0
    Uts46::new().to_ascii_from_cow(
137
0
        domain,
138
0
        ascii_deny_list,
139
0
        uts46::Hyphens::Allow,
140
0
        uts46::DnsLength::Ignore,
141
    )
142
0
}
143
144
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
145
/// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_).
146
///
147
/// This function exists for backward-compatibility. Consider using [`domain_to_ascii_cow`]
148
/// instead.
149
///
150
/// Return the ASCII representation a domain name,
151
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
152
/// and using Punycode as necessary.
153
///
154
/// This process may fail.
155
0
pub fn domain_to_ascii(domain: &str) -> Result<String, Errors> {
156
0
    domain_to_ascii_cow(domain.as_bytes(), AsciiDenyList::EMPTY).map(|cow| cow.into_owned())
157
0
}
158
159
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm,
160
/// with the `beStrict` flag set.
161
///
162
/// Note that this rejects various real-world names including:
163
/// * YouTube CDN nodes
164
/// * Some GitHub user pages
165
/// * Pseudo-hosts used by various TXT record-based protocols.
166
0
pub fn domain_to_ascii_strict(domain: &str) -> Result<String, Errors> {
167
0
    Uts46::new()
168
0
        .to_ascii(
169
0
            domain.as_bytes(),
170
            uts46::AsciiDenyList::STD3,
171
0
            uts46::Hyphens::Check,
172
0
            uts46::DnsLength::Verify,
173
        )
174
0
        .map(|cow| cow.into_owned())
175
0
}
176
177
/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm;
178
/// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_).
179
///
180
/// This function exists for backward-compatibility. Consider using [`Uts46::to_user_interface`]
181
/// or [`Uts46::to_unicode`].
182
///
183
/// Return the Unicode representation of a domain name,
184
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
185
/// and decoding Punycode as necessary.
186
///
187
/// If the second item of the tuple indicates an error, the first item of the tuple
188
/// denotes errors using the REPLACEMENT CHARACTERs in order to be able to illustrate
189
/// errors to the user. When the second item of the return tuple signals an error,
190
/// the first item of the tuple must not be used in a network protocol.
191
0
pub fn domain_to_unicode(domain: &str) -> (String, Result<(), Errors>) {
192
0
    let (cow, result) = Uts46::new().to_unicode(
193
0
        domain.as_bytes(),
194
0
        uts46::AsciiDenyList::EMPTY,
195
0
        uts46::Hyphens::Allow,
196
0
    );
197
0
    (cow.into_owned(), result)
198
0
}