Coverage Report

Created: 2026-01-10 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/icu_capi-1.5.1/src/utf.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use alloc::borrow::Cow;
6
7
use core::fmt::Write;
8
use writeable::{LengthHint, Part, TryWriteable, Writeable};
9
10
#[allow(dead_code)]
11
pub(crate) struct LossyWrap<T>(pub T);
12
13
impl<T: TryWriteable> Writeable for LossyWrap<T> {
14
0
    fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
15
0
        let _ = self.0.try_write_to(sink)?;
16
0
        Ok(())
17
0
    }
Unexecuted instantiation: <icu_capi::utf::LossyWrap<icu_capi::utf::PotentiallyInvalidUtf8> as writeable::Writeable>::write_to::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut diplomat_runtime::writeable::DiplomatWriteable>>
Unexecuted instantiation: <icu_capi::utf::LossyWrap<icu_capi::utf::PotentiallyInvalidUtf8> as writeable::Writeable>::write_to::<<regex_automata::dfa::sparse::DFA<_> as icu_list::lazy_automaton::LazyAutomaton>::matches_earliest_fwd_lazy::DFAStepper>
Unexecuted instantiation: <icu_capi::utf::LossyWrap<icu_capi::utf::PotentiallyInvalidUtf16> as writeable::Writeable>::write_to::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut diplomat_runtime::writeable::DiplomatWriteable>>
Unexecuted instantiation: <icu_capi::utf::LossyWrap<icu_capi::utf::PotentiallyInvalidUtf16> as writeable::Writeable>::write_to::<<regex_automata::dfa::sparse::DFA<_> as icu_list::lazy_automaton::LazyAutomaton>::matches_earliest_fwd_lazy::DFAStepper>
18
19
0
    fn writeable_length_hint(&self) -> LengthHint {
20
0
        self.0.writeable_length_hint()
21
0
    }
22
}
23
24
use core::{char::DecodeUtf16Error, fmt, str::Utf8Error};
25
26
/// Implements [`Writeable`] for [`&[u8]`] according to the [WHATWG Encoding Standard](
27
/// https://encoding.spec.whatwg.org/#utf-8-decoder).
28
#[derive(Debug)]
29
#[allow(clippy::exhaustive_structs)] // newtype
30
pub struct PotentiallyInvalidUtf8<'a>(pub &'a [u8]);
31
32
impl TryWriteable for PotentiallyInvalidUtf8<'_> {
33
    type Error = Utf8Error;
34
35
0
    fn try_write_to_parts<S: writeable::PartsWrite + ?Sized>(
36
0
        &self,
37
0
        sink: &mut S,
38
0
    ) -> Result<Result<(), Self::Error>, fmt::Error> {
39
0
        let mut remaining = self.0;
40
0
        let mut r = Ok(());
41
        loop {
42
0
            match core::str::from_utf8(remaining) {
43
0
                Ok(valid) => {
44
0
                    sink.write_str(valid)?;
45
0
                    return Ok(r);
46
                }
47
0
                Err(e) => {
48
                    // SAFETY: By Utf8Error invariants
49
0
                    let valid = unsafe {
50
0
                        core::str::from_utf8_unchecked(remaining.get_unchecked(..e.valid_up_to()))
51
                    };
52
0
                    sink.write_str(valid)?;
53
0
                    sink.with_part(Part::ERROR, |s| s.write_char(char::REPLACEMENT_CHARACTER))?;
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf8 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut diplomat_runtime::writeable::DiplomatWriteable>>>::{closure#0}
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf8 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut alloc::string::String>>::{closure#0}
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf8 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut <regex_automata::dfa::sparse::DFA<_> as icu_list::lazy_automaton::LazyAutomaton>::matches_earliest_fwd_lazy::DFAStepper>>::{closure#0}
54
0
                    if r.is_ok() {
55
0
                        r = Err(e);
56
0
                    }
57
0
                    let Some(error_len) = e.error_len() else {
58
0
                        return Ok(r); // end of string
59
                    };
60
                    // SAFETY: By Utf8Error invariants
61
0
                    remaining = unsafe { remaining.get_unchecked(e.valid_up_to() + error_len..) }
62
                }
63
            }
64
        }
65
0
    }
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf8 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut diplomat_runtime::writeable::DiplomatWriteable>>>
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf8 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut alloc::string::String>>
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf8 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut <regex_automata::dfa::sparse::DFA<_> as icu_list::lazy_automaton::LazyAutomaton>::matches_earliest_fwd_lazy::DFAStepper>>
66
67
0
    fn writeable_length_hint(&self) -> writeable::LengthHint {
68
        // Lower bound is all valid UTF-8, upper bound is all bytes with the high bit, which become replacement characters.
69
0
        LengthHint::between(self.0.len(), self.0.len() * 3)
70
0
    }
71
72
0
    fn try_write_to_string(&self) -> Result<Cow<str>, (Self::Error, Cow<str>)> {
73
0
        match core::str::from_utf8(self.0) {
74
0
            Ok(valid) => Ok(Cow::Borrowed(valid)),
75
0
            Err(e) => {
76
                // SAFETY: By Utf8Error invariants
77
0
                let valid = unsafe {
78
0
                    core::str::from_utf8_unchecked(self.0.get_unchecked(..e.valid_up_to()))
79
                };
80
81
                // Let's assume this is the only error
82
0
                let mut out = alloc::string::String::with_capacity(
83
0
                    self.0.len() + char::REPLACEMENT_CHARACTER.len_utf8()
84
0
                        - e.error_len().unwrap_or(0),
85
                );
86
87
0
                out.push_str(valid);
88
0
                out.push(char::REPLACEMENT_CHARACTER);
89
90
                // If there's more, we can use `try_write_to`
91
0
                if let Some(error_len) = e.error_len() {
92
0
                    // SAFETY: By Utf8Error invariants
93
0
                    let remaining = unsafe { self.0.get_unchecked(e.valid_up_to() + error_len..) };
94
0
                    let _discard = Self(remaining).try_write_to(&mut out);
95
0
                }
96
97
0
                Err((e, Cow::Owned(out)))
98
            }
99
        }
100
0
    }
101
}
102
103
/// Implements [`Writeable`] for [`&[u16]`] according to the [WHATWG Encoding Standard](
104
/// https://encoding.spec.whatwg.org/#shared-utf-16-decoder).
105
#[derive(Debug)]
106
#[allow(clippy::exhaustive_structs)] // newtype
107
pub struct PotentiallyInvalidUtf16<'a>(pub &'a [u16]);
108
109
impl TryWriteable for PotentiallyInvalidUtf16<'_> {
110
    type Error = DecodeUtf16Error;
111
112
0
    fn try_write_to_parts<S: writeable::PartsWrite + ?Sized>(
113
0
        &self,
114
0
        sink: &mut S,
115
0
    ) -> Result<Result<(), Self::Error>, fmt::Error> {
116
0
        let mut r = Ok(());
117
0
        for c in core::char::decode_utf16(self.0.iter().copied()) {
118
0
            match c {
119
0
                Ok(c) => sink.write_char(c)?,
120
0
                Err(e) => {
121
0
                    if r.is_ok() {
122
0
                        r = Err(e);
123
0
                    }
124
0
                    sink.with_part(Part::ERROR, |s| s.write_char(char::REPLACEMENT_CHARACTER))?;
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf16 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut diplomat_runtime::writeable::DiplomatWriteable>>>::{closure#0}
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf16 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut <regex_automata::dfa::sparse::DFA<_> as icu_list::lazy_automaton::LazyAutomaton>::matches_earliest_fwd_lazy::DFAStepper>>::{closure#0}
125
                }
126
            }
127
        }
128
0
        Ok(r)
129
0
    }
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf16 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut diplomat_runtime::writeable::DiplomatWriteable>>>
Unexecuted instantiation: <icu_capi::utf::PotentiallyInvalidUtf16 as writeable::try_writeable::TryWriteable>::try_write_to_parts::<writeable::parts_write_adapter::CoreWriteAsPartsWrite<&mut <regex_automata::dfa::sparse::DFA<_> as icu_list::lazy_automaton::LazyAutomaton>::matches_earliest_fwd_lazy::DFAStepper>>
130
131
0
    fn writeable_length_hint(&self) -> LengthHint {
132
        // Lower bound is all ASCII, upper bound is all 3-byte code points (including replacement character)
133
0
        LengthHint::between(self.0.len(), self.0.len() * 3)
134
0
    }
135
}
136
137
#[cfg(test)]
138
mod test {
139
    #![allow(invalid_from_utf8)] // only way to construct the error
140
    use super::*;
141
    use writeable::assert_try_writeable_parts_eq;
142
143
    #[test]
144
    fn test_utf8() {
145
        assert_try_writeable_parts_eq!(PotentiallyInvalidUtf8(b"Foo Bar"), "Foo Bar", Ok(()), []);
146
        assert_try_writeable_parts_eq!(
147
            PotentiallyInvalidUtf8(b"Foo\xFDBar"),
148
            "Foo�Bar",
149
            Err(core::str::from_utf8(b"Foo\xFDBar").unwrap_err()),
150
            [(3, 6, Part::ERROR)]
151
        );
152
        assert_try_writeable_parts_eq!(
153
            PotentiallyInvalidUtf8(b"Foo\xFDBar\xff"),
154
            "Foo�Bar�",
155
            Err(core::str::from_utf8(b"Foo\xFDBar\xff").unwrap_err()),
156
            [(3, 6, Part::ERROR), (9, 12, Part::ERROR)],
157
        );
158
    }
159
160
    #[test]
161
    fn test_utf16() {
162
        assert_try_writeable_parts_eq!(
163
            PotentiallyInvalidUtf16(&[0xD83E, 0xDD73]),
164
            "🥳",
165
            Ok(()),
166
            []
167
        );
168
        assert_try_writeable_parts_eq!(
169
            PotentiallyInvalidUtf16(&[0xD83E, 0x20, 0xDD73]),
170
            "� �",
171
            Err(core::char::decode_utf16([0xD83E].into_iter())
172
                .next()
173
                .unwrap()
174
                .unwrap_err()),
175
            [(0, 3, Part::ERROR), (4, 7, Part::ERROR)]
176
        );
177
    }
178
}