Coverage Report

Created: 2025-11-01 06:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/utf-8-0.7.6/src/lib.rs
Line
Count
Source
1
mod lossy;
2
mod read;
3
4
pub use lossy::LossyDecoder;
5
pub use read::{BufReadDecoder, BufReadDecoderError};
6
7
use std::cmp;
8
use std::error::Error;
9
use std::fmt;
10
use std::str;
11
12
/// The replacement character, U+FFFD. In lossy decoding, insert it for every decoding error.
13
pub const REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
14
15
#[derive(Debug, Copy, Clone)]
16
pub enum DecodeError<'a> {
17
    /// In lossy decoding insert `valid_prefix`, then `"\u{FFFD}"`,
18
    /// then call `decode()` again with `remaining_input`.
19
    Invalid {
20
        valid_prefix: &'a str,
21
        invalid_sequence: &'a [u8],
22
        remaining_input: &'a [u8],
23
    },
24
25
    /// Call the `incomplete_suffix.try_complete` method with more input when available.
26
    /// If no more input is available, this is an invalid byte sequence.
27
    Incomplete {
28
        valid_prefix: &'a str,
29
        incomplete_suffix: Incomplete,
30
    },
31
}
32
33
impl<'a> fmt::Display for DecodeError<'a> {
34
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
35
0
        match *self {
36
            DecodeError::Invalid {
37
0
                valid_prefix,
38
0
                invalid_sequence,
39
0
                remaining_input,
40
0
            } => write!(
41
0
                f,
42
0
                "found invalid byte sequence {invalid_sequence:02x?} after \
43
0
                 {valid_byte_count} valid bytes, followed by {unprocessed_byte_count} more \
44
0
                 unprocessed bytes",
45
                invalid_sequence = invalid_sequence,
46
0
                valid_byte_count = valid_prefix.len(),
47
0
                unprocessed_byte_count = remaining_input.len()
48
            ),
49
            DecodeError::Incomplete {
50
0
                valid_prefix,
51
0
                incomplete_suffix,
52
0
            } => write!(
53
0
                f,
54
0
                "found incomplete byte sequence {incomplete_suffix:02x?} after \
55
0
                 {valid_byte_count} bytes",
56
                incomplete_suffix = incomplete_suffix,
57
0
                valid_byte_count = valid_prefix.len()
58
            ),
59
        }
60
0
    }
61
}
62
63
impl<'a> Error for DecodeError<'a> {}
64
65
#[derive(Debug, Copy, Clone)]
66
pub struct Incomplete {
67
    pub buffer: [u8; 4],
68
    pub buffer_len: u8,
69
}
70
71
386k
pub fn decode(input: &[u8]) -> Result<&str, DecodeError> {
72
386k
    let error = match str::from_utf8(input) {
73
184k
        Ok(valid) => return Ok(valid),
74
201k
        Err(error) => error,
75
    };
76
77
    // FIXME: separate function from here to guide inlining?
78
201k
    let (valid, after_valid) = input.split_at(error.valid_up_to());
79
201k
    let valid = unsafe {
80
201k
        str::from_utf8_unchecked(valid)
81
    };
82
83
201k
    match error.error_len() {
84
313
        Some(invalid_sequence_length) => {
85
313
            let (invalid, rest) = after_valid.split_at(invalid_sequence_length);
86
313
            Err(DecodeError::Invalid {
87
313
                valid_prefix: valid,
88
313
                invalid_sequence: invalid,
89
313
                remaining_input: rest
90
313
            })
91
        }
92
        None => {
93
201k
            Err(DecodeError::Incomplete {
94
201k
                valid_prefix: valid,
95
201k
                incomplete_suffix: Incomplete::new(after_valid),
96
201k
            })
97
        }
98
    }
99
386k
}
100
101
impl Incomplete {
102
0
    pub fn empty() -> Self {
103
0
        Incomplete {
104
0
            buffer: [0, 0, 0, 0],
105
0
            buffer_len: 0,
106
0
        }
107
0
    }
108
109
0
    pub fn is_empty(&self) -> bool {
110
0
        self.buffer_len == 0
111
0
    }
112
113
201k
    pub fn new(bytes: &[u8]) -> Self {
114
201k
        let mut buffer = [0, 0, 0, 0];
115
201k
        let len = bytes.len();
116
201k
        buffer[..len].copy_from_slice(bytes);
117
201k
        Incomplete {
118
201k
            buffer: buffer,
119
201k
            buffer_len: len as u8,
120
201k
        }
121
201k
    }
122
123
    /// * `None`: still incomplete, call `try_complete` again with more input.
124
    ///   If no more input is available, this is invalid byte sequence.
125
    /// * `Some((result, remaining_input))`: We’re done with this `Incomplete`.
126
    ///   To keep decoding, pass `remaining_input` to `decode()`.
127
201k
    pub fn try_complete<'input>(&mut self, input: &'input [u8])
128
201k
                                -> Option<(Result<&str, &[u8]>, &'input [u8])> {
129
201k
        let (consumed, opt_result) = self.try_complete_offsets(input);
130
201k
        let result = opt_result?;
131
200k
        let remaining_input = &input[consumed..];
132
200k
        let result_bytes = self.take_buffer();
133
200k
        let result = match result {
134
200k
            Ok(()) => Ok(unsafe { str::from_utf8_unchecked(result_bytes) }),
135
41
            Err(()) => Err(result_bytes),
136
        };
137
200k
        Some((result, remaining_input))
138
201k
    }
139
140
200k
    fn take_buffer(&mut self) -> &[u8] {
141
200k
        let len = self.buffer_len as usize;
142
200k
        self.buffer_len = 0;
143
200k
        &self.buffer[..len as usize]
144
200k
    }
145
146
    /// (consumed_from_input, None): not enough input
147
    /// (consumed_from_input, Some(Err(()))): error bytes in buffer
148
    /// (consumed_from_input, Some(Ok(()))): UTF-8 string in buffer
149
201k
    fn try_complete_offsets(&mut self, input: &[u8]) -> (usize, Option<Result<(), ()>>) {
150
201k
        let initial_buffer_len = self.buffer_len as usize;
151
        let copied_from_input;
152
201k
        {
153
201k
            let unwritten = &mut self.buffer[initial_buffer_len..];
154
201k
            copied_from_input = cmp::min(unwritten.len(), input.len());
155
201k
            unwritten[..copied_from_input].copy_from_slice(&input[..copied_from_input]);
156
201k
        }
157
201k
        let spliced = &self.buffer[..initial_buffer_len + copied_from_input];
158
201k
        match str::from_utf8(spliced) {
159
            Ok(_) => {
160
16.8k
                self.buffer_len = spliced.len() as u8;
161
16.8k
                (copied_from_input, Some(Ok(())))
162
            }
163
184k
            Err(error) => {
164
184k
                let valid_up_to = error.valid_up_to();
165
184k
                if valid_up_to > 0 {
166
183k
                    let consumed = valid_up_to.checked_sub(initial_buffer_len).unwrap();
167
183k
                    self.buffer_len = valid_up_to as u8;
168
183k
                    (consumed, Some(Ok(())))
169
                } else {
170
622
                    match error.error_len() {
171
41
                        Some(invalid_sequence_length) => {
172
41
                            let consumed = invalid_sequence_length
173
41
                                .checked_sub(initial_buffer_len).unwrap();
174
41
                            self.buffer_len = invalid_sequence_length as u8;
175
41
                            (consumed, Some(Err(())))
176
                        }
177
                        None => {
178
581
                            self.buffer_len = spliced.len() as u8;
179
581
                            (copied_from_input, None)
180
                        }
181
                    }
182
                }
183
            }
184
        }
185
201k
    }
186
}