Coverage Report

Created: 2025-08-29 07:06

/rust/registry/src/index.crates.io-6f17d22bba15001f/utf-8-0.7.6/src/read.rs
Line
Count
Source (jump to first uncovered line)
1
use std::io::{self, BufRead};
2
use std::error::Error;
3
use std::fmt;
4
use std::str;
5
use super::*;
6
7
/// Wraps a `std::io::BufRead` buffered byte stream and decode it as UTF-8.
8
pub struct BufReadDecoder<B: BufRead> {
9
    buf_read: B,
10
    bytes_consumed: usize,
11
    incomplete: Incomplete,
12
}
13
14
#[derive(Debug)]
15
pub enum BufReadDecoderError<'a> {
16
    /// Represents one UTF-8 error in the byte stream.
17
    ///
18
    /// In lossy decoding, each such error should be replaced with U+FFFD.
19
    /// (See `BufReadDecoder::next_lossy` and `BufReadDecoderError::lossy`.)
20
    InvalidByteSequence(&'a [u8]),
21
22
    /// An I/O error from the underlying byte stream
23
    Io(io::Error),
24
}
25
26
impl<'a> BufReadDecoderError<'a> {
27
    /// Replace UTF-8 errors with U+FFFD
28
0
    pub fn lossy(self) -> Result<&'static str, io::Error> {
29
0
        match self {
30
0
            BufReadDecoderError::Io(error) => Err(error),
31
0
            BufReadDecoderError::InvalidByteSequence(_) => Ok(REPLACEMENT_CHARACTER),
32
        }
33
0
    }
34
}
35
36
impl<'a> fmt::Display for BufReadDecoderError<'a> {
37
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
38
0
        match *self {
39
0
            BufReadDecoderError::InvalidByteSequence(bytes) => {
40
0
                write!(f, "invalid byte sequence: {:02x?}", bytes)
41
            }
42
0
            BufReadDecoderError::Io(ref err) => write!(f, "underlying bytestream error: {}", err),
43
        }
44
0
    }
45
}
46
47
impl<'a> Error for BufReadDecoderError<'a> {
48
0
    fn source(&self) -> Option<&(dyn Error + 'static)> {
49
0
        match *self {
50
0
            BufReadDecoderError::InvalidByteSequence(_) => None,
51
0
            BufReadDecoderError::Io(ref err) => Some(err),
52
        }
53
0
    }
54
}
55
56
impl<B: BufRead> BufReadDecoder<B> {
57
    /// This is to `Read::read_to_string` what `String::from_utf8_lossy` is to `String::from_utf8`.
58
0
    pub fn read_to_string_lossy(buf_read: B) -> io::Result<String> {
59
0
        let mut decoder = Self::new(buf_read);
60
0
        let mut string = String::new();
61
0
        while let Some(result) = decoder.next_lossy() {
62
0
            string.push_str(result?)
63
        }
64
0
        Ok(string)
65
0
    }
66
67
0
    pub fn new(buf_read: B) -> Self {
68
0
        Self {
69
0
            buf_read,
70
0
            bytes_consumed: 0,
71
0
            incomplete: Incomplete::empty(),
72
0
        }
73
0
    }
74
75
    /// Same as `BufReadDecoder::next_strict`, but replace UTF-8 errors with U+FFFD.
76
0
    pub fn next_lossy(&mut self) -> Option<io::Result<&str>> {
77
0
        self.next_strict().map(|result| result.or_else(|e| e.lossy()))
78
0
    }
79
80
    /// Decode and consume the next chunk of UTF-8 input.
81
    ///
82
    /// This method is intended to be called repeatedly until it returns `None`,
83
    /// which represents EOF from the underlying byte stream.
84
    /// This is similar to `Iterator::next`,
85
    /// except that decoded chunks borrow the decoder (~iterator)
86
    /// so they need to be handled or copied before the next chunk can start decoding.
87
0
    pub fn next_strict(&mut self) -> Option<Result<&str, BufReadDecoderError>> {
88
        enum BytesSource {
89
            BufRead(usize),
90
            Incomplete,
91
        }
92
        macro_rules! try_io {
93
            ($io_result: expr) => {
94
                match $io_result {
95
                    Ok(value) => value,
96
                    Err(error) => return Some(Err(BufReadDecoderError::Io(error)))
97
                }
98
            }
99
        }
100
0
        let (source, result) = loop {
101
0
            if self.bytes_consumed > 0 {
102
0
                self.buf_read.consume(self.bytes_consumed);
103
0
                self.bytes_consumed = 0;
104
0
            }
105
0
            let buf = try_io!(self.buf_read.fill_buf());
106
107
            // Force loop iteration to go through an explicit `continue`
108
            enum Unreachable {}
109
0
            let _: Unreachable = if self.incomplete.is_empty() {
110
0
                if buf.is_empty() {
111
0
                    return None  // EOF
112
0
                }
113
0
                match str::from_utf8(buf) {
114
                    Ok(_) => {
115
0
                        break (BytesSource::BufRead(buf.len()), Ok(()))
116
                    }
117
0
                    Err(error) => {
118
0
                        let valid_up_to = error.valid_up_to();
119
0
                        if valid_up_to > 0 {
120
0
                            break (BytesSource::BufRead(valid_up_to), Ok(()))
121
0
                        }
122
0
                        match error.error_len() {
123
0
                            Some(invalid_sequence_length) => {
124
0
                                break (BytesSource::BufRead(invalid_sequence_length), Err(()))
125
                            }
126
                            None => {
127
0
                                self.bytes_consumed = buf.len();
128
0
                                self.incomplete = Incomplete::new(buf);
129
0
                                // need more input bytes
130
0
                                continue
131
                            }
132
                        }
133
                    }
134
                }
135
            } else {
136
0
                if buf.is_empty() {
137
0
                    break (BytesSource::Incomplete, Err(()))  // EOF with incomplete code point
138
0
                }
139
0
                let (consumed, opt_result) = self.incomplete.try_complete_offsets(buf);
140
0
                self.bytes_consumed = consumed;
141
0
                match opt_result {
142
                    None => {
143
                        // need more input bytes
144
0
                        continue
145
                    }
146
0
                    Some(result) => {
147
0
                        break (BytesSource::Incomplete, result)
148
                    }
149
                }
150
            };
151
        };
152
0
        let bytes = match source {
153
0
            BytesSource::BufRead(byte_count) => {
154
0
                self.bytes_consumed = byte_count;
155
0
                let buf = try_io!(self.buf_read.fill_buf());
156
0
                &buf[..byte_count]
157
            }
158
            BytesSource::Incomplete => {
159
0
                self.incomplete.take_buffer()
160
            }
161
        };
162
0
        match result {
163
0
            Ok(()) => Some(Ok(unsafe { str::from_utf8_unchecked(bytes) })),
164
0
            Err(()) => Some(Err(BufReadDecoderError::InvalidByteSequence(bytes))),
165
        }
166
0
    }
167
}