/rust/registry/src/index.crates.io-6f17d22bba15001f/utf-8-0.7.6/src/read.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use std::io::{self, BufRead}; |
2 | | use std::error::Error; |
3 | | use std::fmt; |
4 | | use std::str; |
5 | | use super::*; |
6 | | |
7 | | /// Wraps a `std::io::BufRead` buffered byte stream and decode it as UTF-8. |
8 | | pub struct BufReadDecoder<B: BufRead> { |
9 | | buf_read: B, |
10 | | bytes_consumed: usize, |
11 | | incomplete: Incomplete, |
12 | | } |
13 | | |
14 | | #[derive(Debug)] |
15 | | pub enum BufReadDecoderError<'a> { |
16 | | /// Represents one UTF-8 error in the byte stream. |
17 | | /// |
18 | | /// In lossy decoding, each such error should be replaced with U+FFFD. |
19 | | /// (See `BufReadDecoder::next_lossy` and `BufReadDecoderError::lossy`.) |
20 | | InvalidByteSequence(&'a [u8]), |
21 | | |
22 | | /// An I/O error from the underlying byte stream |
23 | | Io(io::Error), |
24 | | } |
25 | | |
26 | | impl<'a> BufReadDecoderError<'a> { |
27 | | /// Replace UTF-8 errors with U+FFFD |
28 | 0 | pub fn lossy(self) -> Result<&'static str, io::Error> { |
29 | 0 | match self { |
30 | 0 | BufReadDecoderError::Io(error) => Err(error), |
31 | 0 | BufReadDecoderError::InvalidByteSequence(_) => Ok(REPLACEMENT_CHARACTER), |
32 | | } |
33 | 0 | } |
34 | | } |
35 | | |
36 | | impl<'a> fmt::Display for BufReadDecoderError<'a> { |
37 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
38 | 0 | match *self { |
39 | 0 | BufReadDecoderError::InvalidByteSequence(bytes) => { |
40 | 0 | write!(f, "invalid byte sequence: {:02x?}", bytes) |
41 | | } |
42 | 0 | BufReadDecoderError::Io(ref err) => write!(f, "underlying bytestream error: {}", err), |
43 | | } |
44 | 0 | } |
45 | | } |
46 | | |
47 | | impl<'a> Error for BufReadDecoderError<'a> { |
48 | 0 | fn source(&self) -> Option<&(dyn Error + 'static)> { |
49 | 0 | match *self { |
50 | 0 | BufReadDecoderError::InvalidByteSequence(_) => None, |
51 | 0 | BufReadDecoderError::Io(ref err) => Some(err), |
52 | | } |
53 | 0 | } |
54 | | } |
55 | | |
56 | | impl<B: BufRead> BufReadDecoder<B> { |
57 | | /// This is to `Read::read_to_string` what `String::from_utf8_lossy` is to `String::from_utf8`. |
58 | 0 | pub fn read_to_string_lossy(buf_read: B) -> io::Result<String> { |
59 | 0 | let mut decoder = Self::new(buf_read); |
60 | 0 | let mut string = String::new(); |
61 | 0 | while let Some(result) = decoder.next_lossy() { |
62 | 0 | string.push_str(result?) |
63 | | } |
64 | 0 | Ok(string) |
65 | 0 | } |
66 | | |
67 | 0 | pub fn new(buf_read: B) -> Self { |
68 | 0 | Self { |
69 | 0 | buf_read, |
70 | 0 | bytes_consumed: 0, |
71 | 0 | incomplete: Incomplete::empty(), |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | | /// Same as `BufReadDecoder::next_strict`, but replace UTF-8 errors with U+FFFD. |
76 | 0 | pub fn next_lossy(&mut self) -> Option<io::Result<&str>> { |
77 | 0 | self.next_strict().map(|result| result.or_else(|e| e.lossy())) |
78 | 0 | } |
79 | | |
80 | | /// Decode and consume the next chunk of UTF-8 input. |
81 | | /// |
82 | | /// This method is intended to be called repeatedly until it returns `None`, |
83 | | /// which represents EOF from the underlying byte stream. |
84 | | /// This is similar to `Iterator::next`, |
85 | | /// except that decoded chunks borrow the decoder (~iterator) |
86 | | /// so they need to be handled or copied before the next chunk can start decoding. |
87 | 0 | pub fn next_strict(&mut self) -> Option<Result<&str, BufReadDecoderError>> { |
88 | | enum BytesSource { |
89 | | BufRead(usize), |
90 | | Incomplete, |
91 | | } |
92 | | macro_rules! try_io { |
93 | | ($io_result: expr) => { |
94 | | match $io_result { |
95 | | Ok(value) => value, |
96 | | Err(error) => return Some(Err(BufReadDecoderError::Io(error))) |
97 | | } |
98 | | } |
99 | | } |
100 | 0 | let (source, result) = loop { |
101 | 0 | if self.bytes_consumed > 0 { |
102 | 0 | self.buf_read.consume(self.bytes_consumed); |
103 | 0 | self.bytes_consumed = 0; |
104 | 0 | } |
105 | 0 | let buf = try_io!(self.buf_read.fill_buf()); |
106 | | |
107 | | // Force loop iteration to go through an explicit `continue` |
108 | | enum Unreachable {} |
109 | 0 | let _: Unreachable = if self.incomplete.is_empty() { |
110 | 0 | if buf.is_empty() { |
111 | 0 | return None // EOF |
112 | 0 | } |
113 | 0 | match str::from_utf8(buf) { |
114 | | Ok(_) => { |
115 | 0 | break (BytesSource::BufRead(buf.len()), Ok(())) |
116 | | } |
117 | 0 | Err(error) => { |
118 | 0 | let valid_up_to = error.valid_up_to(); |
119 | 0 | if valid_up_to > 0 { |
120 | 0 | break (BytesSource::BufRead(valid_up_to), Ok(())) |
121 | 0 | } |
122 | 0 | match error.error_len() { |
123 | 0 | Some(invalid_sequence_length) => { |
124 | 0 | break (BytesSource::BufRead(invalid_sequence_length), Err(())) |
125 | | } |
126 | | None => { |
127 | 0 | self.bytes_consumed = buf.len(); |
128 | 0 | self.incomplete = Incomplete::new(buf); |
129 | 0 | // need more input bytes |
130 | 0 | continue |
131 | | } |
132 | | } |
133 | | } |
134 | | } |
135 | | } else { |
136 | 0 | if buf.is_empty() { |
137 | 0 | break (BytesSource::Incomplete, Err(())) // EOF with incomplete code point |
138 | 0 | } |
139 | 0 | let (consumed, opt_result) = self.incomplete.try_complete_offsets(buf); |
140 | 0 | self.bytes_consumed = consumed; |
141 | 0 | match opt_result { |
142 | | None => { |
143 | | // need more input bytes |
144 | 0 | continue |
145 | | } |
146 | 0 | Some(result) => { |
147 | 0 | break (BytesSource::Incomplete, result) |
148 | | } |
149 | | } |
150 | | }; |
151 | | }; |
152 | 0 | let bytes = match source { |
153 | 0 | BytesSource::BufRead(byte_count) => { |
154 | 0 | self.bytes_consumed = byte_count; |
155 | 0 | let buf = try_io!(self.buf_read.fill_buf()); |
156 | 0 | &buf[..byte_count] |
157 | | } |
158 | | BytesSource::Incomplete => { |
159 | 0 | self.incomplete.take_buffer() |
160 | | } |
161 | | }; |
162 | 0 | match result { |
163 | 0 | Ok(()) => Some(Ok(unsafe { str::from_utf8_unchecked(bytes) })), |
164 | 0 | Err(()) => Some(Err(BufReadDecoderError::InvalidByteSequence(bytes))), |
165 | | } |
166 | 0 | } |
167 | | } |