/rust/registry/src/index.crates.io-1949cf8c6b5b557f/utf-8-0.7.6/src/lib.rs

Source
mod lossy;
mod read;

pub use lossy::LossyDecoder;
pub use read::{BufReadDecoder, BufReadDecoderError};

use std::cmp;
use std::error::Error;
use std::fmt;
use std::str;

/// The replacement character, U+FFFD. In lossy decoding, insert it for every decoding error.
pub const REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";

#[derive(Debug, Copy, Clone)]
pub enum DecodeError<'a> {
    /// In lossy decoding insert `valid_prefix`, then `"\u{FFFD}"`,
    /// then call `decode()` again with `remaining_input`.
    Invalid {
        valid_prefix: &'a str,
        invalid_sequence: &'a [u8],
        remaining_input: &'a [u8],
    },

    /// Call the `incomplete_suffix.try_complete` method with more input when available.
    /// If no more input is available, this is an invalid byte sequence.
    Incomplete {
        valid_prefix: &'a str,
        incomplete_suffix: Incomplete,
    },
}

impl<'a> fmt::Display for DecodeError<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            DecodeError::Invalid {
                valid_prefix,
                invalid_sequence,
                remaining_input,
            } => write!(
                f,
                "found invalid byte sequence {invalid_sequence:02x?} after \
                 {valid_byte_count} valid bytes, followed by {unprocessed_byte_count} more \
                 unprocessed bytes",
                invalid_sequence = invalid_sequence,
                valid_byte_count = valid_prefix.len(),
                unprocessed_byte_count = remaining_input.len()
            ),
            DecodeError::Incomplete {
                valid_prefix,
                incomplete_suffix,
            } => write!(
                f,
                "found incomplete byte sequence {incomplete_suffix:02x?} after \
                 {valid_byte_count} bytes",
                incomplete_suffix = incomplete_suffix,
                valid_byte_count = valid_prefix.len()
            ),
        }
    }
}

impl<'a> Error for DecodeError<'a> {}

#[derive(Debug, Copy, Clone)]
pub struct Incomplete {
    pub buffer: [u8; 4],
    pub buffer_len: u8,
}

pub fn decode(input: &[u8]) -> Result<&str, DecodeError> {
    let error = match str::from_utf8(input) {
        Ok(valid) => return Ok(valid),
        Err(error) => error,
    };

    // FIXME: separate function from here to guide inlining?
    let (valid, after_valid) = input.split_at(error.valid_up_to());
    let valid = unsafe {
        str::from_utf8_unchecked(valid)
    };

    match error.error_len() {
        Some(invalid_sequence_length) => {
            let (invalid, rest) = after_valid.split_at(invalid_sequence_length);
            Err(DecodeError::Invalid {
                valid_prefix: valid,
                invalid_sequence: invalid,
                remaining_input: rest
            })
        }
        None => {
            Err(DecodeError::Incomplete {
                valid_prefix: valid,
                incomplete_suffix: Incomplete::new(after_valid),
            })
        }
    }
}

impl Incomplete {
    pub fn empty() -> Self {
        Incomplete {
            buffer: [0, 0, 0, 0],
            buffer_len: 0,
        }
    }

    pub fn is_empty(&self) -> bool {
        self.buffer_len == 0
    }

    pub fn new(bytes: &[u8]) -> Self {
        let mut buffer = [0, 0, 0, 0];
        let len = bytes.len();
        buffer[..len].copy_from_slice(bytes);
        Incomplete {
            buffer: buffer,
            buffer_len: len as u8,
        }
    }

    /// * `None`: still incomplete, call `try_complete` again with more input.
    ///   If no more input is available, this is invalid byte sequence.
    /// * `Some((result, remaining_input))`: We’re done with this `Incomplete`.
    ///   To keep decoding, pass `remaining_input` to `decode()`.
    pub fn try_complete<'input>(&mut self, input: &'input [u8])
                                -> Option<(Result<&str, &[u8]>, &'input [u8])> {
        let (consumed, opt_result) = self.try_complete_offsets(input);
        let result = opt_result?;
        let remaining_input = &input[consumed..];
        let result_bytes = self.take_buffer();
        let result = match result {
            Ok(()) => Ok(unsafe { str::from_utf8_unchecked(result_bytes) }),
            Err(()) => Err(result_bytes),
        };
        Some((result, remaining_input))
    }

    fn take_buffer(&mut self) -> &[u8] {
        let len = self.buffer_len as usize;
        self.buffer_len = 0;
        &self.buffer[..len as usize]
    }

    /// (consumed_from_input, None): not enough input
    /// (consumed_from_input, Some(Err(()))): error bytes in buffer
    /// (consumed_from_input, Some(Ok(()))): UTF-8 string in buffer
    fn try_complete_offsets(&mut self, input: &[u8]) -> (usize, Option<Result<(), ()>>) {
        let initial_buffer_len = self.buffer_len as usize;
        let copied_from_input;
        {
            let unwritten = &mut self.buffer[initial_buffer_len..];
            copied_from_input = cmp::min(unwritten.len(), input.len());
            unwritten[..copied_from_input].copy_from_slice(&input[..copied_from_input]);
        }
        let spliced = &self.buffer[..initial_buffer_len + copied_from_input];
        match str::from_utf8(spliced) {
            Ok(_) => {
                self.buffer_len = spliced.len() as u8;
                (copied_from_input, Some(Ok(())))
            }
            Err(error) => {
                let valid_up_to = error.valid_up_to();
                if valid_up_to > 0 {
                    let consumed = valid_up_to.checked_sub(initial_buffer_len).unwrap();
                    self.buffer_len = valid_up_to as u8;
                    (consumed, Some(Ok(())))
                } else {
                    match error.error_len() {
                        Some(invalid_sequence_length) => {
                            let consumed = invalid_sequence_length
                                .checked_sub(initial_buffer_len).unwrap();
                            self.buffer_len = invalid_sequence_length as u8;
                            (consumed, Some(Err(())))
                        }
                        None => {
                            self.buffer_len = spliced.len() as u8;
                            (copied_from_input, None)
                        }
                    }
                }
            }
        }
    }
}

Line	Count	Source
1		mod lossy;
2		mod read;
3
4		pub use lossy::LossyDecoder;
5		pub use read::{BufReadDecoder, BufReadDecoderError};
6
7		use std::cmp;
8		use std::error::Error;
9		use std::fmt;
10		use std::str;
11
12		/// The replacement character, U+FFFD. In lossy decoding, insert it for every decoding error.
13		pub const REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
14
15		#[derive(Debug, Copy, Clone)]
16		pub enum DecodeError<'a> {
17		/// In lossy decoding insert `valid_prefix`, then `"\u{FFFD}"`,
18		/// then call `decode()` again with `remaining_input`.
19		Invalid {
20		valid_prefix: &'a str,
21		invalid_sequence: &'a [u8],
22		remaining_input: &'a [u8],
23		},
24
25		/// Call the `incomplete_suffix.try_complete` method with more input when available.
26		/// If no more input is available, this is an invalid byte sequence.
27		Incomplete {
28		valid_prefix: &'a str,
29		incomplete_suffix: Incomplete,
30		},
31		}
32
33		impl<'a> fmt::Display for DecodeError<'a> {
34	0	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
35	0	match *self {
36		DecodeError::Invalid {
37	0	valid_prefix,
38	0	invalid_sequence,
39	0	remaining_input,
40	0	} => write!(
41	0	f,
42	0	"found invalid byte sequence {invalid_sequence:02x?} after \
43	0	{valid_byte_count} valid bytes, followed by {unprocessed_byte_count} more \
44	0	unprocessed bytes",
45		invalid_sequence = invalid_sequence,
46	0	valid_byte_count = valid_prefix.len(),
47	0	unprocessed_byte_count = remaining_input.len()
48		),
49		DecodeError::Incomplete {
50	0	valid_prefix,
51	0	incomplete_suffix,
52	0	} => write!(
53	0	f,
54	0	"found incomplete byte sequence {incomplete_suffix:02x?} after \
55	0	{valid_byte_count} bytes",
56		incomplete_suffix = incomplete_suffix,
57	0	valid_byte_count = valid_prefix.len()
58		),
59		}
60	0	}
61		}
62
63		impl<'a> Error for DecodeError<'a> {}
64
65		#[derive(Debug, Copy, Clone)]
66		pub struct Incomplete {
67		pub buffer: [u8; 4],
68		pub buffer_len: u8,
69		}
70
71	386k	pub fn decode(input: &[u8]) -> Result<&str, DecodeError> {
72	386k	let error = match str::from_utf8(input) {
73	184k	Ok(valid) => return Ok(valid),
74	201k	Err(error) => error,
75		};
76
77		// FIXME: separate function from here to guide inlining?
78	201k	let (valid, after_valid) = input.split_at(error.valid_up_to());
79	201k	let valid = unsafe {
80	201k	str::from_utf8_unchecked(valid)
81		};
82
83	201k	match error.error_len() {
84	313	Some(invalid_sequence_length) => {
85	313	let (invalid, rest) = after_valid.split_at(invalid_sequence_length);
86	313	Err(DecodeError::Invalid {
87	313	valid_prefix: valid,
88	313	invalid_sequence: invalid,
89	313	remaining_input: rest
90	313	})
91		}
92		None => {
93	201k	Err(DecodeError::Incomplete {
94	201k	valid_prefix: valid,
95	201k	incomplete_suffix: Incomplete::new(after_valid),
96	201k	})
97		}
98		}
99	386k	}
100
101		impl Incomplete {
102	0	pub fn empty() -> Self {
103	0	Incomplete {
104	0	buffer: [0, 0, 0, 0],
105	0	buffer_len: 0,
106	0	}
107	0	}
108
109	0	pub fn is_empty(&self) -> bool {
110	0	self.buffer_len == 0
111	0	}
112
113	201k	pub fn new(bytes: &[u8]) -> Self {
114	201k	let mut buffer = [0, 0, 0, 0];
115	201k	let len = bytes.len();
116	201k	buffer[..len].copy_from_slice(bytes);
117	201k	Incomplete {
118	201k	buffer: buffer,
119	201k	buffer_len: len as u8,
120	201k	}
121	201k	}
122
123		/// * `None`: still incomplete, call `try_complete` again with more input.
124		/// If no more input is available, this is invalid byte sequence.
125		/// * `Some((result, remaining_input))`: We’re done with this `Incomplete`.
126		/// To keep decoding, pass `remaining_input` to `decode()`.
127	201k	pub fn try_complete<'input>(&mut self, input: &'input [u8])
128	201k	-> Option<(Result<&str, &[u8]>, &'input [u8])> {
129	201k	let (consumed, opt_result) = self.try_complete_offsets(input);
130	201k	let result = opt_result?;
131	200k	let remaining_input = &input[consumed..];
132	200k	let result_bytes = self.take_buffer();
133	200k	let result = match result {
134	200k	Ok(()) => Ok(unsafe { str::from_utf8_unchecked(result_bytes) }),
135	41	Err(()) => Err(result_bytes),
136		};
137	200k	Some((result, remaining_input))
138	201k	}
139
140	200k	fn take_buffer(&mut self) -> &[u8] {
141	200k	let len = self.buffer_len as usize;
142	200k	self.buffer_len = 0;
143	200k	&self.buffer[..len as usize]
144	200k	}
145
146		/// (consumed_from_input, None): not enough input
147		/// (consumed_from_input, Some(Err(()))): error bytes in buffer
148		/// (consumed_from_input, Some(Ok(()))): UTF-8 string in buffer
149	201k	fn try_complete_offsets(&mut self, input: &[u8]) -> (usize, Option<Result<(), ()>>) {
150	201k	let initial_buffer_len = self.buffer_len as usize;
151		let copied_from_input;
152	201k	{
153	201k	let unwritten = &mut self.buffer[initial_buffer_len..];
154	201k	copied_from_input = cmp::min(unwritten.len(), input.len());
155	201k	unwritten[..copied_from_input].copy_from_slice(&input[..copied_from_input]);
156	201k	}
157	201k	let spliced = &self.buffer[..initial_buffer_len + copied_from_input];
158	201k	match str::from_utf8(spliced) {
159		Ok(_) => {
160	16.8k	self.buffer_len = spliced.len() as u8;
161	16.8k	(copied_from_input, Some(Ok(())))
162		}
163	184k	Err(error) => {
164	184k	let valid_up_to = error.valid_up_to();
165	184k	if valid_up_to > 0 {
166	183k	let consumed = valid_up_to.checked_sub(initial_buffer_len).unwrap();
167	183k	self.buffer_len = valid_up_to as u8;
168	183k	(consumed, Some(Ok(())))
169		} else {
170	622	match error.error_len() {
171	41	Some(invalid_sequence_length) => {
172	41	let consumed = invalid_sequence_length
173	41	.checked_sub(initial_buffer_len).unwrap();
174	41	self.buffer_len = invalid_sequence_length as u8;
175	41	(consumed, Some(Err(())))
176		}
177		None => {
178	581	self.buffer_len = spliced.len() as u8;
179	581	(copied_from_input, None)
180		}
181		}
182		}
183		}
184		}
185	201k	}
186		}

Coverage Report

Created: 2025-11-01 06:05