/rust/registry/src/index.crates.io-1949cf8c6b5b557f/tendril-0.4.3/src/utf8_decode.rs
Line | Count | Source |
1 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
2 | | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
3 | | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
4 | | // option. This file may not be copied, modified, or distributed |
5 | | // except according to those terms. |
6 | | |
7 | | use fmt; |
8 | | use tendril::{Atomicity, Tendril}; |
9 | | use utf8; |
10 | | |
11 | | pub struct IncompleteUtf8(utf8::Incomplete); |
12 | | |
13 | | impl<A> Tendril<fmt::Bytes, A> |
14 | | where |
15 | | A: Atomicity, |
16 | | { |
17 | 0 | pub fn decode_utf8_lossy<F>(mut self, mut push_utf8: F) -> Option<IncompleteUtf8> |
18 | 0 | where |
19 | 0 | F: FnMut(Tendril<fmt::UTF8, A>), |
20 | | { |
21 | | loop { |
22 | 0 | if self.is_empty() { |
23 | 0 | return None; |
24 | 0 | } |
25 | 0 | let unborrowed_result = match utf8::decode(&self) { |
26 | 0 | Ok(s) => { |
27 | 0 | debug_assert!(s.as_ptr() == self.as_ptr()); |
28 | 0 | debug_assert!(s.len() == self.len()); |
29 | 0 | Ok(()) |
30 | | } |
31 | | Err(utf8::DecodeError::Invalid { |
32 | 0 | valid_prefix, |
33 | 0 | invalid_sequence, |
34 | | .. |
35 | | }) => { |
36 | 0 | debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); |
37 | 0 | debug_assert!(valid_prefix.len() <= self.len()); |
38 | 0 | Err(( |
39 | 0 | valid_prefix.len(), |
40 | 0 | Err(valid_prefix.len() + invalid_sequence.len()), |
41 | 0 | )) |
42 | | } |
43 | | Err(utf8::DecodeError::Incomplete { |
44 | 0 | valid_prefix, |
45 | 0 | incomplete_suffix, |
46 | | }) => { |
47 | 0 | debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); |
48 | 0 | debug_assert!(valid_prefix.len() <= self.len()); |
49 | 0 | Err((valid_prefix.len(), Ok(incomplete_suffix))) |
50 | | } |
51 | | }; |
52 | 0 | match unborrowed_result { |
53 | | Ok(()) => { |
54 | 0 | unsafe { push_utf8(self.reinterpret_without_validating()) } |
55 | 0 | return None; |
56 | | } |
57 | 0 | Err((valid_len, and_then)) => { |
58 | 0 | if valid_len > 0 { |
59 | 0 | let subtendril = self.subtendril(0, valid_len as u32); |
60 | 0 | unsafe { push_utf8(subtendril.reinterpret_without_validating()) } |
61 | 0 | } |
62 | 0 | match and_then { |
63 | 0 | Ok(incomplete) => return Some(IncompleteUtf8(incomplete)), |
64 | 0 | Err(offset) => { |
65 | 0 | push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); |
66 | 0 | self.pop_front(offset as u32) |
67 | | } |
68 | | } |
69 | | } |
70 | | } |
71 | | } |
72 | 0 | } |
73 | | } |
74 | | |
75 | | impl IncompleteUtf8 { |
76 | 0 | pub fn try_complete<A, F>( |
77 | 0 | &mut self, |
78 | 0 | mut input: Tendril<fmt::Bytes, A>, |
79 | 0 | mut push_utf8: F, |
80 | 0 | ) -> Result<Tendril<fmt::Bytes, A>, ()> |
81 | 0 | where |
82 | 0 | A: Atomicity, |
83 | 0 | F: FnMut(Tendril<fmt::UTF8, A>), |
84 | | { |
85 | | let resume_at; |
86 | 0 | match self.0.try_complete(&input) { |
87 | 0 | None => return Err(()), |
88 | 0 | Some((result, rest)) => { |
89 | 0 | push_utf8(Tendril::from_slice( |
90 | 0 | result.unwrap_or(utf8::REPLACEMENT_CHARACTER), |
91 | 0 | )); |
92 | 0 | resume_at = input.len() - rest.len(); |
93 | 0 | } |
94 | | } |
95 | 0 | input.pop_front(resume_at as u32); |
96 | 0 | Ok(input) |
97 | 0 | } |
98 | | } |