/src/html5ever/tendril/src/utf8_decode.rs
Line | Count | Source |
1 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
2 | | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
3 | | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
4 | | // option. This file may not be copied, modified, or distributed |
5 | | // except according to those terms. |
6 | | |
7 | | use crate::fmt; |
8 | | use crate::{Atomicity, Tendril}; |
9 | | |
10 | | pub struct IncompleteUtf8(utf8::Incomplete); |
11 | | |
12 | | impl<A> Tendril<fmt::Bytes, A> |
13 | | where |
14 | | A: Atomicity, |
15 | | { |
16 | 0 | pub fn decode_utf8_lossy<F>(mut self, mut push_utf8: F) -> Option<IncompleteUtf8> |
17 | 0 | where |
18 | 0 | F: FnMut(Tendril<fmt::UTF8, A>), |
19 | | { |
20 | | loop { |
21 | 0 | if self.is_empty() { |
22 | 0 | return None; |
23 | 0 | } |
24 | 0 | let unborrowed_result = match utf8::decode(&self) { |
25 | 0 | Ok(s) => { |
26 | 0 | debug_assert!(s.as_ptr() == self.as_ptr()); |
27 | 0 | debug_assert!(s.len() == self.len()); |
28 | 0 | Ok(()) |
29 | | }, |
30 | | Err(utf8::DecodeError::Invalid { |
31 | 0 | valid_prefix, |
32 | 0 | invalid_sequence, |
33 | | .. |
34 | | }) => { |
35 | 0 | debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); |
36 | 0 | debug_assert!(valid_prefix.len() <= self.len()); |
37 | 0 | Err(( |
38 | 0 | valid_prefix.len(), |
39 | 0 | Err(valid_prefix.len() + invalid_sequence.len()), |
40 | 0 | )) |
41 | | }, |
42 | | Err(utf8::DecodeError::Incomplete { |
43 | 0 | valid_prefix, |
44 | 0 | incomplete_suffix, |
45 | | }) => { |
46 | 0 | debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); |
47 | 0 | debug_assert!(valid_prefix.len() <= self.len()); |
48 | 0 | Err((valid_prefix.len(), Ok(incomplete_suffix))) |
49 | | }, |
50 | | }; |
51 | 0 | match unborrowed_result { |
52 | | Ok(()) => { |
53 | 0 | unsafe { push_utf8(self.reinterpret_without_validating()) } |
54 | 0 | return None; |
55 | | }, |
56 | 0 | Err((valid_len, and_then)) => { |
57 | 0 | if valid_len > 0 { |
58 | 0 | let subtendril = self.subtendril(0, valid_len as u32); |
59 | 0 | unsafe { push_utf8(subtendril.reinterpret_without_validating()) } |
60 | 0 | } |
61 | 0 | match and_then { |
62 | 0 | Ok(incomplete) => return Some(IncompleteUtf8(incomplete)), |
63 | 0 | Err(offset) => { |
64 | 0 | push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); |
65 | 0 | self.pop_front(offset as u32) |
66 | | }, |
67 | | } |
68 | | }, |
69 | | } |
70 | | } |
71 | 0 | } |
72 | | } |
73 | | |
74 | | impl IncompleteUtf8 { |
75 | 0 | pub fn try_complete<A, F>( |
76 | 0 | &mut self, |
77 | 0 | mut input: Tendril<fmt::Bytes, A>, |
78 | 0 | mut push_utf8: F, |
79 | 0 | ) -> Result<Tendril<fmt::Bytes, A>, ()> |
80 | 0 | where |
81 | 0 | A: Atomicity, |
82 | 0 | F: FnMut(Tendril<fmt::UTF8, A>), |
83 | | { |
84 | 0 | let resume_at = match self.0.try_complete(&input) { |
85 | 0 | None => return Err(()), |
86 | 0 | Some((result, rest)) => { |
87 | 0 | push_utf8(Tendril::from_slice( |
88 | 0 | result.unwrap_or(utf8::REPLACEMENT_CHARACTER), |
89 | 0 | )); |
90 | 0 | input.len() - rest.len() |
91 | | }, |
92 | | }; |
93 | 0 | input.pop_front(resume_at as u32); |
94 | 0 | Ok(input) |
95 | 0 | } |
96 | | } |