/rust/registry/src/index.crates.io-1949cf8c6b5b557f/encoding_rs-0.8.35/src/macros.rs
Line | Count | Source |
1 | | // Copyright Mozilla Foundation. See the COPYRIGHT |
2 | | // file at the top-level directory of this distribution. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
7 | | // option. This file may not be copied, modified, or distributed |
8 | | // except according to those terms. |
9 | | |
10 | | macro_rules! decoder_function { |
11 | | ($preamble:block, |
12 | | $loop_preable:block, |
13 | | $eof:block, |
14 | | $body:block, |
15 | | $slf:ident, |
16 | | $src_consumed:ident, |
17 | | $dest:ident, |
18 | | $source:ident, |
19 | | $b:ident, |
20 | | $destination_handle:ident, |
21 | | $unread_handle:ident, |
22 | | $destination_check:ident, |
23 | | $name:ident, |
24 | | $code_unit:ty, |
25 | | $dest_struct:ident) => ( |
26 | 0 | pub fn $name(&mut $slf, |
27 | 0 | src: &[u8], |
28 | 0 | dst: &mut [$code_unit], |
29 | 0 | last: bool) |
30 | 0 | -> (DecoderResult, usize, usize) { |
31 | 0 | let mut $source = ByteSource::new(src); |
32 | 0 | let mut $dest = $dest_struct::new(dst); |
33 | | loop { // TODO: remove this loop |
34 | | { |
35 | | // Start non-boilerplate |
36 | | $preamble |
37 | | // End non-boilerplate |
38 | | } |
39 | | loop { |
40 | | { |
41 | | $loop_preable |
42 | | } |
43 | 0 | match $source.check_available() { |
44 | 0 | Space::Full($src_consumed) => { |
45 | 0 | if last { |
46 | | // Start non-boilerplate |
47 | | $eof |
48 | | // End non-boilerplate |
49 | 0 | } |
50 | 0 | return (DecoderResult::InputEmpty, $src_consumed, $dest.written()); |
51 | | } |
52 | 0 | Space::Available(source_handle) => { |
53 | 0 | match $dest.$destination_check() { |
54 | 0 | Space::Full(dst_written) => { |
55 | 0 | return (DecoderResult::OutputFull, |
56 | 0 | source_handle.consumed(), |
57 | 0 | dst_written); |
58 | | } |
59 | 0 | Space::Available($destination_handle) => { |
60 | 0 | let ($b, $unread_handle) = source_handle.read(); |
61 | | // Start non-boilerplate |
62 | | $body |
63 | | // End non-boilerplate |
64 | | } |
65 | | } |
66 | | } |
67 | | } |
68 | | } |
69 | | } |
70 | 0 | }); Unexecuted instantiation: <encoding_rs::utf_8::Utf8Decoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::utf_8::Utf8Decoder>::decode_to_utf16_raw Unexecuted instantiation: <encoding_rs::x_user_defined::UserDefinedDecoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::utf_16::Utf16Decoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::utf_16::Utf16Decoder>::decode_to_utf16_raw Unexecuted instantiation: <encoding_rs::iso_2022_jp::Iso2022JpDecoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::iso_2022_jp::Iso2022JpDecoder>::decode_to_utf16_raw |
71 | | } |
72 | | |
73 | | macro_rules! decoder_functions { |
74 | | ( |
75 | | $preamble:block, |
76 | | $loop_preable:block, |
77 | | $eof:block, |
78 | | $body:block, |
79 | | $slf:ident, |
80 | | $src_consumed:ident, |
81 | | $dest:ident, |
82 | | $source:ident, |
83 | | $b:ident, |
84 | | $destination_handle:ident, |
85 | | $unread_handle:ident, |
86 | | $destination_check:ident |
87 | | ) => { |
88 | | decoder_function!( |
89 | | $preamble, |
90 | | $loop_preable, |
91 | | $eof, |
92 | | $body, |
93 | | $slf, |
94 | | $src_consumed, |
95 | | $dest, |
96 | | $source, |
97 | | $b, |
98 | | $destination_handle, |
99 | | $unread_handle, |
100 | | $destination_check, |
101 | | decode_to_utf8_raw, |
102 | | u8, |
103 | | Utf8Destination |
104 | | ); |
105 | | decoder_function!( |
106 | | $preamble, |
107 | | $loop_preable, |
108 | | $eof, |
109 | | $body, |
110 | | $slf, |
111 | | $src_consumed, |
112 | | $dest, |
113 | | $source, |
114 | | $b, |
115 | | $destination_handle, |
116 | | $unread_handle, |
117 | | $destination_check, |
118 | | decode_to_utf16_raw, |
119 | | u16, |
120 | | Utf16Destination |
121 | | ); |
122 | | }; |
123 | | } |
124 | | |
125 | | macro_rules! ascii_compatible_two_byte_decoder_function { |
126 | | ($lead:block, |
127 | | $trail:block, |
128 | | $slf:ident, |
129 | | $non_ascii:ident, |
130 | | $byte:ident, |
131 | | $lead_minus_offset:ident, |
132 | | $unread_handle_trail:ident, |
133 | | $source:ident, |
134 | | $handle:ident, |
135 | | $outermost:tt, |
136 | | $copy_ascii:ident, |
137 | | $destination_check:ident, |
138 | | $name:ident, |
139 | | $code_unit:ty, |
140 | | $dest_struct:ident, |
141 | | $ascii_punctuation:expr) => ( |
142 | 0 | pub fn $name(&mut $slf, |
143 | 0 | src: &[u8], |
144 | 0 | dst: &mut [$code_unit], |
145 | 0 | last: bool) |
146 | 0 | -> (DecoderResult, usize, usize) { |
147 | 0 | let mut $source = ByteSource::new(src); |
148 | 0 | let mut dest_prolog = $dest_struct::new(dst); |
149 | 0 | let dest = match $slf.lead { |
150 | 0 | Some(lead) => { |
151 | 0 | let $lead_minus_offset = lead; |
152 | 0 | $slf.lead = None; |
153 | | // Since we don't have `goto` we could use to jump into the trail |
154 | | // handling part of the main loop, we need to repeat trail handling |
155 | | // here. |
156 | 0 | match $source.check_available() { |
157 | 0 | Space::Full(src_consumed_prolog) => { |
158 | 0 | if last { |
159 | 0 | return (DecoderResult::Malformed(1, 0), |
160 | 0 | src_consumed_prolog, |
161 | 0 | dest_prolog.written()); |
162 | 0 | } |
163 | 0 | return (DecoderResult::InputEmpty, src_consumed_prolog, dest_prolog.written()); |
164 | | } |
165 | 0 | Space::Available(source_handle_prolog) => { |
166 | 0 | match dest_prolog.$destination_check() { |
167 | 0 | Space::Full(dst_written_prolog) => { |
168 | 0 | return (DecoderResult::OutputFull, |
169 | 0 | source_handle_prolog.consumed(), |
170 | 0 | dst_written_prolog); |
171 | | } |
172 | 0 | Space::Available($handle) => { |
173 | 0 | let ($byte, $unread_handle_trail) = source_handle_prolog.read(); |
174 | | // Start non-boilerplate |
175 | | $trail |
176 | | // End non-boilerplate |
177 | | } |
178 | | } |
179 | | } |
180 | | } |
181 | | }, |
182 | | None => { |
183 | 0 | &mut dest_prolog |
184 | | } |
185 | | }; |
186 | | $outermost: loop { |
187 | 0 | match dest.$copy_ascii(&mut $source) { |
188 | 0 | CopyAsciiResult::Stop(ret) => return ret, |
189 | 0 | CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { |
190 | | 'middle: loop { |
191 | 0 | let dest_again = { |
192 | 0 | let $lead_minus_offset = { |
193 | | // Start non-boilerplate |
194 | | $lead |
195 | | // End non-boilerplate |
196 | | }; |
197 | 0 | match $source.check_available() { |
198 | 0 | Space::Full(src_consumed_trail) => { |
199 | 0 | if last { |
200 | 0 | return (DecoderResult::Malformed(1, 0), |
201 | 0 | src_consumed_trail, |
202 | 0 | $handle.written()); |
203 | 0 | } |
204 | 0 | $slf.lead = Some($lead_minus_offset); |
205 | 0 | return (DecoderResult::InputEmpty, |
206 | 0 | src_consumed_trail, |
207 | 0 | $handle.written()); |
208 | | } |
209 | 0 | Space::Available(source_handle_trail) => { |
210 | 0 | let ($byte, $unread_handle_trail) = source_handle_trail.read(); |
211 | | // Start non-boilerplate |
212 | | $trail |
213 | | // End non-boilerplate |
214 | | } |
215 | | } |
216 | | }; |
217 | 0 | match $source.check_available() { |
218 | 0 | Space::Full(src_consumed) => { |
219 | 0 | return (DecoderResult::InputEmpty, |
220 | 0 | src_consumed, |
221 | 0 | dest_again.written()); |
222 | | } |
223 | 0 | Space::Available(source_handle) => { |
224 | 0 | match dest_again.$destination_check() { |
225 | 0 | Space::Full(dst_written) => { |
226 | 0 | return (DecoderResult::OutputFull, |
227 | 0 | source_handle.consumed(), |
228 | 0 | dst_written); |
229 | | } |
230 | 0 | Space::Available(mut destination_handle) => { |
231 | 0 | let (mut b, unread_handle) = source_handle.read(); |
232 | 0 | let source_again = unread_handle.commit(); |
233 | | 'innermost: loop { |
234 | 0 | if b > 127 { |
235 | 0 | $non_ascii = b; |
236 | 0 | $handle = destination_handle; |
237 | 0 | continue 'middle; |
238 | 0 | } |
239 | | // Testing on Haswell says that we should write the |
240 | | // byte unconditionally instead of trying to unread it |
241 | | // to make it part of the next SIMD stride. |
242 | 0 | let dest_again_again = |
243 | 0 | destination_handle.write_ascii(b); |
244 | 0 | if $ascii_punctuation && b < 60 { |
245 | | // We've got punctuation |
246 | 0 | match source_again.check_available() { |
247 | 0 | Space::Full(src_consumed_again) => { |
248 | 0 | return (DecoderResult::InputEmpty, |
249 | 0 | src_consumed_again, |
250 | 0 | dest_again_again.written()); |
251 | | } |
252 | 0 | Space::Available(source_handle_again) => { |
253 | 0 | match dest_again_again.$destination_check() { |
254 | 0 | Space::Full(dst_written_again) => { |
255 | 0 | return (DecoderResult::OutputFull, |
256 | 0 | source_handle_again.consumed(), |
257 | 0 | dst_written_again); |
258 | | } |
259 | 0 | Space::Available(destination_handle_again) => { |
260 | | { |
261 | 0 | let (b_again, _unread_handle_again) = |
262 | 0 | source_handle_again.read(); |
263 | 0 | b = b_again; |
264 | 0 | destination_handle = destination_handle_again; |
265 | 0 | continue 'innermost; |
266 | | } |
267 | | } |
268 | | } |
269 | | } |
270 | | } |
271 | 0 | } |
272 | | // We've got markup or ASCII text |
273 | 0 | continue $outermost; |
274 | | } |
275 | | } |
276 | | } |
277 | | } |
278 | | } |
279 | | } |
280 | | } |
281 | | } |
282 | 0 | } |
283 | 0 | }); Unexecuted instantiation: <encoding_rs::euc_kr::EucKrDecoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::euc_kr::EucKrDecoder>::decode_to_utf16_raw Unexecuted instantiation: <encoding_rs::shift_jis::ShiftJisDecoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::shift_jis::ShiftJisDecoder>::decode_to_utf16_raw Unexecuted instantiation: <encoding_rs::big5::Big5Decoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::big5::Big5Decoder>::decode_to_utf16_raw |
284 | | } |
285 | | |
286 | | macro_rules! ascii_compatible_two_byte_decoder_functions { |
287 | | ( |
288 | | $lead:block, |
289 | | $trail:block, |
290 | | $slf:ident, |
291 | | $non_ascii:ident, |
292 | | $byte:ident, |
293 | | $lead_minus_offset:ident, |
294 | | $unread_handle_trail:ident, |
295 | | $source:ident, |
296 | | $handle:ident, |
297 | | $outermost:tt, |
298 | | $copy_ascii:ident, |
299 | | $destination_check:ident, |
300 | | $ascii_punctuation:expr |
301 | | ) => { |
302 | | ascii_compatible_two_byte_decoder_function!( |
303 | | $lead, |
304 | | $trail, |
305 | | $slf, |
306 | | $non_ascii, |
307 | | $byte, |
308 | | $lead_minus_offset, |
309 | | $unread_handle_trail, |
310 | | $source, |
311 | | $handle, |
312 | | $outermost, |
313 | | $copy_ascii, |
314 | | $destination_check, |
315 | | decode_to_utf8_raw, |
316 | | u8, |
317 | | Utf8Destination, |
318 | | $ascii_punctuation |
319 | | ); |
320 | | ascii_compatible_two_byte_decoder_function!( |
321 | | $lead, |
322 | | $trail, |
323 | | $slf, |
324 | | $non_ascii, |
325 | | $byte, |
326 | | $lead_minus_offset, |
327 | | $unread_handle_trail, |
328 | | $source, |
329 | | $handle, |
330 | | $outermost, |
331 | | $copy_ascii, |
332 | | $destination_check, |
333 | | decode_to_utf16_raw, |
334 | | u16, |
335 | | Utf16Destination, |
336 | | $ascii_punctuation |
337 | | ); |
338 | | }; |
339 | | } |
340 | | |
341 | | macro_rules! gb18030_decoder_function { |
342 | | ($first_body:block, |
343 | | $second_body:block, |
344 | | $third_body:block, |
345 | | $fourth_body:block, |
346 | | $slf:ident, |
347 | | $non_ascii:ident, |
348 | | $first_minus_offset:ident, |
349 | | $second:ident, |
350 | | $second_minus_offset:ident, |
351 | | $unread_handle_second:ident, |
352 | | $third:ident, |
353 | | $third_minus_offset:ident, |
354 | | $unread_handle_third:ident, |
355 | | $fourth:ident, |
356 | | $fourth_minus_offset:ident, |
357 | | $unread_handle_fourth:ident, |
358 | | $source:ident, |
359 | | $handle:ident, |
360 | | $outermost:tt, |
361 | | $name:ident, |
362 | | $code_unit:ty, |
363 | | $dest_struct:ident) => ( |
364 | | #[cfg_attr(feature = "cargo-clippy", allow(never_loop))] |
365 | 0 | pub fn $name(&mut $slf, |
366 | 0 | src: &[u8], |
367 | 0 | dst: &mut [$code_unit], |
368 | 0 | last: bool) |
369 | 0 | -> (DecoderResult, usize, usize) { |
370 | 0 | let mut $source = ByteSource::new(src); |
371 | 0 | let mut dest = $dest_struct::new(dst); |
372 | | { |
373 | 0 | if let Some(ascii) = $slf.pending_ascii { |
374 | 0 | match dest.check_space_bmp() { |
375 | | Space::Full(_) => { |
376 | 0 | return (DecoderResult::OutputFull, 0, 0); |
377 | | } |
378 | 0 | Space::Available(pending_ascii_handle) => { |
379 | 0 | $slf.pending_ascii = None; |
380 | 0 | pending_ascii_handle.write_ascii(ascii); |
381 | 0 | } |
382 | | } |
383 | 0 | } |
384 | | } |
385 | 0 | while !$slf.pending.is_none() { |
386 | 0 | match $source.check_available() { |
387 | 0 | Space::Full(src_consumed) => { |
388 | 0 | if last { |
389 | | // Start non-boilerplate |
390 | 0 | let count = $slf.pending.count(); |
391 | 0 | $slf.pending = Gb18030Pending::None; |
392 | 0 | return (DecoderResult::Malformed(count as u8, 0), |
393 | 0 | src_consumed, |
394 | 0 | dest.written()); |
395 | | // End non-boilerplate |
396 | 0 | } |
397 | 0 | return (DecoderResult::InputEmpty, src_consumed, dest.written()); |
398 | | } |
399 | 0 | Space::Available(source_handle) => { |
400 | 0 | match dest.check_space_astral() { |
401 | 0 | Space::Full(dst_written) => { |
402 | 0 | return (DecoderResult::OutputFull, |
403 | 0 | source_handle.consumed(), |
404 | 0 | dst_written); |
405 | | } |
406 | 0 | Space::Available($handle) => { |
407 | 0 | let (byte, unread_handle) = source_handle.read(); |
408 | 0 | match $slf.pending { |
409 | 0 | Gb18030Pending::One($first_minus_offset) => { |
410 | 0 | $slf.pending = Gb18030Pending::None; |
411 | 0 | let $second = byte; |
412 | 0 | let $unread_handle_second = unread_handle; |
413 | | // If second is between 0x40 and 0x7E, |
414 | | // inclusive, subtract offset 0x40. Else if |
415 | | // second is between 0x80 and 0xFE, inclusive, |
416 | | // subtract offset 0x41. In both cases, |
417 | | // handle as a two-byte sequence. |
418 | | // Else if second is between 0x30 and 0x39, |
419 | | // inclusive, subtract offset 0x30 and |
420 | | // handle as a four-byte sequence. |
421 | 0 | let $second_minus_offset = $second.wrapping_sub(0x30); |
422 | | // It's not optimal to do this check first, |
423 | | // but this results in more readable code. |
424 | 0 | if $second_minus_offset > (0x39 - 0x30) { |
425 | | // Start non-boilerplate |
426 | | $second_body |
427 | | // End non-boilerplate |
428 | | } else { |
429 | | // Four-byte! |
430 | 0 | $slf.pending = Gb18030Pending::Two($first_minus_offset, |
431 | 0 | $second_minus_offset); |
432 | 0 | $handle.commit() |
433 | | } |
434 | | } |
435 | 0 | Gb18030Pending::Two($first_minus_offset, $second_minus_offset) => { |
436 | 0 | $slf.pending = Gb18030Pending::None; |
437 | 0 | let $third = byte; |
438 | 0 | let $unread_handle_third = unread_handle; |
439 | 0 | let $third_minus_offset = { |
440 | | // Start non-boilerplate |
441 | | $third_body |
442 | | // End non-boilerplate |
443 | | }; |
444 | 0 | $slf.pending = Gb18030Pending::Three($first_minus_offset, |
445 | 0 | $second_minus_offset, |
446 | 0 | $third_minus_offset); |
447 | 0 | $handle.commit() |
448 | | } |
449 | 0 | Gb18030Pending::Three($first_minus_offset, |
450 | 0 | $second_minus_offset, |
451 | 0 | $third_minus_offset) => { |
452 | 0 | $slf.pending = Gb18030Pending::None; |
453 | 0 | let $fourth = byte; |
454 | 0 | let $unread_handle_fourth = unread_handle; |
455 | | // Start non-boilerplate |
456 | 0 | $fourth_body |
457 | | // End non-boilerplate |
458 | | } |
459 | 0 | Gb18030Pending::None => unreachable!("Checked in loop condition"), |
460 | | }; |
461 | | } |
462 | | } |
463 | | } |
464 | | } |
465 | | } |
466 | | $outermost: loop { |
467 | 0 | match dest.copy_ascii_from_check_space_astral(&mut $source) { |
468 | 0 | CopyAsciiResult::Stop(ret) => return ret, |
469 | 0 | CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { |
470 | | 'middle: loop { |
471 | 0 | let dest_again = { |
472 | 0 | let $first_minus_offset = { |
473 | | // Start non-boilerplate |
474 | | $first_body |
475 | | // End non-boilerplate |
476 | | }; |
477 | 0 | match $source.check_available() { |
478 | 0 | Space::Full(src_consumed_trail) => { |
479 | 0 | if last { |
480 | 0 | return (DecoderResult::Malformed(1, 0), |
481 | 0 | src_consumed_trail, |
482 | 0 | $handle.written()); |
483 | 0 | } |
484 | 0 | $slf.pending = Gb18030Pending::One($first_minus_offset); |
485 | 0 | return (DecoderResult::InputEmpty, |
486 | 0 | src_consumed_trail, |
487 | 0 | $handle.written()); |
488 | | } |
489 | 0 | Space::Available(source_handle_trail) => { |
490 | 0 | let ($second, $unread_handle_second) = source_handle_trail.read(); |
491 | | // Start non-boilerplate |
492 | | // If second is between 0x40 and 0x7E, |
493 | | // inclusive, subtract offset 0x40. Else if |
494 | | // second is between 0x80 and 0xFE, inclusive, |
495 | | // subtract offset 0x41. In both cases, |
496 | | // handle as a two-byte sequence. |
497 | | // Else if second is between 0x30 and 0x39, |
498 | | // inclusive, subtract offset 0x30 and |
499 | | // handle as a four-byte sequence. |
500 | 0 | let $second_minus_offset = $second.wrapping_sub(0x30); |
501 | | // It's not optimal to do this check first, |
502 | | // but this results in more readable code. |
503 | 0 | if $second_minus_offset > (0x39 - 0x30) { |
504 | | // Start non-boilerplate |
505 | | $second_body |
506 | | // End non-boilerplate |
507 | | } else { |
508 | | // Four-byte! |
509 | 0 | match $unread_handle_second.commit().check_available() { |
510 | 0 | Space::Full(src_consumed_third) => { |
511 | 0 | if last { |
512 | 0 | return (DecoderResult::Malformed(2, 0), |
513 | 0 | src_consumed_third, |
514 | 0 | $handle.written()); |
515 | 0 | } |
516 | 0 | $slf.pending = |
517 | 0 | Gb18030Pending::Two($first_minus_offset, |
518 | 0 | $second_minus_offset); |
519 | 0 | return (DecoderResult::InputEmpty, |
520 | 0 | src_consumed_third, |
521 | 0 | $handle.written()); |
522 | | } |
523 | 0 | Space::Available(source_handle_third) => { |
524 | 0 | let ($third, $unread_handle_third) = |
525 | 0 | source_handle_third.read(); |
526 | 0 | let $third_minus_offset = { |
527 | | // Start non-boilerplate |
528 | | $third_body |
529 | | // End non-boilerplate |
530 | | }; |
531 | 0 | match $unread_handle_third.commit() |
532 | 0 | .check_available() { |
533 | 0 | Space::Full(src_consumed_fourth) => { |
534 | 0 | if last { |
535 | 0 | return (DecoderResult::Malformed(3, 0), |
536 | 0 | src_consumed_fourth, |
537 | 0 | $handle.written()); |
538 | 0 | } |
539 | 0 | $slf.pending = Gb18030Pending::Three($first_minus_offset, $second_minus_offset, $third_minus_offset); |
540 | 0 | return (DecoderResult::InputEmpty, |
541 | 0 | src_consumed_fourth, |
542 | 0 | $handle.written()); |
543 | | } |
544 | 0 | Space::Available(source_handle_fourth) => { |
545 | 0 | let ($fourth, $unread_handle_fourth) = |
546 | 0 | source_handle_fourth.read(); |
547 | | // Start non-boilerplate |
548 | | $fourth_body |
549 | | // End non-boilerplate |
550 | | } |
551 | | } |
552 | | } |
553 | | } |
554 | | } |
555 | | // End non-boilerplate |
556 | | } |
557 | | } |
558 | | }; |
559 | 0 | match $source.check_available() { |
560 | 0 | Space::Full(src_consumed) => { |
561 | 0 | return (DecoderResult::InputEmpty, |
562 | 0 | src_consumed, |
563 | 0 | dest_again.written()); |
564 | | } |
565 | 0 | Space::Available(source_handle) => { |
566 | 0 | match dest_again.check_space_astral() { |
567 | 0 | Space::Full(dst_written) => { |
568 | 0 | return (DecoderResult::OutputFull, |
569 | 0 | source_handle.consumed(), |
570 | 0 | dst_written); |
571 | | } |
572 | 0 | Space::Available(destination_handle) => { |
573 | 0 | let (b, _) = source_handle.read(); |
574 | | loop { |
575 | 0 | if b > 127 { |
576 | 0 | $non_ascii = b; |
577 | 0 | $handle = destination_handle; |
578 | 0 | continue 'middle; |
579 | 0 | } |
580 | | // Testing on Haswell says that we should write the |
581 | | // byte unconditionally instead of trying to unread it |
582 | | // to make it part of the next SIMD stride. |
583 | 0 | destination_handle.write_ascii(b); |
584 | | // We've got markup or ASCII text |
585 | 0 | continue $outermost; |
586 | | } |
587 | | } |
588 | | } |
589 | | } |
590 | | } |
591 | | } |
592 | | } |
593 | | } |
594 | 0 | } |
595 | 0 | }); Unexecuted instantiation: <encoding_rs::gb18030::Gb18030Decoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::gb18030::Gb18030Decoder>::decode_to_utf16_raw |
596 | | } |
597 | | |
598 | | macro_rules! gb18030_decoder_functions { |
599 | | ( |
600 | | $first_body:block, |
601 | | $second_body:block, |
602 | | $third_body:block, |
603 | | $fourth_body:block, |
604 | | $slf:ident, |
605 | | $non_ascii:ident, |
606 | | $first_minus_offset:ident, |
607 | | $second:ident, |
608 | | $second_minus_offset:ident, |
609 | | $unread_handle_second:ident, |
610 | | $third:ident, |
611 | | $third_minus_offset:ident, |
612 | | $unread_handle_third:ident, |
613 | | $fourth:ident, |
614 | | $fourth_minus_offset:ident, |
615 | | $unread_handle_fourth:ident, |
616 | | $source:ident, |
617 | | $handle:ident, |
618 | | $outermost:tt |
619 | | ) => { |
620 | | gb18030_decoder_function!( |
621 | | $first_body, |
622 | | $second_body, |
623 | | $third_body, |
624 | | $fourth_body, |
625 | | $slf, |
626 | | $non_ascii, |
627 | | $first_minus_offset, |
628 | | $second, |
629 | | $second_minus_offset, |
630 | | $unread_handle_second, |
631 | | $third, |
632 | | $third_minus_offset, |
633 | | $unread_handle_third, |
634 | | $fourth, |
635 | | $fourth_minus_offset, |
636 | | $unread_handle_fourth, |
637 | | $source, |
638 | | $handle, |
639 | | $outermost, |
640 | | decode_to_utf8_raw, |
641 | | u8, |
642 | | Utf8Destination |
643 | | ); |
644 | | gb18030_decoder_function!( |
645 | | $first_body, |
646 | | $second_body, |
647 | | $third_body, |
648 | | $fourth_body, |
649 | | $slf, |
650 | | $non_ascii, |
651 | | $first_minus_offset, |
652 | | $second, |
653 | | $second_minus_offset, |
654 | | $unread_handle_second, |
655 | | $third, |
656 | | $third_minus_offset, |
657 | | $unread_handle_third, |
658 | | $fourth, |
659 | | $fourth_minus_offset, |
660 | | $unread_handle_fourth, |
661 | | $source, |
662 | | $handle, |
663 | | $outermost, |
664 | | decode_to_utf16_raw, |
665 | | u16, |
666 | | Utf16Destination |
667 | | ); |
668 | | }; |
669 | | } |
670 | | |
671 | | macro_rules! euc_jp_decoder_function { |
672 | | ($jis0802_trail_body:block, |
673 | | $jis0812_lead_body:block, |
674 | | $jis0812_trail_body:block, |
675 | | $half_width_katakana_body:block, |
676 | | $slf:ident, |
677 | | $non_ascii:ident, |
678 | | $jis0208_lead_minus_offset:ident, |
679 | | $byte:ident, |
680 | | $unread_handle_trail:ident, |
681 | | $jis0212_lead_minus_offset:ident, |
682 | | $lead:ident, |
683 | | $unread_handle_jis0212:ident, |
684 | | $source:ident, |
685 | | $handle:ident, |
686 | | $name:ident, |
687 | | $code_unit:ty, |
688 | | $dest_struct:ident) => ( |
689 | | #[cfg_attr(feature = "cargo-clippy", allow(never_loop))] |
690 | 0 | pub fn $name(&mut $slf, |
691 | 0 | src: &[u8], |
692 | 0 | dst: &mut [$code_unit], |
693 | 0 | last: bool) |
694 | 0 | -> (DecoderResult, usize, usize) { |
695 | 0 | let mut $source = ByteSource::new(src); |
696 | 0 | let mut dest = $dest_struct::new(dst); |
697 | 0 | while !$slf.pending.is_none() { |
698 | 0 | match $source.check_available() { |
699 | 0 | Space::Full(src_consumed) => { |
700 | 0 | if last { |
701 | | // Start non-boilerplate |
702 | 0 | let count = $slf.pending.count(); |
703 | 0 | $slf.pending = EucJpPending::None; |
704 | 0 | return (DecoderResult::Malformed(count as u8, 0), |
705 | 0 | src_consumed, |
706 | 0 | dest.written()); |
707 | | // End non-boilerplate |
708 | 0 | } |
709 | 0 | return (DecoderResult::InputEmpty, src_consumed, dest.written()); |
710 | | } |
711 | 0 | Space::Available(source_handle) => { |
712 | 0 | match dest.check_space_bmp() { |
713 | 0 | Space::Full(dst_written) => { |
714 | 0 | return (DecoderResult::OutputFull, |
715 | 0 | source_handle.consumed(), |
716 | 0 | dst_written); |
717 | | } |
718 | 0 | Space::Available($handle) => { |
719 | 0 | let ($byte, $unread_handle_trail) = source_handle.read(); |
720 | 0 | match $slf.pending { |
721 | 0 | EucJpPending::Jis0208Lead($jis0208_lead_minus_offset) => { |
722 | 0 | $slf.pending = EucJpPending::None; |
723 | | // Start non-boilerplate |
724 | | $jis0802_trail_body |
725 | | // End non-boilerplate |
726 | | } |
727 | | EucJpPending::Jis0212Shift => { |
728 | 0 | $slf.pending = EucJpPending::None; |
729 | 0 | let $lead = $byte; |
730 | 0 | let $unread_handle_jis0212 = $unread_handle_trail; |
731 | 0 | let $jis0212_lead_minus_offset = { |
732 | | // Start non-boilerplate |
733 | | $jis0812_lead_body |
734 | | // End non-boilerplate |
735 | | }; |
736 | 0 | $slf.pending = |
737 | 0 | EucJpPending::Jis0212Lead($jis0212_lead_minus_offset); |
738 | 0 | $handle.commit() |
739 | | } |
740 | 0 | EucJpPending::Jis0212Lead($jis0212_lead_minus_offset) => { |
741 | 0 | $slf.pending = EucJpPending::None; |
742 | | // Start non-boilerplate |
743 | 0 | $jis0812_trail_body |
744 | | // End non-boilerplate |
745 | | } |
746 | | EucJpPending::HalfWidthKatakana => { |
747 | 0 | $slf.pending = EucJpPending::None; |
748 | | // Start non-boilerplate |
749 | 0 | $half_width_katakana_body |
750 | | // End non-boilerplate |
751 | | } |
752 | 0 | EucJpPending::None => unreachable!("Checked in loop condition"), |
753 | | }; |
754 | | } |
755 | | } |
756 | | } |
757 | | } |
758 | | } |
759 | | 'outermost: loop { |
760 | 0 | match dest.copy_ascii_from_check_space_bmp(&mut $source) { |
761 | 0 | CopyAsciiResult::Stop(ret) => return ret, |
762 | 0 | CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { |
763 | | 'middle: loop { |
764 | 0 | let dest_again = { |
765 | | // If lead is between 0xA1 and 0xFE, inclusive, |
766 | | // subtract 0xA1. Else if lead is 0x8E, handle the |
767 | | // next byte as half-width Katakana. Else if lead is |
768 | | // 0x8F, expect JIS 0212. |
769 | 0 | let $jis0208_lead_minus_offset = $non_ascii.wrapping_sub(0xA1); |
770 | 0 | if $jis0208_lead_minus_offset <= (0xFE - 0xA1) { |
771 | | // JIS 0208 |
772 | 0 | match $source.check_available() { |
773 | 0 | Space::Full(src_consumed_trail) => { |
774 | 0 | if last { |
775 | 0 | return (DecoderResult::Malformed(1, 0), |
776 | 0 | src_consumed_trail, |
777 | 0 | $handle.written()); |
778 | 0 | } |
779 | 0 | $slf.pending = |
780 | 0 | EucJpPending::Jis0208Lead($jis0208_lead_minus_offset); |
781 | 0 | return (DecoderResult::InputEmpty, |
782 | 0 | src_consumed_trail, |
783 | 0 | $handle.written()); |
784 | | } |
785 | 0 | Space::Available(source_handle_trail) => { |
786 | 0 | let ($byte, $unread_handle_trail) = |
787 | 0 | source_handle_trail.read(); |
788 | | // Start non-boilerplate |
789 | | $jis0802_trail_body |
790 | | // End non-boilerplate |
791 | | } |
792 | | } |
793 | 0 | } else if $non_ascii == 0x8F { |
794 | 0 | match $source.check_available() { |
795 | 0 | Space::Full(src_consumed_jis0212) => { |
796 | 0 | if last { |
797 | 0 | return (DecoderResult::Malformed(1, 0), |
798 | 0 | src_consumed_jis0212, |
799 | 0 | $handle.written()); |
800 | 0 | } |
801 | 0 | $slf.pending = EucJpPending::Jis0212Shift; |
802 | 0 | return (DecoderResult::InputEmpty, |
803 | 0 | src_consumed_jis0212, |
804 | 0 | $handle.written()); |
805 | | } |
806 | 0 | Space::Available(source_handle_jis0212) => { |
807 | 0 | let ($lead, $unread_handle_jis0212) = |
808 | 0 | source_handle_jis0212.read(); |
809 | 0 | let $jis0212_lead_minus_offset = { |
810 | | // Start non-boilerplate |
811 | | $jis0812_lead_body |
812 | | // End non-boilerplate |
813 | | }; |
814 | 0 | match $unread_handle_jis0212.commit().check_available() { |
815 | 0 | Space::Full(src_consumed_trail) => { |
816 | 0 | if last { |
817 | 0 | return (DecoderResult::Malformed(2, 0), |
818 | 0 | src_consumed_trail, |
819 | 0 | $handle.written()); |
820 | 0 | } |
821 | 0 | $slf.pending = EucJpPending::Jis0212Lead($jis0212_lead_minus_offset); |
822 | 0 | return (DecoderResult::InputEmpty, |
823 | 0 | src_consumed_trail, |
824 | 0 | $handle.written()); |
825 | | } |
826 | 0 | Space::Available(source_handle_trail) => { |
827 | 0 | let ($byte, $unread_handle_trail) = |
828 | 0 | source_handle_trail.read(); |
829 | | // Start non-boilerplate |
830 | | $jis0812_trail_body |
831 | | // End non-boilerplate |
832 | | } |
833 | | } |
834 | | } |
835 | | } |
836 | 0 | } else if $non_ascii == 0x8E { |
837 | 0 | match $source.check_available() { |
838 | 0 | Space::Full(src_consumed_trail) => { |
839 | 0 | if last { |
840 | 0 | return (DecoderResult::Malformed(1, 0), |
841 | 0 | src_consumed_trail, |
842 | 0 | $handle.written()); |
843 | 0 | } |
844 | 0 | $slf.pending = EucJpPending::HalfWidthKatakana; |
845 | 0 | return (DecoderResult::InputEmpty, |
846 | 0 | src_consumed_trail, |
847 | 0 | $handle.written()); |
848 | | } |
849 | 0 | Space::Available(source_handle_trail) => { |
850 | 0 | let ($byte, $unread_handle_trail) = |
851 | 0 | source_handle_trail.read(); |
852 | | // Start non-boilerplate |
853 | | $half_width_katakana_body |
854 | | // End non-boilerplate |
855 | | } |
856 | | } |
857 | | } else { |
858 | 0 | return (DecoderResult::Malformed(1, 0), |
859 | 0 | $source.consumed(), |
860 | 0 | $handle.written()); |
861 | | } |
862 | | }; |
863 | 0 | match $source.check_available() { |
864 | 0 | Space::Full(src_consumed) => { |
865 | 0 | return (DecoderResult::InputEmpty, |
866 | 0 | src_consumed, |
867 | 0 | dest_again.written()); |
868 | | } |
869 | 0 | Space::Available(source_handle) => { |
870 | 0 | match dest_again.check_space_bmp() { |
871 | 0 | Space::Full(dst_written) => { |
872 | 0 | return (DecoderResult::OutputFull, |
873 | 0 | source_handle.consumed(), |
874 | 0 | dst_written); |
875 | | } |
876 | 0 | Space::Available(destination_handle) => { |
877 | 0 | let (b, _) = source_handle.read(); |
878 | | loop { |
879 | 0 | if b > 127 { |
880 | 0 | $non_ascii = b; |
881 | 0 | $handle = destination_handle; |
882 | 0 | continue 'middle; |
883 | 0 | } |
884 | | // Testing on Haswell says that we should write the |
885 | | // byte unconditionally instead of trying to unread it |
886 | | // to make it part of the next SIMD stride. |
887 | 0 | destination_handle.write_ascii(b); |
888 | | // We've got markup or ASCII text |
889 | 0 | continue 'outermost; |
890 | | } |
891 | | } |
892 | | } |
893 | | } |
894 | | } |
895 | | } |
896 | | } |
897 | | } |
898 | | } |
899 | 0 | }); Unexecuted instantiation: <encoding_rs::euc_jp::EucJpDecoder>::decode_to_utf8_raw Unexecuted instantiation: <encoding_rs::euc_jp::EucJpDecoder>::decode_to_utf16_raw |
900 | | } |
901 | | |
902 | | macro_rules! euc_jp_decoder_functions { |
903 | | ( |
904 | | $jis0802_trail_body:block, |
905 | | $jis0812_lead_body:block, |
906 | | $jis0812_trail_body:block, |
907 | | $half_width_katakana_body:block, |
908 | | $slf:ident, |
909 | | $non_ascii:ident, |
910 | | $jis0208_lead_minus_offset:ident, |
911 | | $byte:ident, |
912 | | $unread_handle_trail:ident, |
913 | | $jis0212_lead_minus_offset:ident, |
914 | | $lead:ident, |
915 | | $unread_handle_jis0212:ident, |
916 | | $source:ident, |
917 | | $handle:ident |
918 | | ) => { |
919 | | euc_jp_decoder_function!( |
920 | | $jis0802_trail_body, |
921 | | $jis0812_lead_body, |
922 | | $jis0812_trail_body, |
923 | | $half_width_katakana_body, |
924 | | $slf, |
925 | | $non_ascii, |
926 | | $jis0208_lead_minus_offset, |
927 | | $byte, |
928 | | $unread_handle_trail, |
929 | | $jis0212_lead_minus_offset, |
930 | | $lead, |
931 | | $unread_handle_jis0212, |
932 | | $source, |
933 | | $handle, |
934 | | decode_to_utf8_raw, |
935 | | u8, |
936 | | Utf8Destination |
937 | | ); |
938 | | euc_jp_decoder_function!( |
939 | | $jis0802_trail_body, |
940 | | $jis0812_lead_body, |
941 | | $jis0812_trail_body, |
942 | | $half_width_katakana_body, |
943 | | $slf, |
944 | | $non_ascii, |
945 | | $jis0208_lead_minus_offset, |
946 | | $byte, |
947 | | $unread_handle_trail, |
948 | | $jis0212_lead_minus_offset, |
949 | | $lead, |
950 | | $unread_handle_jis0212, |
951 | | $source, |
952 | | $handle, |
953 | | decode_to_utf16_raw, |
954 | | u16, |
955 | | Utf16Destination |
956 | | ); |
957 | | }; |
958 | | } |
959 | | |
960 | | macro_rules! encoder_function { |
961 | | ($eof:block, |
962 | | $body:block, |
963 | | $slf:ident, |
964 | | $src_consumed:ident, |
965 | | $source:ident, |
966 | | $dest:ident, |
967 | | $c:ident, |
968 | | $destination_handle:ident, |
969 | | $unread_handle:ident, |
970 | | $destination_check:ident, |
971 | | $name:ident, |
972 | | $input:ty, |
973 | | $source_struct:ident) => ( |
974 | 0 | pub fn $name(&mut $slf, |
975 | 0 | src: &$input, |
976 | 0 | dst: &mut [u8], |
977 | 0 | last: bool) |
978 | 0 | -> (EncoderResult, usize, usize) { |
979 | 0 | let mut $source = $source_struct::new(src); |
980 | 0 | let mut $dest = ByteDestination::new(dst); |
981 | | loop { |
982 | 0 | match $source.check_available() { |
983 | 0 | Space::Full($src_consumed) => { |
984 | 0 | if last { |
985 | | // Start non-boilerplate |
986 | | $eof |
987 | | // End non-boilerplate |
988 | 0 | } |
989 | 0 | return (EncoderResult::InputEmpty, $src_consumed, $dest.written()); |
990 | | } |
991 | 0 | Space::Available(source_handle) => { |
992 | 0 | match $dest.$destination_check() { |
993 | 0 | Space::Full(dst_written) => { |
994 | 0 | return (EncoderResult::OutputFull, |
995 | 0 | source_handle.consumed(), |
996 | 0 | dst_written); |
997 | | } |
998 | 0 | Space::Available($destination_handle) => { |
999 | 0 | let ($c, $unread_handle) = source_handle.read(); |
1000 | | // Start non-boilerplate |
1001 | | $body |
1002 | | // End non-boilerplate |
1003 | | } |
1004 | | } |
1005 | | } |
1006 | | } |
1007 | | } |
1008 | 0 | }); Unexecuted instantiation: <encoding_rs::x_user_defined::UserDefinedEncoder>::encode_from_utf8_raw Unexecuted instantiation: <encoding_rs::x_user_defined::UserDefinedEncoder>::encode_from_utf16_raw Unexecuted instantiation: <encoding_rs::iso_2022_jp::Iso2022JpEncoder>::encode_from_utf8_raw Unexecuted instantiation: <encoding_rs::iso_2022_jp::Iso2022JpEncoder>::encode_from_utf16_raw |
1009 | | } |
1010 | | |
1011 | | macro_rules! encoder_functions { |
1012 | | ( |
1013 | | $eof:block, |
1014 | | $body:block, |
1015 | | $slf:ident, |
1016 | | $src_consumed:ident, |
1017 | | $source:ident, |
1018 | | $dest:ident, |
1019 | | $c:ident, |
1020 | | $destination_handle:ident, |
1021 | | $unread_handle:ident, |
1022 | | $destination_check:ident |
1023 | | ) => { |
1024 | | encoder_function!( |
1025 | | $eof, |
1026 | | $body, |
1027 | | $slf, |
1028 | | $src_consumed, |
1029 | | $source, |
1030 | | $dest, |
1031 | | $c, |
1032 | | $destination_handle, |
1033 | | $unread_handle, |
1034 | | $destination_check, |
1035 | | encode_from_utf8_raw, |
1036 | | str, |
1037 | | Utf8Source |
1038 | | ); |
1039 | | encoder_function!( |
1040 | | $eof, |
1041 | | $body, |
1042 | | $slf, |
1043 | | $src_consumed, |
1044 | | $source, |
1045 | | $dest, |
1046 | | $c, |
1047 | | $destination_handle, |
1048 | | $unread_handle, |
1049 | | $destination_check, |
1050 | | encode_from_utf16_raw, |
1051 | | [u16], |
1052 | | Utf16Source |
1053 | | ); |
1054 | | }; |
1055 | | } |
1056 | | |
1057 | | macro_rules! ascii_compatible_encoder_function { |
1058 | | ($bmp_body:block, |
1059 | | $astral_body:block, |
1060 | | $bmp:ident, |
1061 | | $astral:ident, |
1062 | | $slf:ident, |
1063 | | $source:ident, |
1064 | | $handle:ident, |
1065 | | $copy_ascii:ident, |
1066 | | $destination_check:ident, |
1067 | | $name:ident, |
1068 | | $input:ty, |
1069 | | $source_struct:ident, |
1070 | | $ascii_punctuation:expr) => ( |
1071 | 0 | pub fn $name(&mut $slf, |
1072 | 0 | src: &$input, |
1073 | 0 | dst: &mut [u8], |
1074 | 0 | _last: bool) |
1075 | 0 | -> (EncoderResult, usize, usize) { |
1076 | 0 | let mut $source = $source_struct::new(src); |
1077 | 0 | let mut dest = ByteDestination::new(dst); |
1078 | | 'outermost: loop { |
1079 | 0 | match $source.$copy_ascii(&mut dest) { |
1080 | 0 | CopyAsciiResult::Stop(ret) => return ret, |
1081 | 0 | CopyAsciiResult::GoOn((mut non_ascii, mut $handle)) => { |
1082 | | 'middle: loop { |
1083 | 0 | let dest_again = match non_ascii { |
1084 | 0 | NonAscii::BmpExclAscii($bmp) => { |
1085 | | // Start non-boilerplate |
1086 | | $bmp_body |
1087 | | // End non-boilerplate |
1088 | | } |
1089 | 0 | NonAscii::Astral($astral) => { |
1090 | | // Start non-boilerplate |
1091 | 0 | $astral_body |
1092 | | // End non-boilerplate |
1093 | | } |
1094 | | }; |
1095 | 0 | match $source.check_available() { |
1096 | 0 | Space::Full(src_consumed) => { |
1097 | 0 | return (EncoderResult::InputEmpty, |
1098 | 0 | src_consumed, |
1099 | 0 | dest_again.written()); |
1100 | | } |
1101 | 0 | Space::Available(source_handle) => { |
1102 | 0 | match dest_again.$destination_check() { |
1103 | 0 | Space::Full(dst_written) => { |
1104 | 0 | return (EncoderResult::OutputFull, |
1105 | 0 | source_handle.consumed(), |
1106 | 0 | dst_written); |
1107 | | } |
1108 | 0 | Space::Available(mut destination_handle) => { |
1109 | 0 | let (mut c, unread_handle) = source_handle.read_enum(); |
1110 | 0 | let source_again = unread_handle.commit(); |
1111 | | 'innermost: loop { |
1112 | 0 | let ascii = match c { |
1113 | 0 | Unicode::NonAscii(non_ascii_again) => { |
1114 | 0 | non_ascii = non_ascii_again; |
1115 | 0 | $handle = destination_handle; |
1116 | 0 | continue 'middle; |
1117 | | } |
1118 | 0 | Unicode::Ascii(a) => a, |
1119 | | }; |
1120 | | // Testing on Haswell says that we should write the |
1121 | | // byte unconditionally instead of trying to unread it |
1122 | | // to make it part of the next SIMD stride. |
1123 | 0 | let dest_again_again = |
1124 | 0 | destination_handle.write_one(ascii); |
1125 | 0 | if $ascii_punctuation && ascii < 60 { |
1126 | | // We've got punctuation |
1127 | 0 | match source_again.check_available() { |
1128 | 0 | Space::Full(src_consumed_again) => { |
1129 | 0 | return (EncoderResult::InputEmpty, |
1130 | 0 | src_consumed_again, |
1131 | 0 | dest_again_again.written()); |
1132 | | } |
1133 | 0 | Space::Available(source_handle_again) => { |
1134 | 0 | match dest_again_again.$destination_check() { |
1135 | 0 | Space::Full(dst_written_again) => { |
1136 | 0 | return (EncoderResult::OutputFull, |
1137 | 0 | source_handle_again.consumed(), |
1138 | 0 | dst_written_again); |
1139 | | } |
1140 | 0 | Space::Available(destination_handle_again) => { |
1141 | | { |
1142 | 0 | let (c_again, _unread_handle_again) = |
1143 | 0 | source_handle_again.read_enum(); |
1144 | 0 | c = c_again; |
1145 | 0 | destination_handle = destination_handle_again; |
1146 | 0 | continue 'innermost; |
1147 | | } |
1148 | | } |
1149 | | } |
1150 | | } |
1151 | | } |
1152 | 0 | } |
1153 | | // We've got markup or ASCII text |
1154 | 0 | continue 'outermost; |
1155 | | } |
1156 | | } |
1157 | | } |
1158 | | } |
1159 | | } |
1160 | | } |
1161 | | } |
1162 | | } |
1163 | | } |
1164 | 0 | }); Unexecuted instantiation: <encoding_rs::single_byte::SingleByteEncoder>::encode_from_utf8_raw Unexecuted instantiation: <encoding_rs::gb18030::Gb18030Encoder>::encode_from_utf8_raw Unexecuted instantiation: <encoding_rs::gb18030::Gb18030Encoder>::encode_from_utf16_raw Unexecuted instantiation: <encoding_rs::euc_kr::EucKrEncoder>::encode_from_utf8_raw Unexecuted instantiation: <encoding_rs::euc_kr::EucKrEncoder>::encode_from_utf16_raw Unexecuted instantiation: <encoding_rs::euc_jp::EucJpEncoder>::encode_from_utf8_raw Unexecuted instantiation: <encoding_rs::euc_jp::EucJpEncoder>::encode_from_utf16_raw Unexecuted instantiation: <encoding_rs::shift_jis::ShiftJisEncoder>::encode_from_utf8_raw Unexecuted instantiation: <encoding_rs::shift_jis::ShiftJisEncoder>::encode_from_utf16_raw Unexecuted instantiation: <encoding_rs::big5::Big5Encoder>::encode_from_utf8_raw Unexecuted instantiation: <encoding_rs::big5::Big5Encoder>::encode_from_utf16_raw |
1165 | | } |
1166 | | |
1167 | | macro_rules! ascii_compatible_encoder_functions { |
1168 | | ( |
1169 | | $bmp_body:block, |
1170 | | $astral_body:block, |
1171 | | $bmp:ident, |
1172 | | $astral:ident, |
1173 | | $slf:ident, |
1174 | | $source:ident, |
1175 | | $handle:ident, |
1176 | | $copy_ascii:ident, |
1177 | | $destination_check:ident, |
1178 | | $ascii_punctuation:expr |
1179 | | ) => { |
1180 | | ascii_compatible_encoder_function!( |
1181 | | $bmp_body, |
1182 | | $astral_body, |
1183 | | $bmp, |
1184 | | $astral, |
1185 | | $slf, |
1186 | | $source, |
1187 | | $handle, |
1188 | | $copy_ascii, |
1189 | | $destination_check, |
1190 | | encode_from_utf8_raw, |
1191 | | str, |
1192 | | Utf8Source, |
1193 | | $ascii_punctuation |
1194 | | ); |
1195 | | ascii_compatible_encoder_function!( |
1196 | | $bmp_body, |
1197 | | $astral_body, |
1198 | | $bmp, |
1199 | | $astral, |
1200 | | $slf, |
1201 | | $source, |
1202 | | $handle, |
1203 | | $copy_ascii, |
1204 | | $destination_check, |
1205 | | encode_from_utf16_raw, |
1206 | | [u16], |
1207 | | Utf16Source, |
1208 | | $ascii_punctuation |
1209 | | ); |
1210 | | }; |
1211 | | } |
1212 | | |
1213 | | macro_rules! ascii_compatible_bmp_encoder_function { |
1214 | | ( |
1215 | | $bmp_body:block, |
1216 | | $bmp:ident, |
1217 | | $slf:ident, |
1218 | | $source:ident, |
1219 | | $handle:ident, |
1220 | | $copy_ascii:ident, |
1221 | | $destination_check:ident, |
1222 | | $name:ident, |
1223 | | $input:ty, |
1224 | | $source_struct:ident, |
1225 | | $ascii_punctuation:expr |
1226 | | ) => { |
1227 | | ascii_compatible_encoder_function!( |
1228 | | $bmp_body, |
1229 | | { |
1230 | | return ( |
1231 | | EncoderResult::Unmappable(astral), |
1232 | | $source.consumed(), |
1233 | | $handle.written(), |
1234 | | ); |
1235 | | }, |
1236 | | $bmp, |
1237 | | astral, |
1238 | | $slf, |
1239 | | $source, |
1240 | | $handle, |
1241 | | $copy_ascii, |
1242 | | $destination_check, |
1243 | | $name, |
1244 | | $input, |
1245 | | $source_struct, |
1246 | | $ascii_punctuation |
1247 | | ); |
1248 | | }; |
1249 | | } |
1250 | | |
1251 | | macro_rules! ascii_compatible_bmp_encoder_functions { |
1252 | | ( |
1253 | | $bmp_body:block, |
1254 | | $bmp:ident, |
1255 | | $slf:ident, |
1256 | | $source:ident, |
1257 | | $handle:ident, |
1258 | | $copy_ascii:ident, |
1259 | | $destination_check:ident, |
1260 | | $ascii_punctuation:expr |
1261 | | ) => { |
1262 | | ascii_compatible_encoder_functions!( |
1263 | | $bmp_body, |
1264 | | { |
1265 | | return ( |
1266 | | EncoderResult::Unmappable(astral), |
1267 | | $source.consumed(), |
1268 | | $handle.written(), |
1269 | | ); |
1270 | | }, |
1271 | | $bmp, |
1272 | | astral, |
1273 | | $slf, |
1274 | | $source, |
1275 | | $handle, |
1276 | | $copy_ascii, |
1277 | | $destination_check, |
1278 | | $ascii_punctuation |
1279 | | ); |
1280 | | }; |
1281 | | } |
1282 | | |
1283 | | macro_rules! public_decode_function{ |
1284 | | ($(#[$meta:meta])*, |
1285 | | $decode_to_utf:ident, |
1286 | | $decode_to_utf_raw:ident, |
1287 | | $decode_to_utf_checking_end:ident, |
1288 | | $decode_to_utf_after_one_potential_bom_byte:ident, |
1289 | | $decode_to_utf_after_two_potential_bom_bytes:ident, |
1290 | | $decode_to_utf_checking_end_with_offset:ident, |
1291 | | $code_unit:ty) => ( |
1292 | | $(#[$meta])* |
1293 | 0 | pub fn $decode_to_utf(&mut self, |
1294 | 0 | src: &[u8], |
1295 | 0 | dst: &mut [$code_unit], |
1296 | 0 | last: bool) |
1297 | 0 | -> (DecoderResult, usize, usize) { |
1298 | 0 | let mut offset = 0usize; |
1299 | | loop { |
1300 | 0 | match self.life_cycle { |
1301 | | // The common case. (Post-sniffing.) |
1302 | | DecoderLifeCycle::Converting => { |
1303 | 0 | return self.$decode_to_utf_checking_end(src, dst, last); |
1304 | | } |
1305 | | // The rest is all BOM sniffing! |
1306 | | DecoderLifeCycle::AtStart => { |
1307 | 0 | debug_assert_eq!(offset, 0usize); |
1308 | 0 | if src.is_empty() { |
1309 | 0 | return (DecoderResult::InputEmpty, 0, 0); |
1310 | 0 | } |
1311 | 0 | match src[0] { |
1312 | | 0xEFu8 => { |
1313 | 0 | self.life_cycle = DecoderLifeCycle::SeenUtf8First; |
1314 | 0 | offset += 1; |
1315 | 0 | continue; |
1316 | | } |
1317 | | 0xFEu8 => { |
1318 | 0 | self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst; |
1319 | 0 | offset += 1; |
1320 | 0 | continue; |
1321 | | } |
1322 | | 0xFFu8 => { |
1323 | 0 | self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst; |
1324 | 0 | offset += 1; |
1325 | 0 | continue; |
1326 | | } |
1327 | | _ => { |
1328 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1329 | 0 | continue; |
1330 | | } |
1331 | | } |
1332 | | } |
1333 | | DecoderLifeCycle::AtUtf8Start => { |
1334 | 0 | debug_assert_eq!(offset, 0usize); |
1335 | 0 | if src.is_empty() { |
1336 | 0 | return (DecoderResult::InputEmpty, 0, 0); |
1337 | 0 | } |
1338 | 0 | match src[0] { |
1339 | | 0xEFu8 => { |
1340 | 0 | self.life_cycle = DecoderLifeCycle::SeenUtf8First; |
1341 | 0 | offset += 1; |
1342 | 0 | continue; |
1343 | | } |
1344 | | _ => { |
1345 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1346 | 0 | continue; |
1347 | | } |
1348 | | } |
1349 | | } |
1350 | | DecoderLifeCycle::AtUtf16BeStart => { |
1351 | 0 | debug_assert_eq!(offset, 0usize); |
1352 | 0 | if src.is_empty() { |
1353 | 0 | return (DecoderResult::InputEmpty, 0, 0); |
1354 | 0 | } |
1355 | 0 | match src[0] { |
1356 | | 0xFEu8 => { |
1357 | 0 | self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst; |
1358 | 0 | offset += 1; |
1359 | 0 | continue; |
1360 | | } |
1361 | | _ => { |
1362 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1363 | 0 | continue; |
1364 | | } |
1365 | | } |
1366 | | } |
1367 | | DecoderLifeCycle::AtUtf16LeStart => { |
1368 | 0 | debug_assert_eq!(offset, 0usize); |
1369 | 0 | if src.is_empty() { |
1370 | 0 | return (DecoderResult::InputEmpty, 0, 0); |
1371 | 0 | } |
1372 | 0 | match src[0] { |
1373 | | 0xFFu8 => { |
1374 | 0 | self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst; |
1375 | 0 | offset += 1; |
1376 | 0 | continue; |
1377 | | } |
1378 | | _ => { |
1379 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1380 | 0 | continue; |
1381 | | } |
1382 | | } |
1383 | | } |
1384 | | DecoderLifeCycle::SeenUtf8First => { |
1385 | 0 | if offset >= src.len() { |
1386 | 0 | if last { |
1387 | 0 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1388 | 0 | dst, |
1389 | 0 | last, |
1390 | 0 | offset, |
1391 | 0 | 0xEFu8); |
1392 | 0 | } |
1393 | 0 | return (DecoderResult::InputEmpty, offset, 0); |
1394 | 0 | } |
1395 | 0 | if src[offset] == 0xBBu8 { |
1396 | 0 | self.life_cycle = DecoderLifeCycle::SeenUtf8Second; |
1397 | 0 | offset += 1; |
1398 | 0 | continue; |
1399 | 0 | } |
1400 | 0 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1401 | 0 | dst, |
1402 | 0 | last, |
1403 | 0 | offset, |
1404 | 0 | 0xEFu8); |
1405 | | } |
1406 | | DecoderLifeCycle::SeenUtf8Second => { |
1407 | 0 | if offset >= src.len() { |
1408 | 0 | if last { |
1409 | 0 | return self.$decode_to_utf_after_two_potential_bom_bytes(src, |
1410 | 0 | dst, |
1411 | 0 | last, |
1412 | 0 | offset); |
1413 | 0 | } |
1414 | 0 | return (DecoderResult::InputEmpty, offset, 0); |
1415 | 0 | } |
1416 | 0 | if src[offset] == 0xBFu8 { |
1417 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1418 | 0 | offset += 1; |
1419 | 0 | if self.encoding != UTF_8 { |
1420 | 0 | self.encoding = UTF_8; |
1421 | 0 | self.variant = UTF_8.new_variant_decoder(); |
1422 | 0 | } |
1423 | 0 | return self.$decode_to_utf_checking_end_with_offset(src, |
1424 | 0 | dst, |
1425 | 0 | last, |
1426 | 0 | offset); |
1427 | 0 | } |
1428 | 0 | return self.$decode_to_utf_after_two_potential_bom_bytes(src, |
1429 | 0 | dst, |
1430 | 0 | last, |
1431 | 0 | offset); |
1432 | | } |
1433 | | DecoderLifeCycle::SeenUtf16BeFirst => { |
1434 | 0 | if offset >= src.len() { |
1435 | 0 | if last { |
1436 | 0 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1437 | 0 | dst, |
1438 | 0 | last, |
1439 | 0 | offset, |
1440 | 0 | 0xFEu8); |
1441 | 0 | } |
1442 | 0 | return (DecoderResult::InputEmpty, offset, 0); |
1443 | 0 | } |
1444 | 0 | if src[offset] == 0xFFu8 { |
1445 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1446 | 0 | offset += 1; |
1447 | 0 | if self.encoding != UTF_16BE { |
1448 | 0 | self.encoding = UTF_16BE; |
1449 | 0 | self.variant = UTF_16BE.new_variant_decoder(); |
1450 | 0 | } |
1451 | 0 | return self.$decode_to_utf_checking_end_with_offset(src, |
1452 | 0 | dst, |
1453 | 0 | last, |
1454 | 0 | offset); |
1455 | 0 | } |
1456 | 0 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1457 | 0 | dst, |
1458 | 0 | last, |
1459 | 0 | offset, |
1460 | 0 | 0xFEu8); |
1461 | | } |
1462 | | DecoderLifeCycle::SeenUtf16LeFirst => { |
1463 | 0 | if offset >= src.len() { |
1464 | 0 | if last { |
1465 | 0 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1466 | 0 | dst, |
1467 | 0 | last, |
1468 | 0 | offset, |
1469 | 0 | 0xFFu8); |
1470 | 0 | } |
1471 | 0 | return (DecoderResult::InputEmpty, offset, 0); |
1472 | 0 | } |
1473 | 0 | if src[offset] == 0xFEu8 { |
1474 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1475 | 0 | offset += 1; |
1476 | 0 | if self.encoding != UTF_16LE { |
1477 | 0 | self.encoding = UTF_16LE; |
1478 | 0 | self.variant = UTF_16LE.new_variant_decoder(); |
1479 | 0 | } |
1480 | 0 | return self.$decode_to_utf_checking_end_with_offset(src, |
1481 | 0 | dst, |
1482 | 0 | last, |
1483 | 0 | offset); |
1484 | 0 | } |
1485 | 0 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1486 | 0 | dst, |
1487 | 0 | last, |
1488 | 0 | offset, |
1489 | 0 | 0xFFu8); |
1490 | | } |
1491 | | DecoderLifeCycle::ConvertingWithPendingBB => { |
1492 | 0 | debug_assert_eq!(offset, 0usize); |
1493 | 0 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1494 | 0 | dst, |
1495 | 0 | last, |
1496 | 0 | 0usize, |
1497 | 0 | 0xBBu8); |
1498 | | } |
1499 | 0 | DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."), |
1500 | | } |
1501 | | } |
1502 | 0 | } Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf8_without_replacement Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf16_without_replacement |
1503 | | |
1504 | 0 | fn $decode_to_utf_after_one_potential_bom_byte(&mut self, |
1505 | 0 | src: &[u8], |
1506 | 0 | dst: &mut [$code_unit], |
1507 | 0 | last: bool, |
1508 | 0 | offset: usize, |
1509 | 0 | first_byte: u8) |
1510 | 0 | -> (DecoderResult, usize, usize) { |
1511 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1512 | 0 | if offset == 0usize { |
1513 | | // First byte was seen previously. |
1514 | 0 | let first = [first_byte]; |
1515 | 0 | let mut out_read = 0usize; |
1516 | 0 | let (mut first_result, _, mut first_written) = |
1517 | 0 | self.variant |
1518 | 0 | .$decode_to_utf_raw(&first[..], dst, false); |
1519 | 0 | match first_result { |
1520 | 0 | DecoderResult::InputEmpty => { |
1521 | 0 | let (result, read, written) = |
1522 | 0 | self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last); |
1523 | 0 | first_result = result; |
1524 | 0 | out_read = read; // Overwrite, don't add! |
1525 | 0 | first_written += written; |
1526 | 0 | } |
1527 | 0 | DecoderResult::Malformed(_, _) => { |
1528 | 0 | // Wasn't read from `src`!, leave out_read to 0 |
1529 | 0 | } |
1530 | | DecoderResult::OutputFull => { |
1531 | 0 | panic!("Output buffer must have been too small."); |
1532 | | } |
1533 | | } |
1534 | 0 | return (first_result, out_read, first_written); |
1535 | 0 | } |
1536 | 0 | debug_assert_eq!(offset, 1usize); |
1537 | | // The first byte is in `src`, so no need to push it separately. |
1538 | 0 | self.$decode_to_utf_checking_end(src, dst, last) |
1539 | 0 | } Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf8_after_one_potential_bom_byte Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf16_after_one_potential_bom_byte |
1540 | | |
1541 | 0 | fn $decode_to_utf_after_two_potential_bom_bytes(&mut self, |
1542 | 0 | src: &[u8], |
1543 | 0 | dst: &mut [$code_unit], |
1544 | 0 | last: bool, |
1545 | 0 | offset: usize) |
1546 | 0 | -> (DecoderResult, usize, usize) { |
1547 | 0 | self.life_cycle = DecoderLifeCycle::Converting; |
1548 | 0 | if offset == 0usize { |
1549 | | // The first two bytes are not in the current buffer.. |
1550 | 0 | let ef_bb = [0xEFu8, 0xBBu8]; |
1551 | 0 | let (mut first_result, mut first_read, mut first_written) = |
1552 | 0 | self.variant |
1553 | 0 | .$decode_to_utf_raw(&ef_bb[..], dst, false); |
1554 | 0 | match first_result { |
1555 | 0 | DecoderResult::InputEmpty => { |
1556 | 0 | let (result, read, written) = |
1557 | 0 | self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last); |
1558 | 0 | first_result = result; |
1559 | 0 | first_read = read; // Overwrite, don't add! |
1560 | 0 | first_written += written; |
1561 | 0 | } |
1562 | | DecoderResult::Malformed(_, _) => { |
1563 | 0 | if first_read == 1usize { |
1564 | 0 | // The first byte was malformed. We need to handle |
1565 | 0 | // the second one, which isn't in `src`, later. |
1566 | 0 | self.life_cycle = DecoderLifeCycle::ConvertingWithPendingBB; |
1567 | 0 | } |
1568 | 0 | first_read = 0usize; // Wasn't read from `src`! |
1569 | | } |
1570 | | DecoderResult::OutputFull => { |
1571 | 0 | panic!("Output buffer must have been too small."); |
1572 | | } |
1573 | | } |
1574 | 0 | return (first_result, first_read, first_written); |
1575 | 0 | } |
1576 | 0 | if offset == 1usize { |
1577 | | // The first byte isn't in the current buffer but the second one |
1578 | | // is. |
1579 | 0 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1580 | 0 | dst, |
1581 | 0 | last, |
1582 | 0 | 0usize, |
1583 | 0 | 0xEFu8); |
1584 | | |
1585 | 0 | } |
1586 | 0 | debug_assert_eq!(offset, 2usize); |
1587 | | // The first two bytes are in `src`, so no need to push them separately. |
1588 | 0 | self.$decode_to_utf_checking_end(src, dst, last) |
1589 | 0 | } Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf8_after_two_potential_bom_bytes Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf16_after_two_potential_bom_bytes |
1590 | | |
1591 | | /// Calls `$decode_to_utf_checking_end` with `offset` bytes omitted from |
1592 | | /// the start of `src` but adjusting the return values to show those bytes |
1593 | | /// as having been consumed. |
1594 | 0 | fn $decode_to_utf_checking_end_with_offset(&mut self, |
1595 | 0 | src: &[u8], |
1596 | 0 | dst: &mut [$code_unit], |
1597 | 0 | last: bool, |
1598 | 0 | offset: usize) |
1599 | 0 | -> (DecoderResult, usize, usize) { |
1600 | 0 | debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting); |
1601 | 0 | let (result, read, written) = self.$decode_to_utf_checking_end(&src[offset..], dst, last); |
1602 | 0 | (result, read + offset, written) |
1603 | 0 | } Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf8_checking_end_with_offset Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf16_checking_end_with_offset |
1604 | | |
1605 | | /// Calls through to the delegate and adjusts life cycle iff `last` is |
1606 | | /// `true` and result is `DecoderResult::InputEmpty`. |
1607 | 0 | fn $decode_to_utf_checking_end(&mut self, |
1608 | 0 | src: &[u8], |
1609 | 0 | dst: &mut [$code_unit], |
1610 | 0 | last: bool) |
1611 | 0 | -> (DecoderResult, usize, usize) { |
1612 | 0 | debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting); |
1613 | 0 | let (result, read, written) = self.variant |
1614 | 0 | .$decode_to_utf_raw(src, dst, last); |
1615 | 0 | if last { |
1616 | 0 | if let DecoderResult::InputEmpty = result { |
1617 | 0 | self.life_cycle = DecoderLifeCycle::Finished; |
1618 | 0 | } |
1619 | 0 | } |
1620 | 0 | (result, read, written) |
1621 | 0 | }); Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf8_checking_end Unexecuted instantiation: <encoding_rs::Decoder>::decode_to_utf16_checking_end |
1622 | | } |