Coverage Report

Created: 2025-07-18 06:22

/rust/registry/src/index.crates.io-6f17d22bba15001f/encoding_rs-0.8.35/src/handles.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright Mozilla Foundation. See the COPYRIGHT
2
// file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! This module provides structs that use lifetimes to couple bounds checking
11
//! and space availability checking and detaching those from actual slice
12
//! reading/writing.
13
//!
14
//! At present, the internals of the implementation are safe code, so the
15
//! bound checks currently also happen on read/write. Once this code works,
16
//! the plan is to replace the internals with unsafe code that omits the
17
//! bound check at the read/write time.
18
19
#[cfg(all(
20
    feature = "simd-accel",
21
    any(
22
        target_feature = "sse2",
23
        all(target_endian = "little", target_arch = "aarch64"),
24
        all(target_endian = "little", target_feature = "neon")
25
    )
26
))]
27
use crate::simd_funcs::*;
28
29
#[cfg(all(
30
    feature = "simd-accel",
31
    any(
32
        target_feature = "sse2",
33
        all(target_endian = "little", target_arch = "aarch64"),
34
        all(target_endian = "little", target_feature = "neon")
35
    )
36
))]
37
use core::simd::u16x8;
38
39
use super::DecoderResult;
40
use super::EncoderResult;
41
use crate::ascii::*;
42
use crate::utf_8::convert_utf8_to_utf16_up_to_invalid;
43
use crate::utf_8::utf8_valid_up_to;
44
45
pub enum Space<T> {
46
    Available(T),
47
    Full(usize),
48
}
49
50
pub enum CopyAsciiResult<T, U> {
51
    Stop(T),
52
    GoOn(U),
53
}
54
55
pub enum NonAscii {
56
    BmpExclAscii(u16),
57
    Astral(char),
58
}
59
60
pub enum Unicode {
61
    Ascii(u8),
62
    NonAscii(NonAscii),
63
}
64
65
// Start UTF-16LE/BE fast path
66
67
pub trait Endian {
68
    const OPPOSITE_ENDIAN: bool;
69
}
70
71
pub struct BigEndian;
72
73
impl Endian for BigEndian {
74
    #[cfg(target_endian = "little")]
75
    const OPPOSITE_ENDIAN: bool = true;
76
77
    #[cfg(target_endian = "big")]
78
    const OPPOSITE_ENDIAN: bool = false;
79
}
80
81
pub struct LittleEndian;
82
83
impl Endian for LittleEndian {
84
    #[cfg(target_endian = "little")]
85
    const OPPOSITE_ENDIAN: bool = false;
86
87
    #[cfg(target_endian = "big")]
88
    const OPPOSITE_ENDIAN: bool = true;
89
}
90
91
#[derive(Debug, Copy, Clone)]
92
struct UnalignedU16Slice {
93
    // Safety invariant: ptr must be valid for reading 2*len bytes
94
    ptr: *const u8,
95
    len: usize,
96
}
97
98
impl UnalignedU16Slice {
99
    /// Safety: ptr must be valid for reading 2*len bytes
100
    #[inline(always)]
101
0
    pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
102
0
        // Safety: field invariant passed up to caller here
103
0
        UnalignedU16Slice { ptr, len }
104
0
    }
105
106
    #[inline(always)]
107
0
    pub fn trim_last(&mut self) {
108
0
        assert!(self.len > 0);
109
        // Safety: invariant upheld here: a slice is still valid with a shorter len
110
0
        self.len -= 1;
111
0
    }
112
113
    #[inline(always)]
114
0
    pub fn at(&self, i: usize) -> u16 {
115
        use core::mem::MaybeUninit;
116
117
0
        assert!(i < self.len);
118
        unsafe {
119
0
            let mut u: MaybeUninit<u16> = MaybeUninit::uninit();
120
0
            // Safety: i is at most len - 1, which works here
121
0
            ::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2);
122
0
            // Safety: valid read above lets us do this
123
0
            u.assume_init()
124
0
        }
125
0
    }
126
127
    #[cfg(feature = "simd-accel")]
128
    #[inline(always)]
129
    pub fn simd_at(&self, i: usize) -> u16x8 {
130
        // Safety: i/len are on the scale of u16s, each one corresponds to 2 u8s
131
        assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
132
        let byte_index = i * 2;
133
        // Safety: load16_unaligned needs SIMD_STRIDE_SIZE=16 u8 elements to read,
134
        // or 16/2 = 8 u16 elements to read.
135
        // We have checked that we have at least that many above.
136
137
        unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
138
    }
139
140
    #[inline(always)]
141
0
    pub fn len(&self) -> usize {
142
0
        self.len
143
0
    }
144
145
    #[inline(always)]
146
0
    pub fn tail(&self, from: usize) -> UnalignedU16Slice {
147
0
        // XXX the return value should be restricted not to
148
0
        // outlive self.
149
0
        assert!(from <= self.len);
150
        // Safety: This upholds the same invariant: `from` is in bounds and we're returning a shorter slice
151
0
        unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
152
0
    }
153
154
    #[cfg(feature = "simd-accel")]
155
    #[inline(always)]
156
    pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
157
        assert!(self.len <= other.len());
158
        let mut offset = 0;
159
        // Safety: SIMD_STRIDE_SIZE is measured in bytes, whereas len is in u16s. We check we can
160
        // munch SIMD_STRIDE_SIZE / 2 u16s which means we can write SIMD_STRIDE_SIZE u8s
161
        if SIMD_STRIDE_SIZE / 2 <= self.len {
162
            let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
163
            loop {
164
                let mut simd = self.simd_at(offset);
165
                if E::OPPOSITE_ENDIAN {
166
                    simd = simd_byte_swap(simd);
167
                }
168
                // Safety: we have enough space on the other side to write this
169
                unsafe {
170
                    store8_unaligned(other.as_mut_ptr().add(offset), simd);
171
                }
172
                if contains_surrogates(simd) {
173
                    break;
174
                }
175
                offset += SIMD_STRIDE_SIZE / 2;
176
                // Safety: This ensures we still have space for writing SIMD_STRIDE_SIZE u8s
177
                if offset > len_minus_stride {
178
                    break;
179
                }
180
            }
181
        }
182
        while offset < self.len {
183
            let unit = swap_if_opposite_endian::<E>(self.at(offset));
184
            other[offset] = unit;
185
            if super::in_range16(unit, 0xD800, 0xE000) {
186
                return Some((unit, offset));
187
            }
188
            offset += 1;
189
        }
190
        None
191
    }
192
193
    #[cfg(not(feature = "simd-accel"))]
194
    #[inline(always)]
195
0
    fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
196
0
        assert!(self.len <= other.len());
197
0
        for (i, target) in other.iter_mut().enumerate().take(self.len) {
198
0
            let unit = swap_if_opposite_endian::<E>(self.at(i));
199
0
            *target = unit;
200
0
            if super::in_range16(unit, 0xD800, 0xE000) {
201
0
                return Some((unit, i));
202
0
            }
203
        }
204
0
        None
205
0
    }
Unexecuted instantiation: <encoding_rs::handles::UnalignedU16Slice>::copy_bmp_to::<encoding_rs::handles::LittleEndian>
Unexecuted instantiation: <encoding_rs::handles::UnalignedU16Slice>::copy_bmp_to::<encoding_rs::handles::BigEndian>
206
}
207
208
#[inline(always)]
209
0
fn copy_unaligned_basic_latin_to_ascii_alu<E: Endian>(
210
0
    src: UnalignedU16Slice,
211
0
    dst: &mut [u8],
212
0
    offset: usize,
213
0
) -> CopyAsciiResult<usize, (u16, usize)> {
214
0
    let len = ::core::cmp::min(src.len(), dst.len());
215
0
    let mut i = 0usize;
216
    loop {
217
0
        if i == len {
218
0
            return CopyAsciiResult::Stop(i + offset);
219
0
        }
220
0
        let unit = swap_if_opposite_endian::<E>(src.at(i));
221
0
        if unit > 0x7F {
222
0
            return CopyAsciiResult::GoOn((unit, i + offset));
223
0
        }
224
0
        dst[i] = unit as u8;
225
0
        i += 1;
226
    }
227
0
}
Unexecuted instantiation: encoding_rs::handles::copy_unaligned_basic_latin_to_ascii_alu::<encoding_rs::handles::LittleEndian>
Unexecuted instantiation: encoding_rs::handles::copy_unaligned_basic_latin_to_ascii_alu::<encoding_rs::handles::BigEndian>
228
229
#[inline(always)]
230
0
fn swap_if_opposite_endian<E: Endian>(unit: u16) -> u16 {
231
0
    if E::OPPOSITE_ENDIAN {
232
0
        unit.swap_bytes()
233
    } else {
234
0
        unit
235
    }
236
0
}
Unexecuted instantiation: encoding_rs::handles::swap_if_opposite_endian::<encoding_rs::handles::LittleEndian>
Unexecuted instantiation: encoding_rs::handles::swap_if_opposite_endian::<encoding_rs::handles::BigEndian>
237
238
#[cfg(not(feature = "simd-accel"))]
239
#[inline(always)]
240
0
fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
241
0
    src: UnalignedU16Slice,
242
0
    dst: &mut [u8],
243
0
) -> CopyAsciiResult<usize, (u16, usize)> {
244
0
    copy_unaligned_basic_latin_to_ascii_alu::<E>(src, dst, 0)
245
0
}
Unexecuted instantiation: encoding_rs::handles::copy_unaligned_basic_latin_to_ascii::<encoding_rs::handles::LittleEndian>
Unexecuted instantiation: encoding_rs::handles::copy_unaligned_basic_latin_to_ascii::<encoding_rs::handles::BigEndian>
246
247
#[cfg(feature = "simd-accel")]
248
#[inline(always)]
249
fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
250
    src: UnalignedU16Slice,
251
    dst: &mut [u8],
252
) -> CopyAsciiResult<usize, (u16, usize)> {
253
    let len = ::core::cmp::min(src.len(), dst.len());
254
    let mut offset = 0;
255
    // Safety: This check ensures we are able to read/write at least SIMD_STRIDE_SIZE elements
256
    if SIMD_STRIDE_SIZE <= len {
257
        let len_minus_stride = len - SIMD_STRIDE_SIZE;
258
        loop {
259
            let mut first = src.simd_at(offset);
260
            let mut second = src.simd_at(offset + (SIMD_STRIDE_SIZE / 2));
261
            if E::OPPOSITE_ENDIAN {
262
                first = simd_byte_swap(first);
263
                second = simd_byte_swap(second);
264
            }
265
            if !simd_is_basic_latin(first | second) {
266
                break;
267
            }
268
            let packed = simd_pack(first, second);
269
            // Safety: We are able to write SIMD_STRIDE_SIZE elements in this iteration
270
            unsafe {
271
                store16_unaligned(dst.as_mut_ptr().add(offset), packed);
272
            }
273
            offset += SIMD_STRIDE_SIZE;
274
            // Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which ensures that we can write at least SIMD_STRIDE_SIZE elements
275
            // in the next iteration
276
            if offset > len_minus_stride {
277
                break;
278
            }
279
        }
280
    }
281
    copy_unaligned_basic_latin_to_ascii_alu::<E>(src.tail(offset), &mut dst[offset..], offset)
282
}
283
284
#[inline(always)]
285
0
fn convert_unaligned_utf16_to_utf8<E: Endian>(
286
0
    src: UnalignedU16Slice,
287
0
    dst: &mut [u8],
288
0
) -> (usize, usize, bool) {
289
0
    if dst.len() < 4 {
290
0
        return (0, 0, false);
291
0
    }
292
0
    let mut src_pos = 0usize;
293
0
    let mut dst_pos = 0usize;
294
0
    let src_len = src.len();
295
0
    let dst_len_minus_three = dst.len() - 3;
296
    'outer: loop {
297
0
        let mut non_ascii = match copy_unaligned_basic_latin_to_ascii::<E>(
298
0
            src.tail(src_pos),
299
0
            &mut dst[dst_pos..],
300
0
        ) {
301
0
            CopyAsciiResult::GoOn((unit, read_written)) => {
302
0
                src_pos += read_written;
303
0
                dst_pos += read_written;
304
0
                unit
305
            }
306
0
            CopyAsciiResult::Stop(read_written) => {
307
0
                return (src_pos + read_written, dst_pos + read_written, false);
308
            }
309
        };
310
0
        if dst_pos >= dst_len_minus_three {
311
0
            break 'outer;
312
0
        }
313
0
        // We have enough destination space to commit to
314
0
        // having read `non_ascii`.
315
0
        src_pos += 1;
316
        'inner: loop {
317
0
            let non_ascii_minus_surrogate_start = non_ascii.wrapping_sub(0xD800);
318
0
            if non_ascii_minus_surrogate_start > (0xDFFF - 0xD800) {
319
0
                if non_ascii < 0x800 {
320
0
                    dst[dst_pos] = ((non_ascii >> 6) | 0xC0) as u8;
321
0
                    dst_pos += 1;
322
0
                    dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
323
0
                    dst_pos += 1;
324
0
                } else {
325
0
                    dst[dst_pos] = ((non_ascii >> 12) | 0xE0) as u8;
326
0
                    dst_pos += 1;
327
0
                    dst[dst_pos] = (((non_ascii & 0xFC0) >> 6) | 0x80) as u8;
328
0
                    dst_pos += 1;
329
0
                    dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
330
0
                    dst_pos += 1;
331
0
                }
332
0
            } else if non_ascii_minus_surrogate_start <= (0xDBFF - 0xD800) {
333
                // high surrogate
334
0
                if src_pos < src_len {
335
0
                    let second = swap_if_opposite_endian::<E>(src.at(src_pos));
336
0
                    let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
337
0
                    if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
338
0
                        // The next code unit is a low surrogate. Advance position.
339
0
                        src_pos += 1;
340
0
                        let point = (u32::from(non_ascii) << 10) + u32::from(second)
341
0
                            - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
342
0
343
0
                        dst[dst_pos] = ((point >> 18) | 0xF0u32) as u8;
344
0
                        dst_pos += 1;
345
0
                        dst[dst_pos] = (((point & 0x3F000u32) >> 12) | 0x80u32) as u8;
346
0
                        dst_pos += 1;
347
0
                        dst[dst_pos] = (((point & 0xFC0u32) >> 6) | 0x80u32) as u8;
348
0
                        dst_pos += 1;
349
0
                        dst[dst_pos] = ((point & 0x3Fu32) | 0x80u32) as u8;
350
0
                        dst_pos += 1;
351
0
                    } else {
352
                        // The next code unit is not a low surrogate. Don't advance
353
                        // position and treat the high surrogate as unpaired.
354
0
                        return (src_pos, dst_pos, true);
355
                    }
356
                } else {
357
                    // Unpaired surrogate at the end of buffer
358
0
                    return (src_pos, dst_pos, true);
359
                }
360
            } else {
361
                // Unpaired low surrogate
362
0
                return (src_pos, dst_pos, true);
363
            }
364
0
            if dst_pos >= dst_len_minus_three || src_pos == src_len {
365
0
                break 'outer;
366
0
            }
367
0
            let unit = swap_if_opposite_endian::<E>(src.at(src_pos));
368
0
            src_pos += 1;
369
0
            if unit > 0x7F {
370
0
                non_ascii = unit;
371
0
                continue 'inner;
372
0
            }
373
0
            dst[dst_pos] = unit as u8;
374
0
            dst_pos += 1;
375
0
            continue 'outer;
376
        }
377
    }
378
0
    (src_pos, dst_pos, false)
379
0
}
Unexecuted instantiation: encoding_rs::handles::convert_unaligned_utf16_to_utf8::<encoding_rs::handles::LittleEndian>
Unexecuted instantiation: encoding_rs::handles::convert_unaligned_utf16_to_utf8::<encoding_rs::handles::BigEndian>
380
381
// Byte source
382
383
pub struct ByteSource<'a> {
384
    slice: &'a [u8],
385
    pos: usize,
386
}
387
388
impl<'a> ByteSource<'a> {
389
    #[inline(always)]
390
0
    pub fn new(src: &[u8]) -> ByteSource {
391
0
        ByteSource { slice: src, pos: 0 }
392
0
    }
393
    #[inline(always)]
394
0
    pub fn check_available<'b>(&'b mut self) -> Space<ByteReadHandle<'b, 'a>> {
395
0
        if self.pos < self.slice.len() {
396
0
            Space::Available(ByteReadHandle::new(self))
397
        } else {
398
0
            Space::Full(self.consumed())
399
        }
400
0
    }
401
    #[inline(always)]
402
0
    fn read(&mut self) -> u8 {
403
0
        let ret = self.slice[self.pos];
404
0
        self.pos += 1;
405
0
        ret
406
0
    }
407
    #[inline(always)]
408
0
    fn unread(&mut self) -> usize {
409
0
        self.pos -= 1;
410
0
        self.pos
411
0
    }
412
    #[inline(always)]
413
0
    pub fn consumed(&self) -> usize {
414
0
        self.pos
415
0
    }
416
}
417
418
pub struct ByteReadHandle<'a, 'b>
419
where
420
    'b: 'a,
421
{
422
    source: &'a mut ByteSource<'b>,
423
}
424
425
impl<'a, 'b> ByteReadHandle<'a, 'b>
426
where
427
    'b: 'a,
428
{
429
    #[inline(always)]
430
0
    fn new(src: &'a mut ByteSource<'b>) -> ByteReadHandle<'a, 'b> {
431
0
        ByteReadHandle { source: src }
432
0
    }
433
    #[inline(always)]
434
0
    pub fn read(self) -> (u8, ByteUnreadHandle<'a, 'b>) {
435
0
        let byte = self.source.read();
436
0
        let handle = ByteUnreadHandle::new(self.source);
437
0
        (byte, handle)
438
0
    }
439
    #[inline(always)]
440
0
    pub fn consumed(&self) -> usize {
441
0
        self.source.consumed()
442
0
    }
443
}
444
445
pub struct ByteUnreadHandle<'a, 'b>
446
where
447
    'b: 'a,
448
{
449
    source: &'a mut ByteSource<'b>,
450
}
451
452
impl<'a, 'b> ByteUnreadHandle<'a, 'b>
453
where
454
    'b: 'a,
455
{
456
    #[inline(always)]
457
0
    fn new(src: &'a mut ByteSource<'b>) -> ByteUnreadHandle<'a, 'b> {
458
0
        ByteUnreadHandle { source: src }
459
0
    }
460
    #[inline(always)]
461
0
    pub fn unread(self) -> usize {
462
0
        self.source.unread()
463
0
    }
464
    #[inline(always)]
465
0
    pub fn consumed(&self) -> usize {
466
0
        self.source.consumed()
467
0
    }
468
    #[inline(always)]
469
0
    pub fn commit(self) -> &'a mut ByteSource<'b> {
470
0
        self.source
471
0
    }
472
}
473
474
// UTF-16 destination
475
476
pub struct Utf16BmpHandle<'a, 'b>
477
where
478
    'b: 'a,
479
{
480
    dest: &'a mut Utf16Destination<'b>,
481
}
482
483
impl<'a, 'b> Utf16BmpHandle<'a, 'b>
484
where
485
    'b: 'a,
486
{
487
    #[inline(always)]
488
0
    fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16BmpHandle<'a, 'b> {
489
0
        Utf16BmpHandle { dest: dst }
490
0
    }
491
    #[inline(always)]
492
0
    pub fn written(&self) -> usize {
493
0
        self.dest.written()
494
0
    }
495
    #[inline(always)]
496
0
    pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
497
0
        self.dest.write_ascii(ascii);
498
0
        self.dest
499
0
    }
500
    #[inline(always)]
501
0
    pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
502
0
        self.dest.write_bmp(bmp);
503
0
        self.dest
504
0
    }
505
    #[inline(always)]
506
0
    pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
507
0
        self.dest.write_bmp_excl_ascii(bmp);
508
0
        self.dest
509
0
    }
510
    #[inline(always)]
511
0
    pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
512
0
        self.dest.write_mid_bmp(bmp);
513
0
        self.dest
514
0
    }
515
    #[inline(always)]
516
0
    pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
517
0
        self.dest.write_upper_bmp(bmp);
518
0
        self.dest
519
0
    }
520
    #[inline(always)]
521
0
    pub fn commit(self) -> &'a mut Utf16Destination<'b> {
522
0
        self.dest
523
0
    }
524
}
525
526
pub struct Utf16AstralHandle<'a, 'b>
527
where
528
    'b: 'a,
529
{
530
    dest: &'a mut Utf16Destination<'b>,
531
}
532
533
impl<'a, 'b> Utf16AstralHandle<'a, 'b>
534
where
535
    'b: 'a,
536
{
537
    #[inline(always)]
538
0
    fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16AstralHandle<'a, 'b> {
539
0
        Utf16AstralHandle { dest: dst }
540
0
    }
541
    #[inline(always)]
542
0
    pub fn written(&self) -> usize {
543
0
        self.dest.written()
544
0
    }
545
    #[inline(always)]
546
0
    pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
547
0
        self.dest.write_ascii(ascii);
548
0
        self.dest
549
0
    }
550
    #[inline(always)]
551
0
    pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
552
0
        self.dest.write_bmp(bmp);
553
0
        self.dest
554
0
    }
555
    #[inline(always)]
556
0
    pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
557
0
        self.dest.write_bmp_excl_ascii(bmp);
558
0
        self.dest
559
0
    }
560
    #[inline(always)]
561
0
    pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
562
0
        self.dest.write_upper_bmp(bmp);
563
0
        self.dest
564
0
    }
565
    #[inline(always)]
566
0
    pub fn write_astral(self, astral: u32) -> &'a mut Utf16Destination<'b> {
567
0
        self.dest.write_astral(astral);
568
0
        self.dest
569
0
    }
570
    #[inline(always)]
571
0
    pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf16Destination<'b> {
572
0
        self.dest.write_surrogate_pair(high, low);
573
0
        self.dest
574
0
    }
575
    #[inline(always)]
576
0
    pub fn write_big5_combination(
577
0
        self,
578
0
        combined: u16,
579
0
        combining: u16,
580
0
    ) -> &'a mut Utf16Destination<'b> {
581
0
        self.dest.write_big5_combination(combined, combining);
582
0
        self.dest
583
0
    }
584
    #[inline(always)]
585
0
    pub fn commit(self) -> &'a mut Utf16Destination<'b> {
586
0
        self.dest
587
0
    }
588
}
589
590
pub struct Utf16Destination<'a> {
591
    slice: &'a mut [u16],
592
    pos: usize,
593
}
594
595
impl<'a> Utf16Destination<'a> {
596
    #[inline(always)]
597
0
    pub fn new(dst: &mut [u16]) -> Utf16Destination {
598
0
        Utf16Destination { slice: dst, pos: 0 }
599
0
    }
600
    #[inline(always)]
601
0
    pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf16BmpHandle<'b, 'a>> {
602
0
        if self.pos < self.slice.len() {
603
0
            Space::Available(Utf16BmpHandle::new(self))
604
        } else {
605
0
            Space::Full(self.written())
606
        }
607
0
    }
608
    #[inline(always)]
609
0
    pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf16AstralHandle<'b, 'a>> {
610
0
        if self.pos + 1 < self.slice.len() {
611
0
            Space::Available(Utf16AstralHandle::new(self))
612
        } else {
613
0
            Space::Full(self.written())
614
        }
615
0
    }
616
    #[inline(always)]
617
0
    pub fn written(&self) -> usize {
618
0
        self.pos
619
0
    }
620
    #[inline(always)]
621
0
    fn write_code_unit(&mut self, u: u16) {
622
0
        unsafe {
623
0
            // OK, because we checked before handing out a handle.
624
0
            *(self.slice.get_unchecked_mut(self.pos)) = u;
625
0
        }
626
0
        self.pos += 1;
627
0
    }
628
    #[inline(always)]
629
0
    fn write_ascii(&mut self, ascii: u8) {
630
0
        debug_assert!(ascii < 0x80);
631
0
        self.write_code_unit(u16::from(ascii));
632
0
    }
633
    #[inline(always)]
634
0
    fn write_bmp(&mut self, bmp: u16) {
635
0
        self.write_code_unit(bmp);
636
0
    }
637
    #[inline(always)]
638
0
    fn write_bmp_excl_ascii(&mut self, bmp: u16) {
639
0
        debug_assert!(bmp >= 0x80);
640
0
        self.write_code_unit(bmp);
641
0
    }
642
    #[inline(always)]
643
0
    fn write_mid_bmp(&mut self, bmp: u16) {
644
0
        debug_assert!(bmp >= 0x80); // XXX
645
0
        self.write_code_unit(bmp);
646
0
    }
647
    #[inline(always)]
648
0
    fn write_upper_bmp(&mut self, bmp: u16) {
649
0
        debug_assert!(bmp >= 0x80);
650
0
        self.write_code_unit(bmp);
651
0
    }
652
    #[inline(always)]
653
0
    fn write_astral(&mut self, astral: u32) {
654
0
        debug_assert!(astral > 0xFFFF);
655
0
        debug_assert!(astral <= 0x10_FFFF);
656
0
        self.write_code_unit((0xD7C0 + (astral >> 10)) as u16);
657
0
        self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
658
0
    }
659
    #[inline(always)]
660
0
    fn write_surrogate_pair(&mut self, high: u16, low: u16) {
661
0
        self.write_code_unit(high);
662
0
        self.write_code_unit(low);
663
0
    }
664
    #[inline(always)]
665
0
    fn write_big5_combination(&mut self, combined: u16, combining: u16) {
666
0
        self.write_bmp_excl_ascii(combined);
667
0
        self.write_bmp_excl_ascii(combining);
668
0
    }
669
    // Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
670
    #[inline(always)]
671
0
    pub fn copy_ascii_from_check_space_bmp<'b>(
672
0
        &'b mut self,
673
0
        source: &mut ByteSource,
674
0
    ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)> {
675
0
        let non_ascii_ret = {
676
0
            let src_remaining = &source.slice[source.pos..];
677
0
            let dst_remaining = &mut self.slice[self.pos..];
678
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
679
0
                (DecoderResult::OutputFull, dst_remaining.len())
680
            } else {
681
0
                (DecoderResult::InputEmpty, src_remaining.len())
682
            };
683
            // Safety: This function is documented as needing valid pointers for src/dest and len, which
684
            // is true since we've passed the minumum length of the two
685
0
            match unsafe {
686
0
                ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
687
0
            } {
688
                None => {
689
0
                    source.pos += length;
690
0
                    self.pos += length;
691
0
                    return CopyAsciiResult::Stop((pending, source.pos, self.pos));
692
                }
693
                // Safety: the function is documented as returning bytes >=0x80 in the Some
694
0
                Some((non_ascii, consumed)) => {
695
0
                    source.pos += consumed;
696
0
                    self.pos += consumed;
697
0
                    source.pos += 1; // +1 for non_ascii
698
0
                                     // Safety: non-ascii bubbled out here
699
0
                    non_ascii
700
0
                }
701
0
            }
702
0
        };
703
0
        // Safety: non-ascii returned here
704
0
        CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
705
0
    }
706
    // Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
707
    #[inline(always)]
708
0
    pub fn copy_ascii_from_check_space_astral<'b>(
709
0
        &'b mut self,
710
0
        source: &mut ByteSource,
711
0
    ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)> {
712
0
        let non_ascii_ret = {
713
0
            let dst_len = self.slice.len();
714
0
            let src_remaining = &source.slice[source.pos..];
715
0
            let dst_remaining = &mut self.slice[self.pos..];
716
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
717
0
                (DecoderResult::OutputFull, dst_remaining.len())
718
            } else {
719
0
                (DecoderResult::InputEmpty, src_remaining.len())
720
            };
721
            // Safety: This function is documented as needing valid pointers for src/dest and len, which
722
            // is true since we've passed the minumum length of the two
723
0
            match unsafe {
724
0
                ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
725
0
            } {
726
                None => {
727
0
                    source.pos += length;
728
0
                    self.pos += length;
729
0
                    return CopyAsciiResult::Stop((pending, source.pos, self.pos));
730
                }
731
                // Safety: the function is documented as returning bytes >=0x80 in the Some
732
0
                Some((non_ascii, consumed)) => {
733
0
                    source.pos += consumed;
734
0
                    self.pos += consumed;
735
0
                    if self.pos + 1 < dst_len {
736
0
                        source.pos += 1; // +1 for non_ascii
737
0
                                         // Safety: non-ascii bubbled out here
738
0
                        non_ascii
739
                    } else {
740
0
                        return CopyAsciiResult::Stop((
741
0
                            DecoderResult::OutputFull,
742
0
                            source.pos,
743
0
                            self.pos,
744
0
                        ));
745
                    }
746
                }
747
            }
748
        };
749
        // Safety: non-ascii returned here
750
0
        CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
751
0
    }
752
    #[inline(always)]
753
0
    pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
754
0
        let src_remaining = &source.slice[source.pos..];
755
0
        let dst_remaining = &mut self.slice[self.pos..];
756
0
        let (read, written) = convert_utf8_to_utf16_up_to_invalid(src_remaining, dst_remaining);
757
0
        source.pos += read;
758
0
        self.pos += written;
759
0
    }
760
    #[inline(always)]
761
0
    pub fn copy_utf16_from<E: Endian>(
762
0
        &mut self,
763
0
        source: &mut ByteSource,
764
0
    ) -> Option<(usize, usize)> {
765
0
        let src_remaining = &source.slice[source.pos..];
766
0
        let dst_remaining = &mut self.slice[self.pos..];
767
0
768
0
        let mut src_unaligned = unsafe {
769
0
            UnalignedU16Slice::new(
770
0
                src_remaining.as_ptr(),
771
0
                ::core::cmp::min(src_remaining.len() / 2, dst_remaining.len()),
772
0
            )
773
0
        };
774
0
        if src_unaligned.len() == 0 {
775
0
            return None;
776
0
        }
777
0
        let last_unit = swap_if_opposite_endian::<E>(src_unaligned.at(src_unaligned.len() - 1));
778
0
        if super::in_range16(last_unit, 0xD800, 0xDC00) {
779
0
            // Last code unit is a high surrogate. It might
780
0
            // legitimately form a pair later, so let's not
781
0
            // include it.
782
0
            src_unaligned.trim_last();
783
0
        }
784
0
        let mut offset = 0usize;
785
        loop {
786
0
            if let Some((surrogate, bmp_len)) = {
787
0
                let src_left = src_unaligned.tail(offset);
788
0
                let dst_left = &mut dst_remaining[offset..src_unaligned.len()];
789
0
                src_left.copy_bmp_to::<E>(dst_left)
790
0
            } {
791
0
                offset += bmp_len; // surrogate has not been consumed yet
792
0
                let second_pos = offset + 1;
793
0
                if surrogate > 0xDBFF || second_pos == src_unaligned.len() {
794
                    // Unpaired surrogate
795
0
                    source.pos += second_pos * 2;
796
0
                    self.pos += offset;
797
0
                    return Some((source.pos, self.pos));
798
0
                }
799
0
                let second = swap_if_opposite_endian::<E>(src_unaligned.at(second_pos));
800
0
                if !super::in_range16(second, 0xDC00, 0xE000) {
801
                    // Unpaired surrogate
802
0
                    source.pos += second_pos * 2;
803
0
                    self.pos += offset;
804
0
                    return Some((source.pos, self.pos));
805
0
                }
806
0
                // `surrogate` was already speculatively written
807
0
                dst_remaining[second_pos] = second;
808
0
                offset += 2;
809
0
                continue;
810
            } else {
811
0
                source.pos += src_unaligned.len() * 2;
812
0
                self.pos += src_unaligned.len();
813
0
                return None;
814
            }
815
        }
816
0
    }
Unexecuted instantiation: <encoding_rs::handles::Utf16Destination>::copy_utf16_from::<encoding_rs::handles::LittleEndian>
Unexecuted instantiation: <encoding_rs::handles::Utf16Destination>::copy_utf16_from::<encoding_rs::handles::BigEndian>
817
}
818
819
// UTF-8 destination
820
821
pub struct Utf8BmpHandle<'a, 'b>
822
where
823
    'b: 'a,
824
{
825
    dest: &'a mut Utf8Destination<'b>,
826
}
827
828
impl<'a, 'b> Utf8BmpHandle<'a, 'b>
829
where
830
    'b: 'a,
831
{
832
    #[inline(always)]
833
0
    fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8BmpHandle<'a, 'b> {
834
0
        Utf8BmpHandle { dest: dst }
835
0
    }
836
    #[inline(always)]
837
0
    pub fn written(&self) -> usize {
838
0
        self.dest.written()
839
0
    }
840
    #[inline(always)]
841
0
    pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
842
0
        self.dest.write_ascii(ascii);
843
0
        self.dest
844
0
    }
845
    #[inline(always)]
846
0
    pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
847
0
        self.dest.write_bmp(bmp);
848
0
        self.dest
849
0
    }
850
    #[inline(always)]
851
0
    pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
852
0
        self.dest.write_bmp_excl_ascii(bmp);
853
0
        self.dest
854
0
    }
855
    #[inline(always)]
856
0
    pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
857
0
        self.dest.write_mid_bmp(bmp);
858
0
        self.dest
859
0
    }
860
    #[inline(always)]
861
0
    pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
862
0
        self.dest.write_upper_bmp(bmp);
863
0
        self.dest
864
0
    }
865
    #[inline(always)]
866
0
    pub fn commit(self) -> &'a mut Utf8Destination<'b> {
867
0
        self.dest
868
0
    }
869
}
870
871
pub struct Utf8AstralHandle<'a, 'b>
872
where
873
    'b: 'a,
874
{
875
    dest: &'a mut Utf8Destination<'b>,
876
}
877
878
impl<'a, 'b> Utf8AstralHandle<'a, 'b>
879
where
880
    'b: 'a,
881
{
882
    #[inline(always)]
883
0
    fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8AstralHandle<'a, 'b> {
884
0
        Utf8AstralHandle { dest: dst }
885
0
    }
886
    #[inline(always)]
887
0
    pub fn written(&self) -> usize {
888
0
        self.dest.written()
889
0
    }
890
    #[inline(always)]
891
0
    pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
892
0
        self.dest.write_ascii(ascii);
893
0
        self.dest
894
0
    }
895
    #[inline(always)]
896
0
    pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
897
0
        self.dest.write_bmp(bmp);
898
0
        self.dest
899
0
    }
900
    #[inline(always)]
901
0
    pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
902
0
        self.dest.write_bmp_excl_ascii(bmp);
903
0
        self.dest
904
0
    }
905
    #[inline(always)]
906
0
    pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
907
0
        self.dest.write_upper_bmp(bmp);
908
0
        self.dest
909
0
    }
910
    #[inline(always)]
911
0
    pub fn write_astral(self, astral: u32) -> &'a mut Utf8Destination<'b> {
912
0
        self.dest.write_astral(astral);
913
0
        self.dest
914
0
    }
915
    #[inline(always)]
916
0
    pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf8Destination<'b> {
917
0
        self.dest.write_surrogate_pair(high, low);
918
0
        self.dest
919
0
    }
920
    #[inline(always)]
921
0
    pub fn write_big5_combination(
922
0
        self,
923
0
        combined: u16,
924
0
        combining: u16,
925
0
    ) -> &'a mut Utf8Destination<'b> {
926
0
        self.dest.write_big5_combination(combined, combining);
927
0
        self.dest
928
0
    }
929
    #[inline(always)]
930
0
    pub fn commit(self) -> &'a mut Utf8Destination<'b> {
931
0
        self.dest
932
0
    }
933
}
934
935
pub struct Utf8Destination<'a> {
936
    slice: &'a mut [u8],
937
    pos: usize,
938
}
939
940
impl<'a> Utf8Destination<'a> {
941
    #[inline(always)]
942
0
    pub fn new(dst: &mut [u8]) -> Utf8Destination {
943
0
        Utf8Destination { slice: dst, pos: 0 }
944
0
    }
945
    #[inline(always)]
946
0
    pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf8BmpHandle<'b, 'a>> {
947
0
        if self.pos + 2 < self.slice.len() {
948
0
            Space::Available(Utf8BmpHandle::new(self))
949
        } else {
950
0
            Space::Full(self.written())
951
        }
952
0
    }
953
    #[inline(always)]
954
0
    pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf8AstralHandle<'b, 'a>> {
955
0
        if self.pos + 3 < self.slice.len() {
956
0
            Space::Available(Utf8AstralHandle::new(self))
957
        } else {
958
0
            Space::Full(self.written())
959
        }
960
0
    }
961
    #[inline(always)]
962
0
    pub fn written(&self) -> usize {
963
0
        self.pos
964
0
    }
965
    #[inline(always)]
966
0
    fn write_code_unit(&mut self, u: u8) {
967
0
        unsafe {
968
0
            // OK, because we checked before handing out a handle.
969
0
            *(self.slice.get_unchecked_mut(self.pos)) = u;
970
0
        }
971
0
        self.pos += 1;
972
0
    }
973
    #[inline(always)]
974
0
    fn write_ascii(&mut self, ascii: u8) {
975
0
        debug_assert!(ascii < 0x80);
976
0
        self.write_code_unit(ascii);
977
0
    }
978
    #[inline(always)]
979
0
    fn write_bmp(&mut self, bmp: u16) {
980
0
        if bmp < 0x80u16 {
981
0
            self.write_ascii(bmp as u8);
982
0
        } else if bmp < 0x800u16 {
983
0
            self.write_mid_bmp(bmp);
984
0
        } else {
985
0
            self.write_upper_bmp(bmp);
986
0
        }
987
0
    }
988
    #[inline(always)]
989
0
    fn write_mid_bmp(&mut self, mid_bmp: u16) {
990
0
        debug_assert!(mid_bmp >= 0x80);
991
0
        debug_assert!(mid_bmp < 0x800);
992
0
        self.write_code_unit(((mid_bmp >> 6) | 0xC0) as u8);
993
0
        self.write_code_unit(((mid_bmp & 0x3F) | 0x80) as u8);
994
0
    }
995
    #[inline(always)]
996
0
    fn write_upper_bmp(&mut self, upper_bmp: u16) {
997
0
        debug_assert!(upper_bmp >= 0x800);
998
0
        self.write_code_unit(((upper_bmp >> 12) | 0xE0) as u8);
999
0
        self.write_code_unit((((upper_bmp & 0xFC0) >> 6) | 0x80) as u8);
1000
0
        self.write_code_unit(((upper_bmp & 0x3F) | 0x80) as u8);
1001
0
    }
1002
    #[inline(always)]
1003
0
    fn write_bmp_excl_ascii(&mut self, bmp: u16) {
1004
0
        if bmp < 0x800u16 {
1005
0
            self.write_mid_bmp(bmp);
1006
0
        } else {
1007
0
            self.write_upper_bmp(bmp);
1008
0
        }
1009
0
    }
1010
    #[inline(always)]
1011
0
    fn write_astral(&mut self, astral: u32) {
1012
0
        debug_assert!(astral > 0xFFFF);
1013
0
        debug_assert!(astral <= 0x10_FFFF);
1014
0
        self.write_code_unit(((astral >> 18) | 0xF0) as u8);
1015
0
        self.write_code_unit((((astral & 0x3F000) >> 12) | 0x80) as u8);
1016
0
        self.write_code_unit((((astral & 0xFC0) >> 6) | 0x80) as u8);
1017
0
        self.write_code_unit(((astral & 0x3F) | 0x80) as u8);
1018
0
    }
1019
    #[inline(always)]
1020
0
    pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
1021
0
        self.write_astral(
1022
0
            (u32::from(high) << 10) + u32::from(low)
1023
0
                - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1024
0
        );
1025
0
    }
1026
    #[inline(always)]
1027
0
    fn write_big5_combination(&mut self, combined: u16, combining: u16) {
1028
0
        self.write_mid_bmp(combined);
1029
0
        self.write_mid_bmp(combining);
1030
0
    }
1031
    #[inline(always)]
1032
0
    pub fn copy_ascii_from_check_space_bmp<'b>(
1033
0
        &'b mut self,
1034
0
        source: &mut ByteSource,
1035
0
    ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8BmpHandle<'b, 'a>)> {
1036
0
        let non_ascii_ret = {
1037
0
            let dst_len = self.slice.len();
1038
0
            let src_remaining = &source.slice[source.pos..];
1039
0
            let dst_remaining = &mut self.slice[self.pos..];
1040
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1041
0
                (DecoderResult::OutputFull, dst_remaining.len())
1042
            } else {
1043
0
                (DecoderResult::InputEmpty, src_remaining.len())
1044
            };
1045
0
            match unsafe {
1046
0
                ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1047
0
            } {
1048
                None => {
1049
0
                    source.pos += length;
1050
0
                    self.pos += length;
1051
0
                    return CopyAsciiResult::Stop((pending, source.pos, self.pos));
1052
                }
1053
0
                Some((non_ascii, consumed)) => {
1054
0
                    source.pos += consumed;
1055
0
                    self.pos += consumed;
1056
0
                    if self.pos + 2 < dst_len {
1057
0
                        source.pos += 1; // +1 for non_ascii
1058
0
                        non_ascii
1059
                    } else {
1060
0
                        return CopyAsciiResult::Stop((
1061
0
                            DecoderResult::OutputFull,
1062
0
                            source.pos,
1063
0
                            self.pos,
1064
0
                        ));
1065
                    }
1066
                }
1067
            }
1068
        };
1069
0
        CopyAsciiResult::GoOn((non_ascii_ret, Utf8BmpHandle::new(self)))
1070
0
    }
1071
    #[inline(always)]
1072
0
    pub fn copy_ascii_from_check_space_astral<'b>(
1073
0
        &'b mut self,
1074
0
        source: &mut ByteSource,
1075
0
    ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8AstralHandle<'b, 'a>)> {
1076
0
        let non_ascii_ret = {
1077
0
            let dst_len = self.slice.len();
1078
0
            let src_remaining = &source.slice[source.pos..];
1079
0
            let dst_remaining = &mut self.slice[self.pos..];
1080
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1081
0
                (DecoderResult::OutputFull, dst_remaining.len())
1082
            } else {
1083
0
                (DecoderResult::InputEmpty, src_remaining.len())
1084
            };
1085
0
            match unsafe {
1086
0
                ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1087
0
            } {
1088
                None => {
1089
0
                    source.pos += length;
1090
0
                    self.pos += length;
1091
0
                    return CopyAsciiResult::Stop((pending, source.pos, self.pos));
1092
                }
1093
0
                Some((non_ascii, consumed)) => {
1094
0
                    source.pos += consumed;
1095
0
                    self.pos += consumed;
1096
0
                    if self.pos + 3 < dst_len {
1097
0
                        source.pos += 1; // +1 for non_ascii
1098
0
                        non_ascii
1099
                    } else {
1100
0
                        return CopyAsciiResult::Stop((
1101
0
                            DecoderResult::OutputFull,
1102
0
                            source.pos,
1103
0
                            self.pos,
1104
0
                        ));
1105
                    }
1106
                }
1107
            }
1108
        };
1109
0
        CopyAsciiResult::GoOn((non_ascii_ret, Utf8AstralHandle::new(self)))
1110
0
    }
1111
    #[inline(always)]
1112
0
    pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
1113
0
        let src_remaining = &source.slice[source.pos..];
1114
0
        let dst_remaining = &mut self.slice[self.pos..];
1115
0
        let min_len = ::core::cmp::min(src_remaining.len(), dst_remaining.len());
1116
0
        // Validate first, then memcpy to let memcpy do its thing even for
1117
0
        // non-ASCII. (And potentially do something better than SSE2 for ASCII.)
1118
0
        let valid_len = utf8_valid_up_to(&src_remaining[..min_len]);
1119
0
        (&mut dst_remaining[..valid_len]).copy_from_slice(&src_remaining[..valid_len]);
1120
0
        source.pos += valid_len;
1121
0
        self.pos += valid_len;
1122
0
    }
1123
    #[inline(always)]
1124
0
    pub fn copy_utf16_from<E: Endian>(
1125
0
        &mut self,
1126
0
        source: &mut ByteSource,
1127
0
    ) -> Option<(usize, usize)> {
1128
0
        let src_remaining = &source.slice[source.pos..];
1129
0
        let dst_remaining = &mut self.slice[self.pos..];
1130
0
1131
0
        let mut src_unaligned =
1132
0
            unsafe { UnalignedU16Slice::new(src_remaining.as_ptr(), src_remaining.len() / 2) };
1133
0
        if src_unaligned.len() == 0 {
1134
0
            return None;
1135
0
        }
1136
0
        let mut last_unit = src_unaligned.at(src_unaligned.len() - 1);
1137
0
        if E::OPPOSITE_ENDIAN {
1138
0
            last_unit = last_unit.swap_bytes();
1139
0
        }
1140
0
        if super::in_range16(last_unit, 0xD800, 0xDC00) {
1141
0
            // Last code unit is a high surrogate. It might
1142
0
            // legitimately form a pair later, so let's not
1143
0
            // include it.
1144
0
            src_unaligned.trim_last();
1145
0
        }
1146
0
        let (read, written, had_error) =
1147
0
            convert_unaligned_utf16_to_utf8::<E>(src_unaligned, dst_remaining);
1148
0
        source.pos += read * 2;
1149
0
        self.pos += written;
1150
0
        if had_error {
1151
0
            Some((source.pos, self.pos))
1152
        } else {
1153
0
            None
1154
        }
1155
0
    }
Unexecuted instantiation: <encoding_rs::handles::Utf8Destination>::copy_utf16_from::<encoding_rs::handles::LittleEndian>
Unexecuted instantiation: <encoding_rs::handles::Utf8Destination>::copy_utf16_from::<encoding_rs::handles::BigEndian>
1156
}
1157
1158
// UTF-16 source
1159
1160
pub struct Utf16Source<'a> {
1161
    slice: &'a [u16],
1162
    pos: usize,
1163
    old_pos: usize,
1164
}
1165
1166
impl<'a> Utf16Source<'a> {
1167
    #[inline(always)]
1168
0
    pub fn new(src: &[u16]) -> Utf16Source {
1169
0
        Utf16Source {
1170
0
            slice: src,
1171
0
            pos: 0,
1172
0
            old_pos: 0,
1173
0
        }
1174
0
    }
1175
    #[inline(always)]
1176
0
    pub fn check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>> {
1177
0
        if self.pos < self.slice.len() {
1178
0
            Space::Available(Utf16ReadHandle::new(self))
1179
        } else {
1180
0
            Space::Full(self.consumed())
1181
        }
1182
0
    }
1183
    #[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1184
    #[inline(always)]
1185
0
    fn read(&mut self) -> char {
1186
0
        self.old_pos = self.pos;
1187
0
        let unit = self.slice[self.pos];
1188
0
        self.pos += 1;
1189
0
        let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1190
0
        if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1191
0
            return unsafe { ::core::char::from_u32_unchecked(u32::from(unit)) };
1192
0
        }
1193
0
        if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1194
            // high surrogate
1195
0
            if self.pos < self.slice.len() {
1196
0
                let second = self.slice[self.pos];
1197
0
                let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1198
0
                if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1199
                    // The next code unit is a low surrogate. Advance position.
1200
0
                    self.pos += 1;
1201
0
                    return unsafe {
1202
0
                        ::core::char::from_u32_unchecked(
1203
0
                            (u32::from(unit) << 10) + u32::from(second)
1204
0
                                - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1205
0
                        )
1206
                    };
1207
0
                }
1208
                // The next code unit is not a low surrogate. Don't advance
1209
                // position and treat the high surrogate as unpaired.
1210
                // fall through
1211
0
            }
1212
            // Unpaired surrogate at the end of buffer, fall through
1213
0
        }
1214
        // Unpaired low surrogate
1215
0
        '\u{FFFD}'
1216
0
    }
1217
    #[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1218
    #[inline(always)]
1219
0
    fn read_enum(&mut self) -> Unicode {
1220
0
        self.old_pos = self.pos;
1221
0
        let unit = self.slice[self.pos];
1222
0
        self.pos += 1;
1223
0
        if unit < 0x80 {
1224
0
            return Unicode::Ascii(unit as u8);
1225
0
        }
1226
0
        let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1227
0
        if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1228
0
            return Unicode::NonAscii(NonAscii::BmpExclAscii(unit));
1229
0
        }
1230
0
        if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1231
            // high surrogate
1232
0
            if self.pos < self.slice.len() {
1233
0
                let second = self.slice[self.pos];
1234
0
                let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1235
0
                if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1236
                    // The next code unit is a low surrogate. Advance position.
1237
0
                    self.pos += 1;
1238
0
                    return Unicode::NonAscii(NonAscii::Astral(unsafe {
1239
0
                        ::core::char::from_u32_unchecked(
1240
0
                            (u32::from(unit) << 10) + u32::from(second)
1241
0
                                - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1242
0
                        )
1243
0
                    }));
1244
0
                }
1245
                // The next code unit is not a low surrogate. Don't advance
1246
                // position and treat the high surrogate as unpaired.
1247
                // fall through
1248
0
            }
1249
            // Unpaired surrogate at the end of buffer, fall through
1250
0
        }
1251
        // Unpaired low surrogate
1252
0
        Unicode::NonAscii(NonAscii::BmpExclAscii(0xFFFDu16))
1253
0
    }
1254
    #[inline(always)]
1255
0
    fn unread(&mut self) -> usize {
1256
0
        self.pos = self.old_pos;
1257
0
        self.pos
1258
0
    }
1259
    #[inline(always)]
1260
0
    pub fn consumed(&self) -> usize {
1261
0
        self.pos
1262
0
    }
1263
    #[inline(always)]
1264
0
    pub fn copy_ascii_to_check_space_two<'b>(
1265
0
        &mut self,
1266
0
        dest: &'b mut ByteDestination<'a>,
1267
0
    ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
1268
0
        let non_ascii_ret = {
1269
0
            let dst_len = dest.slice.len();
1270
0
            let src_remaining = &self.slice[self.pos..];
1271
0
            let dst_remaining = &mut dest.slice[dest.pos..];
1272
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1273
0
                (EncoderResult::OutputFull, dst_remaining.len())
1274
            } else {
1275
0
                (EncoderResult::InputEmpty, src_remaining.len())
1276
            };
1277
0
            match unsafe {
1278
0
                basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1279
0
            } {
1280
                None => {
1281
0
                    self.pos += length;
1282
0
                    dest.pos += length;
1283
0
                    return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1284
                }
1285
0
                Some((non_ascii, consumed)) => {
1286
0
                    self.pos += consumed;
1287
0
                    dest.pos += consumed;
1288
0
                    if dest.pos + 1 < dst_len {
1289
0
                        self.pos += 1; // commit to reading `non_ascii`
1290
0
                        let unit = non_ascii;
1291
0
                        let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1292
0
                        if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1293
0
                            NonAscii::BmpExclAscii(unit)
1294
0
                        } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1295
                            // high surrogate
1296
0
                            if self.pos < self.slice.len() {
1297
0
                                let second = self.slice[self.pos];
1298
0
                                let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1299
0
                                if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1300
                                    // The next code unit is a low surrogate. Advance position.
1301
0
                                    self.pos += 1;
1302
0
                                    NonAscii::Astral(unsafe {
1303
0
                                        ::core::char::from_u32_unchecked(
1304
0
                                            (u32::from(unit) << 10) + u32::from(second)
1305
0
                                                - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1306
0
                                        )
1307
0
                                    })
1308
                                } else {
1309
                                    // The next code unit is not a low surrogate. Don't advance
1310
                                    // position and treat the high surrogate as unpaired.
1311
0
                                    NonAscii::BmpExclAscii(0xFFFDu16)
1312
                                }
1313
                            } else {
1314
                                // Unpaired surrogate at the end of the buffer.
1315
0
                                NonAscii::BmpExclAscii(0xFFFDu16)
1316
                            }
1317
                        } else {
1318
                            // Unpaired low surrogate
1319
0
                            NonAscii::BmpExclAscii(0xFFFDu16)
1320
                        }
1321
                    } else {
1322
0
                        return CopyAsciiResult::Stop((
1323
0
                            EncoderResult::OutputFull,
1324
0
                            self.pos,
1325
0
                            dest.pos,
1326
0
                        ));
1327
                    }
1328
                }
1329
            }
1330
        };
1331
0
        CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
1332
0
    }
1333
    #[inline(always)]
1334
0
    pub fn copy_ascii_to_check_space_four<'b>(
1335
0
        &mut self,
1336
0
        dest: &'b mut ByteDestination<'a>,
1337
0
    ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
1338
0
        let non_ascii_ret = {
1339
0
            let dst_len = dest.slice.len();
1340
0
            let src_remaining = &self.slice[self.pos..];
1341
0
            let dst_remaining = &mut dest.slice[dest.pos..];
1342
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1343
0
                (EncoderResult::OutputFull, dst_remaining.len())
1344
            } else {
1345
0
                (EncoderResult::InputEmpty, src_remaining.len())
1346
            };
1347
0
            match unsafe {
1348
0
                basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1349
0
            } {
1350
                None => {
1351
0
                    self.pos += length;
1352
0
                    dest.pos += length;
1353
0
                    return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1354
                }
1355
0
                Some((non_ascii, consumed)) => {
1356
0
                    self.pos += consumed;
1357
0
                    dest.pos += consumed;
1358
0
                    if dest.pos + 3 < dst_len {
1359
0
                        self.pos += 1; // commit to reading `non_ascii`
1360
0
                        let unit = non_ascii;
1361
0
                        let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1362
0
                        if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1363
0
                            NonAscii::BmpExclAscii(unit)
1364
0
                        } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1365
                            // high surrogate
1366
0
                            if self.pos == self.slice.len() {
1367
                                // Unpaired surrogate at the end of the buffer.
1368
0
                                NonAscii::BmpExclAscii(0xFFFDu16)
1369
                            } else {
1370
0
                                let second = self.slice[self.pos];
1371
0
                                let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1372
0
                                if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1373
                                    // The next code unit is a low surrogate. Advance position.
1374
0
                                    self.pos += 1;
1375
0
                                    NonAscii::Astral(unsafe {
1376
0
                                        ::core::char::from_u32_unchecked(
1377
0
                                            (u32::from(unit) << 10) + u32::from(second)
1378
0
                                                - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
1379
0
                                        )
1380
0
                                    })
1381
                                } else {
1382
                                    // The next code unit is not a low surrogate. Don't advance
1383
                                    // position and treat the high surrogate as unpaired.
1384
0
                                    NonAscii::BmpExclAscii(0xFFFDu16)
1385
                                }
1386
                            }
1387
                        } else {
1388
                            // Unpaired low surrogate
1389
0
                            NonAscii::BmpExclAscii(0xFFFDu16)
1390
                        }
1391
                    } else {
1392
0
                        return CopyAsciiResult::Stop((
1393
0
                            EncoderResult::OutputFull,
1394
0
                            self.pos,
1395
0
                            dest.pos,
1396
0
                        ));
1397
                    }
1398
                }
1399
            }
1400
        };
1401
0
        CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
1402
0
    }
1403
}
1404
1405
pub struct Utf16ReadHandle<'a, 'b>
1406
where
1407
    'b: 'a,
1408
{
1409
    source: &'a mut Utf16Source<'b>,
1410
}
1411
1412
impl<'a, 'b> Utf16ReadHandle<'a, 'b>
1413
where
1414
    'b: 'a,
1415
{
1416
    #[inline(always)]
1417
0
    fn new(src: &'a mut Utf16Source<'b>) -> Utf16ReadHandle<'a, 'b> {
1418
0
        Utf16ReadHandle { source: src }
1419
0
    }
1420
    #[inline(always)]
1421
0
    pub fn read(self) -> (char, Utf16UnreadHandle<'a, 'b>) {
1422
0
        let character = self.source.read();
1423
0
        let handle = Utf16UnreadHandle::new(self.source);
1424
0
        (character, handle)
1425
0
    }
1426
    #[inline(always)]
1427
0
    pub fn read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>) {
1428
0
        let character = self.source.read_enum();
1429
0
        let handle = Utf16UnreadHandle::new(self.source);
1430
0
        (character, handle)
1431
0
    }
1432
    #[inline(always)]
1433
0
    pub fn consumed(&self) -> usize {
1434
0
        self.source.consumed()
1435
0
    }
1436
}
1437
1438
pub struct Utf16UnreadHandle<'a, 'b>
1439
where
1440
    'b: 'a,
1441
{
1442
    source: &'a mut Utf16Source<'b>,
1443
}
1444
1445
impl<'a, 'b> Utf16UnreadHandle<'a, 'b>
1446
where
1447
    'b: 'a,
1448
{
1449
    #[inline(always)]
1450
0
    fn new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b> {
1451
0
        Utf16UnreadHandle { source: src }
1452
0
    }
1453
    #[inline(always)]
1454
0
    pub fn unread(self) -> usize {
1455
0
        self.source.unread()
1456
0
    }
1457
    #[inline(always)]
1458
0
    pub fn consumed(&self) -> usize {
1459
0
        self.source.consumed()
1460
0
    }
1461
    #[inline(always)]
1462
0
    pub fn commit(self) -> &'a mut Utf16Source<'b> {
1463
0
        self.source
1464
0
    }
1465
}
1466
1467
// UTF-8 source
1468
1469
pub struct Utf8Source<'a> {
1470
    slice: &'a [u8],
1471
    pos: usize,
1472
    old_pos: usize,
1473
}
1474
1475
impl<'a> Utf8Source<'a> {
1476
    #[inline(always)]
1477
0
    pub fn new(src: &str) -> Utf8Source {
1478
0
        Utf8Source {
1479
0
            slice: src.as_bytes(),
1480
0
            pos: 0,
1481
0
            old_pos: 0,
1482
0
        }
1483
0
    }
1484
    #[inline(always)]
1485
0
    pub fn check_available<'b>(&'b mut self) -> Space<Utf8ReadHandle<'b, 'a>> {
1486
0
        if self.pos < self.slice.len() {
1487
0
            Space::Available(Utf8ReadHandle::new(self))
1488
        } else {
1489
0
            Space::Full(self.consumed())
1490
        }
1491
0
    }
1492
    #[inline(always)]
1493
0
    fn read(&mut self) -> char {
1494
0
        self.old_pos = self.pos;
1495
0
        let unit = self.slice[self.pos];
1496
0
        if unit < 0x80 {
1497
0
            self.pos += 1;
1498
0
            return char::from(unit);
1499
0
        }
1500
0
        if unit < 0xE0 {
1501
0
            let point =
1502
0
                ((u32::from(unit) & 0x1F) << 6) | (u32::from(self.slice[self.pos + 1]) & 0x3F);
1503
0
            self.pos += 2;
1504
0
            return unsafe { ::core::char::from_u32_unchecked(point) };
1505
0
        }
1506
0
        if unit < 0xF0 {
1507
0
            let point = ((u32::from(unit) & 0xF) << 12)
1508
0
                | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1509
0
                | (u32::from(self.slice[self.pos + 2]) & 0x3F);
1510
0
            self.pos += 3;
1511
0
            return unsafe { ::core::char::from_u32_unchecked(point) };
1512
0
        }
1513
0
        let point = ((u32::from(unit) & 0x7) << 18)
1514
0
            | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1515
0
            | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1516
0
            | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1517
0
        self.pos += 4;
1518
0
        unsafe { ::core::char::from_u32_unchecked(point) }
1519
0
    }
1520
    #[inline(always)]
1521
0
    fn read_enum(&mut self) -> Unicode {
1522
0
        self.old_pos = self.pos;
1523
0
        let unit = self.slice[self.pos];
1524
0
        if unit < 0x80 {
1525
0
            self.pos += 1;
1526
0
            return Unicode::Ascii(unit);
1527
0
        }
1528
0
        if unit < 0xE0 {
1529
0
            let point =
1530
0
                ((u16::from(unit) & 0x1F) << 6) | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1531
0
            self.pos += 2;
1532
0
            return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
1533
0
        }
1534
0
        if unit < 0xF0 {
1535
0
            let point = ((u16::from(unit) & 0xF) << 12)
1536
0
                | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1537
0
                | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1538
0
            self.pos += 3;
1539
0
            return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
1540
0
        }
1541
0
        let point = ((u32::from(unit) & 0x7) << 18)
1542
0
            | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1543
0
            | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1544
0
            | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1545
0
        self.pos += 4;
1546
0
        Unicode::NonAscii(NonAscii::Astral(unsafe {
1547
0
            ::core::char::from_u32_unchecked(point)
1548
0
        }))
1549
0
    }
1550
    #[inline(always)]
1551
0
    fn unread(&mut self) -> usize {
1552
0
        self.pos = self.old_pos;
1553
0
        self.pos
1554
0
    }
1555
    #[inline(always)]
1556
0
    pub fn consumed(&self) -> usize {
1557
0
        self.pos
1558
0
    }
1559
    #[inline(always)]
1560
0
    pub fn copy_ascii_to_check_space_one<'b>(
1561
0
        &mut self,
1562
0
        dest: &'b mut ByteDestination<'a>,
1563
0
    ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteOneHandle<'b, 'a>)> {
1564
0
        let non_ascii_ret = {
1565
0
            let src_remaining = &self.slice[self.pos..];
1566
0
            let dst_remaining = &mut dest.slice[dest.pos..];
1567
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1568
0
                (EncoderResult::OutputFull, dst_remaining.len())
1569
            } else {
1570
0
                (EncoderResult::InputEmpty, src_remaining.len())
1571
            };
1572
0
            match unsafe {
1573
0
                ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1574
0
            } {
1575
                None => {
1576
0
                    self.pos += length;
1577
0
                    dest.pos += length;
1578
0
                    return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1579
                }
1580
0
                Some((non_ascii, consumed)) => {
1581
0
                    self.pos += consumed;
1582
0
                    dest.pos += consumed;
1583
0
                    // We don't need to check space in destination, because
1584
0
                    // `ascii_to_ascii()` already did.
1585
0
                    if non_ascii < 0xE0 {
1586
0
                        let point = ((u16::from(non_ascii) & 0x1F) << 6)
1587
0
                            | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1588
0
                        self.pos += 2;
1589
0
                        NonAscii::BmpExclAscii(point)
1590
0
                    } else if non_ascii < 0xF0 {
1591
0
                        let point = ((u16::from(non_ascii) & 0xF) << 12)
1592
0
                            | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1593
0
                            | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1594
0
                        self.pos += 3;
1595
0
                        NonAscii::BmpExclAscii(point)
1596
                    } else {
1597
0
                        let point = ((u32::from(non_ascii) & 0x7) << 18)
1598
0
                            | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1599
0
                            | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1600
0
                            | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1601
0
                        self.pos += 4;
1602
0
                        NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
1603
                    }
1604
                }
1605
            }
1606
        };
1607
0
        CopyAsciiResult::GoOn((non_ascii_ret, ByteOneHandle::new(dest)))
1608
0
    }
1609
    #[inline(always)]
1610
0
    pub fn copy_ascii_to_check_space_two<'b>(
1611
0
        &mut self,
1612
0
        dest: &'b mut ByteDestination<'a>,
1613
0
    ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
1614
0
        let non_ascii_ret = {
1615
0
            let dst_len = dest.slice.len();
1616
0
            let src_remaining = &self.slice[self.pos..];
1617
0
            let dst_remaining = &mut dest.slice[dest.pos..];
1618
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1619
0
                (EncoderResult::OutputFull, dst_remaining.len())
1620
            } else {
1621
0
                (EncoderResult::InputEmpty, src_remaining.len())
1622
            };
1623
0
            match unsafe {
1624
0
                ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1625
0
            } {
1626
                None => {
1627
0
                    self.pos += length;
1628
0
                    dest.pos += length;
1629
0
                    return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1630
                }
1631
0
                Some((non_ascii, consumed)) => {
1632
0
                    self.pos += consumed;
1633
0
                    dest.pos += consumed;
1634
0
                    if dest.pos + 1 < dst_len {
1635
0
                        if non_ascii < 0xE0 {
1636
0
                            let point = ((u16::from(non_ascii) & 0x1F) << 6)
1637
0
                                | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1638
0
                            self.pos += 2;
1639
0
                            NonAscii::BmpExclAscii(point)
1640
0
                        } else if non_ascii < 0xF0 {
1641
0
                            let point = ((u16::from(non_ascii) & 0xF) << 12)
1642
0
                                | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1643
0
                                | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1644
0
                            self.pos += 3;
1645
0
                            NonAscii::BmpExclAscii(point)
1646
                        } else {
1647
0
                            let point = ((u32::from(non_ascii) & 0x7) << 18)
1648
0
                                | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1649
0
                                | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1650
0
                                | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1651
0
                            self.pos += 4;
1652
0
                            NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
1653
                        }
1654
                    } else {
1655
0
                        return CopyAsciiResult::Stop((
1656
0
                            EncoderResult::OutputFull,
1657
0
                            self.pos,
1658
0
                            dest.pos,
1659
0
                        ));
1660
                    }
1661
                }
1662
            }
1663
        };
1664
0
        CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
1665
0
    }
1666
    #[inline(always)]
1667
0
    pub fn copy_ascii_to_check_space_four<'b>(
1668
0
        &mut self,
1669
0
        dest: &'b mut ByteDestination<'a>,
1670
0
    ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
1671
0
        let non_ascii_ret = {
1672
0
            let dst_len = dest.slice.len();
1673
0
            let src_remaining = &self.slice[self.pos..];
1674
0
            let dst_remaining = &mut dest.slice[dest.pos..];
1675
0
            let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1676
0
                (EncoderResult::OutputFull, dst_remaining.len())
1677
            } else {
1678
0
                (EncoderResult::InputEmpty, src_remaining.len())
1679
            };
1680
0
            match unsafe {
1681
0
                ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1682
0
            } {
1683
                None => {
1684
0
                    self.pos += length;
1685
0
                    dest.pos += length;
1686
0
                    return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1687
                }
1688
0
                Some((non_ascii, consumed)) => {
1689
0
                    self.pos += consumed;
1690
0
                    dest.pos += consumed;
1691
0
                    if dest.pos + 3 < dst_len {
1692
0
                        if non_ascii < 0xE0 {
1693
0
                            let point = ((u16::from(non_ascii) & 0x1F) << 6)
1694
0
                                | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1695
0
                            self.pos += 2;
1696
0
                            NonAscii::BmpExclAscii(point)
1697
0
                        } else if non_ascii < 0xF0 {
1698
0
                            let point = ((u16::from(non_ascii) & 0xF) << 12)
1699
0
                                | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1700
0
                                | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1701
0
                            self.pos += 3;
1702
0
                            NonAscii::BmpExclAscii(point)
1703
                        } else {
1704
0
                            let point = ((u32::from(non_ascii) & 0x7) << 18)
1705
0
                                | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1706
0
                                | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1707
0
                                | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1708
0
                            self.pos += 4;
1709
0
                            NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
1710
                        }
1711
                    } else {
1712
0
                        return CopyAsciiResult::Stop((
1713
0
                            EncoderResult::OutputFull,
1714
0
                            self.pos,
1715
0
                            dest.pos,
1716
0
                        ));
1717
                    }
1718
                }
1719
            }
1720
        };
1721
0
        CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
1722
0
    }
1723
}
1724
1725
pub struct Utf8ReadHandle<'a, 'b>
1726
where
1727
    'b: 'a,
1728
{
1729
    source: &'a mut Utf8Source<'b>,
1730
}
1731
1732
impl<'a, 'b> Utf8ReadHandle<'a, 'b>
1733
where
1734
    'b: 'a,
1735
{
1736
    #[inline(always)]
1737
0
    fn new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b> {
1738
0
        Utf8ReadHandle { source: src }
1739
0
    }
1740
    #[inline(always)]
1741
0
    pub fn read(self) -> (char, Utf8UnreadHandle<'a, 'b>) {
1742
0
        let character = self.source.read();
1743
0
        let handle = Utf8UnreadHandle::new(self.source);
1744
0
        (character, handle)
1745
0
    }
1746
    #[inline(always)]
1747
0
    pub fn read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>) {
1748
0
        let character = self.source.read_enum();
1749
0
        let handle = Utf8UnreadHandle::new(self.source);
1750
0
        (character, handle)
1751
0
    }
1752
    #[inline(always)]
1753
0
    pub fn consumed(&self) -> usize {
1754
0
        self.source.consumed()
1755
0
    }
1756
}
1757
1758
pub struct Utf8UnreadHandle<'a, 'b>
1759
where
1760
    'b: 'a,
1761
{
1762
    source: &'a mut Utf8Source<'b>,
1763
}
1764
1765
impl<'a, 'b> Utf8UnreadHandle<'a, 'b>
1766
where
1767
    'b: 'a,
1768
{
1769
    #[inline(always)]
1770
0
    fn new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b> {
1771
0
        Utf8UnreadHandle { source: src }
1772
0
    }
1773
    #[inline(always)]
1774
0
    pub fn unread(self) -> usize {
1775
0
        self.source.unread()
1776
0
    }
1777
    #[inline(always)]
1778
0
    pub fn consumed(&self) -> usize {
1779
0
        self.source.consumed()
1780
0
    }
1781
    #[inline(always)]
1782
0
    pub fn commit(self) -> &'a mut Utf8Source<'b> {
1783
0
        self.source
1784
0
    }
1785
}
1786
1787
// Byte destination
1788
1789
pub struct ByteOneHandle<'a, 'b>
1790
where
1791
    'b: 'a,
1792
{
1793
    dest: &'a mut ByteDestination<'b>,
1794
}
1795
1796
impl<'a, 'b> ByteOneHandle<'a, 'b>
1797
where
1798
    'b: 'a,
1799
{
1800
    #[inline(always)]
1801
0
    fn new(dst: &'a mut ByteDestination<'b>) -> ByteOneHandle<'a, 'b> {
1802
0
        ByteOneHandle { dest: dst }
1803
0
    }
1804
    #[inline(always)]
1805
0
    pub fn written(&self) -> usize {
1806
0
        self.dest.written()
1807
0
    }
1808
    #[inline(always)]
1809
0
    pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1810
0
        self.dest.write_one(first);
1811
0
        self.dest
1812
0
    }
1813
}
1814
1815
pub struct ByteTwoHandle<'a, 'b>
1816
where
1817
    'b: 'a,
1818
{
1819
    dest: &'a mut ByteDestination<'b>,
1820
}
1821
1822
impl<'a, 'b> ByteTwoHandle<'a, 'b>
1823
where
1824
    'b: 'a,
1825
{
1826
    #[inline(always)]
1827
0
    fn new(dst: &'a mut ByteDestination<'b>) -> ByteTwoHandle<'a, 'b> {
1828
0
        ByteTwoHandle { dest: dst }
1829
0
    }
1830
    #[inline(always)]
1831
0
    pub fn written(&self) -> usize {
1832
0
        self.dest.written()
1833
0
    }
1834
    #[inline(always)]
1835
0
    pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1836
0
        self.dest.write_one(first);
1837
0
        self.dest
1838
0
    }
1839
    #[inline(always)]
1840
0
    pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1841
0
        self.dest.write_two(first, second);
1842
0
        self.dest
1843
0
    }
1844
}
1845
1846
pub struct ByteThreeHandle<'a, 'b>
1847
where
1848
    'b: 'a,
1849
{
1850
    dest: &'a mut ByteDestination<'b>,
1851
}
1852
1853
impl<'a, 'b> ByteThreeHandle<'a, 'b>
1854
where
1855
    'b: 'a,
1856
{
1857
    #[inline(always)]
1858
0
    fn new(dst: &'a mut ByteDestination<'b>) -> ByteThreeHandle<'a, 'b> {
1859
0
        ByteThreeHandle { dest: dst }
1860
0
    }
1861
    #[inline(always)]
1862
0
    pub fn written(&self) -> usize {
1863
0
        self.dest.written()
1864
0
    }
1865
    #[inline(always)]
1866
0
    pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1867
0
        self.dest.write_one(first);
1868
0
        self.dest
1869
0
    }
1870
    #[inline(always)]
1871
0
    pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1872
0
        self.dest.write_two(first, second);
1873
0
        self.dest
1874
0
    }
1875
    #[inline(always)]
1876
0
    pub fn write_three(self, first: u8, second: u8, third: u8) -> &'a mut ByteDestination<'b> {
1877
0
        self.dest.write_three(first, second, third);
1878
0
        self.dest
1879
0
    }
1880
    #[inline(always)]
1881
0
    pub fn write_three_return_written(self, first: u8, second: u8, third: u8) -> usize {
1882
0
        self.dest.write_three(first, second, third);
1883
0
        self.dest.written()
1884
0
    }
1885
}
1886
1887
pub struct ByteFourHandle<'a, 'b>
1888
where
1889
    'b: 'a,
1890
{
1891
    dest: &'a mut ByteDestination<'b>,
1892
}
1893
1894
impl<'a, 'b> ByteFourHandle<'a, 'b>
1895
where
1896
    'b: 'a,
1897
{
1898
    #[inline(always)]
1899
0
    fn new(dst: &'a mut ByteDestination<'b>) -> ByteFourHandle<'a, 'b> {
1900
0
        ByteFourHandle { dest: dst }
1901
0
    }
1902
    #[inline(always)]
1903
0
    pub fn written(&self) -> usize {
1904
0
        self.dest.written()
1905
0
    }
1906
    #[inline(always)]
1907
0
    pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1908
0
        self.dest.write_one(first);
1909
0
        self.dest
1910
0
    }
1911
    #[inline(always)]
1912
0
    pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1913
0
        self.dest.write_two(first, second);
1914
0
        self.dest
1915
0
    }
1916
    #[inline(always)]
1917
0
    pub fn write_four(
1918
0
        self,
1919
0
        first: u8,
1920
0
        second: u8,
1921
0
        third: u8,
1922
0
        fourth: u8,
1923
0
    ) -> &'a mut ByteDestination<'b> {
1924
0
        self.dest.write_four(first, second, third, fourth);
1925
0
        self.dest
1926
0
    }
1927
}
1928
1929
pub struct ByteDestination<'a> {
1930
    slice: &'a mut [u8],
1931
    pos: usize,
1932
}
1933
1934
impl<'a> ByteDestination<'a> {
1935
    #[inline(always)]
1936
0
    pub fn new(dst: &mut [u8]) -> ByteDestination {
1937
0
        ByteDestination { slice: dst, pos: 0 }
1938
0
    }
1939
    #[inline(always)]
1940
0
    pub fn check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>> {
1941
0
        if self.pos < self.slice.len() {
1942
0
            Space::Available(ByteOneHandle::new(self))
1943
        } else {
1944
0
            Space::Full(self.written())
1945
        }
1946
0
    }
1947
    #[inline(always)]
1948
0
    pub fn check_space_two<'b>(&'b mut self) -> Space<ByteTwoHandle<'b, 'a>> {
1949
0
        if self.pos + 1 < self.slice.len() {
1950
0
            Space::Available(ByteTwoHandle::new(self))
1951
        } else {
1952
0
            Space::Full(self.written())
1953
        }
1954
0
    }
1955
    #[inline(always)]
1956
0
    pub fn check_space_three<'b>(&'b mut self) -> Space<ByteThreeHandle<'b, 'a>> {
1957
0
        if self.pos + 2 < self.slice.len() {
1958
0
            Space::Available(ByteThreeHandle::new(self))
1959
        } else {
1960
0
            Space::Full(self.written())
1961
        }
1962
0
    }
1963
    #[inline(always)]
1964
0
    pub fn check_space_four<'b>(&'b mut self) -> Space<ByteFourHandle<'b, 'a>> {
1965
0
        if self.pos + 3 < self.slice.len() {
1966
0
            Space::Available(ByteFourHandle::new(self))
1967
        } else {
1968
0
            Space::Full(self.written())
1969
        }
1970
0
    }
1971
    #[inline(always)]
1972
0
    pub fn written(&self) -> usize {
1973
0
        self.pos
1974
0
    }
1975
    #[inline(always)]
1976
0
    fn write_one(&mut self, first: u8) {
1977
0
        self.slice[self.pos] = first;
1978
0
        self.pos += 1;
1979
0
    }
1980
    #[inline(always)]
1981
0
    fn write_two(&mut self, first: u8, second: u8) {
1982
0
        self.slice[self.pos] = first;
1983
0
        self.slice[self.pos + 1] = second;
1984
0
        self.pos += 2;
1985
0
    }
1986
    #[inline(always)]
1987
0
    fn write_three(&mut self, first: u8, second: u8, third: u8) {
1988
0
        self.slice[self.pos] = first;
1989
0
        self.slice[self.pos + 1] = second;
1990
0
        self.slice[self.pos + 2] = third;
1991
0
        self.pos += 3;
1992
0
    }
1993
    #[inline(always)]
1994
0
    fn write_four(&mut self, first: u8, second: u8, third: u8, fourth: u8) {
1995
0
        self.slice[self.pos] = first;
1996
0
        self.slice[self.pos + 1] = second;
1997
0
        self.slice[self.pos + 2] = third;
1998
0
        self.slice[self.pos + 3] = fourth;
1999
0
        self.pos += 4;
2000
0
    }
2001
}