/rust/registry/src/index.crates.io-6f17d22bba15001f/encoding_rs-0.8.35/src/big5.rs

Source (jump to first uncovered line)
// Copyright Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use super::*;
use crate::data::*;
use crate::handles::*;
use crate::variant::*;
// Rust 1.14.0 requires the following despite the asterisk above.
use super::in_inclusive_range32;

pub struct Big5Decoder {
    lead: Option<u8>,
}

impl Big5Decoder {
    pub fn new() -> VariantDecoder {
        VariantDecoder::Big5(Big5Decoder { lead: None })
    }

    pub fn in_neutral_state(&self) -> bool {
        self.lead.is_none()
    }

    fn plus_one_if_lead(&self, byte_length: usize) -> Option<usize> {
        byte_length.checked_add(match self.lead {
            None => 0,
            Some(_) => 1,
        })
    }

    pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
        // If there is a lead but the next byte isn't a valid trail, an
        // error is generated for the lead (+1). Then another iteration checks
        // space, which needs +1 to account for the possibility of astral
        // output or combining pair.
        checked_add(1, self.plus_one_if_lead(byte_length))
    }

    pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
        // No need to account for REPLACEMENT CHARACTERS.
        // Cases:
        // ASCII: 1 to 1
        // Valid pair: 2 to 2, 2 to 3 or 2 to 4, i.e. worst case 2 to 4
        // lead set and first byte is trail: 1 to 4 worst case
        //
        // When checking for space for the last byte:
        // no lead: the last byte must be ASCII (or fatal error): 1 to 1
        // lead set: space for 4 bytes was already checked when reading the
        // lead, hence the last lead and the last trail together are worst
        // case 2 to 4.
        //
        // If lead set and the input is a single trail byte, the worst-case
        // output is 4, so we need to add one before multiplying if lead is
        // set.
        //
        // Finally, add two so that if input is non-zero, the output is at
        // least 4.
        checked_add(2, checked_mul(2, self.plus_one_if_lead(byte_length)))
    }

    pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
        // If there is a lead but the next byte isn't a valid trail, an
        // error is generated for the lead (+(1*3)). Then another iteration
        // checks space, which needs +3 to account for the possibility of astral
        // output or combining pair. In between start and end, the worst case
        // is that every byte is bad: *3.
        checked_add(3, checked_mul(3, self.plus_one_if_lead(byte_length)))
    }

    ascii_compatible_two_byte_decoder_functions!(
        {
            // If lead is between 0x81 and 0xFE, inclusive,
            // subtract offset 0x81.
            let non_ascii_minus_offset =
                non_ascii.wrapping_sub(0x81);
            if non_ascii_minus_offset > (0xFE - 0x81) {
                return (DecoderResult::Malformed(1, 0),
                        source.consumed(),
                        handle.written());
            }
            non_ascii_minus_offset
        },
        {
            // If trail is between 0x40 and 0x7E, inclusive,
            // subtract offset 0x40. Else if trail is
            // between 0xA1 and 0xFE, inclusive, subtract
            // offset 0x62.
            // TODO: Find out which range is more probable.
            let mut trail_minus_offset =
                byte.wrapping_sub(0x40);
            if trail_minus_offset > (0x7E - 0x40) {
                let trail_minus_range_start =
                    byte.wrapping_sub(0xA1);
                if trail_minus_range_start >
                   (0xFE - 0xA1) {
                    if byte < 0x80 {
                        return (DecoderResult::Malformed(1, 0),
                                unread_handle_trail.unread(),
                                handle.written());
                    }
                    return (DecoderResult::Malformed(2, 0),
                            unread_handle_trail.consumed(),
                            handle.written());
                }
                trail_minus_offset = byte - 0x62;
            }
            let pointer = lead_minus_offset as usize *
                          157usize +
                          trail_minus_offset as usize;
            let rebased_pointer = pointer.wrapping_sub(942);
            let low_bits = big5_low_bits(rebased_pointer);
            if low_bits == 0 {
                match pointer {
                    1133 => {
                        handle.write_big5_combination(0x00CAu16,
                                                      0x0304u16)
                    }
                    1135 => {
                        handle.write_big5_combination(0x00CAu16,
                                                      0x030Cu16)
                    }
                    1164 => {
                        handle.write_big5_combination(0x00EAu16,
                                                      0x0304u16)
                    }
                    1166 => {
                        handle.write_big5_combination(0x00EAu16,
                                                      0x030Cu16)
                    }
                    _ => {
                        if byte < 0x80 {
                            return (DecoderResult::Malformed(1, 0),
                                    unread_handle_trail.unread(),
                                    handle.written());
                        }
                        return (DecoderResult::Malformed(2, 0),
                                unread_handle_trail.consumed(),
                                handle.written());
                    }
                }
            } else if big5_is_astral(rebased_pointer) {
                handle.write_astral(u32::from(low_bits) |
                                    0x20000u32)
            } else {
                handle.write_bmp_excl_ascii(low_bits)
            }
        },
        self,
        non_ascii,
        byte,
        lead_minus_offset,
        unread_handle_trail,
        source,
        handle,
        'outermost,
        copy_ascii_from_check_space_astral,
        check_space_astral,
        false);
}

pub struct Big5Encoder;

impl Big5Encoder {
    pub fn new(encoding: &'static Encoding) -> Encoder {
        Encoder::new(encoding, VariantEncoder::Big5(Big5Encoder))
    }

    pub fn max_buffer_length_from_utf16_without_replacement(
        &self,
        u16_length: usize,
    ) -> Option<usize> {
        // Astral: 2 to 2
        // ASCII: 1 to 1
        // Other: 1 to 2
        u16_length.checked_mul(2)
    }

    pub fn max_buffer_length_from_utf8_without_replacement(
        &self,
        byte_length: usize,
    ) -> Option<usize> {
        // Astral: 4 to 2
        // Upper BMP: 3 to 2
        // Lower BMP: 2 to 2
        // ASCII: 1 to 1
        byte_length.checked_add(1)
    }

    ascii_compatible_encoder_functions!(
        {
            // For simplicity, unified ideographs
            // in the pointer range 11206...11212 are handled
            // as Level 1 Hanzi.
            if let Some((lead, trail)) = big5_level1_hanzi_encode(bmp) {
                handle.write_two(lead, trail)
            } else {
                let pointer = if let Some(pointer) = big5_box_encode(bmp) {
                    pointer
                } else if let Some(pointer) = big5_other_encode(bmp) {
                    pointer
                } else {
                    return (
                        EncoderResult::unmappable_from_bmp(bmp),
                        source.consumed(),
                        handle.written(),
                    );
                };
                let lead = pointer / 157 + 0x81;
                let remainder = pointer % 157;
                let trail = if remainder < 0x3F {
                    remainder + 0x40
                } else {
                    remainder + 0x62
                };
                handle.write_two(lead as u8, trail as u8)
            }
        },
        {
            if in_inclusive_range32(astral as u32, 0x2008A, 0x2F8A6) {
                if let Some(rebased_pointer) = big5_astral_encode(astral as u16) {
                    // big5_astral_encode returns rebased pointer,
                    // so adding 0x87 instead of 0x81.
                    let lead = rebased_pointer / 157 + 0x87;
                    let remainder = rebased_pointer % 157;
                    let trail = if remainder < 0x3F {
                        remainder + 0x40
                    } else {
                        remainder + 0x62
                    };
                    handle.write_two(lead as u8, trail as u8)
                } else {
                    return (
                        EncoderResult::Unmappable(astral),
                        source.consumed(),
                        handle.written(),
                    );
                }
            } else {
                return (
                    EncoderResult::Unmappable(astral),
                    source.consumed(),
                    handle.written(),
                );
            }
        },
        bmp,
        astral,
        self,
        source,
        handle,
        copy_ascii_to_check_space_two,
        check_space_two,
        false
    );
}

// Any copyright to the test code below this comment is dedicated to the
// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/

#[cfg(all(test, feature = "alloc"))]
mod tests {
    use super::super::testing::*;
    use super::super::*;

    fn decode_big5(bytes: &[u8], expect: &str) {
        decode(BIG5, bytes, expect);
    }

    fn encode_big5(string: &str, expect: &[u8]) {
        encode(BIG5, string, expect);
    }

    #[test]
    fn test_big5_decode() {
        // Empty
        decode_big5(b"", &"");

        // ASCII
        decode_big5(&[0x61u8, 0x62u8], &"\u{0061}\u{0062}");

        // Edge cases
        decode_big5(&[0x87u8, 0x40u8], &"\u{43F0}");
        decode_big5(&[0xFEu8, 0xFEu8], &"\u{79D4}");
        decode_big5(&[0xFEu8, 0xFDu8], &"\u{2910D}");
        decode_big5(&[0x88u8, 0x62u8], &"\u{00CA}\u{0304}");
        decode_big5(&[0x88u8, 0x64u8], &"\u{00CA}\u{030C}");
        decode_big5(&[0x88u8, 0x66u8], &"\u{00CA}");
        decode_big5(&[0x88u8, 0xA3u8], &"\u{00EA}\u{0304}");
        decode_big5(&[0x88u8, 0xA5u8], &"\u{00EA}\u{030C}");
        decode_big5(&[0x88u8, 0xA7u8], &"\u{00EA}");
        decode_big5(&[0x99u8, 0xD4u8], &"\u{8991}");
        decode_big5(&[0x99u8, 0xD5u8], &"\u{27967}");
        decode_big5(&[0x99u8, 0xD6u8], &"\u{8A29}");

        // Edge cases surrounded with ASCII
        decode_big5(
            &[0x61u8, 0x87u8, 0x40u8, 0x62u8],
            &"\u{0061}\u{43F0}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0xFEu8, 0xFEu8, 0x62u8],
            &"\u{0061}\u{79D4}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0xFEu8, 0xFDu8, 0x62u8],
            &"\u{0061}\u{2910D}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x88u8, 0x62u8, 0x62u8],
            &"\u{0061}\u{00CA}\u{0304}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x88u8, 0x64u8, 0x62u8],
            &"\u{0061}\u{00CA}\u{030C}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x88u8, 0x66u8, 0x62u8],
            &"\u{0061}\u{00CA}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x88u8, 0xA3u8, 0x62u8],
            &"\u{0061}\u{00EA}\u{0304}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x88u8, 0xA5u8, 0x62u8],
            &"\u{0061}\u{00EA}\u{030C}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x88u8, 0xA7u8, 0x62u8],
            &"\u{0061}\u{00EA}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x99u8, 0xD4u8, 0x62u8],
            &"\u{0061}\u{8991}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x99u8, 0xD5u8, 0x62u8],
            &"\u{0061}\u{27967}\u{0062}",
        );
        decode_big5(
            &[0x61u8, 0x99u8, 0xD6u8, 0x62u8],
            &"\u{0061}\u{8A29}\u{0062}",
        );

        // Bad sequences
        decode_big5(&[0x80u8, 0x61u8], &"\u{FFFD}\u{0061}");
        decode_big5(&[0xFFu8, 0x61u8], &"\u{FFFD}\u{0061}");
        decode_big5(&[0xFEu8, 0x39u8], &"\u{FFFD}\u{0039}");
        decode_big5(&[0x87u8, 0x66u8], &"\u{FFFD}\u{0066}");
        decode_big5(&[0x81u8, 0x40u8], &"\u{FFFD}\u{0040}");
        decode_big5(&[0x61u8, 0x81u8], &"\u{0061}\u{FFFD}");
    }

    #[test]
    fn test_big5_encode() {
        // Empty
        encode_big5("", b"");

        // ASCII
        encode_big5("\u{0061}\u{0062}", b"\x61\x62");

        if !cfg!(miri) {
            // Miri is too slow
            // Edge cases
            encode_big5("\u{9EA6}\u{0061}", b"&#40614;\x61");
            encode_big5("\u{2626B}\u{0061}", b"&#156267;\x61");
            encode_big5("\u{3000}", b"\xA1\x40");
            encode_big5("\u{20AC}", b"\xA3\xE1");
            encode_big5("\u{4E00}", b"\xA4\x40");
            encode_big5("\u{27607}", b"\xC8\xA4");
            encode_big5("\u{FFE2}", b"\xC8\xCD");
            encode_big5("\u{79D4}", b"\xFE\xFE");

            // Not in index
            encode_big5("\u{2603}\u{0061}", b"&#9731;\x61");
        }

        // duplicate low bits
        encode_big5("\u{203B5}", b"\xFD\x6A");
        encode_big5("\u{25605}", b"\xFE\x46");

        // prefer last
        encode_big5("\u{2550}", b"\xF9\xF9");
    }

    #[test]
    #[cfg_attr(miri, ignore)] // Miri is too slow
    fn test_big5_decode_all() {
        let input = include_bytes!("test_data/big5_in.txt");
        let expectation = include_str!("test_data/big5_in_ref.txt");
        let (cow, had_errors) = BIG5.decode_without_bom_handling(input);
        assert!(had_errors, "Should have had errors.");
        assert_eq!(&cow[..], expectation);
    }

    #[test]
    #[cfg_attr(miri, ignore)] // Miri is too slow
    fn test_big5_encode_all() {
        let input = include_str!("test_data/big5_out.txt");
        let expectation = include_bytes!("test_data/big5_out_ref.txt");
        let (cow, encoding, had_errors) = BIG5.encode(input);
        assert!(!had_errors, "Should not have had errors.");
        assert_eq!(encoding, BIG5);
        assert_eq!(&cow[..], &expectation[..]);
    }

    #[test]
    #[cfg_attr(miri, ignore)] // Miri is too slow
    fn test_big5_encode_from_two_low_surrogates() {
        let expectation = b"&#65533;&#65533;";
        let mut output = [0u8; 40];
        let mut encoder = BIG5.new_encoder();
        let (result, read, written, had_errors) =
            encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true);
        assert_eq!(result, CoderResult::InputEmpty);
        assert_eq!(read, 2);
        assert_eq!(written, expectation.len());
        assert!(had_errors);
        assert_eq!(&output[..written], expectation);
    }
}

Coverage Report

Created: 2025-02-21 07:11

Line	Count	Source (jump to first uncovered line)
1		// Copyright Mozilla Foundation. See the COPYRIGHT
2		// file at the top-level directory of this distribution.
3		//
4		// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5		// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6		// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7		// option. This file may not be copied, modified, or distributed
8		// except according to those terms.
9
10		use super::*;
11		use crate::data::*;
12		use crate::handles::*;
13		use crate::variant::*;
14		// Rust 1.14.0 requires the following despite the asterisk above.
15		use super::in_inclusive_range32;
16
17		pub struct Big5Decoder {
18		lead: Option<u8>,
19		}
20
21		impl Big5Decoder {
22	0	pub fn new() -> VariantDecoder {
23	0	VariantDecoder::Big5(Big5Decoder { lead: None })
24	0	}
25
26	0	pub fn in_neutral_state(&self) -> bool {
27	0	self.lead.is_none()
28	0	}
29
30	0	fn plus_one_if_lead(&self, byte_length: usize) -> Option<usize> {
31	0	byte_length.checked_add(match self.lead {
32	0	None => 0,
33	0	Some(_) => 1,
34		})
35	0	}
36
37	0	pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
38	0	// If there is a lead but the next byte isn't a valid trail, an
39	0	// error is generated for the lead (+1). Then another iteration checks
40	0	// space, which needs +1 to account for the possibility of astral
41	0	// output or combining pair.
42	0	checked_add(1, self.plus_one_if_lead(byte_length))
43	0	}
44
45	0	pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
46	0	// No need to account for REPLACEMENT CHARACTERS.
47	0	// Cases:
48	0	// ASCII: 1 to 1
49	0	// Valid pair: 2 to 2, 2 to 3 or 2 to 4, i.e. worst case 2 to 4
50	0	// lead set and first byte is trail: 1 to 4 worst case
51	0	//
52	0	// When checking for space for the last byte:
53	0	// no lead: the last byte must be ASCII (or fatal error): 1 to 1
54	0	// lead set: space for 4 bytes was already checked when reading the
55	0	// lead, hence the last lead and the last trail together are worst
56	0	// case 2 to 4.
57	0	//
58	0	// If lead set and the input is a single trail byte, the worst-case
59	0	// output is 4, so we need to add one before multiplying if lead is
60	0	// set.
61	0	//
62	0	// Finally, add two so that if input is non-zero, the output is at
63	0	// least 4.
64	0	checked_add(2, checked_mul(2, self.plus_one_if_lead(byte_length)))
65	0	}
66
67	0	pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
68	0	// If there is a lead but the next byte isn't a valid trail, an
69	0	// error is generated for the lead (+(1*3)). Then another iteration
70	0	// checks space, which needs +3 to account for the possibility of astral
71	0	// output or combining pair. In between start and end, the worst case
72	0	// is that every byte is bad: *3.
73	0	checked_add(3, checked_mul(3, self.plus_one_if_lead(byte_length)))
74	0	}
75
76		ascii_compatible_two_byte_decoder_functions!(
77		{
78		// If lead is between 0x81 and 0xFE, inclusive,
79		// subtract offset 0x81.
80		let non_ascii_minus_offset =
81		non_ascii.wrapping_sub(0x81);
82		if non_ascii_minus_offset > (0xFE - 0x81) {
83		return (DecoderResult::Malformed(1, 0),
84		source.consumed(),
85		handle.written());
86		}
87		non_ascii_minus_offset
88		},
89		{
90		// If trail is between 0x40 and 0x7E, inclusive,
91		// subtract offset 0x40. Else if trail is
92		// between 0xA1 and 0xFE, inclusive, subtract
93		// offset 0x62.
94		// TODO: Find out which range is more probable.
95		let mut trail_minus_offset =
96		byte.wrapping_sub(0x40);
97		if trail_minus_offset > (0x7E - 0x40) {
98		let trail_minus_range_start =
99		byte.wrapping_sub(0xA1);
100		if trail_minus_range_start >
101		(0xFE - 0xA1) {
102		if byte < 0x80 {
103		return (DecoderResult::Malformed(1, 0),
104		unread_handle_trail.unread(),
105		handle.written());
106		}
107		return (DecoderResult::Malformed(2, 0),
108		unread_handle_trail.consumed(),
109		handle.written());
110		}
111		trail_minus_offset = byte - 0x62;
112		}
113		let pointer = lead_minus_offset as usize *
114		157usize +
115		trail_minus_offset as usize;
116		let rebased_pointer = pointer.wrapping_sub(942);
117		let low_bits = big5_low_bits(rebased_pointer);
118		if low_bits == 0 {
119		match pointer {
120		1133 => {
121		handle.write_big5_combination(0x00CAu16,
122		0x0304u16)
123		}
124		1135 => {
125		handle.write_big5_combination(0x00CAu16,
126		0x030Cu16)
127		}
128		1164 => {
129		handle.write_big5_combination(0x00EAu16,
130		0x0304u16)
131		}
132		1166 => {
133		handle.write_big5_combination(0x00EAu16,
134		0x030Cu16)
135		}
136		_ => {
137		if byte < 0x80 {
138		return (DecoderResult::Malformed(1, 0),
139		unread_handle_trail.unread(),
140		handle.written());
141		}
142		return (DecoderResult::Malformed(2, 0),
143		unread_handle_trail.consumed(),
144		handle.written());
145		}
146		}
147		} else if big5_is_astral(rebased_pointer) {
148		handle.write_astral(u32::from(low_bits) \|
149		0x20000u32)
150		} else {
151		handle.write_bmp_excl_ascii(low_bits)
152		}
153		},
154		self,
155		non_ascii,
156		byte,
157		lead_minus_offset,
158		unread_handle_trail,
159		source,
160		handle,
161		'outermost,
162		copy_ascii_from_check_space_astral,
163		check_space_astral,
164		false);
165		}
166
167		pub struct Big5Encoder;
168
169		impl Big5Encoder {
170	0	pub fn new(encoding: &'static Encoding) -> Encoder {
171	0	Encoder::new(encoding, VariantEncoder::Big5(Big5Encoder))
172	0	}
173
174	0	pub fn max_buffer_length_from_utf16_without_replacement(
175	0	&self,
176	0	u16_length: usize,
177	0	) -> Option<usize> {
178	0	// Astral: 2 to 2
179	0	// ASCII: 1 to 1
180	0	// Other: 1 to 2
181	0	u16_length.checked_mul(2)
182	0	}
183
184	0	pub fn max_buffer_length_from_utf8_without_replacement(
185	0	&self,
186	0	byte_length: usize,
187	0	) -> Option<usize> {
188	0	// Astral: 4 to 2
189	0	// Upper BMP: 3 to 2
190	0	// Lower BMP: 2 to 2
191	0	// ASCII: 1 to 1
192	0	byte_length.checked_add(1)
193	0	}
194
195		ascii_compatible_encoder_functions!(
196		{
197		// For simplicity, unified ideographs
198		// in the pointer range 11206...11212 are handled
199		// as Level 1 Hanzi.
200		if let Some((lead, trail)) = big5_level1_hanzi_encode(bmp) {
201		handle.write_two(lead, trail)
202		} else {
203		let pointer = if let Some(pointer) = big5_box_encode(bmp) {
204		pointer
205		} else if let Some(pointer) = big5_other_encode(bmp) {
206		pointer
207		} else {
208		return (
209		EncoderResult::unmappable_from_bmp(bmp),
210		source.consumed(),
211		handle.written(),
212		);
213		};
214		let lead = pointer / 157 + 0x81;
215		let remainder = pointer % 157;
216		let trail = if remainder < 0x3F {
217		remainder + 0x40
218		} else {
219		remainder + 0x62
220		};
221		handle.write_two(lead as u8, trail as u8)
222		}
223		},
224		{
225		if in_inclusive_range32(astral as u32, 0x2008A, 0x2F8A6) {
226		if let Some(rebased_pointer) = big5_astral_encode(astral as u16) {
227		// big5_astral_encode returns rebased pointer,
228		// so adding 0x87 instead of 0x81.
229		let lead = rebased_pointer / 157 + 0x87;
230		let remainder = rebased_pointer % 157;
231		let trail = if remainder < 0x3F {
232		remainder + 0x40
233		} else {
234		remainder + 0x62
235		};
236		handle.write_two(lead as u8, trail as u8)
237		} else {
238		return (
239		EncoderResult::Unmappable(astral),
240		source.consumed(),
241		handle.written(),
242		);
243		}
244		} else {
245		return (
246		EncoderResult::Unmappable(astral),
247		source.consumed(),
248		handle.written(),
249		);
250		}
251		},
252		bmp,
253		astral,
254		self,
255		source,
256		handle,
257		copy_ascii_to_check_space_two,
258		check_space_two,
259		false
260		);
261		}
262
263		// Any copyright to the test code below this comment is dedicated to the
264		// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
265
266		#[cfg(all(test, feature = "alloc"))]
267		mod tests {
268		use super::super::testing::*;
269		use super::super::*;
270
271		fn decode_big5(bytes: &[u8], expect: &str) {
272		decode(BIG5, bytes, expect);
273		}
274
275		fn encode_big5(string: &str, expect: &[u8]) {
276		encode(BIG5, string, expect);
277		}
278
279		#[test]
280		fn test_big5_decode() {
281		// Empty
282		decode_big5(b"", &"");
283
284		// ASCII
285		decode_big5(&[0x61u8, 0x62u8], &"\u{0061}\u{0062}");
286
287		// Edge cases
288		decode_big5(&[0x87u8, 0x40u8], &"\u{43F0}");
289		decode_big5(&[0xFEu8, 0xFEu8], &"\u{79D4}");
290		decode_big5(&[0xFEu8, 0xFDu8], &"\u{2910D}");
291		decode_big5(&[0x88u8, 0x62u8], &"\u{00CA}\u{0304}");
292		decode_big5(&[0x88u8, 0x64u8], &"\u{00CA}\u{030C}");
293		decode_big5(&[0x88u8, 0x66u8], &"\u{00CA}");
294		decode_big5(&[0x88u8, 0xA3u8], &"\u{00EA}\u{0304}");
295		decode_big5(&[0x88u8, 0xA5u8], &"\u{00EA}\u{030C}");
296		decode_big5(&[0x88u8, 0xA7u8], &"\u{00EA}");
297		decode_big5(&[0x99u8, 0xD4u8], &"\u{8991}");
298		decode_big5(&[0x99u8, 0xD5u8], &"\u{27967}");
299		decode_big5(&[0x99u8, 0xD6u8], &"\u{8A29}");
300
301		// Edge cases surrounded with ASCII
302		decode_big5(
303		&[0x61u8, 0x87u8, 0x40u8, 0x62u8],
304		&"\u{0061}\u{43F0}\u{0062}",
305		);
306		decode_big5(
307		&[0x61u8, 0xFEu8, 0xFEu8, 0x62u8],
308		&"\u{0061}\u{79D4}\u{0062}",
309		);
310		decode_big5(
311		&[0x61u8, 0xFEu8, 0xFDu8, 0x62u8],
312		&"\u{0061}\u{2910D}\u{0062}",
313		);
314		decode_big5(
315		&[0x61u8, 0x88u8, 0x62u8, 0x62u8],
316		&"\u{0061}\u{00CA}\u{0304}\u{0062}",
317		);
318		decode_big5(
319		&[0x61u8, 0x88u8, 0x64u8, 0x62u8],
320		&"\u{0061}\u{00CA}\u{030C}\u{0062}",
321		);
322		decode_big5(
323		&[0x61u8, 0x88u8, 0x66u8, 0x62u8],
324		&"\u{0061}\u{00CA}\u{0062}",
325		);
326		decode_big5(
327		&[0x61u8, 0x88u8, 0xA3u8, 0x62u8],
328		&"\u{0061}\u{00EA}\u{0304}\u{0062}",
329		);
330		decode_big5(
331		&[0x61u8, 0x88u8, 0xA5u8, 0x62u8],
332		&"\u{0061}\u{00EA}\u{030C}\u{0062}",
333		);
334		decode_big5(
335		&[0x61u8, 0x88u8, 0xA7u8, 0x62u8],
336		&"\u{0061}\u{00EA}\u{0062}",
337		);
338		decode_big5(
339		&[0x61u8, 0x99u8, 0xD4u8, 0x62u8],
340		&"\u{0061}\u{8991}\u{0062}",
341		);
342		decode_big5(
343		&[0x61u8, 0x99u8, 0xD5u8, 0x62u8],
344		&"\u{0061}\u{27967}\u{0062}",
345		);
346		decode_big5(
347		&[0x61u8, 0x99u8, 0xD6u8, 0x62u8],
348		&"\u{0061}\u{8A29}\u{0062}",
349		);
350
351		// Bad sequences
352		decode_big5(&[0x80u8, 0x61u8], &"\u{FFFD}\u{0061}");
353		decode_big5(&[0xFFu8, 0x61u8], &"\u{FFFD}\u{0061}");
354		decode_big5(&[0xFEu8, 0x39u8], &"\u{FFFD}\u{0039}");
355		decode_big5(&[0x87u8, 0x66u8], &"\u{FFFD}\u{0066}");
356		decode_big5(&[0x81u8, 0x40u8], &"\u{FFFD}\u{0040}");
357		decode_big5(&[0x61u8, 0x81u8], &"\u{0061}\u{FFFD}");
358		}
359
360		#[test]
361		fn test_big5_encode() {
362		// Empty
363		encode_big5("", b"");
364
365		// ASCII
366		encode_big5("\u{0061}\u{0062}", b"\x61\x62");
367
368		if !cfg!(miri) {
369		// Miri is too slow
370		// Edge cases
371		encode_big5("\u{9EA6}\u{0061}", b"麦\x61");
372		encode_big5("\u{2626B}\u{0061}", b"𦉫\x61");
373		encode_big5("\u{3000}", b"\xA1\x40");
374		encode_big5("\u{20AC}", b"\xA3\xE1");
375		encode_big5("\u{4E00}", b"\xA4\x40");
376		encode_big5("\u{27607}", b"\xC8\xA4");
377		encode_big5("\u{FFE2}", b"\xC8\xCD");
378		encode_big5("\u{79D4}", b"\xFE\xFE");
379
380		// Not in index
381		encode_big5("\u{2603}\u{0061}", b"☃\x61");
382		}
383
384		// duplicate low bits
385		encode_big5("\u{203B5}", b"\xFD\x6A");
386		encode_big5("\u{25605}", b"\xFE\x46");
387
388		// prefer last
389		encode_big5("\u{2550}", b"\xF9\xF9");
390		}
391
392		#[test]
393		#[cfg_attr(miri, ignore)] // Miri is too slow
394		fn test_big5_decode_all() {
395		let input = include_bytes!("test_data/big5_in.txt");
396		let expectation = include_str!("test_data/big5_in_ref.txt");
397		let (cow, had_errors) = BIG5.decode_without_bom_handling(input);
398		assert!(had_errors, "Should have had errors.");
399		assert_eq!(&cow[..], expectation);
400		}
401
402		#[test]
403		#[cfg_attr(miri, ignore)] // Miri is too slow
404		fn test_big5_encode_all() {
405		let input = include_str!("test_data/big5_out.txt");
406		let expectation = include_bytes!("test_data/big5_out_ref.txt");
407		let (cow, encoding, had_errors) = BIG5.encode(input);
408		assert!(!had_errors, "Should not have had errors.");
409		assert_eq!(encoding, BIG5);
410		assert_eq!(&cow[..], &expectation[..]);
411		}
412
413		#[test]
414		#[cfg_attr(miri, ignore)] // Miri is too slow
415		fn test_big5_encode_from_two_low_surrogates() {
416		let expectation = b"��";
417		let mut output = [0u8; 40];
418		let mut encoder = BIG5.new_encoder();
419		let (result, read, written, had_errors) =
420		encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true);
421		assert_eq!(result, CoderResult::InputEmpty);
422		assert_eq!(read, 2);
423		assert_eq!(written, expectation.len());
424		assert!(had_errors);
425		assert_eq!(&output[..written], expectation);
426		}
427		}