/src/rust-brotli/src/enc/utf8_util.rs
Line | Count | Source |
1 | | use crate::enc::floatX; |
2 | | |
3 | 403M | fn parse_as_utf8(input: &[u8], size: usize) -> (usize, i32) { |
4 | 403M | if (input[0] & 0x80) == 0 { |
5 | 309M | if input[0] > 0 { |
6 | 232M | return (1, i32::from(input[0])); |
7 | 76.4M | } |
8 | 94.8M | } |
9 | 171M | if size > 1 && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80 { |
10 | 2.67M | let symbol = (input[0] as i32 & 0x1f) << 6 | input[1] as i32 & 0x3f; |
11 | 2.67M | if symbol > 0x7f { |
12 | 2.51M | return (2, symbol); |
13 | 158k | } |
14 | 168M | } |
15 | 168M | if size > 2 |
16 | 168M | && (input[0] & 0xf0) == 0xe0 |
17 | 8.52M | && (input[1] & 0xc0) == 0x80 |
18 | 2.24M | && (input[2] & 0xc0) == 0x80 |
19 | | { |
20 | 1.30M | let symbol = (i32::from(input[0]) & 0x0f) << 12 |
21 | 1.30M | | (i32::from(input[1]) & 0x3f) << 6 |
22 | 1.30M | | i32::from(input[2]) & 0x3f; |
23 | 1.30M | if symbol > 0x7ff { |
24 | 1.29M | return (3, symbol); |
25 | 16.9k | } |
26 | 167M | } |
27 | 167M | if size > 3 |
28 | 167M | && (input[0] & 0xf8) == 0xf0 |
29 | 4.17M | && (input[1] & 0xc0) == 0x80 |
30 | 811k | && (input[2] & 0xc0) == 0x80 |
31 | 357k | && (input[3] & 0xc0) == 0x80 |
32 | | { |
33 | 253k | let symbol = (i32::from(input[0]) & 0x07) << 18 |
34 | 253k | | (i32::from(input[1]) & 0x3f) << 12 |
35 | 253k | | (i32::from(input[2]) & 0x3f) << 6 |
36 | 253k | | i32::from(input[3]) & 0x3f; |
37 | 253k | if symbol > 0xffff && symbol <= 0x10_ffff { |
38 | 239k | return (4, symbol); |
39 | 14.5k | } |
40 | 167M | } |
41 | | |
42 | 167M | (1, 0x11_0000 | i32::from(input[0])) |
43 | 403M | } |
44 | | |
45 | 14.8k | pub(crate) fn is_mostly_utf8( |
46 | 14.8k | data: &[u8], |
47 | 14.8k | pos: usize, |
48 | 14.8k | mask: usize, |
49 | 14.8k | length: usize, |
50 | 14.8k | min_fraction: floatX, |
51 | 14.8k | ) -> bool { |
52 | 14.8k | let mut size_utf8: usize = 0; |
53 | 14.8k | let mut i: usize = 0; |
54 | 403M | while i < length { |
55 | 403M | let (bytes_read, symbol) = parse_as_utf8(&data[(pos.wrapping_add(i) & mask)..], length - i); |
56 | 403M | i = i.wrapping_add(bytes_read); |
57 | 403M | if symbol < 0x11_0000 { |
58 | 236M | size_utf8 = size_utf8.wrapping_add(bytes_read); |
59 | 236M | } |
60 | | } |
61 | 14.8k | size_utf8 as floatX > min_fraction * length as floatX |
62 | 14.8k | } |