/src/rust-brotli/src/enc/utf8_util.rs
Line | Count | Source |
1 | | use crate::enc::floatX; |
2 | | |
3 | 434M | fn parse_as_utf8(input: &[u8], size: usize) -> (usize, i32) { |
4 | 434M | if (input[0] & 0x80) == 0 { |
5 | 330M | if input[0] > 0 { |
6 | 245M | return (1, i32::from(input[0])); |
7 | 85.0M | } |
8 | 104M | } |
9 | 189M | if size > 1 && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80 { |
10 | 3.06M | let symbol = (input[0] as i32 & 0x1f) << 6 | input[1] as i32 & 0x3f; |
11 | 3.06M | if symbol > 0x7f { |
12 | 2.87M | return (2, symbol); |
13 | 193k | } |
14 | 186M | } |
15 | 187M | if size > 2 |
16 | 187M | && (input[0] & 0xf0) == 0xe0 |
17 | 10.4M | && (input[1] & 0xc0) == 0x80 |
18 | 2.75M | && (input[2] & 0xc0) == 0x80 |
19 | | { |
20 | 1.65M | let symbol = (i32::from(input[0]) & 0x0f) << 12 |
21 | 1.65M | | (i32::from(input[1]) & 0x3f) << 6 |
22 | 1.65M | | i32::from(input[2]) & 0x3f; |
23 | 1.65M | if symbol > 0x7ff { |
24 | 1.63M | return (3, symbol); |
25 | 17.6k | } |
26 | 185M | } |
27 | 185M | if size > 3 |
28 | 185M | && (input[0] & 0xf8) == 0xf0 |
29 | 6.55M | && (input[1] & 0xc0) == 0x80 |
30 | 1.09M | && (input[2] & 0xc0) == 0x80 |
31 | 569k | && (input[3] & 0xc0) == 0x80 |
32 | | { |
33 | 450k | let symbol = (i32::from(input[0]) & 0x07) << 18 |
34 | 450k | | (i32::from(input[1]) & 0x3f) << 12 |
35 | 450k | | (i32::from(input[2]) & 0x3f) << 6 |
36 | 450k | | i32::from(input[3]) & 0x3f; |
37 | 450k | if symbol > 0xffff && symbol <= 0x10_ffff { |
38 | 430k | return (4, symbol); |
39 | 19.8k | } |
40 | 184M | } |
41 | | |
42 | 184M | (1, 0x11_0000 | i32::from(input[0])) |
43 | 434M | } |
44 | | |
45 | 14.4k | pub(crate) fn is_mostly_utf8( |
46 | 14.4k | data: &[u8], |
47 | 14.4k | pos: usize, |
48 | 14.4k | mask: usize, |
49 | 14.4k | length: usize, |
50 | 14.4k | min_fraction: floatX, |
51 | 14.4k | ) -> bool { |
52 | 14.4k | let mut size_utf8: usize = 0; |
53 | 14.4k | let mut i: usize = 0; |
54 | 434M | while i < length { |
55 | 434M | let (bytes_read, symbol) = parse_as_utf8(&data[(pos.wrapping_add(i) & mask)..], length - i); |
56 | 434M | i = i.wrapping_add(bytes_read); |
57 | 434M | if symbol < 0x11_0000 { |
58 | 249M | size_utf8 = size_utf8.wrapping_add(bytes_read); |
59 | 249M | } |
60 | | } |
61 | 14.4k | size_utf8 as floatX > min_fraction * length as floatX |
62 | 14.4k | } |