/src/libcbor/src/cbor/internal/unicode.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com> |
3 | | * |
4 | | * libcbor is free software; you can redistribute it and/or modify |
5 | | * it under the terms of the MIT license. See LICENSE for details. |
6 | | */ |
7 | | |
8 | | #include "unicode.h" |
9 | | #include <stdint.h> |
10 | | |
11 | 6.61M | #define UTF8_ACCEPT 0 |
12 | 21.9k | #define UTF8_REJECT 1 |
13 | | |
14 | | static const uint8_t utf8d[] = { |
15 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
17 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..1f */ |
18 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
19 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
20 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..3f */ |
21 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
22 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
23 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..5f */ |
24 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
25 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
26 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..7f */ |
27 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
28 | | 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, |
29 | | 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 80..9f */ |
30 | | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
31 | | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
32 | | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..bf */ |
33 | | 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
34 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
35 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..df */ |
36 | | 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, |
37 | | 0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */ |
38 | | 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, |
39 | | 0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */ |
40 | | 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, |
41 | | 0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */ |
42 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
43 | | 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, |
44 | | 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1..s2 */ |
45 | | 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, |
46 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
47 | | 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s3..s4 */ |
48 | | 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, |
49 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
50 | | 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s5..s6 */ |
51 | | 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, |
52 | | 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, |
53 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s7..s8 */ |
54 | | }; |
55 | | |
56 | | /* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann |
57 | | * <bjoern@hoehrmann.de> */ |
58 | | /* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */ |
59 | 3.01M | uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) { |
60 | 3.01M | uint32_t type = utf8d[byte]; |
61 | | |
62 | 3.01M | *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6) |
63 | 3.01M | : (0xff >> type) & (byte); |
64 | | |
65 | 3.01M | *state = utf8d[256 + *state * 16 + type]; |
66 | 3.01M | return *state; |
67 | 3.01M | } |
68 | | |
69 | | size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length, |
70 | 302k | struct _cbor_unicode_status* status) { |
71 | 302k | *status = |
72 | 302k | (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK}; |
73 | 302k | uint32_t codepoint, state = UTF8_ACCEPT, res; |
74 | 302k | size_t pos = 0, count = 0; |
75 | | |
76 | 3.29M | for (; pos < source_length; pos++) { |
77 | 3.01M | res = _cbor_unicode_decode(&state, &codepoint, source[pos]); |
78 | | |
79 | 3.01M | if (res == UTF8_ACCEPT) { |
80 | 2.98M | count++; |
81 | 2.98M | } else if (res == UTF8_REJECT) { |
82 | 18.0k | goto error; |
83 | 18.0k | } |
84 | 3.01M | } |
85 | | |
86 | | /* Unfinished multibyte codepoint */ |
87 | 284k | if (state != UTF8_ACCEPT) goto error; |
88 | | |
89 | 284k | return count; |
90 | | |
91 | 18.3k | error: |
92 | 18.3k | *status = (struct _cbor_unicode_status){.location = pos, |
93 | 18.3k | .status = _CBOR_UNICODE_BADCP}; |
94 | 18.3k | return 0; |
95 | 284k | } |