/src/libprotobuf-mutator/src/utf8_fix.cc
Line | Count | Source |
1 | | // Copyright 2017 Google Inc. All rights reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "src/utf8_fix.h" |
16 | | |
17 | | #include <algorithm> |
18 | | #include <cassert> |
19 | | |
20 | | namespace protobuf_mutator { |
21 | | |
22 | | namespace { |
23 | | |
24 | 0 | void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) { |
25 | 0 | while (--size) { |
26 | 0 | *(--e) = 0x80 | (code & 0x3F); |
27 | 0 | code >>= 6; |
28 | 0 | } |
29 | 0 | *(--e) = prefix | code; |
30 | 0 | } |
31 | | |
32 | 0 | char* FixCode(char* b, const char* e, RandomEngine* random) { |
33 | 0 | const char* start = b; |
34 | 0 | assert(b < e); |
35 | | |
36 | 0 | e = std::min<const char*>(e, b + 4); |
37 | 0 | char32_t c = *b++; |
38 | 0 | for (; b < e && (*b & 0xC0) == 0x80; ++b) { |
39 | 0 | c = (c << 6) + (*b & 0x3F); |
40 | 0 | } |
41 | 0 | uint8_t size = b - start; |
42 | 0 | switch (size) { |
43 | 0 | case 1: |
44 | 0 | c &= 0x7F; |
45 | 0 | StoreCode(b, c, size, 0); |
46 | 0 | break; |
47 | 0 | case 2: |
48 | 0 | c &= 0x7FF; |
49 | 0 | if (c < 0x80) { |
50 | | // Use uint32_t because uniform_int_distribution does not support |
51 | | // char32_t on Windows. |
52 | 0 | c = std::uniform_int_distribution<uint32_t>(0x80, 0x7FF)(*random); |
53 | 0 | } |
54 | 0 | StoreCode(b, c, size, 0xC0); |
55 | 0 | break; |
56 | 0 | case 3: |
57 | 0 | c &= 0xFFFF; |
58 | | |
59 | | // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves. |
60 | 0 | if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) { |
61 | 0 | uint32_t halves = 0xE000 - 0xD800; |
62 | 0 | c = std::uniform_int_distribution<uint32_t>(0x800, |
63 | 0 | 0xFFFF - halves)(*random); |
64 | 0 | if (c >= 0xD800) c += halves; |
65 | 0 | } |
66 | 0 | StoreCode(b, c, size, 0xE0); |
67 | 0 | break; |
68 | 0 | case 4: |
69 | 0 | c &= 0x1FFFFF; |
70 | 0 | if (c < 0x10000 || c > 0x10FFFF) { |
71 | 0 | c = std::uniform_int_distribution<uint32_t>(0x10000, 0x10FFFF)(*random); |
72 | 0 | } |
73 | 0 | StoreCode(b, c, size, 0xF0); |
74 | 0 | break; |
75 | 0 | default: |
76 | 0 | assert(false && "Unexpected size of UTF-8 sequence"); |
77 | 0 | } |
78 | 0 | return b; |
79 | 0 | } |
80 | | |
81 | | } // namespace |
82 | | |
83 | 0 | void FixUtf8String(std::string* str, RandomEngine* random) { |
84 | 0 | if (str->empty()) return; |
85 | 0 | char* b = &(*str)[0]; |
86 | 0 | const char* e = b + str->size(); |
87 | 0 | while (b < e) { |
88 | 0 | b = FixCode(b, e, random); |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | | } // namespace protobuf_mutator |