/src/ada-url/fuzz/parse.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <fuzzer/FuzzedDataProvider.h> |
2 | | |
3 | | #include <cstdio> |
4 | | #include <memory> |
5 | | #include <string> |
6 | | |
7 | | #include "ada.cpp" |
8 | | #include "ada.h" |
9 | | |
10 | 8.21k | bool is_valid_utf8_string(const char *buf, size_t len) { |
11 | 8.21k | const uint8_t *data = reinterpret_cast<const uint8_t *>(buf); |
12 | 8.21k | uint64_t pos = 0; |
13 | 8.21k | uint32_t code_point = 0; |
14 | 30.4k | while (pos < len) { |
15 | 29.1k | uint64_t next_pos = pos + 16; |
16 | 29.1k | if (next_pos <= len) { // if it is safe to read 16 more bytes, check that |
17 | | // they are ascii |
18 | 19.3k | uint64_t v1; |
19 | 19.3k | std::memcpy(&v1, data + pos, sizeof(uint64_t)); |
20 | 19.3k | uint64_t v2; |
21 | 19.3k | std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); |
22 | 19.3k | uint64_t v{v1 | v2}; |
23 | 19.3k | if ((v & 0x8080808080808080) == 0) { |
24 | 5.28k | pos = next_pos; |
25 | 5.28k | continue; |
26 | 5.28k | } |
27 | 19.3k | } |
28 | 23.8k | unsigned char byte = data[pos]; |
29 | 71.5k | while (byte < 0b10000000) { |
30 | 52.4k | if (++pos == len) { |
31 | 4.77k | return true; |
32 | 4.77k | } |
33 | 47.6k | byte = data[pos]; |
34 | 47.6k | } |
35 | | |
36 | 19.0k | if ((byte & 0b11100000) == 0b11000000) { |
37 | 3.45k | next_pos = pos + 2; |
38 | 3.45k | if (next_pos > len) { |
39 | 78 | return false; |
40 | 78 | } |
41 | 3.37k | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
42 | 222 | return false; |
43 | 222 | } |
44 | 3.15k | code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); |
45 | 3.15k | if ((code_point < 0x80) || (0x7ff < code_point)) { |
46 | 8 | return false; |
47 | 8 | } |
48 | 15.6k | } else if ((byte & 0b11110000) == 0b11100000) { |
49 | 13.7k | next_pos = pos + 3; |
50 | 13.7k | if (next_pos > len) { |
51 | 36 | return false; |
52 | 36 | } |
53 | 13.6k | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
54 | 99 | return false; |
55 | 99 | } |
56 | 13.5k | if ((data[pos + 2] & 0b11000000) != 0b10000000) { |
57 | 14 | return false; |
58 | 14 | } |
59 | 13.5k | code_point = (byte & 0b00001111) << 12 | |
60 | 13.5k | (data[pos + 1] & 0b00111111) << 6 | |
61 | 13.5k | (data[pos + 2] & 0b00111111); |
62 | 13.5k | if ((code_point < 0x800) || (0xffff < code_point) || |
63 | 13.5k | (0xd7ff < code_point && code_point < 0xe000)) { |
64 | 17 | return false; |
65 | 17 | } |
66 | 13.5k | } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 |
67 | 435 | next_pos = pos + 4; |
68 | 435 | if (next_pos > len) { |
69 | 32 | return false; |
70 | 32 | } |
71 | 403 | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
72 | 62 | return false; |
73 | 62 | } |
74 | 341 | if ((data[pos + 2] & 0b11000000) != 0b10000000) { |
75 | 7 | return false; |
76 | 7 | } |
77 | 334 | if ((data[pos + 3] & 0b11000000) != 0b10000000) { |
78 | 9 | return false; |
79 | 9 | } |
80 | 325 | code_point = |
81 | 325 | (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | |
82 | 325 | (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); |
83 | 325 | if (code_point <= 0xffff || 0x10ffff < code_point) { |
84 | 21 | return false; |
85 | 21 | } |
86 | 1.49k | } else { |
87 | 1.49k | return false; |
88 | 1.49k | } |
89 | 16.9k | pos = next_pos; |
90 | 16.9k | } |
91 | 1.33k | return true; |
92 | 8.21k | } |
93 | | |
94 | 8.21k | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |
95 | 8.21k | FuzzedDataProvider fdp(data, size); |
96 | 8.21k | std::string source = fdp.ConsumeRandomLengthString(256); |
97 | | |
98 | | // volatile forces the compiler to store the results without undue |
99 | | // optimizations |
100 | 8.21k | volatile size_t length = 0; |
101 | | |
102 | 8.21k | auto parse_url = ada::parse<ada::url>(source); |
103 | 8.21k | auto parse_url_aggregator = ada::parse<ada::url_aggregator>(source); |
104 | | |
105 | 8.21k | if (is_valid_utf8_string(source.data(), source.length())) { |
106 | 6.11k | if (parse_url.has_value() ^ parse_url_aggregator.has_value()) { |
107 | 0 | printf("Source used to parse: %s", source.c_str()); |
108 | 0 | abort(); |
109 | 0 | } |
110 | 6.11k | } |
111 | | |
112 | 8.21k | if (parse_url) { |
113 | 2.86k | length += parse_url->get_href().size(); |
114 | 2.86k | length += parse_url->get_origin().size(); |
115 | 2.86k | } |
116 | | |
117 | 8.21k | if (parse_url_aggregator) { |
118 | 2.72k | length += parse_url_aggregator->get_href().size(); |
119 | 2.72k | length += parse_url_aggregator->get_origin().size(); |
120 | | |
121 | 2.72k | volatile bool is_parse_url_aggregator_output_valid = false; |
122 | 2.72k | is_parse_url_aggregator_output_valid = parse_url_aggregator->validate(); |
123 | | |
124 | 2.72k | assert(parse_url->get_protocol() == parse_url_aggregator->get_protocol()); |
125 | 2.72k | assert(parse_url->get_href() == parse_url_aggregator->get_href()); |
126 | | |
127 | 2.72k | parse_url->set_href(source); |
128 | 2.72k | parse_url_aggregator->set_href(source); |
129 | 2.72k | assert(parse_url->get_href() == parse_url_aggregator->get_href()); |
130 | 2.72k | } |
131 | | |
132 | | /** |
133 | | * ada::parse<ada::url> |
134 | | */ |
135 | 8.21k | auto out_url = ada::parse<ada::url>("https://www.ada-url.com"); |
136 | | |
137 | 8.21k | if (out_url) { |
138 | 8.21k | out_url->set_protocol(source); |
139 | 8.21k | out_url->set_username(source); |
140 | 8.21k | out_url->set_password(source); |
141 | 8.21k | out_url->set_hostname(source); |
142 | 8.21k | out_url->set_host(source); |
143 | 8.21k | out_url->set_pathname(source); |
144 | 8.21k | out_url->set_search(source); |
145 | 8.21k | out_url->set_hash(source); |
146 | 8.21k | out_url->set_port(source); |
147 | | |
148 | | // getters |
149 | 8.21k | length += out_url->get_protocol().size(); |
150 | 8.21k | length += out_url->get_username().size(); |
151 | 8.21k | length += out_url->get_password().size(); |
152 | 8.21k | length += out_url->get_hostname().size(); |
153 | 8.21k | length += out_url->get_host().size(); |
154 | 8.21k | length += out_url->get_pathname().size(); |
155 | 8.21k | length += out_url->get_search().size(); |
156 | 8.21k | length += out_url->get_hash().size(); |
157 | 8.21k | length += out_url->get_origin().size(); |
158 | 8.21k | length += out_url->get_port().size(); |
159 | | |
160 | 8.21k | length += out_url->to_string().size(); |
161 | 8.21k | } |
162 | | |
163 | | /** |
164 | | * ada::parse<ada::url_aggregator> |
165 | | */ |
166 | 8.21k | auto out_aggregator = |
167 | 8.21k | ada::parse<ada::url_aggregator>("https://www.ada-url.com"); |
168 | | |
169 | 8.21k | if (out_aggregator) { |
170 | 8.21k | out_aggregator->set_protocol(source); |
171 | 8.21k | out_aggregator->set_username(source); |
172 | 8.21k | out_aggregator->set_password(source); |
173 | 8.21k | out_aggregator->set_hostname(source); |
174 | 8.21k | out_aggregator->set_host(source); |
175 | 8.21k | out_aggregator->set_pathname(source); |
176 | 8.21k | out_aggregator->set_search(source); |
177 | 8.21k | out_aggregator->set_hash(source); |
178 | 8.21k | out_aggregator->set_port(source); |
179 | | |
180 | | // getters |
181 | 8.21k | length += out_aggregator->get_protocol().size(); |
182 | 8.21k | length += out_aggregator->get_username().size(); |
183 | 8.21k | length += out_aggregator->get_password().size(); |
184 | 8.21k | length += out_aggregator->get_hostname().size(); |
185 | 8.21k | length += out_aggregator->get_host().size(); |
186 | 8.21k | length += out_aggregator->get_pathname().size(); |
187 | 8.21k | length += out_aggregator->get_search().size(); |
188 | 8.21k | length += out_aggregator->get_hash().size(); |
189 | 8.21k | length += out_aggregator->get_origin().size(); |
190 | 8.21k | length += out_aggregator->get_port().size(); |
191 | | |
192 | 8.21k | length += out_aggregator->to_string().size(); |
193 | | |
194 | 8.21k | volatile bool is_output_valid = false; |
195 | 8.21k | is_output_valid = out_aggregator->validate(); |
196 | | |
197 | | // Printing due to dead-code elimination |
198 | 8.21k | printf("diagram %s\n", out_aggregator->to_diagram().c_str()); |
199 | | |
200 | | // clear methods |
201 | 8.21k | out_aggregator->clear_port(); |
202 | 8.21k | out_aggregator->clear_search(); |
203 | 8.21k | out_aggregator->clear_hash(); |
204 | 8.21k | } |
205 | | |
206 | | /** |
207 | | * Node.js specific |
208 | | */ |
209 | 8.21k | length += ada::href_from_file(source).size(); |
210 | | |
211 | | /** |
212 | | * Others |
213 | | */ |
214 | 8.21k | bool is_valid = ada::checkers::verify_dns_length(source); |
215 | | |
216 | | // Only used for avoiding dead-code elimination |
217 | 8.21k | if (is_valid) { |
218 | 6.30k | printf("dns length is valid\n"); |
219 | 6.30k | } |
220 | | |
221 | | // Only used for avoiding dead-code elimination |
222 | 8.21k | printf("length of url is %zu\n", length); |
223 | | |
224 | 8.21k | return 0; |
225 | 8.21k | } // extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |