/src/ada-url/fuzz/parse.cc
Line | Count | Source |
1 | | #include <fuzzer/FuzzedDataProvider.h> |
2 | | |
3 | | #include <cassert> |
4 | | #include <cstdio> |
5 | | #include <memory> |
6 | | #include <string> |
7 | | |
8 | | #include "ada.cpp" |
9 | | #include "ada.h" |
10 | | |
11 | 38.7k | bool is_valid_utf8_string(const char* buf, size_t len) { |
12 | 38.7k | const uint8_t* data = reinterpret_cast<const uint8_t*>(buf); |
13 | 38.7k | uint64_t pos = 0; |
14 | 38.7k | uint32_t code_point = 0; |
15 | 85.1k | while (pos < len) { |
16 | 71.7k | uint64_t next_pos = pos + 16; |
17 | 71.7k | if (next_pos <= len) { // if it is safe to read 16 more bytes, check that |
18 | | // they are ascii |
19 | 40.9k | uint64_t v1; |
20 | 40.9k | std::memcpy(&v1, data + pos, sizeof(uint64_t)); |
21 | 40.9k | uint64_t v2; |
22 | 40.9k | std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); |
23 | 40.9k | uint64_t v{v1 | v2}; |
24 | 40.9k | if ((v & 0x8080808080808080) == 0) { |
25 | 13.0k | pos = next_pos; |
26 | 13.0k | continue; |
27 | 13.0k | } |
28 | 40.9k | } |
29 | 58.6k | unsigned char byte = data[pos]; |
30 | 199k | while (byte < 0b10000000) { |
31 | 159k | if (++pos == len) { |
32 | 19.3k | return true; |
33 | 19.3k | } |
34 | 140k | byte = data[pos]; |
35 | 140k | } |
36 | | |
37 | 39.2k | if ((byte & 0b11100000) == 0b11000000) { |
38 | 5.61k | next_pos = pos + 2; |
39 | 5.61k | if (next_pos > len) { |
40 | 358 | return false; |
41 | 358 | } |
42 | 5.25k | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
43 | 798 | return false; |
44 | 798 | } |
45 | 4.46k | code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); |
46 | 4.46k | if ((code_point < 0x80) || (0x7ff < code_point)) { |
47 | 40 | return false; |
48 | 40 | } |
49 | 33.6k | } else if ((byte & 0b11110000) == 0b11100000) { |
50 | 28.5k | next_pos = pos + 3; |
51 | 28.5k | if (next_pos > len) { |
52 | 194 | return false; |
53 | 194 | } |
54 | 28.3k | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
55 | 331 | return false; |
56 | 331 | } |
57 | 27.9k | if ((data[pos + 2] & 0b11000000) != 0b10000000) { |
58 | 110 | return false; |
59 | 110 | } |
60 | 27.8k | code_point = (byte & 0b00001111) << 12 | |
61 | 27.8k | (data[pos + 1] & 0b00111111) << 6 | |
62 | 27.8k | (data[pos + 2] & 0b00111111); |
63 | 27.8k | if ((code_point < 0x800) || (0xffff < code_point) || |
64 | 27.8k | (0xd7ff < code_point && code_point < 0xe000)) { |
65 | 40 | return false; |
66 | 40 | } |
67 | 27.8k | } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 |
68 | 1.39k | next_pos = pos + 4; |
69 | 1.39k | if (next_pos > len) { |
70 | 94 | return false; |
71 | 94 | } |
72 | 1.29k | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
73 | 185 | return false; |
74 | 185 | } |
75 | 1.11k | if ((data[pos + 2] & 0b11000000) != 0b10000000) { |
76 | 34 | return false; |
77 | 34 | } |
78 | 1.07k | if ((data[pos + 3] & 0b11000000) != 0b10000000) { |
79 | 42 | return false; |
80 | 42 | } |
81 | 1.03k | code_point = |
82 | 1.03k | (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | |
83 | 1.03k | (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); |
84 | 1.03k | if (code_point <= 0xffff || 0x10ffff < code_point) { |
85 | 45 | return false; |
86 | 45 | } |
87 | 3.75k | } else { |
88 | 3.75k | return false; |
89 | 3.75k | } |
90 | 33.2k | pos = next_pos; |
91 | 33.2k | } |
92 | 13.3k | return true; |
93 | 38.7k | } |
94 | | |
95 | | // Exercise all getters and boolean predicates on ada::url |
96 | 7.01k | static void exercise_url_predicates(const ada::url& u) { |
97 | 7.01k | volatile size_t length = 0; |
98 | 7.01k | length += u.get_href().size(); |
99 | 7.01k | length += u.get_origin().size(); |
100 | 7.01k | length += u.get_protocol().size(); |
101 | 7.01k | length += u.get_username().size(); |
102 | 7.01k | length += u.get_password().size(); |
103 | 7.01k | length += u.get_host().size(); |
104 | 7.01k | length += u.get_hostname().size(); |
105 | 7.01k | length += u.get_pathname().size(); |
106 | 7.01k | length += u.get_search().size(); |
107 | 7.01k | length += u.get_hash().size(); |
108 | 7.01k | length += u.get_port().size(); |
109 | 7.01k | length += u.to_string().size(); |
110 | 7.01k | length += u.get_pathname_length(); |
111 | 7.01k | (void)u.has_valid_domain(); |
112 | 7.01k | (void)u.has_credentials(); |
113 | 7.01k | (void)u.has_empty_hostname(); |
114 | 7.01k | (void)u.has_hostname(); |
115 | 7.01k | (void)u.has_port(); |
116 | 7.01k | (void)u.has_hash(); |
117 | 7.01k | (void)u.has_search(); |
118 | 7.01k | (void)u.get_components(); |
119 | 7.01k | } |
120 | | |
121 | | // Exercise all getters and boolean predicates on ada::url_aggregator |
122 | 19.4k | static void exercise_aggregator_predicates(const ada::url_aggregator& u) { |
123 | 19.4k | volatile size_t length = 0; |
124 | 19.4k | length += u.get_href().size(); |
125 | 19.4k | length += u.get_origin().size(); |
126 | 19.4k | length += u.get_protocol().size(); |
127 | 19.4k | length += u.get_username().size(); |
128 | 19.4k | length += u.get_password().size(); |
129 | 19.4k | length += u.get_host().size(); |
130 | 19.4k | length += u.get_hostname().size(); |
131 | 19.4k | length += u.get_pathname().size(); |
132 | 19.4k | length += u.get_search().size(); |
133 | 19.4k | length += u.get_hash().size(); |
134 | 19.4k | length += u.get_port().size(); |
135 | 19.4k | length += u.to_string().size(); |
136 | 19.4k | length += u.get_pathname_length(); |
137 | 19.4k | (void)u.has_valid_domain(); |
138 | 19.4k | (void)u.has_credentials(); |
139 | 19.4k | (void)u.has_empty_hostname(); |
140 | 19.4k | (void)u.has_hostname(); |
141 | 19.4k | (void)u.has_non_empty_username(); |
142 | 19.4k | (void)u.has_non_empty_password(); |
143 | 19.4k | (void)u.has_password(); |
144 | 19.4k | (void)u.has_port(); |
145 | 19.4k | (void)u.has_hash(); |
146 | 19.4k | (void)u.has_search(); |
147 | 19.4k | (void)u.get_components(); |
148 | 19.4k | volatile bool is_valid = u.validate(); |
149 | 19.4k | (void)is_valid; |
150 | 19.4k | (void)u.to_diagram(); |
151 | 19.4k | } |
152 | | |
153 | 13.8k | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { |
154 | 13.8k | FuzzedDataProvider fdp(data, size); |
155 | 13.8k | std::string source = fdp.ConsumeRandomLengthString(256); |
156 | 13.8k | std::string base = fdp.ConsumeRandomLengthString(256); |
157 | | |
158 | | // volatile forces the compiler to store the results without undue |
159 | | // optimizations |
160 | 13.8k | volatile size_t length = 0; |
161 | | |
162 | 13.8k | auto parse_url = ada::parse<ada::url>(source); |
163 | 13.8k | auto parse_url_aggregator = ada::parse<ada::url_aggregator>(source); |
164 | | |
165 | 13.8k | if (is_valid_utf8_string(source.data(), source.length())) { |
166 | 11.1k | if (parse_url.has_value() ^ parse_url_aggregator.has_value()) { |
167 | 0 | printf("Source used to parse: %s", source.c_str()); |
168 | 0 | abort(); |
169 | 0 | } |
170 | 11.1k | } |
171 | | |
172 | 13.8k | if (parse_url) { |
173 | 4.14k | length += parse_url->get_href().size(); |
174 | 4.14k | length += parse_url->get_origin().size(); |
175 | 4.14k | } |
176 | | |
177 | 13.8k | if (parse_url_aggregator) { |
178 | 4.14k | length += parse_url_aggregator->get_href().size(); |
179 | 4.14k | length += parse_url_aggregator->get_origin().size(); |
180 | | |
181 | 4.14k | volatile bool is_parse_url_aggregator_output_valid = false; |
182 | 4.14k | is_parse_url_aggregator_output_valid = parse_url_aggregator->validate(); |
183 | | |
184 | 4.14k | assert(parse_url->get_protocol() == parse_url_aggregator->get_protocol()); |
185 | 4.14k | assert(parse_url->get_href() == parse_url_aggregator->get_href()); |
186 | 4.14k | assert(std::string(parse_url->get_hostname()) == |
187 | 4.14k | std::string(parse_url_aggregator->get_hostname())); |
188 | 4.14k | assert(std::string(parse_url->get_pathname()) == |
189 | 4.14k | std::string(parse_url_aggregator->get_pathname())); |
190 | 4.14k | assert(std::string(parse_url->get_search()) == |
191 | 4.14k | std::string(parse_url_aggregator->get_search())); |
192 | 4.14k | assert(std::string(parse_url->get_hash()) == |
193 | 4.14k | std::string(parse_url_aggregator->get_hash())); |
194 | 4.14k | assert(std::string(parse_url->get_port()) == |
195 | 4.14k | std::string(parse_url_aggregator->get_port())); |
196 | 4.14k | assert(parse_url->get_username() == |
197 | 4.14k | std::string(parse_url_aggregator->get_username())); |
198 | 4.14k | assert(parse_url->get_password() == |
199 | 4.14k | std::string(parse_url_aggregator->get_password())); |
200 | 4.14k | assert(std::string(parse_url->get_host()) == |
201 | 4.14k | std::string(parse_url_aggregator->get_host())); |
202 | | |
203 | | // Exercise all predicates on both types |
204 | 4.14k | exercise_url_predicates(*parse_url); |
205 | 4.14k | exercise_aggregator_predicates(*parse_url_aggregator); |
206 | | |
207 | | // Test set_href consistency |
208 | 4.14k | parse_url->set_href(source); |
209 | 4.14k | parse_url_aggregator->set_href(source); |
210 | 4.14k | assert(parse_url->get_href() == parse_url_aggregator->get_href()); |
211 | 4.14k | } |
212 | | |
213 | | /** |
214 | | * Test copy and move semantics |
215 | | */ |
216 | 13.8k | if (parse_url) { |
217 | | // Copy constructor |
218 | 4.14k | ada::url copied_url = *parse_url; |
219 | 4.14k | assert(copied_url.get_href() == parse_url->get_href()); |
220 | | |
221 | | // Copy assignment |
222 | 4.14k | ada::url assigned_url; |
223 | 4.14k | assigned_url = *parse_url; |
224 | 4.14k | assert(assigned_url.get_href() == parse_url->get_href()); |
225 | | |
226 | | // Move constructor |
227 | 4.14k | ada::url moved_url = std::move(copied_url); |
228 | 4.14k | assert(moved_url.get_href() == parse_url->get_href()); |
229 | 4.14k | } |
230 | | |
231 | 13.8k | if (parse_url_aggregator) { |
232 | | // Copy constructor |
233 | 4.14k | ada::url_aggregator copied_agg = *parse_url_aggregator; |
234 | 4.14k | assert(std::string(copied_agg.get_href()) == |
235 | 4.14k | std::string(parse_url_aggregator->get_href())); |
236 | | |
237 | | // Copy assignment |
238 | 4.14k | ada::url_aggregator assigned_agg; |
239 | 4.14k | assigned_agg = *parse_url_aggregator; |
240 | 4.14k | assert(std::string(assigned_agg.get_href()) == |
241 | 4.14k | std::string(parse_url_aggregator->get_href())); |
242 | | |
243 | | // Move constructor |
244 | 4.14k | ada::url_aggregator moved_agg = std::move(copied_agg); |
245 | 4.14k | assert(std::string(moved_agg.get_href()) == |
246 | 4.14k | std::string(parse_url_aggregator->get_href())); |
247 | | |
248 | | // Move assignment |
249 | 4.14k | ada::url_aggregator move_assigned_agg; |
250 | 4.14k | move_assigned_agg = std::move(assigned_agg); |
251 | 4.14k | assert(std::string(move_assigned_agg.get_href()) == |
252 | 4.14k | std::string(parse_url_aggregator->get_href())); |
253 | 4.14k | } |
254 | | |
255 | | /** |
256 | | * ada::parse<ada::url> |
257 | | */ |
258 | 13.8k | auto out_url = ada::parse<ada::url>("https://www.ada-url.com"); |
259 | | |
260 | 13.8k | if (out_url) { |
261 | 13.8k | out_url->set_protocol(source); |
262 | 13.8k | out_url->set_username(source); |
263 | 13.8k | out_url->set_password(source); |
264 | 13.8k | out_url->set_hostname(source); |
265 | 13.8k | out_url->set_host(source); |
266 | 13.8k | out_url->set_pathname(source); |
267 | 13.8k | out_url->set_search(source); |
268 | 13.8k | out_url->set_hash(source); |
269 | 13.8k | out_url->set_port(source); |
270 | | |
271 | | // getters |
272 | 13.8k | length += out_url->get_protocol().size(); |
273 | 13.8k | length += out_url->get_username().size(); |
274 | 13.8k | length += out_url->get_password().size(); |
275 | 13.8k | length += out_url->get_hostname().size(); |
276 | 13.8k | length += out_url->get_host().size(); |
277 | 13.8k | length += out_url->get_pathname().size(); |
278 | 13.8k | length += out_url->get_search().size(); |
279 | 13.8k | length += out_url->get_hash().size(); |
280 | 13.8k | length += out_url->get_origin().size(); |
281 | 13.8k | length += out_url->get_port().size(); |
282 | 13.8k | length += out_url->get_pathname_length(); |
283 | | |
284 | 13.8k | length += out_url->to_string().size(); |
285 | | |
286 | | // boolean predicates after setters |
287 | 13.8k | (void)out_url->has_valid_domain(); |
288 | 13.8k | (void)out_url->has_credentials(); |
289 | 13.8k | (void)out_url->has_empty_hostname(); |
290 | 13.8k | (void)out_url->has_hostname(); |
291 | 13.8k | (void)out_url->has_port(); |
292 | 13.8k | (void)out_url->has_hash(); |
293 | 13.8k | (void)out_url->has_search(); |
294 | 13.8k | (void)out_url->get_components(); |
295 | 13.8k | } |
296 | | |
297 | | /** |
298 | | * ada::parse<ada::url_aggregator> |
299 | | */ |
300 | 13.8k | auto out_aggregator = |
301 | 13.8k | ada::parse<ada::url_aggregator>("https://www.ada-url.com"); |
302 | | |
303 | 13.8k | if (out_aggregator) { |
304 | 13.8k | out_aggregator->set_protocol(source); |
305 | 13.8k | out_aggregator->set_username(source); |
306 | 13.8k | out_aggregator->set_password(source); |
307 | 13.8k | out_aggregator->set_hostname(source); |
308 | 13.8k | out_aggregator->set_host(source); |
309 | 13.8k | out_aggregator->set_pathname(source); |
310 | 13.8k | out_aggregator->set_search(source); |
311 | 13.8k | out_aggregator->set_hash(source); |
312 | 13.8k | out_aggregator->set_port(source); |
313 | | |
314 | | // getters |
315 | 13.8k | length += out_aggregator->get_protocol().size(); |
316 | 13.8k | length += out_aggregator->get_username().size(); |
317 | 13.8k | length += out_aggregator->get_password().size(); |
318 | 13.8k | length += out_aggregator->get_hostname().size(); |
319 | 13.8k | length += out_aggregator->get_host().size(); |
320 | 13.8k | length += out_aggregator->get_pathname().size(); |
321 | 13.8k | length += out_aggregator->get_search().size(); |
322 | 13.8k | length += out_aggregator->get_hash().size(); |
323 | 13.8k | length += out_aggregator->get_origin().size(); |
324 | 13.8k | length += out_aggregator->get_port().size(); |
325 | 13.8k | length += out_aggregator->get_pathname_length(); |
326 | | |
327 | 13.8k | length += out_aggregator->to_string().size(); |
328 | | |
329 | 13.8k | volatile bool is_output_valid = false; |
330 | 13.8k | is_output_valid = out_aggregator->validate(); |
331 | | |
332 | 13.8k | (void)out_aggregator->to_diagram(); |
333 | | |
334 | | // boolean predicates after setters |
335 | 13.8k | (void)out_aggregator->has_valid_domain(); |
336 | 13.8k | (void)out_aggregator->has_credentials(); |
337 | 13.8k | (void)out_aggregator->has_empty_hostname(); |
338 | 13.8k | (void)out_aggregator->has_hostname(); |
339 | 13.8k | (void)out_aggregator->has_non_empty_username(); |
340 | 13.8k | (void)out_aggregator->has_non_empty_password(); |
341 | 13.8k | (void)out_aggregator->has_password(); |
342 | 13.8k | (void)out_aggregator->has_port(); |
343 | 13.8k | (void)out_aggregator->has_hash(); |
344 | 13.8k | (void)out_aggregator->has_search(); |
345 | 13.8k | (void)out_aggregator->get_components(); |
346 | | |
347 | | // clear methods + postcondition assertions |
348 | 13.8k | out_aggregator->clear_port(); |
349 | 13.8k | if (out_aggregator->has_port()) { |
350 | 0 | printf("clear_port() did not clear has_port()\n"); |
351 | 0 | abort(); |
352 | 0 | } |
353 | 13.8k | if (!out_aggregator->get_port().empty()) { |
354 | 0 | printf("clear_port() left non-empty get_port()\n"); |
355 | 0 | abort(); |
356 | 0 | } |
357 | | |
358 | 13.8k | out_aggregator->clear_search(); |
359 | 13.8k | if (out_aggregator->has_search()) { |
360 | 0 | printf("clear_search() did not clear has_search()\n"); |
361 | 0 | abort(); |
362 | 0 | } |
363 | 13.8k | if (!out_aggregator->get_search().empty()) { |
364 | 0 | printf("clear_search() left non-empty get_search()\n"); |
365 | 0 | abort(); |
366 | 0 | } |
367 | | |
368 | 13.8k | out_aggregator->clear_hash(); |
369 | 13.8k | if (out_aggregator->has_hash()) { |
370 | 0 | printf("clear_hash() did not clear has_hash()\n"); |
371 | 0 | abort(); |
372 | 0 | } |
373 | 13.8k | if (!out_aggregator->get_hash().empty()) { |
374 | 0 | printf("clear_hash() left non-empty get_hash()\n"); |
375 | 0 | abort(); |
376 | 0 | } |
377 | 13.8k | } |
378 | | |
379 | | /** |
380 | | * Relative URL parsing with base (tests the base URL resolution code path) |
381 | | */ |
382 | 13.8k | auto base_url = ada::parse<ada::url>(base); |
383 | 13.8k | auto base_agg = ada::parse<ada::url_aggregator>(base); |
384 | | |
385 | 13.8k | if (base_url) { |
386 | 3.33k | auto result = ada::parse<ada::url>(source, &*base_url); |
387 | 3.33k | if (result) { |
388 | 2.86k | length += result->get_href().size(); |
389 | 2.86k | length += result->get_origin().size(); |
390 | 2.86k | exercise_url_predicates(*result); |
391 | 2.86k | } |
392 | 3.33k | } |
393 | | |
394 | 13.8k | if (base_agg) { |
395 | 3.33k | auto result = ada::parse<ada::url_aggregator>(source, &*base_agg); |
396 | 3.33k | if (result) { |
397 | 2.86k | length += result->get_href().size(); |
398 | 2.86k | length += result->get_origin().size(); |
399 | 2.86k | exercise_aggregator_predicates(*result); |
400 | 2.86k | } |
401 | 3.33k | } |
402 | | |
403 | | // Cross-type consistency: relative URL parsing with a base should agree |
404 | | // between url and url_aggregator representations for valid UTF-8 inputs. |
405 | 13.8k | if (is_valid_utf8_string(source.data(), source.length()) && |
406 | 11.1k | is_valid_utf8_string(base.data(), base.length()) && base_url && |
407 | 2.37k | base_agg) { |
408 | 2.37k | auto res_url = ada::parse<ada::url>(source, &*base_url); |
409 | 2.37k | auto res_agg = ada::parse<ada::url_aggregator>(source, &*base_agg); |
410 | 2.37k | if (res_url.has_value() ^ res_agg.has_value()) { |
411 | 0 | printf("Relative parse inconsistency for source=%s base=%s\n", |
412 | 0 | source.c_str(), base.c_str()); |
413 | 0 | abort(); |
414 | 0 | } |
415 | 2.37k | if (res_url && res_agg) { |
416 | 1.94k | if (res_url->get_href() != std::string(res_agg->get_href())) { |
417 | 0 | printf("Relative parse href mismatch for source=%s base=%s\n", |
418 | 0 | source.c_str(), base.c_str()); |
419 | 0 | abort(); |
420 | 0 | } |
421 | 1.94k | } |
422 | 2.37k | } |
423 | | |
424 | | /** |
425 | | * Chained relative URL resolution: parse source against base, then use the |
426 | | * result as the base for a second parse. Exercises multi-level inheritance. |
427 | | */ |
428 | 13.8k | if (base_agg) { |
429 | 3.33k | auto level1 = ada::parse<ada::url_aggregator>(source, &*base_agg); |
430 | 3.33k | if (level1) { |
431 | 2.86k | std::string input2 = fdp.ConsumeRandomLengthString(128); |
432 | 2.86k | auto level2 = ada::parse<ada::url_aggregator>(input2, &*level1); |
433 | 2.86k | if (level2) { |
434 | 2.33k | length += level2->get_href().size(); |
435 | 2.33k | volatile bool v = level2->validate(); |
436 | 2.33k | (void)v; |
437 | 2.33k | } |
438 | 2.86k | } |
439 | 3.33k | } |
440 | | |
441 | | /** |
442 | | * Known-good base URL with fuzzed relative input. Using a fixed valid base |
443 | | * lets the fuzzer focus entropy entirely on the relative-input code paths |
444 | | * (path resolution, query/fragment inheritance, scheme-relative URLs, etc.) |
445 | | */ |
446 | 13.8k | { |
447 | 13.8k | auto known_base = |
448 | 13.8k | ada::parse<ada::url_aggregator>("https://example.com/a/b/c?query#hash"); |
449 | 13.8k | if (known_base) { |
450 | 13.8k | auto result = ada::parse<ada::url_aggregator>(source, &*known_base); |
451 | 13.8k | if (result) { |
452 | 12.4k | length += result->get_href().size(); |
453 | 12.4k | exercise_aggregator_predicates(*result); |
454 | 12.4k | } |
455 | 13.8k | } |
456 | 13.8k | } |
457 | | |
458 | | /** |
459 | | * Node.js specific |
460 | | */ |
461 | 13.8k | length += ada::href_from_file(source).size(); |
462 | | |
463 | | /** |
464 | | * Others |
465 | | */ |
466 | 13.8k | bool is_valid = ada::checkers::verify_dns_length(source); |
467 | | |
468 | 13.8k | (void)is_valid; |
469 | | |
470 | | /** |
471 | | * Sequential setter interactions with FDP-controlled ordering. |
472 | | * |
473 | | * The existing code calls every setter with the same `source` value in a |
474 | | * fixed order. Here we let the fuzzer choose an arbitrary sequence of |
475 | | * setter/value pairs, checking that url and url_aggregator stay in sync |
476 | | * after every step. This exercises setter-interaction state bugs that |
477 | | * fixed-order testing would miss. |
478 | | */ |
479 | 13.8k | { |
480 | 13.8k | auto url_seq = ada::parse<ada::url>( |
481 | 13.8k | "https://user:pass@example.com:8080/path?query=1#hash"); |
482 | 13.8k | auto agg_seq = ada::parse<ada::url_aggregator>( |
483 | 13.8k | "https://user:pass@example.com:8080/path?query=1#hash"); |
484 | 13.8k | if (url_seq && agg_seq) { |
485 | 13.8k | int steps = fdp.ConsumeIntegralInRange(1, 8); |
486 | 46.0k | for (int i = 0; i < steps; ++i) { |
487 | 32.2k | std::string val = fdp.ConsumeRandomLengthString(64); |
488 | 32.2k | int which = fdp.ConsumeIntegralInRange(0, 8); |
489 | 32.2k | switch (which) { |
490 | 19.8k | case 0: |
491 | 19.8k | url_seq->set_protocol(val); |
492 | 19.8k | agg_seq->set_protocol(val); |
493 | 19.8k | break; |
494 | 1.42k | case 1: |
495 | 1.42k | url_seq->set_username(val); |
496 | 1.42k | agg_seq->set_username(val); |
497 | 1.42k | break; |
498 | 1.51k | case 2: |
499 | 1.51k | url_seq->set_password(val); |
500 | 1.51k | agg_seq->set_password(val); |
501 | 1.51k | break; |
502 | 2.45k | case 3: |
503 | 2.45k | url_seq->set_hostname(val); |
504 | 2.45k | agg_seq->set_hostname(val); |
505 | 2.45k | break; |
506 | 2.95k | case 4: |
507 | 2.95k | url_seq->set_host(val); |
508 | 2.95k | agg_seq->set_host(val); |
509 | 2.95k | break; |
510 | 1.71k | case 5: |
511 | 1.71k | url_seq->set_pathname(val); |
512 | 1.71k | agg_seq->set_pathname(val); |
513 | 1.71k | break; |
514 | 623 | case 6: |
515 | 623 | url_seq->set_search(val); |
516 | 623 | agg_seq->set_search(val); |
517 | 623 | break; |
518 | 445 | case 7: |
519 | 445 | url_seq->set_hash(val); |
520 | 445 | agg_seq->set_hash(val); |
521 | 445 | break; |
522 | 1.25k | case 8: |
523 | 1.25k | url_seq->set_port(val); |
524 | 1.25k | agg_seq->set_port(val); |
525 | 1.25k | break; |
526 | 32.2k | } |
527 | | // After every setter both representations must agree on href. |
528 | 32.2k | if (url_seq->get_href() != std::string(agg_seq->get_href())) { |
529 | 0 | printf( |
530 | 0 | "Sequential setter href mismatch after setter=%d val='%s'\n" |
531 | 0 | " url: %s\n agg: %s\n", |
532 | 0 | which, val.c_str(), url_seq->get_href().c_str(), |
533 | 0 | std::string(agg_seq->get_href()).c_str()); |
534 | 0 | abort(); |
535 | 0 | } |
536 | | // url_aggregator internal invariant must still hold. |
537 | 32.2k | volatile bool v = agg_seq->validate(); |
538 | 32.2k | (void)v; |
539 | 32.2k | } |
540 | 13.8k | } |
541 | 13.8k | } |
542 | | |
543 | | /** |
544 | | * Re-parse idempotency. |
545 | | * |
546 | | * If parse(source) succeeds, then parse(href) must also succeed and |
547 | | * produce the same href. Serialization and parsing must be consistent: |
548 | | * a normalized URL is always its own fixed point. |
549 | | */ |
550 | 13.8k | if (parse_url_aggregator) { |
551 | 4.14k | std::string href1 = std::string(parse_url_aggregator->get_href()); |
552 | 4.14k | auto reparsed = ada::parse<ada::url_aggregator>(href1); |
553 | 4.14k | if (!reparsed) { |
554 | 0 | printf("Re-parse of href failed unexpectedly: '%s'\n", href1.c_str()); |
555 | 0 | abort(); |
556 | 0 | } |
557 | 4.14k | std::string href2 = std::string(reparsed->get_href()); |
558 | 4.14k | if (href1 != href2) { |
559 | 0 | printf( |
560 | 0 | "Re-parse idempotency failure!\n" |
561 | 0 | " href1: %s\n href2: %s\n", |
562 | 0 | href1.c_str(), href2.c_str()); |
563 | 0 | abort(); |
564 | 0 | } |
565 | 4.14k | } |
566 | | |
567 | | /** |
568 | | * URL search params round-trip via URL integration. |
569 | | * |
570 | | * Construct a URL whose query is the fuzz source, extract the search |
571 | | * component as a url_search_params, mutate it, serialise it back, and |
572 | | * set it on the URL. Exercises the interaction between URL objects and |
573 | | * url_search_params and verifies that the combined pipeline doesn't crash. |
574 | | * |
575 | | * Also verifies the url_search_params serialisation idempotency property: |
576 | | * url_search_params(sp.to_string()).to_string() == sp.to_string() |
577 | | */ |
578 | 13.8k | { |
579 | 13.8k | std::string search_url = "https://example.com/?" + source; |
580 | 13.8k | auto url_with_search = ada::parse<ada::url_aggregator>(search_url); |
581 | 13.8k | if (url_with_search) { |
582 | | // Extract the search string (may include leading '?'). |
583 | 13.8k | std::string search_raw = std::string(url_with_search->get_search()); |
584 | 13.8k | std::string_view search_view = search_raw; |
585 | 13.8k | if (!search_view.empty() && search_view[0] == '?') { |
586 | 12.1k | search_view = search_view.substr(1); |
587 | 12.1k | } |
588 | | |
589 | 13.8k | ada::url_search_params sp(search_view); |
590 | | |
591 | | // Mutate with additional entries from the fuzz corpus. |
592 | 13.8k | sp.append(source, base); |
593 | | |
594 | 13.8k | std::string serialized = sp.to_string(); |
595 | | |
596 | | // Idempotency: re-parsing the serialised form must yield the same string. |
597 | 13.8k | ada::url_search_params sp2(serialized); |
598 | 13.8k | std::string serialized2 = sp2.to_string(); |
599 | 13.8k | if (serialized2 != serialized) { |
600 | 0 | printf( |
601 | 0 | "url_search_params serialisation not idempotent!\n" |
602 | 0 | " first: %s\n second: %s\n", |
603 | 0 | serialized.c_str(), serialized2.c_str()); |
604 | 0 | abort(); |
605 | 0 | } |
606 | | |
607 | | // Set the serialised params back on the URL. |
608 | 13.8k | url_with_search->set_search(serialized); |
609 | 13.8k | volatile bool v = url_with_search->validate(); |
610 | 13.8k | (void)v; |
611 | 13.8k | } |
612 | 13.8k | } |
613 | | |
614 | 13.8k | return 0; |
615 | 13.8k | } // extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |