/src/ada-url/fuzz/parse.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <fuzzer/FuzzedDataProvider.h> |
2 | | |
3 | | #include <cstdio> |
4 | | #include <memory> |
5 | | #include <string> |
6 | | |
7 | | #include "ada.cpp" |
8 | | #include "ada.h" |
9 | | |
10 | 8.79k | bool is_valid_utf8_string(const char *buf, size_t len) { |
11 | 8.79k | const uint8_t *data = reinterpret_cast<const uint8_t *>(buf); |
12 | 8.79k | uint64_t pos = 0; |
13 | 8.79k | uint32_t code_point = 0; |
14 | 33.2k | while (pos < len) { |
15 | 31.7k | uint64_t next_pos = pos + 16; |
16 | 31.7k | if (next_pos <= len) { // if it is safe to read 16 more bytes, check that |
17 | | // they are ascii |
18 | 20.9k | uint64_t v1; |
19 | 20.9k | std::memcpy(&v1, data + pos, sizeof(uint64_t)); |
20 | 20.9k | uint64_t v2; |
21 | 20.9k | std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); |
22 | 20.9k | uint64_t v{v1 | v2}; |
23 | 20.9k | if ((v & 0x8080808080808080) == 0) { |
24 | 5.25k | pos = next_pos; |
25 | 5.25k | continue; |
26 | 5.25k | } |
27 | 20.9k | } |
28 | 26.4k | unsigned char byte = data[pos]; |
29 | 76.1k | while (byte < 0b10000000) { |
30 | 54.6k | if (++pos == len) { |
31 | 5.04k | return true; |
32 | 5.04k | } |
33 | 49.6k | byte = data[pos]; |
34 | 49.6k | } |
35 | | |
36 | 21.4k | if ((byte & 0b11100000) == 0b11000000) { |
37 | 3.49k | next_pos = pos + 2; |
38 | 3.49k | if (next_pos > len) { |
39 | 81 | return false; |
40 | 81 | } |
41 | 3.41k | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
42 | 225 | return false; |
43 | 225 | } |
44 | 3.19k | code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); |
45 | 3.19k | if ((code_point < 0x80) || (0x7ff < code_point)) { |
46 | 16 | return false; |
47 | 16 | } |
48 | 17.9k | } else if ((byte & 0b11110000) == 0b11100000) { |
49 | 15.9k | next_pos = pos + 3; |
50 | 15.9k | if (next_pos > len) { |
51 | 44 | return false; |
52 | 44 | } |
53 | 15.8k | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
54 | 105 | return false; |
55 | 105 | } |
56 | 15.7k | if ((data[pos + 2] & 0b11000000) != 0b10000000) { |
57 | 23 | return false; |
58 | 23 | } |
59 | 15.7k | code_point = (byte & 0b00001111) << 12 | |
60 | 15.7k | (data[pos + 1] & 0b00111111) << 6 | |
61 | 15.7k | (data[pos + 2] & 0b00111111); |
62 | 15.7k | if ((code_point < 0x800) || (0xffff < code_point) || |
63 | 15.7k | (0xd7ff < code_point && code_point < 0xe000)) { |
64 | 23 | return false; |
65 | 23 | } |
66 | 15.7k | } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 |
67 | 443 | next_pos = pos + 4; |
68 | 443 | if (next_pos > len) { |
69 | 41 | return false; |
70 | 41 | } |
71 | 402 | if ((data[pos + 1] & 0b11000000) != 0b10000000) { |
72 | 59 | return false; |
73 | 59 | } |
74 | 343 | if ((data[pos + 2] & 0b11000000) != 0b10000000) { |
75 | 10 | return false; |
76 | 10 | } |
77 | 333 | if ((data[pos + 3] & 0b11000000) != 0b10000000) { |
78 | 10 | return false; |
79 | 10 | } |
80 | 323 | code_point = |
81 | 323 | (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | |
82 | 323 | (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); |
83 | 323 | if (code_point <= 0xffff || 0x10ffff < code_point) { |
84 | 17 | return false; |
85 | 17 | } |
86 | 1.57k | } else { |
87 | 1.57k | return false; |
88 | 1.57k | } |
89 | 19.2k | pos = next_pos; |
90 | 19.2k | } |
91 | 1.52k | return true; |
92 | 8.79k | } |
93 | | |
94 | 8.79k | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |
95 | 8.79k | FuzzedDataProvider fdp(data, size); |
96 | 8.79k | std::string source = fdp.ConsumeRandomLengthString(256); |
97 | | |
98 | | // volatile forces the compiler to store the results without undue |
99 | | // optimizations |
100 | 8.79k | volatile size_t length = 0; |
101 | | |
102 | 8.79k | auto parse_url = ada::parse<ada::url>(source); |
103 | 8.79k | auto parse_url_aggregator = ada::parse<ada::url_aggregator>(source); |
104 | | |
105 | 8.79k | if (is_valid_utf8_string(source.data(), source.length())) { |
106 | 6.56k | if (parse_url.has_value() ^ parse_url_aggregator.has_value()) { |
107 | 0 | printf("Source used to parse: %s", source.c_str()); |
108 | 0 | abort(); |
109 | 0 | } |
110 | 6.56k | } |
111 | | |
112 | 8.79k | if (parse_url) { |
113 | 3.05k | length += parse_url->get_href().size(); |
114 | 3.05k | length += parse_url->get_origin().size(); |
115 | 3.05k | } |
116 | | |
117 | 8.79k | if (parse_url_aggregator) { |
118 | 2.90k | length += parse_url_aggregator->get_href().size(); |
119 | 2.90k | length += parse_url_aggregator->get_origin().size(); |
120 | | |
121 | 2.90k | volatile bool is_parse_url_aggregator_output_valid = false; |
122 | 2.90k | is_parse_url_aggregator_output_valid = parse_url_aggregator->validate(); |
123 | | |
124 | 2.90k | assert(parse_url->get_protocol() == parse_url_aggregator->get_protocol()); |
125 | 2.90k | assert(parse_url->get_href() == parse_url_aggregator->get_href()); |
126 | | |
127 | 2.90k | parse_url->set_href(source); |
128 | 2.90k | parse_url_aggregator->set_href(source); |
129 | 2.90k | assert(parse_url->get_href() == parse_url_aggregator->get_href()); |
130 | 2.90k | } |
131 | | |
132 | | /** |
133 | | * ada::parse<ada::url> |
134 | | */ |
135 | 8.79k | auto out_url = ada::parse<ada::url>("https://www.ada-url.com"); |
136 | | |
137 | 8.79k | if (out_url) { |
138 | 8.79k | out_url->set_protocol(source); |
139 | 8.79k | out_url->set_username(source); |
140 | 8.79k | out_url->set_password(source); |
141 | 8.79k | out_url->set_hostname(source); |
142 | 8.79k | out_url->set_host(source); |
143 | 8.79k | out_url->set_pathname(source); |
144 | 8.79k | out_url->set_search(source); |
145 | 8.79k | out_url->set_hash(source); |
146 | 8.79k | out_url->set_port(source); |
147 | | |
148 | | // getters |
149 | 8.79k | length += out_url->get_protocol().size(); |
150 | 8.79k | length += out_url->get_username().size(); |
151 | 8.79k | length += out_url->get_password().size(); |
152 | 8.79k | length += out_url->get_hostname().size(); |
153 | 8.79k | length += out_url->get_host().size(); |
154 | 8.79k | length += out_url->get_pathname().size(); |
155 | 8.79k | length += out_url->get_search().size(); |
156 | 8.79k | length += out_url->get_hash().size(); |
157 | 8.79k | length += out_url->get_origin().size(); |
158 | 8.79k | length += out_url->get_port().size(); |
159 | | |
160 | 8.79k | length += out_url->to_string().size(); |
161 | 8.79k | } |
162 | | |
163 | | /** |
164 | | * ada::parse<ada::url_aggregator> |
165 | | */ |
166 | 8.79k | auto out_aggregator = |
167 | 8.79k | ada::parse<ada::url_aggregator>("https://www.ada-url.com"); |
168 | | |
169 | 8.79k | if (out_aggregator) { |
170 | 8.79k | out_aggregator->set_protocol(source); |
171 | 8.79k | out_aggregator->set_username(source); |
172 | 8.79k | out_aggregator->set_password(source); |
173 | 8.79k | out_aggregator->set_hostname(source); |
174 | 8.79k | out_aggregator->set_host(source); |
175 | 8.79k | out_aggregator->set_pathname(source); |
176 | 8.79k | out_aggregator->set_search(source); |
177 | 8.79k | out_aggregator->set_hash(source); |
178 | 8.79k | out_aggregator->set_port(source); |
179 | | |
180 | | // getters |
181 | 8.79k | length += out_aggregator->get_protocol().size(); |
182 | 8.79k | length += out_aggregator->get_username().size(); |
183 | 8.79k | length += out_aggregator->get_password().size(); |
184 | 8.79k | length += out_aggregator->get_hostname().size(); |
185 | 8.79k | length += out_aggregator->get_host().size(); |
186 | 8.79k | length += out_aggregator->get_pathname().size(); |
187 | 8.79k | length += out_aggregator->get_search().size(); |
188 | 8.79k | length += out_aggregator->get_hash().size(); |
189 | 8.79k | length += out_aggregator->get_origin().size(); |
190 | 8.79k | length += out_aggregator->get_port().size(); |
191 | | |
192 | 8.79k | length += out_aggregator->to_string().size(); |
193 | | |
194 | 8.79k | volatile bool is_output_valid = false; |
195 | 8.79k | is_output_valid = out_aggregator->validate(); |
196 | | |
197 | | // Printing due to dead-code elimination |
198 | 8.79k | printf("diagram %s\n", out_aggregator->to_diagram().c_str()); |
199 | | |
200 | | // clear methods |
201 | 8.79k | out_aggregator->clear_port(); |
202 | 8.79k | out_aggregator->clear_search(); |
203 | 8.79k | out_aggregator->clear_hash(); |
204 | 8.79k | } |
205 | | |
206 | | /** |
207 | | * Node.js specific |
208 | | */ |
209 | 8.79k | length += ada::href_from_file(source).size(); |
210 | | |
211 | | /** |
212 | | * Others |
213 | | */ |
214 | 8.79k | bool is_valid = ada::checkers::verify_dns_length(source); |
215 | | |
216 | | // Only used for avoiding dead-code elimination |
217 | 8.79k | if (is_valid) { |
218 | 6.85k | printf("dns length is valid\n"); |
219 | 6.85k | } |
220 | | |
221 | | // Only used for avoiding dead-code elimination |
222 | 8.79k | printf("length of url is %zu\n", length); |
223 | | |
224 | 8.79k | return 0; |
225 | 8.79k | } // extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |