/src/expat/expat/fuzz/xml_lpm_fuzzer.cpp
Line | Count | Source |
1 | | /* |
2 | | __ __ _ |
3 | | ___\ \/ /_ __ __ _| |_ |
4 | | / _ \\ /| '_ \ / _` | __| |
5 | | | __// \| |_) | (_| | |_ |
6 | | \___/_/\_\ .__/ \__,_|\__| |
7 | | |_| XML parser |
8 | | |
9 | | Copyright (c) 2022 Mark Brand <markbrand@google.com> |
10 | | Copyright (c) 2025 Sebastian Pipping <sebastian@pipping.org> |
11 | | Licensed under the MIT license: |
12 | | |
13 | | Permission is hereby granted, free of charge, to any person obtaining |
14 | | a copy of this software and associated documentation files (the |
15 | | "Software"), to deal in the Software without restriction, including |
16 | | without limitation the rights to use, copy, modify, merge, publish, |
17 | | distribute, sublicense, and/or sell copies of the Software, and to permit |
18 | | persons to whom the Software is furnished to do so, subject to the |
19 | | following conditions: |
20 | | |
21 | | The above copyright notice and this permission notice shall be included |
22 | | in all copies or substantial portions of the Software. |
23 | | |
24 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
25 | | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
26 | | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
27 | | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
28 | | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
29 | | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
30 | | USE OR OTHER DEALINGS IN THE SOFTWARE. |
31 | | */ |
32 | | |
33 | | #if defined(NDEBUG) |
34 | | # undef NDEBUG // because checks below rely on assert(...) |
35 | | #endif |
36 | | |
37 | | #include <assert.h> |
38 | | #include <stdint.h> |
39 | | #include <vector> |
40 | | |
41 | | #include "expat.h" |
42 | | #include "xml_lpm_fuzzer.pb.h" |
43 | | #include "src/libfuzzer/libfuzzer_macro.h" |
44 | | |
45 | | static const char *g_encoding = nullptr; |
46 | | static const char *g_external_entity = nullptr; |
47 | | static size_t g_external_entity_size = 0; |
48 | | |
49 | | void |
50 | 28.6k | SetEncoding(const xml_lpm_fuzzer::Encoding &e) { |
51 | 28.6k | switch (e) { |
52 | 14.4k | case xml_lpm_fuzzer::Encoding::UTF8: |
53 | 14.4k | g_encoding = "UTF-8"; |
54 | 14.4k | break; |
55 | | |
56 | 382 | case xml_lpm_fuzzer::Encoding::UTF16: |
57 | 382 | g_encoding = "UTF-16"; |
58 | 382 | break; |
59 | | |
60 | 1.04k | case xml_lpm_fuzzer::Encoding::ISO88591: |
61 | 1.04k | g_encoding = "ISO-8859-1"; |
62 | 1.04k | break; |
63 | | |
64 | 286 | case xml_lpm_fuzzer::Encoding::ASCII: |
65 | 286 | g_encoding = "US-ASCII"; |
66 | 286 | break; |
67 | | |
68 | 12.4k | case xml_lpm_fuzzer::Encoding::NONE: |
69 | 12.4k | g_encoding = NULL; |
70 | 12.4k | break; |
71 | | |
72 | 24 | default: |
73 | 24 | g_encoding = "UNKNOWN"; |
74 | 24 | break; |
75 | 28.6k | } |
76 | 28.6k | } |
77 | | |
78 | | static int g_allocation_count = 0; |
79 | | static std::vector<int> g_fail_allocations = {}; |
80 | | |
81 | | void * |
82 | 3.14M | MallocHook(size_t size) { |
83 | 3.14M | g_allocation_count += 1; |
84 | 3.14M | for (auto index : g_fail_allocations) { |
85 | 910k | if (index == g_allocation_count) { |
86 | 860 | return NULL; |
87 | 860 | } |
88 | 910k | } |
89 | 3.13M | return malloc(size); |
90 | 3.14M | } |
91 | | |
92 | | void * |
93 | 16.9k | ReallocHook(void *ptr, size_t size) { |
94 | 16.9k | g_allocation_count += 1; |
95 | 16.9k | for (auto index : g_fail_allocations) { |
96 | 9.04k | if (index == g_allocation_count) { |
97 | 243 | return NULL; |
98 | 243 | } |
99 | 9.04k | } |
100 | 16.7k | return realloc(ptr, size); |
101 | 16.9k | } |
102 | | |
103 | | void |
104 | 3.13M | FreeHook(void *ptr) { |
105 | 3.13M | free(ptr); |
106 | 3.13M | } |
107 | | |
108 | | XML_Memory_Handling_Suite memory_handling_suite |
109 | | = {MallocHook, ReallocHook, FreeHook}; |
110 | | |
111 | | void InitializeParser(XML_Parser parser); |
112 | | |
113 | | // We want a parse function that supports resumption, so that we can cover the |
114 | | // suspend/resume code. |
115 | | enum XML_Status |
116 | 486k | Parse(XML_Parser parser, const char *input, int input_len, int is_final) { |
117 | 486k | enum XML_Status status = XML_Parse(parser, input, input_len, is_final); |
118 | 489k | while (status == XML_STATUS_SUSPENDED) { |
119 | 3.30k | status = XML_ResumeParser(parser); |
120 | 3.30k | } |
121 | 486k | return status; |
122 | 486k | } |
123 | | |
124 | | // When the fuzzer is compiled with instrumentation such as ASan, then the |
125 | | // accesses in TouchString will fault if they access invalid memory (ie. detect |
126 | | // either a use-after-free or buffer-overflow). By calling TouchString in each |
127 | | // of the callbacks, we can check that the arguments meet the API specifications |
128 | | // in terms of length/null-termination. no_optimize is used to ensure that the |
129 | | // compiler has to emit actual memory reads, instead of removing them. |
130 | | static volatile size_t no_optimize = 0; |
131 | | static void |
132 | 2.83M | TouchString(const XML_Char *ptr, int len = -1) { |
133 | 2.83M | if (! ptr) { |
134 | 486k | return; |
135 | 486k | } |
136 | | |
137 | 2.35M | if (len == -1) { |
138 | 31.1M | for (XML_Char value = *ptr++; value; value = *ptr++) { |
139 | 29.7M | no_optimize += value; |
140 | 29.7M | } |
141 | 1.38M | } else { |
142 | 30.0M | for (int i = 0; i < len; ++i) { |
143 | 29.0M | no_optimize += ptr[i]; |
144 | 29.0M | } |
145 | 964k | } |
146 | 2.35M | } |
147 | | |
148 | | static void |
149 | 7.92k | TouchNodeAndRecurse(XML_Content *content) { |
150 | 7.92k | switch (content->type) { |
151 | 70 | case XML_CTYPE_EMPTY: |
152 | 364 | case XML_CTYPE_ANY: |
153 | 364 | assert(content->quant == XML_CQUANT_NONE); |
154 | 364 | assert(content->name == NULL); |
155 | 364 | assert(content->numchildren == 0); |
156 | 364 | assert(content->children == NULL); |
157 | 364 | break; |
158 | | |
159 | 364 | case XML_CTYPE_MIXED: |
160 | 236 | assert(content->quant == XML_CQUANT_NONE |
161 | 236 | || content->quant == XML_CQUANT_REP); |
162 | 236 | assert(content->name == NULL); |
163 | 959 | for (unsigned int i = 0; i < content->numchildren; ++i) { |
164 | 723 | assert(content->children[i].type == XML_CTYPE_NAME); |
165 | 723 | assert(content->children[i].quant == XML_CQUANT_NONE); |
166 | 723 | assert(content->children[i].numchildren == 0); |
167 | 723 | assert(content->children[i].children == NULL); |
168 | 723 | TouchString(content->children[i].name); |
169 | 723 | } |
170 | 236 | break; |
171 | | |
172 | 2.65k | case XML_CTYPE_NAME: |
173 | 2.65k | assert((content->quant == XML_CQUANT_NONE) |
174 | 2.65k | || (content->quant == XML_CQUANT_OPT) |
175 | 2.65k | || (content->quant == XML_CQUANT_REP) |
176 | 2.65k | || (content->quant == XML_CQUANT_PLUS)); |
177 | 2.65k | assert(content->numchildren == 0); |
178 | 2.65k | assert(content->children == NULL); |
179 | 2.65k | TouchString(content->name); |
180 | 2.65k | break; |
181 | | |
182 | 265 | case XML_CTYPE_CHOICE: |
183 | 4.67k | case XML_CTYPE_SEQ: |
184 | 4.67k | assert((content->quant == XML_CQUANT_NONE) |
185 | 4.67k | || (content->quant == XML_CQUANT_OPT) |
186 | 4.67k | || (content->quant == XML_CQUANT_REP) |
187 | 4.67k | || (content->quant == XML_CQUANT_PLUS)); |
188 | 4.67k | assert(content->name == NULL); |
189 | 10.8k | for (unsigned int i = 0; i < content->numchildren; ++i) { |
190 | 6.21k | TouchNodeAndRecurse(&content->children[i]); |
191 | 6.21k | } |
192 | 4.67k | break; |
193 | | |
194 | 0 | default: |
195 | 0 | assert(false); |
196 | 7.92k | } |
197 | 7.92k | } |
198 | | |
199 | | static void XMLCALL |
200 | 1.71k | ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) { |
201 | 1.71k | TouchString(name); |
202 | 1.71k | TouchNodeAndRecurse(model); |
203 | 1.71k | XML_FreeContentModel((XML_Parser)userData, model); |
204 | 1.71k | } |
205 | | |
206 | | static void XMLCALL |
207 | | AttlistDeclHandler(void *userData, const XML_Char *elname, |
208 | | const XML_Char *attname, const XML_Char *atttype, |
209 | 10.5k | const XML_Char *dflt, int isrequired) { |
210 | 10.5k | (void)userData; |
211 | 10.5k | TouchString(elname); |
212 | 10.5k | TouchString(attname); |
213 | 10.5k | TouchString(atttype); |
214 | 10.5k | TouchString(dflt); |
215 | 10.5k | (void)isrequired; |
216 | 10.5k | } |
217 | | |
218 | | static void XMLCALL |
219 | | XmlDeclHandler(void *userData, const XML_Char *version, |
220 | 2.60k | const XML_Char *encoding, int standalone) { |
221 | 2.60k | (void)userData; |
222 | 2.60k | TouchString(version); |
223 | 2.60k | TouchString(encoding); |
224 | 2.60k | (void)standalone; |
225 | 2.60k | } |
226 | | |
227 | | static void XMLCALL |
228 | | StartElementHandler(void *userData, const XML_Char *name, |
229 | 753k | const XML_Char **atts) { |
230 | 753k | (void)userData; |
231 | 753k | TouchString(name); |
232 | 777k | for (size_t i = 0; atts[i] != NULL; ++i) { |
233 | 24.0k | TouchString(atts[i]); |
234 | 24.0k | } |
235 | 753k | } |
236 | | |
237 | | static void XMLCALL |
238 | 14.9k | EndElementHandler(void *userData, const XML_Char *name) { |
239 | 14.9k | (void)userData; |
240 | 14.9k | TouchString(name); |
241 | 14.9k | } |
242 | | |
243 | | static void XMLCALL |
244 | 808k | CharacterDataHandler(void *userData, const XML_Char *s, int len) { |
245 | 808k | (void)userData; |
246 | 808k | TouchString(s, len); |
247 | 808k | } |
248 | | |
249 | | static void XMLCALL |
250 | | ProcessingInstructionHandler(void *userData, const XML_Char *target, |
251 | 10.8k | const XML_Char *data) { |
252 | 10.8k | (void)userData; |
253 | 10.8k | TouchString(target); |
254 | 10.8k | TouchString(data); |
255 | 10.8k | } |
256 | | |
257 | | static void XMLCALL |
258 | 11.6k | CommentHandler(void *userData, const XML_Char *data) { |
259 | 11.6k | TouchString(data); |
260 | | // Use the comment handler to trigger parser suspend, so that we can get |
261 | | // coverage of that code. |
262 | 11.6k | XML_StopParser((XML_Parser)userData, XML_TRUE); |
263 | 11.6k | } |
264 | | |
265 | | static void XMLCALL |
266 | 3.86k | StartCdataSectionHandler(void *userData) { |
267 | 3.86k | (void)userData; |
268 | 3.86k | } |
269 | | |
270 | | static void XMLCALL |
271 | 815 | EndCdataSectionHandler(void *userData) { |
272 | 815 | (void)userData; |
273 | 815 | } |
274 | | |
275 | | static void XMLCALL |
276 | 147k | DefaultHandler(void *userData, const XML_Char *s, int len) { |
277 | 147k | (void)userData; |
278 | 147k | TouchString(s, len); |
279 | 147k | } |
280 | | |
281 | | static void XMLCALL |
282 | | StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName, |
283 | | const XML_Char *sysid, const XML_Char *pubid, |
284 | 131k | int has_internal_subset) { |
285 | 131k | (void)userData; |
286 | 131k | TouchString(doctypeName); |
287 | 131k | TouchString(sysid); |
288 | 131k | TouchString(pubid); |
289 | 131k | (void)has_internal_subset; |
290 | 131k | } |
291 | | |
292 | | static void XMLCALL |
293 | 2.96k | EndDoctypeDeclHandler(void *userData) { |
294 | 2.96k | (void)userData; |
295 | 2.96k | } |
296 | | |
297 | | static void XMLCALL |
298 | | EntityDeclHandler(void *userData, const XML_Char *entityName, |
299 | | int is_parameter_entity, const XML_Char *value, |
300 | | int value_length, const XML_Char *base, |
301 | | const XML_Char *systemId, const XML_Char *publicId, |
302 | 11.7k | const XML_Char *notationName) { |
303 | 11.7k | (void)userData; |
304 | 11.7k | TouchString(entityName); |
305 | 11.7k | (void)is_parameter_entity; |
306 | 11.7k | TouchString(value, value_length); |
307 | 11.7k | TouchString(base); |
308 | 11.7k | TouchString(systemId); |
309 | 11.7k | TouchString(publicId); |
310 | 11.7k | TouchString(notationName); |
311 | 11.7k | } |
312 | | |
313 | | static void XMLCALL |
314 | | NotationDeclHandler(void *userData, const XML_Char *notationName, |
315 | | const XML_Char *base, const XML_Char *systemId, |
316 | 888 | const XML_Char *publicId) { |
317 | 888 | (void)userData; |
318 | 888 | TouchString(notationName); |
319 | 888 | TouchString(base); |
320 | 888 | TouchString(systemId); |
321 | 888 | TouchString(publicId); |
322 | 888 | } |
323 | | |
324 | | static void XMLCALL |
325 | | StartNamespaceDeclHandler(void *userData, const XML_Char *prefix, |
326 | 8.05k | const XML_Char *uri) { |
327 | 8.05k | (void)userData; |
328 | 8.05k | TouchString(prefix); |
329 | 8.05k | TouchString(uri); |
330 | 8.05k | } |
331 | | |
332 | | static void XMLCALL |
333 | 3.03k | EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) { |
334 | 3.03k | (void)userData; |
335 | 3.03k | TouchString(prefix); |
336 | 3.03k | } |
337 | | |
338 | | static int XMLCALL |
339 | 2.74k | NotStandaloneHandler(void *userData) { |
340 | 2.74k | (void)userData; |
341 | 2.74k | return XML_STATUS_OK; |
342 | 2.74k | } |
343 | | |
344 | | static int XMLCALL |
345 | | ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context, |
346 | | const XML_Char *base, const XML_Char *systemId, |
347 | 127k | const XML_Char *publicId) { |
348 | 127k | int rc = XML_STATUS_ERROR; |
349 | 127k | TouchString(context); |
350 | 127k | TouchString(base); |
351 | 127k | TouchString(systemId); |
352 | 127k | TouchString(publicId); |
353 | | |
354 | 127k | if (g_external_entity) { |
355 | 125k | XML_Parser ext_parser |
356 | 125k | = XML_ExternalEntityParserCreate(parser, context, g_encoding); |
357 | 125k | if (ext_parser != NULL) { |
358 | 125k | rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1); |
359 | 125k | XML_ParserFree(ext_parser); |
360 | 125k | } |
361 | 125k | } |
362 | | |
363 | 127k | return rc; |
364 | 127k | } |
365 | | |
366 | | static void XMLCALL |
367 | | SkippedEntityHandler(void *userData, const XML_Char *entityName, |
368 | 3.48k | int is_parameter_entity) { |
369 | 3.48k | (void)userData; |
370 | 3.48k | TouchString(entityName); |
371 | 3.48k | (void)is_parameter_entity; |
372 | 3.48k | } |
373 | | |
374 | | static int XMLCALL |
375 | | UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name, |
376 | 1.62k | XML_Encoding *info) { |
377 | 1.62k | (void)encodingHandlerData; |
378 | 1.62k | TouchString(name); |
379 | 1.62k | (void)info; |
380 | 1.62k | return XML_STATUS_ERROR; |
381 | 1.62k | } |
382 | | |
383 | | void |
384 | 375k | InitializeParser(XML_Parser parser) { |
385 | 375k | XML_SetUserData(parser, (void *)parser); |
386 | 375k | XML_SetHashSalt(parser, 0x41414141); |
387 | 375k | XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); |
388 | | |
389 | 375k | XML_SetElementDeclHandler(parser, ElementDeclHandler); |
390 | 375k | XML_SetAttlistDeclHandler(parser, AttlistDeclHandler); |
391 | 375k | XML_SetXmlDeclHandler(parser, XmlDeclHandler); |
392 | 375k | XML_SetElementHandler(parser, StartElementHandler, EndElementHandler); |
393 | 375k | XML_SetCharacterDataHandler(parser, CharacterDataHandler); |
394 | 375k | XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler); |
395 | 375k | XML_SetCommentHandler(parser, CommentHandler); |
396 | 375k | XML_SetCdataSectionHandler(parser, StartCdataSectionHandler, |
397 | 375k | EndCdataSectionHandler); |
398 | | // XML_SetDefaultHandler disables entity expansion |
399 | 375k | XML_SetDefaultHandlerExpand(parser, DefaultHandler); |
400 | 375k | XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler, |
401 | 375k | EndDoctypeDeclHandler); |
402 | | // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler, |
403 | | // and there isn't any significant code change between the two. |
404 | 375k | XML_SetEntityDeclHandler(parser, EntityDeclHandler); |
405 | 375k | XML_SetNotationDeclHandler(parser, NotationDeclHandler); |
406 | 375k | XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler, |
407 | 375k | EndNamespaceDeclHandler); |
408 | 375k | XML_SetNotStandaloneHandler(parser, NotStandaloneHandler); |
409 | 375k | XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler); |
410 | 375k | XML_SetSkippedEntityHandler(parser, SkippedEntityHandler); |
411 | 375k | XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser); |
412 | 375k | } |
413 | | |
414 | 28.6k | DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) { |
415 | 28.6k | g_external_entity = nullptr; |
416 | | |
417 | 28.6k | if (! testcase.actions_size()) { |
418 | 5 | return; |
419 | 5 | } |
420 | | |
421 | 28.6k | g_allocation_count = 0; |
422 | 28.6k | g_fail_allocations.clear(); |
423 | 31.6k | for (int i = 0; i < testcase.fail_allocations_size(); ++i) { |
424 | 3.03k | g_fail_allocations.push_back(testcase.fail_allocations(i)); |
425 | 3.03k | } |
426 | | |
427 | 28.6k | SetEncoding(testcase.encoding()); |
428 | 28.6k | XML_Parser parser |
429 | 28.6k | = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|"); |
430 | 28.6k | InitializeParser(parser); |
431 | | |
432 | 910k | for (int i = 0; i < testcase.actions_size(); ++i) { |
433 | 881k | const auto &action = testcase.actions(i); |
434 | 881k | switch (action.action_case()) { |
435 | 303k | case xml_lpm_fuzzer::Action::kChunk: |
436 | 303k | if (XML_STATUS_ERROR |
437 | 303k | == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) { |
438 | | // Force a reset after parse error. |
439 | 236k | XML_ParserReset(parser, g_encoding); |
440 | 236k | InitializeParser(parser); |
441 | 236k | } |
442 | 303k | break; |
443 | | |
444 | 56.8k | case xml_lpm_fuzzer::Action::kLastChunk: |
445 | 56.8k | Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1); |
446 | 56.8k | XML_ParserReset(parser, g_encoding); |
447 | 56.8k | InitializeParser(parser); |
448 | 56.8k | break; |
449 | | |
450 | 53.0k | case xml_lpm_fuzzer::Action::kReset: |
451 | 53.0k | XML_ParserReset(parser, g_encoding); |
452 | 53.0k | InitializeParser(parser); |
453 | 53.0k | break; |
454 | | |
455 | 14.0k | case xml_lpm_fuzzer::Action::kExternalEntity: |
456 | 14.0k | g_external_entity = action.external_entity().data(); |
457 | 14.0k | g_external_entity_size = action.external_entity().size(); |
458 | 14.0k | break; |
459 | | |
460 | 453k | default: |
461 | 453k | break; |
462 | 881k | } |
463 | 881k | } |
464 | | |
465 | 28.6k | XML_ParserFree(parser); |
466 | 28.6k | } |