/src/libxslt/tests/fuzz/fuzz.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * fuzz.c: Fuzz targets for libxslt |
3 | | * |
4 | | * See Copyright for the status of this software. |
5 | | */ |
6 | | |
7 | | #include <stdio.h> |
8 | | #include <stdlib.h> |
9 | | #include <string.h> |
10 | | |
11 | | #include <libxml/parser.h> |
12 | | #include <libxml/parserInternals.h> |
13 | | #include <libxml/tree.h> |
14 | | #include <libxml/xpath.h> |
15 | | #include <libxml/xpathInternals.h> |
16 | | #include <libxslt/extensions.h> |
17 | | #include <libxslt/functions.h> |
18 | | #include <libxslt/security.h> |
19 | | #include <libxslt/transform.h> |
20 | | #include <libxslt/xslt.h> |
21 | | #include <libxslt/xsltInternals.h> |
22 | | #include <libxslt/xsltutils.h> |
23 | | #include <libexslt/exslt.h> |
24 | | #include "fuzz.h" |
25 | | |
26 | | #if defined(_WIN32) |
27 | | #define DIR_SEP '\\' |
28 | | #else |
29 | | #define DIR_SEP '/' |
30 | | #endif |
31 | | |
32 | | static xsltSecurityPrefsPtr globalSec; |
33 | | static xsltStylesheetPtr globalStyle; |
34 | | static xsltTransformContextPtr tctxt; |
35 | | |
36 | | static void |
37 | 180k | xsltFuzzXmlErrorFunc(void *vctxt, const char *msg ATTRIBUTE_UNUSED, ...) { |
38 | 180k | xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) vctxt; |
39 | | /* |
40 | | * Stopping the parser should be slightly faster and might catch some |
41 | | * issues related to recent libxml2 changes. |
42 | | */ |
43 | 180k | xmlStopParser(ctxt); |
44 | 180k | } |
45 | | |
46 | | static void |
47 | | xsltFuzzXsltErrorFunc(void *vctxt ATTRIBUTE_UNUSED, |
48 | 3.78M | const char *msg ATTRIBUTE_UNUSED, ...) { |
49 | 3.78M | } |
50 | | |
51 | | static void |
52 | 2 | xsltFuzzInit(void) { |
53 | 2 | xmlFuzzMemSetup(); |
54 | | |
55 | | /* Init libxml2, libxslt and libexslt */ |
56 | 2 | xmlInitParser(); |
57 | 2 | xsltInit(); |
58 | 2 | exsltRegisterAll(); |
59 | | |
60 | | /* Suppress error messages */ |
61 | 2 | xmlSetGenericErrorFunc(NULL, xsltFuzzXmlErrorFunc); |
62 | 2 | xsltSetGenericErrorFunc(NULL, xsltFuzzXsltErrorFunc); |
63 | | |
64 | | /* Disallow I/O */ |
65 | 2 | globalSec = xsltNewSecurityPrefs(); |
66 | 2 | xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_READ_FILE, |
67 | 2 | xsltSecurityForbid); |
68 | 2 | xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_WRITE_FILE, |
69 | 2 | xsltSecurityForbid); |
70 | 2 | xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_CREATE_DIRECTORY, |
71 | 2 | xsltSecurityForbid); |
72 | 2 | xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_READ_NETWORK, |
73 | 2 | xsltSecurityForbid); |
74 | 2 | xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_WRITE_NETWORK, |
75 | 2 | xsltSecurityForbid); |
76 | 2 | } |
77 | | |
78 | | /* XPath fuzzer |
79 | | * |
80 | | * This fuzz target parses and evaluates XPath expressions in an (E)XSLT |
81 | | * context using a static XML document. It heavily exercises the libxml2 |
82 | | * XPath engine (xpath.c), a few other parts of libxml2, and most of |
83 | | * libexslt. |
84 | | * |
85 | | * Some EXSLT functions need the transform context to create RVTs for |
86 | | * node-sets. A couple of functions also access the stylesheet. The |
87 | | * XPath context from the transform context is used to parse and |
88 | | * evaluate expressions. |
89 | | * |
90 | | * All these objects are created once at startup. After fuzzing each input, |
91 | | * they're reset as cheaply as possible. |
92 | | * |
93 | | * TODO |
94 | | * |
95 | | * - Some expressions can create lots of temporary node sets (RVTs) which |
96 | | * aren't freed until the whole expression was evaluated, leading to |
97 | | * extensive memory usage. Cleaning them up earlier would require |
98 | | * callbacks from the XPath engine, for example after evaluating a |
99 | | * predicate expression, which doesn't seem feasible. Terminating the |
100 | | * evaluation after creating a certain number of RVTs is a simple |
101 | | * workaround. |
102 | | * - Register a custom xsl:decimal-format declaration for format-number(). |
103 | | * - Some functions add strings to the stylesheet or transform context |
104 | | * dictionary, for example via xsltGetQName, requiring a clean up of the |
105 | | * dicts after fuzzing each input. This behavior seems questionable. |
106 | | * Extension functions shouldn't needlessly modify the transform context |
107 | | * or stylesheet. |
108 | | * - Register xsl:keys and fuzz the key() function. |
109 | | * - Add a few custom func:functions. |
110 | | * - Fuzz the document() function with external documents. |
111 | | */ |
112 | | |
113 | | int |
114 | 2 | xsltFuzzXPathInit(void) { |
115 | 2 | xsltFuzzInit(); |
116 | 2 | globalStyle = xsltNewStylesheet(); |
117 | 2 | return(0); |
118 | 2 | } |
119 | | |
120 | | xmlXPathObjectPtr |
121 | 302 | xsltFuzzXPath(const char *data, size_t size) { |
122 | 302 | xmlXPathContextPtr xpctxt = NULL; |
123 | 302 | xmlXPathObjectPtr xpathObj = NULL; |
124 | 302 | xmlDocPtr doc; |
125 | 302 | xmlNodePtr root; |
126 | 302 | const char *xpathExpr, *xml; |
127 | 302 | size_t maxAllocs, xmlSize; |
128 | | |
129 | 302 | xmlFuzzDataInit(data, size); |
130 | | |
131 | 302 | maxAllocs = xmlFuzzReadInt(4) % (size + 1); |
132 | 302 | xpathExpr = xmlFuzzReadString(NULL); |
133 | 302 | xml = xmlFuzzReadString(&xmlSize); |
134 | | |
135 | | /* Recovery mode allows more input to be fuzzed. */ |
136 | 302 | doc = xmlReadMemory(xml, xmlSize, NULL, NULL, XML_PARSE_RECOVER); |
137 | 302 | if (doc == NULL) |
138 | 2 | goto error; |
139 | 300 | root = xmlDocGetRootElement(doc); |
140 | 300 | if (root != NULL) { |
141 | 265 | xmlNewNs(root, BAD_CAST "a", BAD_CAST "a"); |
142 | 265 | xmlNewNs(root, BAD_CAST "b", BAD_CAST "b"); |
143 | 265 | xmlNewNs(root, BAD_CAST "c", BAD_CAST "c"); |
144 | 265 | } |
145 | | |
146 | 300 | tctxt = xsltNewTransformContext(globalStyle, doc); |
147 | 300 | if (tctxt == NULL) { |
148 | 0 | xmlFreeDoc(doc); |
149 | 0 | goto error; |
150 | 0 | } |
151 | 300 | xsltSetCtxtSecurityPrefs(globalSec, tctxt); |
152 | | |
153 | | /* |
154 | | * Some extension functions need the current instruction. |
155 | | * |
156 | | * - format-number() for namespaces. |
157 | | * - document() for the base URL. |
158 | | * - maybe others? |
159 | | * |
160 | | * For fuzzing, it's enough to use the source document's root element. |
161 | | */ |
162 | 300 | tctxt->inst = xmlDocGetRootElement(doc); |
163 | | |
164 | | /* Set up XPath context */ |
165 | 300 | xpctxt = tctxt->xpathCtxt; |
166 | | |
167 | | /* Resource limits to avoid timeouts and call stack overflows */ |
168 | 300 | xpctxt->opLimit = 500000; |
169 | | |
170 | | /* Test namespaces */ |
171 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a"); |
172 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b"); |
173 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c"); |
174 | | |
175 | | /* EXSLT namespaces */ |
176 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE); |
177 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE); |
178 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE); |
179 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE); |
180 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE); |
181 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE); |
182 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE); |
183 | 300 | xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE); |
184 | | |
185 | | /* Register variables */ |
186 | 300 | xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5)); |
187 | 300 | xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1)); |
188 | 300 | xmlXPathRegisterVariable(xpctxt, BAD_CAST "s", |
189 | 300 | xmlXPathNewString(BAD_CAST "var")); |
190 | 300 | xmlXPathRegisterVariable( |
191 | 300 | xpctxt, BAD_CAST "n", |
192 | 300 | xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt)); |
193 | | |
194 | | /* Compile and return early if the expression is invalid */ |
195 | 300 | xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, |
196 | 300 | (const xmlChar *) xpathExpr); |
197 | 300 | if (compExpr == NULL) |
198 | 202 | goto error; |
199 | | |
200 | | /* Initialize XPath evaluation context and evaluate */ |
201 | 98 | xmlFuzzMemSetLimit(maxAllocs); |
202 | | /* Maybe test different context nodes? */ |
203 | 98 | xpctxt->node = (xmlNodePtr) doc; |
204 | 98 | xpctxt->contextSize = 1; |
205 | 98 | xpctxt->proximityPosition = 1; |
206 | 98 | xpctxt->opCount = 0; |
207 | 98 | xpathObj = xmlXPathCompiledEval(compExpr, xpctxt); |
208 | 98 | xmlXPathFreeCompExpr(compExpr); |
209 | | |
210 | 302 | error: |
211 | 302 | xmlFuzzMemSetLimit(0); |
212 | 302 | xmlXPathRegisteredNsCleanup(xpctxt); |
213 | 302 | xmlFuzzDataCleanup(); |
214 | | |
215 | 302 | return xpathObj; |
216 | 98 | } |
217 | | |
218 | | void |
219 | 302 | xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) { |
220 | 302 | xmlXPathFreeObject(obj); |
221 | | |
222 | 302 | if (tctxt != NULL) { |
223 | 300 | xmlDocPtr doc = tctxt->document->doc; |
224 | | |
225 | 300 | xsltFreeTransformContext(tctxt); |
226 | 300 | tctxt = NULL; |
227 | 300 | xmlFreeDoc(doc); |
228 | 300 | } |
229 | 302 | } |
230 | | |
231 | | void |
232 | 0 | xsltFuzzXPathCleanup(void) { |
233 | 0 | xsltFreeSecurityPrefs(globalSec); |
234 | 0 | globalSec = NULL; |
235 | 0 | xsltFreeStylesheet(globalStyle); |
236 | 0 | globalStyle = NULL; |
237 | 0 | } |
238 | | |
239 | | /* |
240 | | * XSLT fuzzer |
241 | | * |
242 | | * This is a rather naive fuzz target using a static XML document. |
243 | | * |
244 | | * TODO |
245 | | * |
246 | | * - Improve seed corpus |
247 | | * - Mutate multiple input documents: source, xsl:import, xsl:include |
248 | | * - format-number() with xsl:decimal-format |
249 | | * - Better coverage for xsl:key and key() function |
250 | | * - EXSLT func:function |
251 | | * - xsl:document |
252 | | */ |
253 | | |
254 | | int |
255 | 0 | xsltFuzzXsltInit(void) { |
256 | 0 | xsltFuzzInit(); |
257 | 0 | xmlSetExternalEntityLoader(xmlFuzzEntityLoader); |
258 | 0 | return(0); |
259 | 0 | } |
260 | | |
261 | | xmlChar * |
262 | 0 | xsltFuzzXslt(const char *data, size_t size) { |
263 | 0 | const char *xsltBuffer, *xsltUrl, *docBuffer, *docUrl; |
264 | 0 | xmlDocPtr xsltDoc = NULL, doc = NULL; |
265 | 0 | xmlDocPtr result = NULL; |
266 | 0 | xmlNodePtr root; |
267 | 0 | xsltStylesheetPtr sheet = NULL; |
268 | 0 | xsltTransformContextPtr ctxt = NULL; |
269 | 0 | xmlChar *ret = NULL; |
270 | 0 | size_t xsltSize, docSize, maxAllocs; |
271 | 0 | int retLen; |
272 | |
|
273 | 0 | xmlFuzzDataInit(data, size); |
274 | 0 | maxAllocs = xmlFuzzReadInt(4) % (size + 1); |
275 | |
|
276 | 0 | xmlFuzzReadEntities(); |
277 | 0 | xsltBuffer = xmlFuzzMainEntity(&xsltSize); |
278 | 0 | xsltUrl = xmlFuzzMainUrl(); |
279 | 0 | docBuffer = xmlFuzzSecondaryEntity(&docSize); |
280 | 0 | docUrl = xmlFuzzSecondaryUrl(); |
281 | 0 | if ((xsltBuffer == NULL) || (docBuffer == NULL)) |
282 | 0 | goto exit; |
283 | | |
284 | 0 | doc = xmlReadMemory(docBuffer, docSize, docUrl, NULL, XSLT_PARSE_OPTIONS); |
285 | 0 | if (doc == NULL) |
286 | 0 | goto exit; |
287 | | |
288 | 0 | xsltDoc = xmlReadMemory(xsltBuffer, xsltSize, xsltUrl, NULL, |
289 | 0 | XSLT_PARSE_OPTIONS); |
290 | 0 | if (xsltDoc == NULL) |
291 | 0 | goto exit; |
292 | 0 | root = xmlDocGetRootElement(xsltDoc); |
293 | 0 | if (root != NULL) { |
294 | 0 | xmlNewNs(root, XSLT_NAMESPACE, BAD_CAST "x"); |
295 | 0 | xmlNewNs(root, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl"); |
296 | 0 | xmlNewNs(root, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt"); |
297 | 0 | xmlNewNs(root, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto"); |
298 | 0 | xmlNewNs(root, EXSLT_DATE_NAMESPACE, BAD_CAST "date"); |
299 | 0 | xmlNewNs(root, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn"); |
300 | 0 | xmlNewNs(root, EXSLT_MATH_NAMESPACE, BAD_CAST "math"); |
301 | 0 | xmlNewNs(root, EXSLT_SETS_NAMESPACE, BAD_CAST "set"); |
302 | 0 | xmlNewNs(root, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str"); |
303 | 0 | xmlNewNs(root, SAXON_NAMESPACE, BAD_CAST "saxon"); |
304 | 0 | } |
305 | |
|
306 | 0 | xmlFuzzMemSetLimit(maxAllocs); |
307 | 0 | sheet = xsltNewStylesheet(); |
308 | 0 | if (sheet == NULL) |
309 | 0 | goto exit; |
310 | 0 | sheet->opLimit = 10000; |
311 | 0 | sheet->xpathCtxt->opLimit = 100000; |
312 | 0 | sheet->xpathCtxt->opCount = 0; |
313 | 0 | if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) |
314 | 0 | goto exit; |
315 | 0 | xsltDoc = NULL; |
316 | |
|
317 | 0 | root = xmlDocGetRootElement(doc); |
318 | 0 | if (root != NULL) { |
319 | 0 | xmlNewNs(root, BAD_CAST "a", BAD_CAST "a"); |
320 | 0 | xmlNewNs(root, BAD_CAST "b", BAD_CAST "b"); |
321 | 0 | xmlNewNs(root, BAD_CAST "c", BAD_CAST "c"); |
322 | 0 | } |
323 | |
|
324 | 0 | ctxt = xsltNewTransformContext(sheet, doc); |
325 | 0 | if (ctxt == NULL) |
326 | 0 | goto exit; |
327 | 0 | xsltSetCtxtSecurityPrefs(globalSec, ctxt); |
328 | 0 | ctxt->maxTemplateDepth = 100; |
329 | 0 | ctxt->opLimit = 20000; |
330 | 0 | ctxt->xpathCtxt->opLimit = 100000; |
331 | 0 | ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount; |
332 | |
|
333 | 0 | result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt); |
334 | 0 | if (result != NULL) |
335 | 0 | xsltSaveResultToString(&ret, &retLen, result, sheet); |
336 | |
|
337 | 0 | exit: |
338 | 0 | xmlFuzzMemSetLimit(0); |
339 | 0 | xmlFreeDoc(result); |
340 | 0 | xsltFreeTransformContext(ctxt); |
341 | 0 | xsltFreeStylesheet(sheet); |
342 | 0 | xmlFreeDoc(xsltDoc); |
343 | 0 | xmlFreeDoc(doc); |
344 | 0 | xmlFuzzDataCleanup(); |
345 | |
|
346 | 0 | return ret; |
347 | 0 | } |
348 | | |
349 | | void |
350 | 0 | xsltFuzzXsltCleanup(void) { |
351 | 0 | xsltFreeSecurityPrefs(globalSec); |
352 | 0 | globalSec = NULL; |
353 | 0 | } |
354 | | |
355 | | /* |
356 | | * Utility functions, copied from libxml2 |
357 | | */ |
358 | | |
359 | | typedef struct { |
360 | | const char *data; |
361 | | size_t size; |
362 | | } xmlFuzzEntityInfo; |
363 | | |
364 | | /* Single static instance for now */ |
365 | | static struct { |
366 | | /* Original data */ |
367 | | const char *data; |
368 | | size_t size; |
369 | | |
370 | | /* Remaining data */ |
371 | | const char *ptr; |
372 | | size_t remaining; |
373 | | |
374 | | /* Buffer for unescaped strings */ |
375 | | char *outBuf; |
376 | | char *outPtr; /* Free space at end of buffer */ |
377 | | |
378 | | xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */ |
379 | | |
380 | | /* The first entity is the main entity. */ |
381 | | const char *mainUrl; |
382 | | xmlFuzzEntityInfo *mainEntity; |
383 | | const char *secondaryUrl; |
384 | | xmlFuzzEntityInfo *secondaryEntity; |
385 | | } fuzzData; |
386 | | |
387 | | size_t fuzzNumAllocs; |
388 | | size_t fuzzMaxAllocs; |
389 | | |
390 | | /** |
391 | | * xmlFuzzErrorFunc: |
392 | | * |
393 | | * An error function that simply discards all errors. |
394 | | */ |
395 | | void |
396 | | xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, |
397 | 0 | ...) { |
398 | 0 | } |
399 | | |
400 | | /* |
401 | | * Malloc failure injection. |
402 | | * |
403 | | * Quick tip to debug complicated issues: Increase MALLOC_OFFSET until |
404 | | * the crash disappears (or a different issue is triggered). Then set |
405 | | * the offset to the highest value that produces a crash and set |
406 | | * MALLOC_ABORT to 1 to see which failed memory allocation causes the |
407 | | * issue. |
408 | | */ |
409 | | |
410 | 78 | #define XML_FUZZ_MALLOC_OFFSET 0 |
411 | | #define XML_FUZZ_MALLOC_ABORT 0 |
412 | | |
413 | | static void * |
414 | 18.0M | xmlFuzzMalloc(size_t size) { |
415 | 18.0M | if (fuzzMaxAllocs > 0) { |
416 | 1.72M | if (fuzzNumAllocs >= fuzzMaxAllocs - 1) |
417 | | #if XML_FUZZ_MALLOC_ABORT |
418 | | abort(); |
419 | | #else |
420 | 46 | return(NULL); |
421 | 1.72M | #endif |
422 | 1.72M | fuzzNumAllocs += 1; |
423 | 1.72M | } |
424 | 18.0M | return malloc(size); |
425 | 18.0M | } |
426 | | |
427 | | static void * |
428 | 393k | xmlFuzzRealloc(void *ptr, size_t size) { |
429 | 393k | if (fuzzMaxAllocs > 0) { |
430 | 132k | if (fuzzNumAllocs >= fuzzMaxAllocs - 1) |
431 | | #if XML_FUZZ_MALLOC_ABORT |
432 | | abort(); |
433 | | #else |
434 | 29 | return(NULL); |
435 | 132k | #endif |
436 | 132k | fuzzNumAllocs += 1; |
437 | 132k | } |
438 | 393k | return realloc(ptr, size); |
439 | 393k | } |
440 | | |
441 | | void |
442 | 2 | xmlFuzzMemSetup(void) { |
443 | 2 | xmlMemSetup(free, xmlFuzzMalloc, xmlFuzzRealloc, xmlMemStrdup); |
444 | 2 | } |
445 | | |
446 | | void |
447 | 400 | xmlFuzzMemSetLimit(size_t limit) { |
448 | 400 | fuzzNumAllocs = 0; |
449 | 400 | fuzzMaxAllocs = limit ? limit + XML_FUZZ_MALLOC_OFFSET : 0; |
450 | 400 | } |
451 | | |
452 | | /** |
453 | | * xmlFuzzDataInit: |
454 | | * |
455 | | * Initialize fuzz data provider. |
456 | | */ |
457 | | void |
458 | 302 | xmlFuzzDataInit(const char *data, size_t size) { |
459 | 302 | fuzzData.data = data; |
460 | 302 | fuzzData.size = size; |
461 | 302 | fuzzData.ptr = data; |
462 | 302 | fuzzData.remaining = size; |
463 | | |
464 | 302 | fuzzData.outBuf = xmlMalloc(size + 1); |
465 | 302 | fuzzData.outPtr = fuzzData.outBuf; |
466 | | |
467 | 302 | fuzzData.entities = xmlHashCreate(8); |
468 | 302 | fuzzData.mainUrl = NULL; |
469 | 302 | fuzzData.mainEntity = NULL; |
470 | 302 | fuzzData.secondaryUrl = NULL; |
471 | 302 | fuzzData.secondaryEntity = NULL; |
472 | 302 | } |
473 | | |
474 | | /** |
475 | | * xmlFuzzDataFree: |
476 | | * |
477 | | * Cleanup fuzz data provider. |
478 | | */ |
479 | | void |
480 | 302 | xmlFuzzDataCleanup(void) { |
481 | 302 | xmlFree(fuzzData.outBuf); |
482 | 302 | xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator); |
483 | 302 | } |
484 | | |
485 | | /** |
486 | | * xmlFuzzWriteInt: |
487 | | * @out: output file |
488 | | * @v: integer to write |
489 | | * @size: size of integer in bytes |
490 | | * |
491 | | * Write an integer to the fuzz data. |
492 | | */ |
493 | | void |
494 | 0 | xmlFuzzWriteInt(FILE *out, size_t v, int size) { |
495 | 0 | int shift; |
496 | |
|
497 | 0 | while (size > (int) sizeof(size_t)) { |
498 | 0 | putc(0, out); |
499 | 0 | size--; |
500 | 0 | } |
501 | |
|
502 | 0 | shift = size * 8; |
503 | 0 | while (shift > 0) { |
504 | 0 | shift -= 8; |
505 | 0 | putc((v >> shift) & 255, out); |
506 | 0 | } |
507 | 0 | } |
508 | | |
509 | | /** |
510 | | * xmlFuzzReadInt: |
511 | | * @size: size of integer in bytes |
512 | | * |
513 | | * Read an integer from the fuzz data. |
514 | | */ |
515 | | size_t |
516 | 302 | xmlFuzzReadInt(int size) { |
517 | 302 | size_t ret = 0; |
518 | | |
519 | 1.51k | while ((size > 0) && (fuzzData.remaining > 0)) { |
520 | 1.20k | unsigned char c = (unsigned char) *fuzzData.ptr++; |
521 | 1.20k | fuzzData.remaining--; |
522 | 1.20k | ret = (ret << 8) | c; |
523 | 1.20k | size--; |
524 | 1.20k | } |
525 | | |
526 | 302 | return ret; |
527 | 302 | } |
528 | | |
529 | | /** |
530 | | * xmlFuzzReadRemaining: |
531 | | * @size: size of string in bytes |
532 | | * |
533 | | * Read remaining bytes from fuzz data. |
534 | | */ |
535 | | const char * |
536 | 0 | xmlFuzzReadRemaining(size_t *size) { |
537 | 0 | const char *ret = fuzzData.ptr; |
538 | |
|
539 | 0 | *size = fuzzData.remaining; |
540 | 0 | fuzzData.ptr += fuzzData.remaining; |
541 | 0 | fuzzData.remaining = 0; |
542 | |
|
543 | 0 | return(ret); |
544 | 0 | } |
545 | | |
546 | | /* |
547 | | * xmlFuzzWriteString: |
548 | | * @out: output file |
549 | | * @str: string to write |
550 | | * |
551 | | * Write a random-length string to file in a format similar to |
552 | | * FuzzedDataProvider. Backslash followed by newline marks the end of the |
553 | | * string. Two backslashes are used to escape a backslash. |
554 | | */ |
555 | | void |
556 | 0 | xmlFuzzWriteString(FILE *out, const char *str) { |
557 | 0 | for (; *str; str++) { |
558 | 0 | int c = (unsigned char) *str; |
559 | 0 | putc(c, out); |
560 | 0 | if (c == '\\') |
561 | 0 | putc(c, out); |
562 | 0 | } |
563 | 0 | putc('\\', out); |
564 | 0 | putc('\n', out); |
565 | 0 | } |
566 | | |
567 | | /** |
568 | | * xmlFuzzReadString: |
569 | | * @size: size of string in bytes |
570 | | * |
571 | | * Read a random-length string from the fuzz data. |
572 | | * |
573 | | * The format is similar to libFuzzer's FuzzedDataProvider but treats |
574 | | * backslash followed by newline as end of string. This makes the fuzz data |
575 | | * more readable. A backslash character is escaped with another backslash. |
576 | | * |
577 | | * Returns a zero-terminated string or NULL if the fuzz data is exhausted. |
578 | | */ |
579 | | const char * |
580 | 604 | xmlFuzzReadString(size_t *size) { |
581 | 604 | const char *out = fuzzData.outPtr; |
582 | | |
583 | 122M | while (fuzzData.remaining > 0) { |
584 | 122M | int c = *fuzzData.ptr++; |
585 | 122M | fuzzData.remaining--; |
586 | | |
587 | 122M | if ((c == '\\') && (fuzzData.remaining > 0)) { |
588 | 1.85k | int c2 = *fuzzData.ptr; |
589 | | |
590 | 1.85k | if (c2 == '\n') { |
591 | 353 | fuzzData.ptr++; |
592 | 353 | fuzzData.remaining--; |
593 | 353 | if (size != NULL) |
594 | 52 | *size = fuzzData.outPtr - out; |
595 | 353 | *fuzzData.outPtr++ = '\0'; |
596 | 353 | return(out); |
597 | 353 | } |
598 | 1.49k | if (c2 == '\\') { |
599 | 121 | fuzzData.ptr++; |
600 | 121 | fuzzData.remaining--; |
601 | 121 | } |
602 | 1.49k | } |
603 | | |
604 | 122M | *fuzzData.outPtr++ = c; |
605 | 122M | } |
606 | | |
607 | 251 | if (fuzzData.outPtr > out) { |
608 | 250 | if (size != NULL) |
609 | 249 | *size = fuzzData.outPtr - out; |
610 | 250 | *fuzzData.outPtr++ = '\0'; |
611 | 250 | return(out); |
612 | 250 | } |
613 | | |
614 | 1 | if (size != NULL) |
615 | 1 | *size = 0; |
616 | 1 | return(NULL); |
617 | 251 | } |
618 | | |
619 | | /** |
620 | | * xmlFuzzReadEntities: |
621 | | * |
622 | | * Read entities like the main XML file, external DTDs, external parsed |
623 | | * entities from fuzz data. |
624 | | */ |
625 | | void |
626 | 0 | xmlFuzzReadEntities(void) { |
627 | 0 | size_t num = 0; |
628 | |
|
629 | 0 | while (1) { |
630 | 0 | const char *url, *entity; |
631 | 0 | size_t entitySize; |
632 | 0 | xmlFuzzEntityInfo *entityInfo; |
633 | |
|
634 | 0 | url = xmlFuzzReadString(NULL); |
635 | 0 | if (url == NULL) break; |
636 | | |
637 | 0 | entity = xmlFuzzReadString(&entitySize); |
638 | 0 | if (entity == NULL) break; |
639 | | |
640 | 0 | if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) { |
641 | 0 | entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo)); |
642 | 0 | if (entityInfo == NULL) |
643 | 0 | break; |
644 | 0 | entityInfo->data = entity; |
645 | 0 | entityInfo->size = entitySize; |
646 | |
|
647 | 0 | xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo); |
648 | |
|
649 | 0 | if (num == 0) { |
650 | 0 | fuzzData.mainUrl = url; |
651 | 0 | fuzzData.mainEntity = entityInfo; |
652 | 0 | } else if (num == 1) { |
653 | 0 | fuzzData.secondaryUrl = url; |
654 | 0 | fuzzData.secondaryEntity = entityInfo; |
655 | 0 | } |
656 | |
|
657 | 0 | num++; |
658 | 0 | } |
659 | 0 | } |
660 | 0 | } |
661 | | |
662 | | /** |
663 | | * xmlFuzzMainUrl: |
664 | | * |
665 | | * Returns the main URL. |
666 | | */ |
667 | | const char * |
668 | 0 | xmlFuzzMainUrl(void) { |
669 | 0 | return(fuzzData.mainUrl); |
670 | 0 | } |
671 | | |
672 | | /** |
673 | | * xmlFuzzMainEntity: |
674 | | * @size: size of the main entity in bytes |
675 | | * |
676 | | * Returns the main entity. |
677 | | */ |
678 | | const char * |
679 | 0 | xmlFuzzMainEntity(size_t *size) { |
680 | 0 | if (fuzzData.mainEntity == NULL) |
681 | 0 | return(NULL); |
682 | 0 | *size = fuzzData.mainEntity->size; |
683 | 0 | return(fuzzData.mainEntity->data); |
684 | 0 | } |
685 | | |
686 | | /** |
687 | | * xmlFuzzSecondaryUrl: |
688 | | * |
689 | | * Returns the secondary URL. |
690 | | */ |
691 | | const char * |
692 | 0 | xmlFuzzSecondaryUrl(void) { |
693 | 0 | return(fuzzData.secondaryUrl); |
694 | 0 | } |
695 | | |
696 | | /** |
697 | | * xmlFuzzSecondaryEntity: |
698 | | * @size: size of the secondary entity in bytes |
699 | | * |
700 | | * Returns the secondary entity. |
701 | | */ |
702 | | const char * |
703 | 0 | xmlFuzzSecondaryEntity(size_t *size) { |
704 | 0 | if (fuzzData.secondaryEntity == NULL) |
705 | 0 | return(NULL); |
706 | 0 | *size = fuzzData.secondaryEntity->size; |
707 | 0 | return(fuzzData.secondaryEntity->data); |
708 | 0 | } |
709 | | |
710 | | /** |
711 | | * xmlFuzzEntityLoader: |
712 | | * |
713 | | * The entity loader for fuzz data. |
714 | | */ |
715 | | xmlParserInputPtr |
716 | | xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, |
717 | 0 | xmlParserCtxtPtr ctxt) { |
718 | 0 | xmlParserInputBufferPtr buf; |
719 | 0 | xmlFuzzEntityInfo *entity; |
720 | |
|
721 | 0 | if (URL == NULL) |
722 | 0 | return(NULL); |
723 | 0 | entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL); |
724 | 0 | if (entity == NULL) |
725 | 0 | return(NULL); |
726 | | |
727 | 0 | buf = xmlParserInputBufferCreateMem(entity->data, entity->size, |
728 | 0 | XML_CHAR_ENCODING_NONE); |
729 | 0 | if (buf == NULL) |
730 | 0 | return(NULL); |
731 | | |
732 | 0 | return(xmlNewIOInputStream(ctxt, buf, XML_CHAR_ENCODING_NONE)); |
733 | 0 | } |