Coverage Report

Created: 2025-07-18 06:31

/src/libxslt/tests/fuzz/fuzz.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * fuzz.c: Fuzz targets for libxslt
3
 *
4
 * See Copyright for the status of this software.
5
 */
6
7
#include <stdio.h>
8
#include <stdlib.h>
9
#include <string.h>
10
11
#include <libxml/parser.h>
12
#include <libxml/parserInternals.h>
13
#include <libxml/tree.h>
14
#include <libxml/xpath.h>
15
#include <libxml/xpathInternals.h>
16
#include <libxslt/extensions.h>
17
#include <libxslt/functions.h>
18
#include <libxslt/security.h>
19
#include <libxslt/transform.h>
20
#include <libxslt/xslt.h>
21
#include <libxslt/xsltInternals.h>
22
#include <libxslt/xsltutils.h>
23
#include <libexslt/exslt.h>
24
#include "fuzz.h"
25
26
#if defined(_WIN32)
27
  #define DIR_SEP '\\'
28
#else
29
  #define DIR_SEP '/'
30
#endif
31
32
static xsltSecurityPrefsPtr globalSec;
33
static xsltStylesheetPtr globalStyle;
34
static xsltTransformContextPtr tctxt;
35
36
static void
37
24.3M
xsltFuzzXmlErrorFunc(void *vctxt, const char *msg ATTRIBUTE_UNUSED, ...) {
38
24.3M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) vctxt;
39
    /*
40
     * Stopping the parser should be slightly faster and might catch some
41
     * issues related to recent libxml2 changes.
42
     */
43
24.3M
    xmlStopParser(ctxt);
44
24.3M
}
45
46
static void
47
xsltFuzzXsltErrorFunc(void *vctxt ATTRIBUTE_UNUSED,
48
17.1M
                      const char *msg ATTRIBUTE_UNUSED, ...) {
49
17.1M
}
50
51
static void
52
4
xsltFuzzInit(void) {
53
4
    xmlFuzzMemSetup();
54
55
    /* Init libxml2, libxslt and libexslt */
56
4
    xmlInitParser();
57
4
    xsltInit();
58
4
    exsltRegisterAll();
59
60
    /* Suppress error messages */
61
4
    xmlSetGenericErrorFunc(NULL, xsltFuzzXmlErrorFunc);
62
4
    xsltSetGenericErrorFunc(NULL, xsltFuzzXsltErrorFunc);
63
64
    /* Disallow I/O */
65
4
    globalSec = xsltNewSecurityPrefs();
66
4
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_READ_FILE,
67
4
                         xsltSecurityForbid);
68
4
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_WRITE_FILE,
69
4
                         xsltSecurityForbid);
70
4
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_CREATE_DIRECTORY,
71
4
                         xsltSecurityForbid);
72
4
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_READ_NETWORK,
73
4
                         xsltSecurityForbid);
74
4
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_WRITE_NETWORK,
75
4
                         xsltSecurityForbid);
76
4
}
77
78
/* XPath fuzzer
79
 *
80
 * This fuzz target parses and evaluates XPath expressions in an (E)XSLT
81
 * context using a static XML document. It heavily exercises the libxml2
82
 * XPath engine (xpath.c), a few other parts of libxml2, and most of
83
 * libexslt.
84
 *
85
 * Some EXSLT functions need the transform context to create RVTs for
86
 * node-sets. A couple of functions also access the stylesheet. The
87
 * XPath context from the transform context is used to parse and
88
 * evaluate expressions.
89
 *
90
 * All these objects are created once at startup. After fuzzing each input,
91
 * they're reset as cheaply as possible.
92
 *
93
 * TODO
94
 *
95
 * - Some expressions can create lots of temporary node sets (RVTs) which
96
 *   aren't freed until the whole expression was evaluated, leading to
97
 *   extensive memory usage. Cleaning them up earlier would require
98
 *   callbacks from the XPath engine, for example after evaluating a
99
 *   predicate expression, which doesn't seem feasible. Terminating the
100
 *   evaluation after creating a certain number of RVTs is a simple
101
 *   workaround.
102
 * - Register a custom xsl:decimal-format declaration for format-number().
103
 * - Some functions add strings to the stylesheet or transform context
104
 *   dictionary, for example via xsltGetQName, requiring a clean up of the
105
 *   dicts after fuzzing each input. This behavior seems questionable.
106
 *   Extension functions shouldn't needlessly modify the transform context
107
 *   or stylesheet.
108
 * - Register xsl:keys and fuzz the key() function.
109
 * - Add a few custom func:functions.
110
 * - Fuzz the document() function with external documents.
111
 */
112
113
int
114
2
xsltFuzzXPathInit(void) {
115
2
    xsltFuzzInit();
116
2
    globalStyle = xsltNewStylesheet();
117
2
    return(0);
118
2
}
119
120
xmlXPathObjectPtr
121
543
xsltFuzzXPath(const char *data, size_t size) {
122
543
    xmlXPathContextPtr xpctxt = NULL;
123
543
    xmlXPathObjectPtr xpathObj = NULL;
124
543
    xmlDocPtr doc;
125
543
    xmlNodePtr root;
126
543
    const char *xpathExpr, *xml;
127
543
    size_t maxAllocs, xmlSize;
128
129
543
    xmlFuzzDataInit(data, size);
130
131
543
    maxAllocs = xmlFuzzReadInt(4) % (size + 1);
132
543
    xpathExpr = xmlFuzzReadString(NULL);
133
543
    xml = xmlFuzzReadString(&xmlSize);
134
135
    /* Recovery mode allows more input to be fuzzed. */
136
543
    doc = xmlReadMemory(xml, xmlSize, NULL, NULL, XML_PARSE_RECOVER);
137
543
    if (doc == NULL)
138
2
        goto error;
139
541
    root = xmlDocGetRootElement(doc);
140
541
    if (root != NULL) {
141
476
        xmlNewNs(root, BAD_CAST "a", BAD_CAST "a");
142
476
        xmlNewNs(root, BAD_CAST "b", BAD_CAST "b");
143
476
        xmlNewNs(root, BAD_CAST "c", BAD_CAST "c");
144
476
    }
145
146
541
    tctxt = xsltNewTransformContext(globalStyle, doc);
147
541
    if (tctxt == NULL) {
148
0
        xmlFreeDoc(doc);
149
0
        goto error;
150
0
    }
151
541
    xsltSetCtxtSecurityPrefs(globalSec, tctxt);
152
153
    /*
154
     * Some extension functions need the current instruction.
155
     *
156
     * - format-number() for namespaces.
157
     * - document() for the base URL.
158
     * - maybe others?
159
     *
160
     * For fuzzing, it's enough to use the source document's root element.
161
     */
162
541
    tctxt->inst = xmlDocGetRootElement(doc);
163
164
    /* Set up XPath context */
165
541
    xpctxt = tctxt->xpathCtxt;
166
167
    /* Resource limits to avoid timeouts and call stack overflows */
168
541
    xpctxt->opLimit = 500000;
169
170
    /* Test namespaces */
171
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
172
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
173
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");
174
175
    /* EXSLT namespaces */
176
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
177
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
178
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
179
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
180
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
181
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
182
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
183
541
    xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);
184
185
    /* Register variables */
186
541
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
187
541
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
188
541
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
189
541
                             xmlXPathNewString(BAD_CAST "var"));
190
541
    xmlXPathRegisterVariable(
191
541
            xpctxt, BAD_CAST "n",
192
541
            xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));
193
194
    /* Compile and return early if the expression is invalid */
195
541
    xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt,
196
541
            (const xmlChar *) xpathExpr);
197
541
    if (compExpr == NULL)
198
389
        goto error;
199
200
    /* Initialize XPath evaluation context and evaluate */
201
152
    xmlFuzzMemSetLimit(maxAllocs);
202
    /* Maybe test different context nodes? */
203
152
    xpctxt->node = (xmlNodePtr) doc;
204
152
    xpctxt->contextSize = 1;
205
152
    xpctxt->proximityPosition = 1;
206
152
    xpctxt->opCount = 0;
207
152
    xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
208
152
    xmlXPathFreeCompExpr(compExpr);
209
210
543
error:
211
543
    xmlFuzzMemSetLimit(0);
212
543
    xmlXPathRegisteredNsCleanup(xpctxt);
213
543
    xmlFuzzDataCleanup();
214
215
543
    return xpathObj;
216
152
}
217
218
void
219
543
xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) {
220
543
    xmlXPathFreeObject(obj);
221
222
543
    if (tctxt != NULL) {
223
541
        xmlDocPtr doc = tctxt->document->doc;
224
225
541
        xsltFreeTransformContext(tctxt);
226
541
        tctxt = NULL;
227
541
        xmlFreeDoc(doc);
228
541
    }
229
543
}
230
231
void
232
0
xsltFuzzXPathCleanup(void) {
233
0
    xsltFreeSecurityPrefs(globalSec);
234
0
    globalSec = NULL;
235
0
    xsltFreeStylesheet(globalStyle);
236
0
    globalStyle = NULL;
237
0
}
238
239
/*
240
 * XSLT fuzzer
241
 *
242
 * This is a rather naive fuzz target using a static XML document.
243
 *
244
 * TODO
245
 *
246
 * - Improve seed corpus
247
 * - Mutate multiple input documents: source, xsl:import, xsl:include
248
 * - format-number() with xsl:decimal-format
249
 * - Better coverage for xsl:key and key() function
250
 * - EXSLT func:function
251
 * - xsl:document
252
 */
253
254
int
255
2
xsltFuzzXsltInit(void) {
256
2
    xsltFuzzInit();
257
2
    xmlSetExternalEntityLoader(xmlFuzzEntityLoader);
258
2
    return(0);
259
2
}
260
261
xmlChar *
262
38.8k
xsltFuzzXslt(const char *data, size_t size) {
263
38.8k
    const char *xsltBuffer, *xsltUrl, *docBuffer, *docUrl;
264
38.8k
    xmlDocPtr xsltDoc = NULL, doc = NULL;
265
38.8k
    xmlDocPtr result = NULL;
266
38.8k
    xmlNodePtr root;
267
38.8k
    xsltStylesheetPtr sheet = NULL;
268
38.8k
    xsltTransformContextPtr ctxt = NULL;
269
38.8k
    xmlChar *ret = NULL;
270
38.8k
    size_t xsltSize, docSize, maxAllocs;
271
38.8k
    int retLen;
272
273
38.8k
    xmlFuzzDataInit(data, size);
274
38.8k
    maxAllocs = xmlFuzzReadInt(4) % (size + 1);
275
276
38.8k
    xmlFuzzReadEntities();
277
38.8k
    xsltBuffer = xmlFuzzMainEntity(&xsltSize);
278
38.8k
    xsltUrl = xmlFuzzMainUrl();
279
38.8k
    docBuffer = xmlFuzzSecondaryEntity(&docSize);
280
38.8k
    docUrl = xmlFuzzSecondaryUrl();
281
38.8k
    if ((xsltBuffer == NULL) || (docBuffer == NULL))
282
151
        goto exit;
283
284
38.6k
    doc = xmlReadMemory(docBuffer, docSize, docUrl, NULL, XSLT_PARSE_OPTIONS);
285
38.6k
    if (doc == NULL)
286
11.8k
        goto exit;
287
288
26.7k
    xsltDoc = xmlReadMemory(xsltBuffer, xsltSize, xsltUrl, NULL,
289
26.7k
                            XSLT_PARSE_OPTIONS);
290
26.7k
    if (xsltDoc == NULL)
291
317
        goto exit;
292
26.4k
    root = xmlDocGetRootElement(xsltDoc);
293
26.4k
    if (root != NULL) {
294
26.4k
        xmlNewNs(root, XSLT_NAMESPACE, BAD_CAST "x");
295
26.4k
        xmlNewNs(root, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
296
26.4k
        xmlNewNs(root, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
297
26.4k
        xmlNewNs(root, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
298
26.4k
        xmlNewNs(root, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
299
26.4k
        xmlNewNs(root, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
300
26.4k
        xmlNewNs(root, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
301
26.4k
        xmlNewNs(root, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
302
26.4k
        xmlNewNs(root, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
303
26.4k
        xmlNewNs(root, SAXON_NAMESPACE, BAD_CAST "saxon");
304
26.4k
    }
305
306
26.4k
    xmlFuzzMemSetLimit(maxAllocs);
307
26.4k
    sheet = xsltNewStylesheet();
308
26.4k
    if (sheet == NULL)
309
14
        goto exit;
310
26.4k
    sheet->opLimit = 10000;
311
26.4k
    sheet->xpathCtxt->opLimit = 100000;
312
26.4k
    sheet->xpathCtxt->opCount = 0;
313
26.4k
    if (xsltParseStylesheetUser(sheet, xsltDoc) != 0)
314
9.58k
        goto exit;
315
16.8k
    xsltDoc = NULL;
316
317
16.8k
    root = xmlDocGetRootElement(doc);
318
16.8k
    if (root != NULL) {
319
16.8k
        xmlNewNs(root, BAD_CAST "a", BAD_CAST "a");
320
16.8k
        xmlNewNs(root, BAD_CAST "b", BAD_CAST "b");
321
16.8k
        xmlNewNs(root, BAD_CAST "c", BAD_CAST "c");
322
16.8k
    }
323
324
16.8k
    ctxt = xsltNewTransformContext(sheet, doc);
325
16.8k
    if (ctxt == NULL)
326
197
        goto exit;
327
16.6k
    xsltSetCtxtSecurityPrefs(globalSec, ctxt);
328
16.6k
    ctxt->maxTemplateDepth = 100;
329
16.6k
    ctxt->opLimit = 20000;
330
16.6k
    ctxt->xpathCtxt->opLimit = 100000;
331
16.6k
    ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;
332
333
16.6k
    result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
334
16.6k
    if (result != NULL)
335
7.00k
        xsltSaveResultToString(&ret, &retLen, result, sheet);
336
337
38.8k
exit:
338
38.8k
    xmlFuzzMemSetLimit(0);
339
38.8k
    xmlFreeDoc(result);
340
38.8k
    xsltFreeTransformContext(ctxt);
341
38.8k
    xsltFreeStylesheet(sheet);
342
38.8k
    xmlFreeDoc(xsltDoc);
343
38.8k
    xmlFreeDoc(doc);
344
38.8k
    xmlFuzzDataCleanup();
345
346
38.8k
    return ret;
347
16.6k
}
348
349
void
350
0
xsltFuzzXsltCleanup(void) {
351
0
    xsltFreeSecurityPrefs(globalSec);
352
0
    globalSec = NULL;
353
0
}
354
355
/*
356
 * Utility functions, copied from libxml2
357
 */
358
359
typedef struct {
360
    const char *data;
361
    size_t size;
362
} xmlFuzzEntityInfo;
363
364
/* Single static instance for now */
365
static struct {
366
    /* Original data */
367
    const char *data;
368
    size_t size;
369
370
    /* Remaining data */
371
    const char *ptr;
372
    size_t remaining;
373
374
    /* Buffer for unescaped strings */
375
    char *outBuf;
376
    char *outPtr; /* Free space at end of buffer */
377
378
    xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
379
380
    /* The first entity is the main entity. */
381
    const char *mainUrl;
382
    xmlFuzzEntityInfo *mainEntity;
383
    const char *secondaryUrl;
384
    xmlFuzzEntityInfo *secondaryEntity;
385
} fuzzData;
386
387
size_t fuzzNumAllocs;
388
size_t fuzzMaxAllocs;
389
390
/**
391
 * xmlFuzzErrorFunc:
392
 *
393
 * An error function that simply discards all errors.
394
 */
395
void
396
xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
397
0
                 ...) {
398
0
}
399
400
/*
401
 * Malloc failure injection.
402
 *
403
 * Quick tip to debug complicated issues: Increase MALLOC_OFFSET until
404
 * the crash disappears (or a different issue is triggered). Then set
405
 * the offset to the highest value that produces a crash and set
406
 * MALLOC_ABORT to 1 to see which failed memory allocation causes the
407
 * issue.
408
 */
409
410
18.3k
#define XML_FUZZ_MALLOC_OFFSET  0
411
#define XML_FUZZ_MALLOC_ABORT   0
412
413
static void *
414
185M
xmlFuzzMalloc(size_t size) {
415
185M
    if (fuzzMaxAllocs > 0) {
416
20.7M
        if (fuzzNumAllocs >= fuzzMaxAllocs - 1)
417
#if XML_FUZZ_MALLOC_ABORT
418
            abort();
419
#else
420
744k
            return(NULL);
421
20.0M
#endif
422
20.0M
        fuzzNumAllocs += 1;
423
20.0M
    }
424
184M
    return malloc(size);
425
185M
}
426
427
static void *
428
36.8M
xmlFuzzRealloc(void *ptr, size_t size) {
429
36.8M
    if (fuzzMaxAllocs > 0) {
430
3.53M
        if (fuzzNumAllocs >= fuzzMaxAllocs - 1)
431
#if XML_FUZZ_MALLOC_ABORT
432
            abort();
433
#else
434
74.7k
            return(NULL);
435
3.46M
#endif
436
3.46M
        fuzzNumAllocs += 1;
437
3.46M
    }
438
36.7M
    return realloc(ptr, size);
439
36.8M
}
440
441
void
442
4
xmlFuzzMemSetup(void) {
443
4
    xmlMemSetup(free, xmlFuzzMalloc, xmlFuzzRealloc, xmlMemStrdup);
444
4
}
445
446
void
447
65.9k
xmlFuzzMemSetLimit(size_t limit) {
448
65.9k
    fuzzNumAllocs = 0;
449
65.9k
    fuzzMaxAllocs = limit ? limit + XML_FUZZ_MALLOC_OFFSET : 0;
450
65.9k
}
451
452
/**
453
 * xmlFuzzDataInit:
454
 *
455
 * Initialize fuzz data provider.
456
 */
457
void
458
39.3k
xmlFuzzDataInit(const char *data, size_t size) {
459
39.3k
    fuzzData.data = data;
460
39.3k
    fuzzData.size = size;
461
39.3k
    fuzzData.ptr = data;
462
39.3k
    fuzzData.remaining = size;
463
464
39.3k
    fuzzData.outBuf = xmlMalloc(size + 1);
465
39.3k
    fuzzData.outPtr = fuzzData.outBuf;
466
467
39.3k
    fuzzData.entities = xmlHashCreate(8);
468
39.3k
    fuzzData.mainUrl = NULL;
469
39.3k
    fuzzData.mainEntity = NULL;
470
39.3k
    fuzzData.secondaryUrl = NULL;
471
39.3k
    fuzzData.secondaryEntity = NULL;
472
39.3k
}
473
474
/**
475
 * xmlFuzzDataFree:
476
 *
477
 * Cleanup fuzz data provider.
478
 */
479
void
480
39.3k
xmlFuzzDataCleanup(void) {
481
39.3k
    xmlFree(fuzzData.outBuf);
482
39.3k
    xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator);
483
39.3k
}
484
485
/**
486
 * xmlFuzzWriteInt:
487
 * @out:  output file
488
 * @v:  integer to write
489
 * @size:  size of integer in bytes
490
 *
491
 * Write an integer to the fuzz data.
492
 */
493
void
494
0
xmlFuzzWriteInt(FILE *out, size_t v, int size) {
495
0
    int shift;
496
497
0
    while (size > (int) sizeof(size_t)) {
498
0
        putc(0, out);
499
0
        size--;
500
0
    }
501
502
0
    shift = size * 8;
503
0
    while (shift > 0) {
504
0
        shift -= 8;
505
0
        putc((v >> shift) & 255, out);
506
0
    }
507
0
}
508
509
/**
510
 * xmlFuzzReadInt:
511
 * @size:  size of integer in bytes
512
 *
513
 * Read an integer from the fuzz data.
514
 */
515
size_t
516
39.3k
xmlFuzzReadInt(int size) {
517
39.3k
    size_t ret = 0;
518
519
196k
    while ((size > 0) && (fuzzData.remaining > 0)) {
520
157k
        unsigned char c = (unsigned char) *fuzzData.ptr++;
521
157k
        fuzzData.remaining--;
522
157k
        ret = (ret << 8) | c;
523
157k
        size--;
524
157k
    }
525
526
39.3k
    return ret;
527
39.3k
}
528
529
/**
530
 * xmlFuzzReadRemaining:
531
 * @size:  size of string in bytes
532
 *
533
 * Read remaining bytes from fuzz data.
534
 */
535
const char *
536
0
xmlFuzzReadRemaining(size_t *size) {
537
0
    const char *ret = fuzzData.ptr;
538
539
0
    *size = fuzzData.remaining;
540
0
    fuzzData.ptr += fuzzData.remaining;
541
0
    fuzzData.remaining = 0;
542
543
0
    return(ret);
544
0
}
545
546
/*
547
 * xmlFuzzWriteString:
548
 * @out:  output file
549
 * @str:  string to write
550
 *
551
 * Write a random-length string to file in a format similar to
552
 * FuzzedDataProvider. Backslash followed by newline marks the end of the
553
 * string. Two backslashes are used to escape a backslash.
554
 */
555
void
556
0
xmlFuzzWriteString(FILE *out, const char *str) {
557
0
    for (; *str; str++) {
558
0
        int c = (unsigned char) *str;
559
0
        putc(c, out);
560
0
        if (c == '\\')
561
0
            putc(c, out);
562
0
    }
563
0
    putc('\\', out);
564
0
    putc('\n', out);
565
0
}
566
567
/**
568
 * xmlFuzzReadString:
569
 * @size:  size of string in bytes
570
 *
571
 * Read a random-length string from the fuzz data.
572
 *
573
 * The format is similar to libFuzzer's FuzzedDataProvider but treats
574
 * backslash followed by newline as end of string. This makes the fuzz data
575
 * more readable. A backslash character is escaped with another backslash.
576
 *
577
 * Returns a zero-terminated string or NULL if the fuzz data is exhausted.
578
 */
579
const char *
580
259k
xmlFuzzReadString(size_t *size) {
581
259k
    const char *out = fuzzData.outPtr;
582
583
399M
    while (fuzzData.remaining > 0) {
584
399M
        int c = *fuzzData.ptr++;
585
399M
        fuzzData.remaining--;
586
587
399M
        if ((c == '\\') && (fuzzData.remaining > 0)) {
588
227k
            int c2 = *fuzzData.ptr;
589
590
227k
            if (c2 == '\n') {
591
187k
                fuzzData.ptr++;
592
187k
                fuzzData.remaining--;
593
187k
                if (size != NULL)
594
81.8k
                    *size = fuzzData.outPtr - out;
595
187k
                *fuzzData.outPtr++ = '\0';
596
187k
                return(out);
597
187k
            }
598
39.7k
            if (c2 == '\\') {
599
11.0k
                fuzzData.ptr++;
600
11.0k
                fuzzData.remaining--;
601
11.0k
            }
602
39.7k
        }
603
604
398M
        *fuzzData.outPtr++ = c;
605
398M
    }
606
607
71.5k
    if (fuzzData.outPtr > out) {
608
32.7k
        if (size != NULL)
609
23.7k
            *size = fuzzData.outPtr - out;
610
32.7k
        *fuzzData.outPtr++ = '\0';
611
32.7k
        return(out);
612
32.7k
    }
613
614
38.8k
    if (size != NULL)
615
9.40k
        *size = 0;
616
38.8k
    return(NULL);
617
71.5k
}
618
619
/**
620
 * xmlFuzzReadEntities:
621
 *
622
 * Read entities like the main XML file, external DTDs, external parsed
623
 * entities from fuzz data.
624
 */
625
void
626
38.8k
xmlFuzzReadEntities(void) {
627
38.8k
    size_t num = 0;
628
629
143k
    while (1) {
630
143k
        const char *url, *entity;
631
143k
        size_t entitySize;
632
143k
        xmlFuzzEntityInfo *entityInfo;
633
634
143k
        url = xmlFuzzReadString(NULL);
635
143k
        if (url == NULL) break;
636
637
114k
        entity = xmlFuzzReadString(&entitySize);
638
114k
        if (entity == NULL) break;
639
640
105k
        if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) {
641
100k
            entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo));
642
100k
            if (entityInfo == NULL)
643
0
                break;
644
100k
            entityInfo->data = entity;
645
100k
            entityInfo->size = entitySize;
646
647
100k
            xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo);
648
649
100k
            if (num == 0) {
650
38.7k
                fuzzData.mainUrl = url;
651
38.7k
                fuzzData.mainEntity = entityInfo;
652
61.6k
            } else if (num == 1) {
653
38.6k
                fuzzData.secondaryUrl = url;
654
38.6k
                fuzzData.secondaryEntity = entityInfo;
655
38.6k
            }
656
657
100k
            num++;
658
100k
        }
659
105k
    }
660
38.8k
}
661
662
/**
663
 * xmlFuzzMainUrl:
664
 *
665
 * Returns the main URL.
666
 */
667
const char *
668
38.8k
xmlFuzzMainUrl(void) {
669
38.8k
    return(fuzzData.mainUrl);
670
38.8k
}
671
672
/**
673
 * xmlFuzzMainEntity:
674
 * @size:  size of the main entity in bytes
675
 *
676
 * Returns the main entity.
677
 */
678
const char *
679
38.8k
xmlFuzzMainEntity(size_t *size) {
680
38.8k
    if (fuzzData.mainEntity == NULL)
681
75
        return(NULL);
682
38.7k
    *size = fuzzData.mainEntity->size;
683
38.7k
    return(fuzzData.mainEntity->data);
684
38.8k
}
685
686
/**
687
 * xmlFuzzSecondaryUrl:
688
 *
689
 * Returns the secondary URL.
690
 */
691
const char *
692
38.8k
xmlFuzzSecondaryUrl(void) {
693
38.8k
    return(fuzzData.secondaryUrl);
694
38.8k
}
695
696
/**
697
 * xmlFuzzSecondaryEntity:
698
 * @size:  size of the secondary entity in bytes
699
 *
700
 * Returns the secondary entity.
701
 */
702
const char *
703
38.8k
xmlFuzzSecondaryEntity(size_t *size) {
704
38.8k
    if (fuzzData.secondaryEntity == NULL)
705
151
        return(NULL);
706
38.6k
    *size = fuzzData.secondaryEntity->size;
707
38.6k
    return(fuzzData.secondaryEntity->data);
708
38.8k
}
709
710
/**
711
 * xmlFuzzEntityLoader:
712
 *
713
 * The entity loader for fuzz data.
714
 */
715
xmlParserInputPtr
716
xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED,
717
1.03M
                    xmlParserCtxtPtr ctxt) {
718
1.03M
    xmlParserInputBufferPtr buf;
719
1.03M
    xmlFuzzEntityInfo *entity;
720
721
1.03M
    if (URL == NULL)
722
0
        return(NULL);
723
1.03M
    entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL);
724
1.03M
    if (entity == NULL)
725
181k
        return(NULL);
726
727
851k
    buf = xmlParserInputBufferCreateMem(entity->data, entity->size,
728
851k
                                        XML_CHAR_ENCODING_NONE);
729
851k
    if (buf == NULL)
730
148
        return(NULL);
731
732
851k
    return(xmlNewIOInputStream(ctxt, buf, XML_CHAR_ENCODING_NONE));
733
851k
}