Coverage Report

Created: 2025-07-12 06:41

/src/libxslt/tests/fuzz/fuzz.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * fuzz.c: Fuzz targets for libxslt
3
 *
4
 * See Copyright for the status of this software.
5
 */
6
7
#include <stdio.h>
8
#include <stdlib.h>
9
#include <string.h>
10
11
#include <libxml/parser.h>
12
#include <libxml/parserInternals.h>
13
#include <libxml/tree.h>
14
#include <libxml/xpath.h>
15
#include <libxml/xpathInternals.h>
16
#include <libxslt/extensions.h>
17
#include <libxslt/functions.h>
18
#include <libxslt/security.h>
19
#include <libxslt/transform.h>
20
#include <libxslt/xslt.h>
21
#include <libxslt/xsltInternals.h>
22
#include <libxslt/xsltutils.h>
23
#include <libexslt/exslt.h>
24
#include "fuzz.h"
25
26
#if defined(_WIN32)
27
  #define DIR_SEP '\\'
28
#else
29
  #define DIR_SEP '/'
30
#endif
31
32
static xsltSecurityPrefsPtr globalSec;
33
static xsltStylesheetPtr globalStyle;
34
static xsltTransformContextPtr tctxt;
35
36
static void
37
22.7M
xsltFuzzXmlErrorFunc(void *vctxt, const char *msg ATTRIBUTE_UNUSED, ...) {
38
22.7M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) vctxt;
39
    /*
40
     * Stopping the parser should be slightly faster and might catch some
41
     * issues related to recent libxml2 changes.
42
     */
43
22.7M
    xmlStopParser(ctxt);
44
22.7M
}
45
46
static void
47
xsltFuzzXsltErrorFunc(void *vctxt ATTRIBUTE_UNUSED,
48
12.8M
                      const char *msg ATTRIBUTE_UNUSED, ...) {
49
12.8M
}
50
51
static void
52
2
xsltFuzzInit(void) {
53
2
    xmlFuzzMemSetup();
54
55
    /* Init libxml2, libxslt and libexslt */
56
2
    xmlInitParser();
57
2
    xsltInit();
58
2
    exsltRegisterAll();
59
60
    /* Suppress error messages */
61
2
    xmlSetGenericErrorFunc(NULL, xsltFuzzXmlErrorFunc);
62
2
    xsltSetGenericErrorFunc(NULL, xsltFuzzXsltErrorFunc);
63
64
    /* Disallow I/O */
65
2
    globalSec = xsltNewSecurityPrefs();
66
2
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_READ_FILE,
67
2
                         xsltSecurityForbid);
68
2
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_WRITE_FILE,
69
2
                         xsltSecurityForbid);
70
2
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_CREATE_DIRECTORY,
71
2
                         xsltSecurityForbid);
72
2
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_READ_NETWORK,
73
2
                         xsltSecurityForbid);
74
2
    xsltSetSecurityPrefs(globalSec, XSLT_SECPREF_WRITE_NETWORK,
75
2
                         xsltSecurityForbid);
76
2
}
77
78
/* XPath fuzzer
79
 *
80
 * This fuzz target parses and evaluates XPath expressions in an (E)XSLT
81
 * context using a static XML document. It heavily exercises the libxml2
82
 * XPath engine (xpath.c), a few other parts of libxml2, and most of
83
 * libexslt.
84
 *
85
 * Some EXSLT functions need the transform context to create RVTs for
86
 * node-sets. A couple of functions also access the stylesheet. The
87
 * XPath context from the transform context is used to parse and
88
 * evaluate expressions.
89
 *
90
 * All these objects are created once at startup. After fuzzing each input,
91
 * they're reset as cheaply as possible.
92
 *
93
 * TODO
94
 *
95
 * - Some expressions can create lots of temporary node sets (RVTs) which
96
 *   aren't freed until the whole expression was evaluated, leading to
97
 *   extensive memory usage. Cleaning them up earlier would require
98
 *   callbacks from the XPath engine, for example after evaluating a
99
 *   predicate expression, which doesn't seem feasible. Terminating the
100
 *   evaluation after creating a certain number of RVTs is a simple
101
 *   workaround.
102
 * - Register a custom xsl:decimal-format declaration for format-number().
103
 * - Some functions add strings to the stylesheet or transform context
104
 *   dictionary, for example via xsltGetQName, requiring a clean up of the
105
 *   dicts after fuzzing each input. This behavior seems questionable.
106
 *   Extension functions shouldn't needlessly modify the transform context
107
 *   or stylesheet.
108
 * - Register xsl:keys and fuzz the key() function.
109
 * - Add a few custom func:functions.
110
 * - Fuzz the document() function with external documents.
111
 */
112
113
int
114
0
xsltFuzzXPathInit(void) {
115
0
    xsltFuzzInit();
116
0
    globalStyle = xsltNewStylesheet();
117
0
    return(0);
118
0
}
119
120
xmlXPathObjectPtr
121
0
xsltFuzzXPath(const char *data, size_t size) {
122
0
    xmlXPathContextPtr xpctxt = NULL;
123
0
    xmlXPathObjectPtr xpathObj = NULL;
124
0
    xmlDocPtr doc;
125
0
    xmlNodePtr root;
126
0
    const char *xpathExpr, *xml;
127
0
    size_t maxAllocs, xmlSize;
128
129
0
    xmlFuzzDataInit(data, size);
130
131
0
    maxAllocs = xmlFuzzReadInt(4) % (size + 1);
132
0
    xpathExpr = xmlFuzzReadString(NULL);
133
0
    xml = xmlFuzzReadString(&xmlSize);
134
135
    /* Recovery mode allows more input to be fuzzed. */
136
0
    doc = xmlReadMemory(xml, xmlSize, NULL, NULL, XML_PARSE_RECOVER);
137
0
    if (doc == NULL)
138
0
        goto error;
139
0
    root = xmlDocGetRootElement(doc);
140
0
    if (root != NULL) {
141
0
        xmlNewNs(root, BAD_CAST "a", BAD_CAST "a");
142
0
        xmlNewNs(root, BAD_CAST "b", BAD_CAST "b");
143
0
        xmlNewNs(root, BAD_CAST "c", BAD_CAST "c");
144
0
    }
145
146
0
    tctxt = xsltNewTransformContext(globalStyle, doc);
147
0
    if (tctxt == NULL) {
148
0
        xmlFreeDoc(doc);
149
0
        goto error;
150
0
    }
151
0
    xsltSetCtxtSecurityPrefs(globalSec, tctxt);
152
153
    /*
154
     * Some extension functions need the current instruction.
155
     *
156
     * - format-number() for namespaces.
157
     * - document() for the base URL.
158
     * - maybe others?
159
     *
160
     * For fuzzing, it's enough to use the source document's root element.
161
     */
162
0
    tctxt->inst = xmlDocGetRootElement(doc);
163
164
    /* Set up XPath context */
165
0
    xpctxt = tctxt->xpathCtxt;
166
167
    /* Resource limits to avoid timeouts and call stack overflows */
168
0
    xpctxt->opLimit = 500000;
169
170
    /* Test namespaces */
171
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
172
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
173
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");
174
175
    /* EXSLT namespaces */
176
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
177
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
178
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
179
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
180
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
181
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
182
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
183
0
    xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);
184
185
    /* Register variables */
186
0
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
187
0
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
188
0
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
189
0
                             xmlXPathNewString(BAD_CAST "var"));
190
0
    xmlXPathRegisterVariable(
191
0
            xpctxt, BAD_CAST "n",
192
0
            xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));
193
194
    /* Compile and return early if the expression is invalid */
195
0
    xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt,
196
0
            (const xmlChar *) xpathExpr);
197
0
    if (compExpr == NULL)
198
0
        goto error;
199
200
    /* Initialize XPath evaluation context and evaluate */
201
0
    xmlFuzzMemSetLimit(maxAllocs);
202
    /* Maybe test different context nodes? */
203
0
    xpctxt->node = (xmlNodePtr) doc;
204
0
    xpctxt->contextSize = 1;
205
0
    xpctxt->proximityPosition = 1;
206
0
    xpctxt->opCount = 0;
207
0
    xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
208
0
    xmlXPathFreeCompExpr(compExpr);
209
210
0
error:
211
0
    xmlFuzzMemSetLimit(0);
212
0
    xmlXPathRegisteredNsCleanup(xpctxt);
213
0
    xmlFuzzDataCleanup();
214
215
0
    return xpathObj;
216
0
}
217
218
void
219
0
xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) {
220
0
    xmlXPathFreeObject(obj);
221
222
0
    if (tctxt != NULL) {
223
0
        xmlDocPtr doc = tctxt->document->doc;
224
225
0
        xsltFreeTransformContext(tctxt);
226
0
        tctxt = NULL;
227
0
        xmlFreeDoc(doc);
228
0
    }
229
0
}
230
231
void
232
0
xsltFuzzXPathCleanup(void) {
233
0
    xsltFreeSecurityPrefs(globalSec);
234
0
    globalSec = NULL;
235
0
    xsltFreeStylesheet(globalStyle);
236
0
    globalStyle = NULL;
237
0
}
238
239
/*
240
 * XSLT fuzzer
241
 *
242
 * This is a rather naive fuzz target using a static XML document.
243
 *
244
 * TODO
245
 *
246
 * - Improve seed corpus
247
 * - Mutate multiple input documents: source, xsl:import, xsl:include
248
 * - format-number() with xsl:decimal-format
249
 * - Better coverage for xsl:key and key() function
250
 * - EXSLT func:function
251
 * - xsl:document
252
 */
253
254
int
255
2
xsltFuzzXsltInit(void) {
256
2
    xsltFuzzInit();
257
2
    xmlSetExternalEntityLoader(xmlFuzzEntityLoader);
258
2
    return(0);
259
2
}
260
261
xmlChar *
262
39.2k
xsltFuzzXslt(const char *data, size_t size) {
263
39.2k
    const char *xsltBuffer, *xsltUrl, *docBuffer, *docUrl;
264
39.2k
    xmlDocPtr xsltDoc = NULL, doc = NULL;
265
39.2k
    xmlDocPtr result = NULL;
266
39.2k
    xmlNodePtr root;
267
39.2k
    xsltStylesheetPtr sheet = NULL;
268
39.2k
    xsltTransformContextPtr ctxt = NULL;
269
39.2k
    xmlChar *ret = NULL;
270
39.2k
    size_t xsltSize, docSize, maxAllocs;
271
39.2k
    int retLen;
272
273
39.2k
    xmlFuzzDataInit(data, size);
274
39.2k
    maxAllocs = xmlFuzzReadInt(4) % (size + 1);
275
276
39.2k
    xmlFuzzReadEntities();
277
39.2k
    xsltBuffer = xmlFuzzMainEntity(&xsltSize);
278
39.2k
    xsltUrl = xmlFuzzMainUrl();
279
39.2k
    docBuffer = xmlFuzzSecondaryEntity(&docSize);
280
39.2k
    docUrl = xmlFuzzSecondaryUrl();
281
39.2k
    if ((xsltBuffer == NULL) || (docBuffer == NULL))
282
140
        goto exit;
283
284
39.0k
    doc = xmlReadMemory(docBuffer, docSize, docUrl, NULL, XSLT_PARSE_OPTIONS);
285
39.0k
    if (doc == NULL)
286
11.7k
        goto exit;
287
288
27.3k
    xsltDoc = xmlReadMemory(xsltBuffer, xsltSize, xsltUrl, NULL,
289
27.3k
                            XSLT_PARSE_OPTIONS);
290
27.3k
    if (xsltDoc == NULL)
291
356
        goto exit;
292
26.9k
    root = xmlDocGetRootElement(xsltDoc);
293
26.9k
    if (root != NULL) {
294
26.9k
        xmlNewNs(root, XSLT_NAMESPACE, BAD_CAST "x");
295
26.9k
        xmlNewNs(root, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
296
26.9k
        xmlNewNs(root, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
297
26.9k
        xmlNewNs(root, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
298
26.9k
        xmlNewNs(root, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
299
26.9k
        xmlNewNs(root, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
300
26.9k
        xmlNewNs(root, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
301
26.9k
        xmlNewNs(root, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
302
26.9k
        xmlNewNs(root, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
303
26.9k
        xmlNewNs(root, SAXON_NAMESPACE, BAD_CAST "saxon");
304
26.9k
    }
305
306
26.9k
    xmlFuzzMemSetLimit(maxAllocs);
307
26.9k
    sheet = xsltNewStylesheet();
308
26.9k
    if (sheet == NULL)
309
17
        goto exit;
310
26.9k
    sheet->opLimit = 10000;
311
26.9k
    sheet->xpathCtxt->opLimit = 100000;
312
26.9k
    sheet->xpathCtxt->opCount = 0;
313
26.9k
    if (xsltParseStylesheetUser(sheet, xsltDoc) != 0)
314
9.61k
        goto exit;
315
17.3k
    xsltDoc = NULL;
316
317
17.3k
    root = xmlDocGetRootElement(doc);
318
17.3k
    if (root != NULL) {
319
17.3k
        xmlNewNs(root, BAD_CAST "a", BAD_CAST "a");
320
17.3k
        xmlNewNs(root, BAD_CAST "b", BAD_CAST "b");
321
17.3k
        xmlNewNs(root, BAD_CAST "c", BAD_CAST "c");
322
17.3k
    }
323
324
17.3k
    ctxt = xsltNewTransformContext(sheet, doc);
325
17.3k
    if (ctxt == NULL)
326
200
        goto exit;
327
17.1k
    xsltSetCtxtSecurityPrefs(globalSec, ctxt);
328
17.1k
    ctxt->maxTemplateDepth = 100;
329
17.1k
    ctxt->opLimit = 20000;
330
17.1k
    ctxt->xpathCtxt->opLimit = 100000;
331
17.1k
    ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;
332
333
17.1k
    result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
334
17.1k
    if (result != NULL)
335
7.43k
        xsltSaveResultToString(&ret, &retLen, result, sheet);
336
337
39.2k
exit:
338
39.2k
    xmlFuzzMemSetLimit(0);
339
39.2k
    xmlFreeDoc(result);
340
39.2k
    xsltFreeTransformContext(ctxt);
341
39.2k
    xsltFreeStylesheet(sheet);
342
39.2k
    xmlFreeDoc(xsltDoc);
343
39.2k
    xmlFreeDoc(doc);
344
39.2k
    xmlFuzzDataCleanup();
345
346
39.2k
    return ret;
347
17.1k
}
348
349
void
350
0
xsltFuzzXsltCleanup(void) {
351
0
    xsltFreeSecurityPrefs(globalSec);
352
0
    globalSec = NULL;
353
0
}
354
355
/*
356
 * Utility functions, copied from libxml2
357
 */
358
359
typedef struct {
360
    const char *data;
361
    size_t size;
362
} xmlFuzzEntityInfo;
363
364
/* Single static instance for now */
365
static struct {
366
    /* Original data */
367
    const char *data;
368
    size_t size;
369
370
    /* Remaining data */
371
    const char *ptr;
372
    size_t remaining;
373
374
    /* Buffer for unescaped strings */
375
    char *outBuf;
376
    char *outPtr; /* Free space at end of buffer */
377
378
    xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
379
380
    /* The first entity is the main entity. */
381
    const char *mainUrl;
382
    xmlFuzzEntityInfo *mainEntity;
383
    const char *secondaryUrl;
384
    xmlFuzzEntityInfo *secondaryEntity;
385
} fuzzData;
386
387
size_t fuzzNumAllocs;
388
size_t fuzzMaxAllocs;
389
390
/**
391
 * xmlFuzzErrorFunc:
392
 *
393
 * An error function that simply discards all errors.
394
 */
395
void
396
xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
397
0
                 ...) {
398
0
}
399
400
/*
401
 * Malloc failure injection.
402
 *
403
 * Quick tip to debug complicated issues: Increase MALLOC_OFFSET until
404
 * the crash disappears (or a different issue is triggered). Then set
405
 * the offset to the highest value that produces a crash and set
406
 * MALLOC_ABORT to 1 to see which failed memory allocation causes the
407
 * issue.
408
 */
409
410
18.4k
#define XML_FUZZ_MALLOC_OFFSET  0
411
#define XML_FUZZ_MALLOC_ABORT   0
412
413
static void *
414
156M
xmlFuzzMalloc(size_t size) {
415
156M
    if (fuzzMaxAllocs > 0) {
416
20.1M
        if (fuzzNumAllocs >= fuzzMaxAllocs - 1)
417
#if XML_FUZZ_MALLOC_ABORT
418
            abort();
419
#else
420
330k
            return(NULL);
421
19.7M
#endif
422
19.7M
        fuzzNumAllocs += 1;
423
19.7M
    }
424
156M
    return malloc(size);
425
156M
}
426
427
static void *
428
34.8M
xmlFuzzRealloc(void *ptr, size_t size) {
429
34.8M
    if (fuzzMaxAllocs > 0) {
430
3.79M
        if (fuzzNumAllocs >= fuzzMaxAllocs - 1)
431
#if XML_FUZZ_MALLOC_ABORT
432
            abort();
433
#else
434
153k
            return(NULL);
435
3.64M
#endif
436
3.64M
        fuzzNumAllocs += 1;
437
3.64M
    }
438
34.7M
    return realloc(ptr, size);
439
34.8M
}
440
441
void
442
2
xmlFuzzMemSetup(void) {
443
2
    xmlMemSetup(free, xmlFuzzMalloc, xmlFuzzRealloc, xmlMemStrdup);
444
2
}
445
446
void
447
66.1k
xmlFuzzMemSetLimit(size_t limit) {
448
66.1k
    fuzzNumAllocs = 0;
449
66.1k
    fuzzMaxAllocs = limit ? limit + XML_FUZZ_MALLOC_OFFSET : 0;
450
66.1k
}
451
452
/**
453
 * xmlFuzzDataInit:
454
 *
455
 * Initialize fuzz data provider.
456
 */
457
void
458
39.2k
xmlFuzzDataInit(const char *data, size_t size) {
459
39.2k
    fuzzData.data = data;
460
39.2k
    fuzzData.size = size;
461
39.2k
    fuzzData.ptr = data;
462
39.2k
    fuzzData.remaining = size;
463
464
39.2k
    fuzzData.outBuf = xmlMalloc(size + 1);
465
39.2k
    fuzzData.outPtr = fuzzData.outBuf;
466
467
39.2k
    fuzzData.entities = xmlHashCreate(8);
468
39.2k
    fuzzData.mainUrl = NULL;
469
39.2k
    fuzzData.mainEntity = NULL;
470
39.2k
    fuzzData.secondaryUrl = NULL;
471
39.2k
    fuzzData.secondaryEntity = NULL;
472
39.2k
}
473
474
/**
475
 * xmlFuzzDataFree:
476
 *
477
 * Cleanup fuzz data provider.
478
 */
479
void
480
39.2k
xmlFuzzDataCleanup(void) {
481
39.2k
    xmlFree(fuzzData.outBuf);
482
39.2k
    xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator);
483
39.2k
}
484
485
/**
486
 * xmlFuzzWriteInt:
487
 * @out:  output file
488
 * @v:  integer to write
489
 * @size:  size of integer in bytes
490
 *
491
 * Write an integer to the fuzz data.
492
 */
493
void
494
0
xmlFuzzWriteInt(FILE *out, size_t v, int size) {
495
0
    int shift;
496
497
0
    while (size > (int) sizeof(size_t)) {
498
0
        putc(0, out);
499
0
        size--;
500
0
    }
501
502
0
    shift = size * 8;
503
0
    while (shift > 0) {
504
0
        shift -= 8;
505
0
        putc((v >> shift) & 255, out);
506
0
    }
507
0
}
508
509
/**
510
 * xmlFuzzReadInt:
511
 * @size:  size of integer in bytes
512
 *
513
 * Read an integer from the fuzz data.
514
 */
515
size_t
516
39.2k
xmlFuzzReadInt(int size) {
517
39.2k
    size_t ret = 0;
518
519
196k
    while ((size > 0) && (fuzzData.remaining > 0)) {
520
156k
        unsigned char c = (unsigned char) *fuzzData.ptr++;
521
156k
        fuzzData.remaining--;
522
156k
        ret = (ret << 8) | c;
523
156k
        size--;
524
156k
    }
525
526
39.2k
    return ret;
527
39.2k
}
528
529
/**
530
 * xmlFuzzReadRemaining:
531
 * @size:  size of string in bytes
532
 *
533
 * Read remaining bytes from fuzz data.
534
 */
535
const char *
536
0
xmlFuzzReadRemaining(size_t *size) {
537
0
    const char *ret = fuzzData.ptr;
538
539
0
    *size = fuzzData.remaining;
540
0
    fuzzData.ptr += fuzzData.remaining;
541
0
    fuzzData.remaining = 0;
542
543
0
    return(ret);
544
0
}
545
546
/*
547
 * xmlFuzzWriteString:
548
 * @out:  output file
549
 * @str:  string to write
550
 *
551
 * Write a random-length string to file in a format similar to
552
 * FuzzedDataProvider. Backslash followed by newline marks the end of the
553
 * string. Two backslashes are used to escape a backslash.
554
 */
555
void
556
0
xmlFuzzWriteString(FILE *out, const char *str) {
557
0
    for (; *str; str++) {
558
0
        int c = (unsigned char) *str;
559
0
        putc(c, out);
560
0
        if (c == '\\')
561
0
            putc(c, out);
562
0
    }
563
0
    putc('\\', out);
564
0
    putc('\n', out);
565
0
}
566
567
/**
568
 * xmlFuzzReadString:
569
 * @size:  size of string in bytes
570
 *
571
 * Read a random-length string from the fuzz data.
572
 *
573
 * The format is similar to libFuzzer's FuzzedDataProvider but treats
574
 * backslash followed by newline as end of string. This makes the fuzz data
575
 * more readable. A backslash character is escaped with another backslash.
576
 *
577
 * Returns a zero-terminated string or NULL if the fuzz data is exhausted.
578
 */
579
const char *
580
263k
xmlFuzzReadString(size_t *size) {
581
263k
    const char *out = fuzzData.outPtr;
582
583
189M
    while (fuzzData.remaining > 0) {
584
189M
        int c = *fuzzData.ptr++;
585
189M
        fuzzData.remaining--;
586
587
189M
        if ((c == '\\') && (fuzzData.remaining > 0)) {
588
238k
            int c2 = *fuzzData.ptr;
589
590
238k
            if (c2 == '\n') {
591
193k
                fuzzData.ptr++;
592
193k
                fuzzData.remaining--;
593
193k
                if (size != NULL)
594
85.0k
                    *size = fuzzData.outPtr - out;
595
193k
                *fuzzData.outPtr++ = '\0';
596
193k
                return(out);
597
193k
            }
598
45.7k
            if (c2 == '\\') {
599
22.8k
                fuzzData.ptr++;
600
22.8k
                fuzzData.remaining--;
601
22.8k
            }
602
45.7k
        }
603
604
188M
        *fuzzData.outPtr++ = c;
605
188M
    }
606
607
70.5k
    if (fuzzData.outPtr > out) {
608
31.3k
        if (size != NULL)
609
22.6k
            *size = fuzzData.outPtr - out;
610
31.3k
        *fuzzData.outPtr++ = '\0';
611
31.3k
        return(out);
612
31.3k
    }
613
614
39.2k
    if (size != NULL)
615
9.10k
        *size = 0;
616
39.2k
    return(NULL);
617
70.5k
}
618
619
/**
620
 * xmlFuzzReadEntities:
621
 *
622
 * Read entities like the main XML file, external DTDs, external parsed
623
 * entities from fuzz data.
624
 */
625
void
626
39.2k
xmlFuzzReadEntities(void) {
627
39.2k
    size_t num = 0;
628
629
146k
    while (1) {
630
146k
        const char *url, *entity;
631
146k
        size_t entitySize;
632
146k
        xmlFuzzEntityInfo *entityInfo;
633
634
146k
        url = xmlFuzzReadString(NULL);
635
146k
        if (url == NULL) break;
636
637
116k
        entity = xmlFuzzReadString(&entitySize);
638
116k
        if (entity == NULL) break;
639
640
107k
        if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) {
641
102k
            entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo));
642
102k
            if (entityInfo == NULL)
643
0
                break;
644
102k
            entityInfo->data = entity;
645
102k
            entityInfo->size = entitySize;
646
647
102k
            xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo);
648
649
102k
            if (num == 0) {
650
39.1k
                fuzzData.mainUrl = url;
651
39.1k
                fuzzData.mainEntity = entityInfo;
652
63.3k
            } else if (num == 1) {
653
39.0k
                fuzzData.secondaryUrl = url;
654
39.0k
                fuzzData.secondaryEntity = entityInfo;
655
39.0k
            }
656
657
102k
            num++;
658
102k
        }
659
107k
    }
660
39.2k
}
661
662
/**
663
 * xmlFuzzMainUrl:
664
 *
665
 * Returns the main URL.
666
 */
667
const char *
668
39.2k
xmlFuzzMainUrl(void) {
669
39.2k
    return(fuzzData.mainUrl);
670
39.2k
}
671
672
/**
673
 * xmlFuzzMainEntity:
674
 * @size:  size of the main entity in bytes
675
 *
676
 * Returns the main entity.
677
 */
678
const char *
679
39.2k
xmlFuzzMainEntity(size_t *size) {
680
39.2k
    if (fuzzData.mainEntity == NULL)
681
70
        return(NULL);
682
39.1k
    *size = fuzzData.mainEntity->size;
683
39.1k
    return(fuzzData.mainEntity->data);
684
39.2k
}
685
686
/**
687
 * xmlFuzzSecondaryUrl:
688
 *
689
 * Returns the secondary URL.
690
 */
691
const char *
692
39.2k
xmlFuzzSecondaryUrl(void) {
693
39.2k
    return(fuzzData.secondaryUrl);
694
39.2k
}
695
696
/**
697
 * xmlFuzzSecondaryEntity:
698
 * @size:  size of the secondary entity in bytes
699
 *
700
 * Returns the secondary entity.
701
 */
702
const char *
703
39.2k
xmlFuzzSecondaryEntity(size_t *size) {
704
39.2k
    if (fuzzData.secondaryEntity == NULL)
705
140
        return(NULL);
706
39.0k
    *size = fuzzData.secondaryEntity->size;
707
39.0k
    return(fuzzData.secondaryEntity->data);
708
39.2k
}
709
710
/**
711
 * xmlFuzzEntityLoader:
712
 *
713
 * The entity loader for fuzz data.
714
 */
715
xmlParserInputPtr
716
xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED,
717
1.00M
                    xmlParserCtxtPtr ctxt) {
718
1.00M
    xmlParserInputBufferPtr buf;
719
1.00M
    xmlFuzzEntityInfo *entity;
720
721
1.00M
    if (URL == NULL)
722
0
        return(NULL);
723
1.00M
    entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL);
724
1.00M
    if (entity == NULL)
725
181k
        return(NULL);
726
727
821k
    buf = xmlParserInputBufferCreateMem(entity->data, entity->size,
728
821k
                                        XML_CHAR_ENCODING_NONE);
729
821k
    if (buf == NULL)
730
161
        return(NULL);
731
732
821k
    return(xmlNewIOInputStream(ctxt, buf, XML_CHAR_ENCODING_NONE));
733
821k
}